[tip:smp/urgent] irqchip/armada-xp: Consolidate hotplug state space
Commit-ID: 504dcba246a5bc451bd7f37d8da3de11310cad71 Gitweb: http://git.kernel.org/tip/504dcba246a5bc451bd7f37d8da3de11310cad71 Author: Thomas GleixnerAuthorDate: Wed, 21 Dec 2016 20:19:57 +0100 Committer: Thomas Gleixner CommitDate: Thu, 22 Dec 2016 11:37:56 +0100 irqchip/armada-xp: Consolidate hotplug state space The mpic is either the main interrupt controller or is cascaded behind a GIC. The mpic is single instance and the modes are mutually exclusive, so there is no reason to have seperate cpu hotplug states. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Thomas Petazzoni Cc: Sebastian Siewior Cc: Marc Zyngier Link: http://lkml.kernel.org/r/20161221192112.333161...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-armada-370-xp.c | 2 +- include/linux/cpuhotplug.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 9d9c2c4..eb0d4d4 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -583,7 +583,7 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, #endif } else { #ifdef CONFIG_SMP - cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_CASC_STARTING, + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING, "irqchip/armada/cascade:starting", mpic_cascaded_starting_cpu, NULL); #endif diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 45c786c..20bfefb 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -82,7 +82,6 @@ enum cpuhp_state { CPUHP_AP_IRQ_GIC_STARTING, CPUHP_AP_IRQ_HIP04_STARTING, CPUHP_AP_IRQ_ARMADA_XP_STARTING, - CPUHP_AP_IRQ_ARMADA_CASC_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_UNCORE_STARTING,
[tip:smp/urgent] ARM/imx/mmcd: Fix broken cpu hotplug handling
Commit-ID: a5d3bb6a5ba9727a02b328a249652f3ea655a064 Gitweb: http://git.kernel.org/tip/a5d3bb6a5ba9727a02b328a249652f3ea655a064 Author: Thomas GleixnerAuthorDate: Wed, 21 Dec 2016 20:19:48 +0100 Committer: Thomas Gleixner CommitDate: Thu, 22 Dec 2016 11:37:53 +0100 ARM/imx/mmcd: Fix broken cpu hotplug handling The cpu hotplug support of this perf driver is broken in several ways: 1) It adds a instance before setting up the state. 2) The state for the instance is different from the state of the callback. It's just a randomly chosen state. 3) The instance registration is not error checked so nobody noticed that the call can never succeed. 4) The state for the multi install callbacks is chosen randomly and overwrites existing state. This is now prevented by the core code so the call is guaranteed to fail. 5) The error exit path in the init function leaves the instance registered and then frees the memory which contains the enqueued hlist node. 6) The remove function is removing the state and not the instance. Fix it by: - Setting up the state before adding instances. Use a dynamically allocated state for it. - Installing instances after the state has been set up - Removing the instance in the error path before freeing memory - Removing the instance not the state in the driver remove callback While at is use raw_cpu_processor_id(), because cpu_processor_id() cannot be used in preemptible context, and set the driver data after successful registration of the pmu. Signed-off-by: Thomas Gleixner Acked-by: Shawn Guo Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Frank Li Cc: Zhengyu Shen Link: http://lkml.kernel.org/r/20161221192111.596204...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/arm/mach-imx/mmdc.c | 34 ++ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index ba96bf9..6991577 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -60,6 +60,7 @@ #define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu) +static enum cpuhp_state cpuhp_mmdc_state; static int ddr_type; struct fsl_mmdc_devtype_data { @@ -451,8 +452,8 @@ static int imx_mmdc_remove(struct platform_device *pdev) { struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev); + cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, _mmdc->node); perf_pmu_unregister(_mmdc->pmu); - cpuhp_remove_state_nocalls(CPUHP_ONLINE); kfree(pmu_mmdc); return 0; } @@ -472,6 +473,18 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b return -ENOMEM; } + /* The first instance registers the hotplug state */ + if (!cpuhp_mmdc_state) { + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/arm/mmdc:online", NULL, + mmdc_pmu_offline_cpu); + if (ret < 0) { + pr_err("cpuhp_setup_state_multi failed\n"); + goto pmu_free; + } + cpuhp_mmdc_state = ret; + } + mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, >dev); if (mmdc_num == 0) name = "mmdc"; @@ -485,26 +498,23 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b HRTIMER_MODE_REL); pmu_mmdc->hrtimer.function = mmdc_pmu_timer_handler; - cpuhp_state_add_instance_nocalls(CPUHP_ONLINE, -_mmdc->node); - cpumask_set_cpu(smp_processor_id(), _mmdc->cpu); - ret = cpuhp_setup_state_multi(CPUHP_AP_NOTIFY_ONLINE, - "MMDC_ONLINE", NULL, - mmdc_pmu_offline_cpu); - if (ret) { - pr_err("cpuhp_setup_state_multi failure\n"); - goto pmu_register_err; - } + cpumask_set_cpu(raw_smp_processor_id(), _mmdc->cpu); + + /* Register the pmu instance for cpu hotplug */ + cpuhp_state_add_instance_nocalls(cpuhp_mmdc_state, _mmdc->node); ret = perf_pmu_register(&(pmu_mmdc->pmu), name, -1); - platform_set_drvdata(pdev, pmu_mmdc); if (ret) goto pmu_register_err; + + platform_set_drvdata(pdev, pmu_mmdc); return 0; pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); + cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, _mmdc->node); hrtimer_cancel(_mmdc->hrtimer); +pmu_free: kfree(pmu_mmdc); return ret; }
[tip:smp/urgent] cpu/hotplug: Prevent overwriting of callbacks
Commit-ID: ab92ca2014a32ea5dc7f954f84661f6d96f0ba26 Gitweb: http://git.kernel.org/tip/ab92ca2014a32ea5dc7f954f84661f6d96f0ba26 Author: Thomas GleixnerAuthorDate: Wed, 21 Dec 2016 20:19:49 +0100 Committer: Thomas Gleixner CommitDate: Thu, 22 Dec 2016 11:37:54 +0100 cpu/hotplug: Prevent overwriting of callbacks Developers manage to overwrite states blindly without thought. That's fatal and hard to debug. Add sanity checks to make it fail. This requries to restructure the code so that the dynamic state allocation happens in the same lock protected section as the actual store. Otherwise the previous assignment of 'Reserved' to the name field would trigger the overwrite check. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Siewior Link: http://lkml.kernel.org/r/20161221192111.675234...@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/cpu.c | 96 +++- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 5339aca..3ff0ea5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1432,23 +1432,53 @@ static int cpuhp_cb_check(enum cpuhp_state state) return 0; } -static void cpuhp_store_callbacks(enum cpuhp_state state, - const char *name, - int (*startup)(unsigned int cpu), - int (*teardown)(unsigned int cpu), - bool multi_instance) +/* + * Returns a free for dynamic slot assignment of the Online state. The states + * are protected by the cpuhp_slot_states mutex and an empty slot is identified + * by having no name assigned. + */ +static int cpuhp_reserve_state(enum cpuhp_state state) +{ + enum cpuhp_state i; + + for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { + if (!cpuhp_ap_states[i].name) + return i; + } + WARN(1, "No more dynamic states available for CPU hotplug\n"); + return -ENOSPC; +} + +static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, +int (*startup)(unsigned int cpu), +int (*teardown)(unsigned int cpu), +bool multi_instance) { /* (Un)Install the callbacks for further cpu hotplug operations */ struct cpuhp_step *sp; + int ret = 0; mutex_lock(_state_mutex); + + if (state == CPUHP_AP_ONLINE_DYN) { + ret = cpuhp_reserve_state(state); + if (ret < 0) + goto out; + state = ret; + } sp = cpuhp_get_step(state); + if (name && sp->name) { + ret = -EBUSY; + goto out; + } sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(>list); +out: mutex_unlock(_state_mutex); + return ret; } static void *cpuhp_get_teardown_cb(enum cpuhp_state state) @@ -1509,29 +1539,6 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, } } -/* - * Returns a free for dynamic slot assignment of the Online state. The states - * are protected by the cpuhp_slot_states mutex and an empty slot is identified - * by having no name assigned. - */ -static int cpuhp_reserve_state(enum cpuhp_state state) -{ - enum cpuhp_state i; - - mutex_lock(_state_mutex); - for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { - if (cpuhp_ap_states[i].name) - continue; - - cpuhp_ap_states[i].name = "Reserved"; - mutex_unlock(_state_mutex); - return i; - } - mutex_unlock(_state_mutex); - WARN(1, "No more dynamic states available for CPU hotplug\n"); - return -ENOSPC; -} - int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke) { @@ -1580,11 +1587,13 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); /** * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state - * @state: The state to setup - * @invoke:If true, the startup function is invoked for cpus where - * cpu state >= @state - * @startup: startup callback function - * @teardown: teardown callback function + * @state: The state to setup + * @invoke:If true, the startup function is invoked for cpus where + * cpu state >= @state + * @startup: startup callback function + * @teardown: teardown callback function + * @multi_instance:State is set up for multiple instances which get + *
[tip:smp/urgent] cpu/hotplug: Cleanup state names
Commit-ID: 8ea29129e7cd926901ee3396b963453c7f5c3c4b Gitweb: http://git.kernel.org/tip/8ea29129e7cd926901ee3396b963453c7f5c3c4b Author: Thomas GleixnerAuthorDate: Wed, 21 Dec 2016 20:19:54 +0100 Committer: Thomas Gleixner CommitDate: Thu, 22 Dec 2016 11:37:55 +0100 cpu/hotplug: Cleanup state names When the state names got added a script was used to add the extra argument to the calls. The script basically converted the state constant to a string, but the cleanup to convert these strings into meaningful ones did not happen. Replace all the useless strings with 'subsys/xxx/yyy:state' strings which are used in all the other places already. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Siewior Link: http://lkml.kernel.org/r/20161221192112.085444...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/arm/kernel/smp_twd.c | 2 +- arch/arm/mach-mvebu/coherency.c | 2 +- arch/arm/mm/cache-l2x0-pmu.c | 2 +- arch/arm/mm/cache-l2x0.c | 2 +- arch/arm/vfp/vfpmodule.c | 2 +- arch/arm/xen/enlighten.c | 2 +- arch/arm64/kernel/armv8_deprecated.c | 2 +- arch/arm64/kernel/debug-monitors.c| 2 +- arch/arm64/kernel/hw_breakpoint.c | 2 +- arch/blackfin/kernel/perf_event.c | 2 +- arch/metag/kernel/perf/perf_event.c | 2 +- arch/mips/kernel/pm-cps.c | 2 +- arch/mips/oprofile/op_model_loongson3.c | 2 +- arch/powerpc/mm/numa.c| 2 +- arch/powerpc/perf/core-book3s.c | 2 +- arch/s390/kernel/perf_cpum_cf.c | 2 +- arch/s390/kernel/perf_cpum_sf.c | 2 +- arch/x86/entry/vdso/vma.c | 2 +- arch/x86/events/amd/ibs.c | 2 +- arch/x86/events/amd/power.c | 2 +- arch/x86/events/amd/uncore.c | 6 +++--- arch/x86/events/core.c| 6 +++--- arch/x86/events/intel/cqm.c | 4 ++-- arch/x86/events/intel/rapl.c | 4 ++-- arch/x86/events/intel/uncore.c| 10 +- arch/x86/kernel/apb_timer.c | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- arch/x86/kernel/hpet.c| 4 ++-- arch/x86/kernel/tboot.c | 2 +- arch/x86/kvm/x86.c| 2 +- arch/x86/xen/enlighten.c | 4 ++-- arch/xtensa/kernel/perf_event.c | 2 +- drivers/bus/arm-cci.c | 2 +- drivers/bus/arm-ccn.c | 2 +- drivers/clocksource/arc_timer.c | 2 +- drivers/clocksource/arm_arch_timer.c | 2 +- drivers/clocksource/arm_global_timer.c| 2 +- drivers/clocksource/dummy_timer.c | 2 +- drivers/clocksource/exynos_mct.c | 2 +- drivers/clocksource/jcore-pit.c | 2 +- drivers/clocksource/metag_generic.c | 2 +- drivers/clocksource/mips-gic-timer.c | 4 ++-- drivers/clocksource/qcom-timer.c | 2 +- drivers/clocksource/time-armada-370-xp.c | 2 +- drivers/clocksource/timer-atlas7.c| 2 +- drivers/hwtracing/coresight/coresight-etm3x.c | 4 ++-- drivers/hwtracing/coresight/coresight-etm4x.c | 4 ++-- drivers/irqchip/irq-armada-370-xp.c | 4 ++-- drivers/irqchip/irq-bcm2836.c | 2 +- drivers/irqchip/irq-gic-v3.c | 4 ++-- drivers/irqchip/irq-gic.c | 2 +- drivers/irqchip/irq-hip04.c | 2 +- drivers/leds/trigger/ledtrig-cpu.c| 2 +- drivers/net/virtio_net.c | 4 ++-- drivers/perf/arm_pmu.c| 2 +- drivers/xen/events/events_fifo.c | 2 +- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/vgic/vgic-init.c | 2 +- virt/kvm/kvm_main.c | 2 +- 59 files changed, 77 insertions(+), 77 deletions(-) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 02d5e5e..895ae51 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -339,7 +339,7 @@ static int __init twd_local_timer_common_register(struct device_node *np) } cpuhp_setup_state_nocalls(CPUHP_AP_ARM_TWD_STARTING, - "AP_ARM_TWD_STARTING", + "arm/timer/twd:starting", twd_timer_starting_cpu, twd_timer_dying_cpu); twd_get_clock(np); diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index ae2a018..8f8748a 100644 ---
[tip:smp/urgent] irqchip/gic: Consolidate hotplug state space
Commit-ID: 8fe1c3c1b44a5afe44279178afec705fad55c8c4 Gitweb: http://git.kernel.org/tip/8fe1c3c1b44a5afe44279178afec705fad55c8c4 Author: Thomas GleixnerAuthorDate: Wed, 21 Dec 2016 20:19:56 +0100 Committer: Thomas Gleixner CommitDate: Thu, 22 Dec 2016 11:37:56 +0100 irqchip/gic: Consolidate hotplug state space Even if both drivers are compiled in only one instance can run on a given system depending on the available GIC version. So having seperate hotplug states for them is pointless. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Marc Zyngier Cc: Sebastian Siewior Link: http://lkml.kernel.org/r/20161221192112.252416...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3.c | 2 +- include/linux/cpuhotplug.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d696de1..c132f29 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -632,7 +632,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) static void gic_smp_init(void) { set_smp_cross_call(gic_raise_softirq); - cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GICV3_STARTING, + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, "irqchip/arm/gicv3:starting", gic_starting_cpu, NULL); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4a938be..45c786c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -80,7 +80,6 @@ enum cpuhp_state { CPUHP_AP_SCHED_STARTING, CPUHP_AP_RCUTREE_DYING, CPUHP_AP_IRQ_GIC_STARTING, - CPUHP_AP_IRQ_GICV3_STARTING, CPUHP_AP_IRQ_HIP04_STARTING, CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
[tip:smp/urgent] cpu/hotplug: Remove obsolete cpu hotplug register/unregister functions
Commit-ID: 8e38db753d952410bb0928da921885900184391b Gitweb: http://git.kernel.org/tip/8e38db753d952410bb0928da921885900184391b Author: Thomas GleixnerAuthorDate: Wed, 21 Dec 2016 20:19:53 +0100 Committer: Thomas Gleixner CommitDate: Thu, 22 Dec 2016 11:37:55 +0100 cpu/hotplug: Remove obsolete cpu hotplug register/unregister functions hotcpu_notifier(), cpu_notifier(), __hotcpu_notifier(), __cpu_notifier(), register_hotcpu_notifier(), register_cpu_notifier(), __register_hotcpu_notifier(), __register_cpu_notifier(), unregister_hotcpu_notifier(), unregister_cpu_notifier(), __unregister_hotcpu_notifier(), __unregister_cpu_notifier() are unused now. Remove them and all related code. Remove also the now pointless cpu notifier error injection mechanism. The states can be executed step by step and error rollback is the same as cpu down, so any state transition can be tested w/o requiring the notifier error injection. Some CPU hotplug states are kept as they are (ab)used for hotplug state tracking. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: r...@linutronix.de Link: http://lkml.kernel.org/r/20161221192112.005642...@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 90 -- include/linux/cpuhotplug.h | 3 - kernel/cpu.c| 139 +--- lib/Kconfig.debug | 24 --- lib/Makefile| 1 - lib/cpu-notifier-error-inject.c | 84 6 files changed, 1 insertion(+), 340 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 09807c2..21f9c74 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -57,9 +57,6 @@ struct notifier_block; #define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ #define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ -#define CPU_UP_CANCELED0x0004 /* CPU (unsigned)v NOT coming up */ -#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ -#define CPU_DOWN_FAILED0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug * lock is dropped */ @@ -80,80 +77,14 @@ struct notifier_block; #ifdef CONFIG_SMP extern bool cpuhp_tasks_frozen; -/* Need to know about CPUs going up/down? */ -#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) -#define cpu_notifier(fn, pri) {\ - static struct notifier_block fn##_nb = \ - { .notifier_call = fn, .priority = pri }; \ - register_cpu_notifier(##_nb);\ -} - -#define __cpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb = \ - { .notifier_call = fn, .priority = pri }; \ - __register_cpu_notifier(##_nb); \ -} - -extern int register_cpu_notifier(struct notifier_block *nb); -extern int __register_cpu_notifier(struct notifier_block *nb); -extern void unregister_cpu_notifier(struct notifier_block *nb); -extern void __unregister_cpu_notifier(struct notifier_block *nb); - -#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri)do { (void)(fn); } while (0) - -static inline int register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline int __register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline void unregister_cpu_notifier(struct notifier_block *nb) -{ -} - -static inline void __unregister_cpu_notifier(struct notifier_block *nb) -{ -} -#endif - int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); -#define cpu_notifier_register_begincpu_maps_update_begin -#define cpu_notifier_register_done cpu_maps_update_done - #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri)do { (void)(fn); } while (0) - -static inline int register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline int __register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline void unregister_cpu_notifier(struct notifier_block *nb) -{ -} - -static inline void __unregister_cpu_notifier(struct notifier_block *nb) -{ -} - static inline void cpu_maps_update_begin(void) { } @@ -162,14 +93,6 @@ static inline void
[tip:smp/urgent] coresight/etm3/4x: Consolidate hotplug state space
Commit-ID: 3ee93ef2527389b5e83039239e2a754dabd3a5e5 Gitweb: http://git.kernel.org/tip/3ee93ef2527389b5e83039239e2a754dabd3a5e5 Author: Thomas GleixnerAuthorDate: Wed, 21 Dec 2016 20:19:55 +0100 Committer: Thomas Gleixner CommitDate: Thu, 22 Dec 2016 11:37:56 +0100 coresight/etm3/4x: Consolidate hotplug state space Even if both drivers are compiled in only one instance can run on a given system depending on the available tracer cell. So having seperate hotplug states for them is pointless. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Mathieu Poirier Link: http://lkml.kernel.org/r/20161221192112.162765...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/hwtracing/coresight/coresight-etm4x.c | 4 ++-- include/linux/cpuhotplug.h| 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 5edc63f..031480f 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -986,7 +986,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) dev_err(dev, "ETM arch init failed\n"); if (!etm4_count++) { - cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING, + cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING, "arm/coresight4:starting", etm4_starting_cpu, etm4_dying_cpu); ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, @@ -1037,7 +1037,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) err_arch_supported: if (--etm4_count == 0) { - cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING); + cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING); if (hp_online) cpuhp_remove_state_nocalls(hp_online); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0d5ef85..4a938be 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -119,7 +119,6 @@ enum cpuhp_state { CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, - CPUHP_AP_ARM_CORESIGHT4_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, CPUHP_AP_X86_TBOOT_DYING,
[tip:locking/core] locking/lockdep: Handle statically initialized PER_CPU locks properly
Commit-ID: 383776fa7527745224446337f2dcfb0f0d1b8b56 Gitweb: http://git.kernel.org/tip/383776fa7527745224446337f2dcfb0f0d1b8b56 Author: Thomas GleixnerAuthorDate: Mon, 27 Feb 2017 15:37:36 +0100 Committer: Ingo Molnar CommitDate: Thu, 16 Mar 2017 09:57:08 +0100 locking/lockdep: Handle statically initialized PER_CPU locks properly If a PER_CPU struct which contains a spin_lock is statically initialized via: DEFINE_PER_CPU(struct foo, bla) = { .lock = __SPIN_LOCK_UNLOCKED(bla.lock) }; then lockdep assigns a seperate key to each lock because the logic for assigning a key to statically initialized locks is to use the address as the key. With per CPU locks the address is obvioulsy different on each CPU. That's wrong, because all locks should have the same key. To solve this the following modifications are required: 1) Extend the is_kernel/module_percpu_addr() functions to hand back the canonical address of the per CPU address, i.e. the per CPU address minus the per CPU offset. 2) Check the lock address with these functions and if the per CPU check matches use the returned canonical address as the lock key, so all per CPU locks have the same key. 3) Move the static_obj(key) check into look_up_lock_class() so this check can be avoided for statically initialized per CPU locks. That's required because the canonical address fails the static_obj(key) check for obvious reasons. Reported-by: Mike Galbraith Signed-off-by: Thomas Gleixner [ Merged Dan's fixups for !MODULES and !SMP into this patch. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Dan Murphy Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170227143736.pectaimkjkan5...@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/module.h | 6 ++ include/linux/percpu.h | 1 + kernel/locking/lockdep.c | 33 +++-- kernel/module.c | 36 mm/percpu.c | 37 +++-- 5 files changed, 77 insertions(+), 36 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 0297c5c..9ad6856 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -493,6 +493,7 @@ static inline int module_is_live(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); @@ -660,6 +661,11 @@ static inline bool is_module_percpu_address(unsigned long addr) return false; } +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) +{ + return false; +} + static inline bool is_module_text_address(unsigned long addr) { return false; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 56939d3..491b3f5 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -110,6 +110,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, #endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b1a1cef..98dd623 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -660,6 +660,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) struct lockdep_subclass_key *key; struct hlist_head *hash_head; struct lock_class *class; + bool is_static = false; if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { debug_locks_off(); @@ -673,10 +674,23 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) /* * Static locks do not have their class-keys yet - for them the key -* is the lock object itself: +* is the lock object itself. If the lock is in the per cpu area, +* the canonical address of the lock (per cpu offset removed) is +* used. */ - if (unlikely(!lock->key)) - lock->key = (void *)lock; + if (unlikely(!lock->key)) { + unsigned long can_addr, addr = (unsigned long)lock; + + if (__is_kernel_percpu_address(addr, _addr)) +
[tip:x86/urgent] x86/vdso: Plug race between mapping and ELF header setup
Commit-ID: 6fdc6dd90272ce7e75d744f71535cfbd8d77da81 Gitweb: http://git.kernel.org/tip/6fdc6dd90272ce7e75d744f71535cfbd8d77da81 Author: Thomas GleixnerAuthorDate: Mon, 10 Apr 2017 17:14:28 +0200 Committer: Thomas Gleixner CommitDate: Mon, 10 Apr 2017 18:31:41 +0200 x86/vdso: Plug race between mapping and ELF header setup The vsyscall32 sysctl can racy against a concurrent fork when it switches from disabled to enabled: arch_setup_additional_pages() if (vdso32_enabled) --> No mapping sysctl.vsysscall32() --> vdso32_enabled = true create_elf_tables() ARCH_DLINFO_IA32 if (vdso32_enabled) { --> Add VDSO entry with NULL pointer Make ARCH_DLINFO_IA32 check whether the VDSO mapping has been set up for the newly forked process or not. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Mathias Krause Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20170410151723.602367...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 9d49c18..3762536 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -287,7 +287,7 @@ struct task_struct; #defineARCH_DLINFO_IA32 \ do { \ - if (vdso32_enabled) { \ + if (VDSO_CURRENT_BASE) {\ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY);\ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\ } \
[tip:sched/core] workqueue: Provide work_on_cpu_safe()
Commit-ID: 0e8d6a9336b487a1dd6f1991ff376e669d4c87c6 Gitweb: http://git.kernel.org/tip/0e8d6a9336b487a1dd6f1991ff376e669d4c87c6 Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:28 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:53 +0200 workqueue: Provide work_on_cpu_safe() work_on_cpu() is not protected against CPU hotplug. For code which requires to be either executed on an online CPU or to fail if the CPU is not available the callsite would have to protect against CPU hotplug. Provide a function which does get/put_online_cpus() around the call to work_on_cpu() and fails the call with -ENODEV if the target CPU is not online. Preparatory patch to convert several racy task affinity manipulations. Signed-off-by: Thomas Gleixner Acked-by: Tejun Heo Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: Viresh Kumar Cc: Michael Ellerman Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.262610...@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/workqueue.h | 5 + kernel/workqueue.c| 23 +++ 2 files changed, 28 insertions(+) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bde063c..c102ef6 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -608,8 +608,13 @@ static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } +static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) +{ + return fn(arg); +} #else long work_on_cpu(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c0168b7..5bf1be0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4735,6 +4735,29 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg) return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu); + +/** + * work_on_cpu_safe - run a function in thread context on a particular cpu + * @cpu: the cpu to run on + * @fn: the function to run + * @arg: the function argument + * + * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold + * any locks which would prevent @fn from completing. + * + * Return: The value @fn returns. + */ +long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) +{ + long ret = -ENODEV; + + get_online_cpus(); + if (cpu_online(cpu)) + ret = work_on_cpu(cpu, fn, arg); + put_online_cpus(); + return ret; +} +EXPORT_SYMBOL_GPL(work_on_cpu_safe); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER
[tip:sched/core] ia64/topology: Remove cpus_allowed manipulation
Commit-ID: 048c9b954e20396e0c45ee778466994d1be2e612 Gitweb: http://git.kernel.org/tip/048c9b954e20396e0c45ee778466994d1be2e612 Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:27 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:52 +0200 ia64/topology: Remove cpus_allowed manipulation The CPU hotplug callback fiddles with the cpus_allowed pointer to pin the calling thread on the plugged CPU. That's already guaranteed by the hotplug core code. Remove it. Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Tony Luck Cc: linux-i...@vger.kernel.org Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: Viresh Kumar Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.174518...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/topology.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 1a68f01..d76529c 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -355,18 +355,12 @@ static int cache_add_dev(unsigned int cpu) unsigned long i, j; struct cache_info *this_object; int retval = 0; - cpumask_t oldmask; if (all_cpu_cache_info[cpu].kobj.parent) return 0; - oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, cpumask_of(cpu)); - if (unlikely(retval)) - return retval; retval = cpu_cache_sysfs_init(cpu); - set_cpus_allowed_ptr(current, ); if (unlikely(retval < 0)) return retval;
[tip:sched/core] ia64/salinfo: Replace racy task affinity logic
Commit-ID: 67cb85fdcee7fbc61c09c00360d1a4ae37641db4 Gitweb: http://git.kernel.org/tip/67cb85fdcee7fbc61c09c00360d1a4ae37641db4 Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:29 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:53 +0200 ia64/salinfo: Replace racy task affinity logic Some of the file operations in /proc/sal require to run code on the requested cpu. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by using work_on_cpu_safe() which guarantees to run the code on the requested CPU or to fail in case the CPU is offline. Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Tony Luck Cc: linux-i...@vger.kernel.org Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: Viresh Kumar Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.341863...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/salinfo.c | 31 --- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index d194d5c..63dc9cd 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -179,14 +179,14 @@ struct salinfo_platform_oemdata_parms { const u8 *efi_guid; u8 **oemdata; u64 *oemdata_size; - int ret; }; -static void +static long salinfo_platform_oemdata_cpu(void *context) { struct salinfo_platform_oemdata_parms *parms = context; - parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); + + return salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); } static void @@ -380,16 +380,7 @@ salinfo_log_release(struct inode *inode, struct file *file) return 0; } -static void -call_on_cpu(int cpu, void (*fn)(void *), void *arg) -{ - cpumask_t save_cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - (*fn)(arg); - set_cpus_allowed_ptr(current, _cpus_allowed); -} - -static void +static long salinfo_log_read_cpu(void *context) { struct salinfo_data *data = context; @@ -399,6 +390,7 @@ salinfo_log_read_cpu(void *context) /* Clear corrected errors as they are read from SAL */ if (rh->severity == sal_log_severity_corrected) ia64_sal_clear_state_info(data->type); + return 0; } static void @@ -430,7 +422,7 @@ retry: spin_unlock_irqrestore(_saved_lock, flags); if (!data->saved_num) - call_on_cpu(cpu, salinfo_log_read_cpu, data); + work_on_cpu_safe(cpu, salinfo_log_read_cpu, data); if (!data->log_size) { data->state = STATE_NO_DATA; cpumask_clear_cpu(cpu, >cpu_event); @@ -459,11 +451,13 @@ salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *p return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); } -static void +static long salinfo_log_clear_cpu(void *context) { struct salinfo_data *data = context; + ia64_sal_clear_state_info(data->type); + return 0; } static int @@ -486,7 +480,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu) rh = (sal_log_record_header_t *)(data->log_buffer); /* Corrected errors have already been cleared from SAL */ if (rh->severity != sal_log_severity_corrected) - call_on_cpu(cpu, salinfo_log_clear_cpu, data); + work_on_cpu_safe(cpu, salinfo_log_clear_cpu, data); /* clearing a record may make a new record visible */ salinfo_log_new_read(cpu, data); if (data->state == STATE_LOG_RECORD) { @@ -531,9 +525,8 @@ salinfo_log_write(struct file *file, const char __user *buffer, size_t count, lo .oemdata = >oemdata, .oemdata_size = >oemdata_size }; - call_on_cpu(cpu, salinfo_platform_oemdata_cpu, ); - if (parms.ret) - count = parms.ret; + count = work_on_cpu_safe(cpu, salinfo_platform_oemdata_cpu, +
[tip:sched/core] ia64/sn/hwperf: Replace racy task affinity logic
Commit-ID: 9feb42ac88b516e378b9782e82b651ca5bed95c4 Gitweb: http://git.kernel.org/tip/9feb42ac88b516e378b9782e82b651ca5bed95c4 Author: Thomas GleixnerAuthorDate: Thu, 6 Apr 2017 14:56:18 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:53 +0200 ia64/sn/hwperf: Replace racy task affinity logic sn_hwperf_op_cpu() which is invoked from an ioctl requires to run code on the requested cpu. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by using work_on_cpu_safe() which guarantees to run the code on the requested CPU or to fail in case the CPU is offline. Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Tony Luck Cc: linux-i...@vger.kernel.org Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: Viresh Kumar Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704122251450.2548@nanos Signed-off-by: Thomas Gleixner --- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 52704f1..55febd6 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -598,12 +598,17 @@ static void sn_hwperf_call_sal(void *info) op_info->ret = r; } +static long sn_hwperf_call_sal_work(void *info) +{ + sn_hwperf_call_sal(info); + return 0; +} + static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) { u32 cpu; u32 use_ipi; int r = 0; - cpumask_t save_allowed; cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32; use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK; @@ -629,13 +634,9 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) /* use an interprocessor interrupt to call SAL */ smp_call_function_single(cpu, sn_hwperf_call_sal, op_info, 1); - } - else { - /* migrate the task before calling SAL */ - save_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - sn_hwperf_call_sal(op_info); - set_cpus_allowed_ptr(current, _allowed); + } else { + /* Call on the target CPU */ + work_on_cpu_safe(cpu, sn_hwperf_call_sal_work, op_info); } } r = op_info->ret;
[tip:sched/core] powerpc/smp: Replace open coded task affinity logic
Commit-ID: 6d11b87d55eb75007a3721c2de5938f5bbf607fb Gitweb: http://git.kernel.org/tip/6d11b87d55eb75007a3721c2de5938f5bbf607fb Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:31 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:54 +0200 powerpc/smp: Replace open coded task affinity logic Init task invokes smp_ops->setup_cpu() from smp_cpus_done(). Init task can run on any online CPU at this point, but the setup_cpu() callback requires to be invoked on the boot CPU. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. That's actually not a problem in this context as neither CPU hotplug nor affinity settings can happen, but the access to task_struct::cpus_allowed is about to restricted. Replace it with a call to work_on_cpu_safe() which achieves the same result. Signed-off-by: Thomas Gleixner Acked-by: Michael Ellerman Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: Viresh Kumar Cc: Tejun Heo Cc: Paul Mackerras Cc: linuxppc-...@lists.ozlabs.org Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.518053...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/smp.c | 26 +++--- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 46f89e6..d68ed1f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -787,24 +787,21 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -void __init smp_cpus_done(unsigned int max_cpus) +static __init long smp_setup_cpu_workfn(void *data __always_unused) { - cpumask_var_t old_mask; + smp_ops->setup_cpu(boot_cpuid); + return 0; +} - /* We want the setup_cpu() here to be called from CPU 0, but our -* init thread may have been "borrowed" by another CPU in the meantime -* se we pin us down to CPU 0 for a short while +void __init smp_cpus_done(unsigned int max_cpus) +{ + /* +* We want the setup_cpu() here to be called on the boot CPU, but +* init might run on any CPU, so make sure it's invoked on the boot +* CPU. */ - alloc_cpumask_var(_mask, GFP_NOWAIT); - cpumask_copy(old_mask, >cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); - if (smp_ops && smp_ops->setup_cpu) - smp_ops->setup_cpu(boot_cpuid); - - set_cpus_allowed_ptr(current, old_mask); - - free_cpumask_var(old_mask); + work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); @@ -812,7 +809,6 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); set_sched_topology(powerpc_topology); - } #ifdef CONFIG_HOTPLUG_CPU
[tip:sched/core] cpufreq/ia64: Replace racy task affinity logic
Commit-ID: 38f05ed04beb276f780fcd2b5c0b78c76d0b3c0c Gitweb: http://git.kernel.org/tip/38f05ed04beb276f780fcd2b5c0b78c76d0b3c0c Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:55:03 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:55 +0200 cpufreq/ia64: Replace racy task affinity logic The get() and target() callbacks must run on the affected cpu. This is achieved by temporarily setting the affinity of the calling thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by work_on_cpu(). All call pathes which invoke the callbacks are already protected against CPU hotplug. Signed-off-by: Thomas Gleixner Acked-by: Viresh Kumar Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: linux...@vger.kernel.org Cc: Lai Jiangshan Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704122231100.2548@nanos Signed-off-by: Thomas Gleixner --- drivers/cpufreq/ia64-acpi-cpufreq.c | 92 - 1 file changed, 39 insertions(+), 53 deletions(-) diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index e28a31a..a757c0a 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -34,6 +34,11 @@ struct cpufreq_acpi_io { unsigned intresume; }; +struct cpufreq_acpi_req { + unsigned intcpu; + unsigned intstate; +}; + static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; static struct cpufreq_driver acpi_cpufreq_driver; @@ -83,8 +88,7 @@ processor_get_pstate ( static unsigned extract_clock ( struct cpufreq_acpi_io *data, - unsigned value, - unsigned int cpu) + unsigned value) { unsigned long i; @@ -98,60 +102,43 @@ extract_clock ( } -static unsigned int +static long processor_get_freq ( - struct cpufreq_acpi_io *data, - unsigned intcpu) + void *arg) { - int ret = 0; - u32 value = 0; - cpumask_t saved_mask; - unsigned long clock_freq; + struct cpufreq_acpi_req *req = arg; + unsigned intcpu = req->cpu; + struct cpufreq_acpi_io *data = acpi_io_data[cpu]; + u32 value; + int ret; pr_debug("processor_get_freq\n"); - - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpumask_of(cpu)); if (smp_processor_id() != cpu) - goto migrate_end; + return -EAGAIN; /* processor_get_pstate gets the instantaneous frequency */ ret = processor_get_pstate(); - if (ret) { - set_cpus_allowed_ptr(current, _mask); pr_warn("get performance failed with error %d\n", ret); - ret = 0; - goto migrate_end; + return ret; } - clock_freq = extract_clock(data, value, cpu); - ret = (clock_freq*1000); - -migrate_end: - set_cpus_allowed_ptr(current, _mask); - return ret; + return 1000 * extract_clock(data, value); } -static int +static long processor_set_freq ( - struct cpufreq_acpi_io *data, - struct cpufreq_policy *policy, - int state) + void *arg) { - int ret = 0; - u32 value = 0; - cpumask_t saved_mask; - int retval; + struct cpufreq_acpi_req *req = arg; + unsigned intcpu = req->cpu; + struct cpufreq_acpi_io *data = acpi_io_data[cpu]; + int ret, state = req->state; + u32 value; pr_debug("processor_set_freq\n"); - - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpumask_of(policy->cpu)); - if (smp_processor_id() != policy->cpu) { - retval = -EAGAIN; - goto migrate_end; - } + if (smp_processor_id() != cpu) + return -EAGAIN; if (state == data->acpi_data.state) { if (unlikely(data->resume)) { @@ -159,8 +146,7 @@ processor_set_freq (
[tip:sched/core] ACPI/processor: Fix error handling in __acpi_processor_start()
Commit-ID: a5cbdf693a60d5b86d4d21dfedd90f17754eb273 Gitweb: http://git.kernel.org/tip/a5cbdf693a60d5b86d4d21dfedd90f17754eb273 Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:33 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:54 +0200 ACPI/processor: Fix error handling in __acpi_processor_start() When acpi_install_notify_handler() fails the cooling device stays registered and the sysfs files created via acpi_pss_perf_init() are leaked and the function returns success. Undo acpi_pss_perf_init() and return a proper error code. Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: linux-a...@vger.kernel.org Cc: Viresh Kumar Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.695499...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/acpi/processor_driver.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 9d5f0c7..eab8cda 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -251,6 +251,9 @@ static int __acpi_processor_start(struct acpi_device *device) if (ACPI_SUCCESS(status)) return 0; + result = -ENODEV; + acpi_pss_perf_exit(pr, device); + err_power_exit: acpi_processor_power_exit(pr); return result;
[tip:sched/core] cpufreq/sparc-us2e: Replace racy task affinity logic
Commit-ID: 12699ac53a2e5fbd1fd7c164b11685d55c8aa28b Gitweb: http://git.kernel.org/tip/12699ac53a2e5fbd1fd7c164b11685d55c8aa28b Author: Thomas GleixnerAuthorDate: Thu, 13 Apr 2017 10:22:43 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:56 +0200 cpufreq/sparc-us2e: Replace racy task affinity logic The access to the HBIRD_ESTAR_MODE register in the cpu frequency control functions must happen on the target CPU. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by a straight forward smp function call. Signed-off-by: Thomas Gleixner Acked-by: Viresh Kumar Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: linux...@vger.kernel.org Cc: Lai Jiangshan Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704131020280.2408@nanos Signed-off-by: Thomas Gleixner --- drivers/cpufreq/sparc-us2e-cpufreq.c | 45 +--- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/sparc-us2e-cpufreq.c b/drivers/cpufreq/sparc-us2e-cpufreq.c index 35ddb6d..90f33ef 100644 --- a/drivers/cpufreq/sparc-us2e-cpufreq.c +++ b/drivers/cpufreq/sparc-us2e-cpufreq.c @@ -118,10 +118,6 @@ static void us2e_transition(unsigned long estar, unsigned long new_bits, unsigned long clock_tick, unsigned long old_divisor, unsigned long divisor) { - unsigned long flags; - - local_irq_save(flags); - estar &= ~ESTAR_MODE_DIV_MASK; /* This is based upon the state transition diagram in the IIe manual. */ @@ -152,8 +148,6 @@ static void us2e_transition(unsigned long estar, unsigned long new_bits, } else { BUG(); } - - local_irq_restore(flags); } static unsigned long index_to_estar_mode(unsigned int index) @@ -229,48 +223,51 @@ static unsigned long estar_to_divisor(unsigned long estar) return ret; } +static void __us2e_freq_get(void *arg) +{ + unsigned long *estar = arg; + + *estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); +} + static unsigned int us2e_freq_get(unsigned int cpu) { - cpumask_t cpus_allowed; unsigned long clock_tick, estar; - cpumask_copy(_allowed, >cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - clock_tick = sparc64_get_clock_tick(cpu) / 1000; - estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); - - set_cpus_allowed_ptr(current, _allowed); + if (smp_call_function_single(cpu, __us2e_freq_get, , 1)) + return 0; return clock_tick / estar_to_divisor(estar); } -static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index) +static void __us2e_freq_target(void *arg) { - unsigned int cpu = policy->cpu; + unsigned int cpu = smp_processor_id(); + unsigned int *index = arg; unsigned long new_bits, new_freq; unsigned long clock_tick, divisor, old_divisor, estar; - cpumask_t cpus_allowed; - - cpumask_copy(_allowed, >cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; - new_bits = index_to_estar_mode(index); - divisor = index_to_divisor(index); + new_bits = index_to_estar_mode(*index); + divisor = index_to_divisor(*index); new_freq /= divisor; estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); old_divisor = estar_to_divisor(estar); - if (old_divisor != divisor) + if (old_divisor != divisor) { us2e_transition(estar, new_bits, clock_tick * 1000, old_divisor, divisor); + } +} - set_cpus_allowed_ptr(current, _allowed); +static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index) +{ + unsigned int cpu = policy->cpu; - return 0; + return smp_call_function_single(cpu, __us2e_freq_target, , 1); } static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy)
[tip:sched/core] ACPI/processor: Replace racy task affinity logic
Commit-ID: 8153f9ac43897f9f4786b30badc134fcc1a4fb11 Gitweb: http://git.kernel.org/tip/8153f9ac43897f9f4786b30badc134fcc1a4fb11 Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:34 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:54 +0200 ACPI/processor: Replace racy task affinity logic acpi_processor_get_throttling() requires to invoke the getter function on the target CPU. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. acpi_processor_get_throttling() is invoked in two ways: 1) The CPU online callback, which is already running on the target CPU and obviously protected against hotplug and not affected by affinity settings. 2) The ACPI driver probe function, which is not protected against hotplug during modprobe. Switch it over to work_on_cpu() and protect the probe function against CPU hotplug. Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: linux-a...@vger.kernel.org Cc: Viresh Kumar Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.785920...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/acpi/processor_driver.c | 7 - drivers/acpi/processor_throttling.c | 62 + 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index eab8cda..8697a82 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -262,11 +262,16 @@ err_power_exit: static int acpi_processor_start(struct device *dev) { struct acpi_device *device = ACPI_COMPANION(dev); + int ret; if (!device) return -ENODEV; - return __acpi_processor_start(device); + /* Protect against concurrent CPU hotplug operations */ + get_online_cpus(); + ret = __acpi_processor_start(device); + put_online_cpus(); + return ret; } static int acpi_processor_stop(struct device *dev) diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index a12f96c..3de34633 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -62,8 +62,8 @@ struct acpi_processor_throttling_arg { #define THROTTLING_POSTCHANGE (2) static int acpi_processor_get_throttling(struct acpi_processor *pr); -int acpi_processor_set_throttling(struct acpi_processor *pr, - int state, bool force); +static int __acpi_processor_set_throttling(struct acpi_processor *pr, + int state, bool force, bool direct); static int acpi_processor_update_tsd_coord(void) { @@ -891,7 +891,8 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Invalid throttling state, reset\n")); state = 0; - ret = acpi_processor_set_throttling(pr, state, true); + ret = __acpi_processor_set_throttling(pr, state, true, + true); if (ret) return ret; } @@ -901,36 +902,31 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) return 0; } -static int acpi_processor_get_throttling(struct acpi_processor *pr) +static long __acpi_processor_get_throttling(void *data) { - cpumask_var_t saved_mask; - int ret; + struct acpi_processor *pr = data; + + return pr->throttling.acpi_processor_get_throttling(pr); +} +static int acpi_processor_get_throttling(struct acpi_processor *pr) +{ if (!pr) return -EINVAL; if (!pr->flags.throttling) return -ENODEV; - if (!alloc_cpumask_var(_mask, GFP_KERNEL)) - return -ENOMEM; - /* -* Migrate task to the cpu pointed by pr. +* This is either called from the CPU hotplug callback of +* processor_driver or via the ACPI probe function. In the latter +*
[tip:sched/core] cpufreq/sh: Replace racy task affinity logic
Commit-ID: 205dcc1ecbc566cbc20acf246e68de3b080b3ecf Gitweb: http://git.kernel.org/tip/205dcc1ecbc566cbc20acf246e68de3b080b3ecf Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:36 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:55 +0200 cpufreq/sh: Replace racy task affinity logic The target() callback must run on the affected cpu. This is achieved by temporarily setting the affinity of the calling thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. concurrent affinity settings for that thread resulting in code executing on the wrong CPU. Replace it by work_on_cpu(). All call pathes which invoke the callbacks are already protected against CPU hotplug. Signed-off-by: Thomas Gleixner Acked-by: Viresh Kumar Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: linux...@vger.kernel.org Cc: Lai Jiangshan Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.958216...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/cpufreq/sh-cpufreq.c | 45 ++-- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index 86628e2..719c3d9 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -30,54 +30,63 @@ static DEFINE_PER_CPU(struct clk, sh_cpuclk); +struct cpufreq_target { + struct cpufreq_policy *policy; + unsigned intfreq; +}; + static unsigned int sh_cpufreq_get(unsigned int cpu) { return (clk_get_rate(_cpu(sh_cpuclk, cpu)) + 500) / 1000; } -/* - * Here we notify other drivers of the proposed change and the final change. - */ -static int sh_cpufreq_target(struct cpufreq_policy *policy, -unsigned int target_freq, -unsigned int relation) +static long __sh_cpufreq_target(void *arg) { - unsigned int cpu = policy->cpu; + struct cpufreq_target *target = arg; + struct cpufreq_policy *policy = target->policy; + int cpu = policy->cpu; struct clk *cpuclk = _cpu(sh_cpuclk, cpu); - cpumask_t cpus_allowed; struct cpufreq_freqs freqs; struct device *dev; long freq; - cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - - BUG_ON(smp_processor_id() != cpu); + if (smp_processor_id() != cpu) + return -ENODEV; dev = get_cpu_device(cpu); /* Convert target_freq from kHz to Hz */ - freq = clk_round_rate(cpuclk, target_freq * 1000); + freq = clk_round_rate(cpuclk, target->freq * 1000); if (freq < (policy->min * 1000) || freq > (policy->max * 1000)) return -EINVAL; - dev_dbg(dev, "requested frequency %u Hz\n", target_freq * 1000); + dev_dbg(dev, "requested frequency %u Hz\n", target->freq * 1000); freqs.old = sh_cpufreq_get(cpu); freqs.new = (freq + 500) / 1000; freqs.flags = 0; - cpufreq_freq_transition_begin(policy, ); - set_cpus_allowed_ptr(current, _allowed); + cpufreq_freq_transition_begin(target->policy, ); clk_set_rate(cpuclk, freq); - cpufreq_freq_transition_end(policy, , 0); + cpufreq_freq_transition_end(target->policy, , 0); dev_dbg(dev, "set frequency %lu Hz\n", freq); - return 0; } +/* + * Here we notify other drivers of the proposed change and the final change. + */ +static int sh_cpufreq_target(struct cpufreq_policy *policy, +unsigned int target_freq, +unsigned int relation) +{ + struct cpufreq_target data = { .policy = policy, .freq = target_freq }; + + return work_on_cpu(policy->cpu, __sh_cpufreq_target, ); +} + static int sh_cpufreq_verify(struct cpufreq_policy *policy) { struct clk *cpuclk = _cpu(sh_cpuclk, policy->cpu);
[tip:sched/core] cpufreq/sparc-us3: Replace racy task affinity logic
Commit-ID: 9fe24c4e92d3963d92d7d383e28ed098bd5689d8 Gitweb: http://git.kernel.org/tip/9fe24c4e92d3963d92d7d383e28ed098bd5689d8 Author: Thomas GleixnerAuthorDate: Wed, 12 Apr 2017 22:07:37 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:55 +0200 cpufreq/sparc-us3: Replace racy task affinity logic The access to the safari config register in the CPU frequency functions must be executed on the target CPU. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by a straight forward smp function call. Signed-off-by: Thomas Gleixner Acked-by: Viresh Kumar Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: linux...@vger.kernel.org Cc: Lai Jiangshan Cc: Michael Ellerman Cc: Tejun Heo Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201043.047558...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/cpufreq/sparc-us3-cpufreq.c | 46 + 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/drivers/cpufreq/sparc-us3-cpufreq.c b/drivers/cpufreq/sparc-us3-cpufreq.c index a8d86a4..30645b0 100644 --- a/drivers/cpufreq/sparc-us3-cpufreq.c +++ b/drivers/cpufreq/sparc-us3-cpufreq.c @@ -35,22 +35,28 @@ static struct us3_freq_percpu_info *us3_freq_table; #define SAFARI_CFG_DIV_32 0x8000UL #define SAFARI_CFG_DIV_MASK0xC000UL -static unsigned long read_safari_cfg(void) +static void read_safari_cfg(void *arg) { - unsigned long ret; + unsigned long ret, *val = arg; __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=" (ret) : "i" (ASI_SAFARI_CONFIG)); - return ret; + *val = ret; } -static void write_safari_cfg(unsigned long val) +static void update_safari_cfg(void *arg) { + unsigned long reg, *new_bits = arg; + + read_safari_cfg(); + reg &= ~SAFARI_CFG_DIV_MASK; + reg |= *new_bits; + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" "membar#Sync" : /* no outputs */ -: "r" (val), "i" (ASI_SAFARI_CONFIG) +: "r" (reg), "i" (ASI_SAFARI_CONFIG) : "memory"); } @@ -78,29 +84,17 @@ static unsigned long get_current_freq(unsigned int cpu, unsigned long safari_cfg static unsigned int us3_freq_get(unsigned int cpu) { - cpumask_t cpus_allowed; unsigned long reg; - unsigned int ret; - - cpumask_copy(_allowed, >cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - - reg = read_safari_cfg(); - ret = get_current_freq(cpu, reg); - - set_cpus_allowed_ptr(current, _allowed); - return ret; + if (smp_call_function_single(cpu, read_safari_cfg, , 1)) + return 0; + return get_current_freq(cpu, reg); } static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index) { unsigned int cpu = policy->cpu; - unsigned long new_bits, new_freq, reg; - cpumask_t cpus_allowed; - - cpumask_copy(_allowed, >cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(cpu)); + unsigned long new_bits, new_freq; new_freq = sparc64_get_clock_tick(cpu) / 1000; switch (index) { @@ -121,15 +115,7 @@ static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index) BUG(); } - reg = read_safari_cfg(); - - reg &= ~SAFARI_CFG_DIV_MASK; - reg |= new_bits; - write_safari_cfg(reg); - - set_cpus_allowed_ptr(current, _allowed); - - return 0; + return smp_call_function_single(cpu, update_safari_cfg, _bits, 1); } static int __init us3_freq_cpu_init(struct cpufreq_policy *policy)
[tip:sched/core] sparc/sysfs: Replace racy task affinity logic
Commit-ID: ea875ec94eafb858990f3fe9528501f983105653 Gitweb: http://git.kernel.org/tip/ea875ec94eafb858990f3fe9528501f983105653 Author: Thomas GleixnerAuthorDate: Thu, 13 Apr 2017 10:17:07 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:54 +0200 sparc/sysfs: Replace racy task affinity logic The mmustat_enable sysfs file accessor functions must run code on the target CPU. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by using work_on_cpu() which guarantees to run the code on the requested CPU. Protection against CPU hotplug is not required as the open sysfs file already prevents the removal from the CPU offline callback. Using the hotplug protected version would actually be wrong because it would deadlock against a CPU hotplug operation of the CPU associated to the sysfs file in progress. Signed-off-by: Thomas Gleixner Acked-by: David S. Miller Cc: fenghua...@intel.com Cc: tony.l...@intel.com Cc: herb...@gondor.apana.org.au Cc: r...@rjwysocki.net Cc: pet...@infradead.org Cc: b...@kernel.crashing.org Cc: bige...@linutronix.de Cc: jiangshan...@gmail.com Cc: sparcli...@vger.kernel.org Cc: viresh.ku...@linaro.org Cc: m...@ellerman.id.au Cc: t...@kernel.org Cc: l...@kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704131001270.2408@nanos Signed-off-by: Thomas Gleixner --- arch/sparc/kernel/sysfs.c | 39 +++ 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index d63fc61..5fd352b 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -98,27 +98,7 @@ static struct attribute_group mmu_stat_group = { .name = "mmu_stats", }; -/* XXX convert to rusty's on_one_cpu */ -static unsigned long run_on_cpu(unsigned long cpu, - unsigned long (*func)(unsigned long), - unsigned long arg) -{ - cpumask_t old_affinity; - unsigned long ret; - - cpumask_copy(_affinity, >cpus_allowed); - /* should return -EINVAL to userspace */ - if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) - return 0; - - ret = func(arg); - - set_cpus_allowed_ptr(current, _affinity); - - return ret; -} - -static unsigned long read_mmustat_enable(unsigned long junk) +static long read_mmustat_enable(void *data __maybe_unused) { unsigned long ra = 0; @@ -127,11 +107,11 @@ static unsigned long read_mmustat_enable(unsigned long junk) return ra != 0; } -static unsigned long write_mmustat_enable(unsigned long val) +static long write_mmustat_enable(void *data) { - unsigned long ra, orig_ra; + unsigned long ra, orig_ra, *val = data; - if (val) + if (*val) ra = __pa(_cpu(mmu_stats, smp_processor_id())); else ra = 0UL; @@ -142,7 +122,8 @@ static unsigned long write_mmustat_enable(unsigned long val) static ssize_t show_mmustat_enable(struct device *s, struct device_attribute *attr, char *buf) { - unsigned long val = run_on_cpu(s->id, read_mmustat_enable, 0); + long val = work_on_cpu(s->id, read_mmustat_enable, NULL); + return sprintf(buf, "%lx\n", val); } @@ -150,13 +131,15 @@ static ssize_t store_mmustat_enable(struct device *s, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long val, err; - int ret = sscanf(buf, "%lu", ); + unsigned long val; + long err; + int ret; + ret = sscanf(buf, "%lu", ); if (ret != 1) return -EINVAL; - err = run_on_cpu(s->id, write_mmustat_enable, val); + err = work_on_cpu(s->id, write_mmustat_enable, ); if (err) return -EIO;
[tip:locking/core] futex: Avoid freeing an active timer
Commit-ID: 97181f9bd57405b879403763284537e27d46963d Gitweb: http://git.kernel.org/tip/97181f9bd57405b879403763284537e27d46963d Author: Thomas GleixnerAuthorDate: Mon, 10 Apr 2017 18:03:36 +0200 Committer: Ingo Molnar CommitDate: Fri, 14 Apr 2017 10:29:53 +0200 futex: Avoid freeing an active timer Alexander reported a hrtimer debug_object splat: ODEBUG: free active (active state 0) object type: hrtimer hint: hrtimer_wakeup (kernel/time/hrtimer.c:1423) debug_object_free (lib/debugobjects.c:603) destroy_hrtimer_on_stack (kernel/time/hrtimer.c:427) futex_lock_pi (kernel/futex.c:2740) do_futex (kernel/futex.c:3399) SyS_futex (kernel/futex.c:3447 kernel/futex.c:3415) do_syscall_64 (arch/x86/entry/common.c:284) entry_SYSCALL64_slow_path (arch/x86/entry/entry_64.S:249) Which was caused by commit: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") ... losing the hrtimer_cancel() in the shuffle. Where previously the hrtimer_cancel() was done by rt_mutex_slowlock() we now need to do it manually. Reported-by: Alexander Levin Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704101802370.2906@nanos Signed-off-by: Ingo Molnar --- kernel/futex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index c3eebcd..7ac1676 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2736,8 +2736,10 @@ out_unlock_put_key: out_put_key: put_futex_key(); out: - if (to) + if (to) { + hrtimer_cancel(>timer); destroy_hrtimer_on_stack(>timer); + } return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted:
[tip:smp/hotplug] padata: Make padata_alloc() static
Commit-ID: a792e10c4cb42b8508364c7e187caac2409e6166 Gitweb: http://git.kernel.org/tip/a792e10c4cb42b8508364c7e187caac2409e6166 Author: Thomas GleixnerAuthorDate: Tue, 18 Apr 2017 19:04:45 +0200 Committer: Thomas Gleixner CommitDate: Thu, 20 Apr 2017 13:08:51 +0200 padata: Make padata_alloc() static No users outside of padata.c Signed-off-by: Thomas Gleixner Cc: Steffen Klassert Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Cc: linux-cry...@vger.kernel.org Link: http://lkml.kernel.org/r/20170418170552.619297...@linutronix.de --- include/linux/padata.h | 3 --- kernel/padata.c| 32 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/include/linux/padata.h b/include/linux/padata.h index 0f9e567..2f9c1f9 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -166,9 +166,6 @@ struct padata_instance { extern struct padata_instance *padata_alloc_possible( struct workqueue_struct *wq); -extern struct padata_instance *padata_alloc(struct workqueue_struct *wq, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask); extern void padata_free(struct padata_instance *pinst); extern int padata_do_parallel(struct padata_instance *pinst, struct padata_priv *padata, int cb_cpu); diff --git a/kernel/padata.c b/kernel/padata.c index 3202aa1..18992bf 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -939,19 +939,6 @@ static struct kobj_type padata_attr_type = { }; /** - * padata_alloc_possible - Allocate and initialize padata instance. - * Use the cpu_possible_mask for serial and - * parallel workers. - * - * @wq: workqueue to use for the allocated padata instance - */ -struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq) -{ - return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask); -} -EXPORT_SYMBOL(padata_alloc_possible); - -/** * padata_alloc - allocate and initialize a padata instance and specify *cpumasks for serial and parallel workers. * @@ -959,9 +946,9 @@ EXPORT_SYMBOL(padata_alloc_possible); * @pcpumask: cpumask that will be used for padata parallelization * @cbcpumask: cpumask that will be used for padata serialization */ -struct padata_instance *padata_alloc(struct workqueue_struct *wq, -const struct cpumask *pcpumask, -const struct cpumask *cbcpumask) +static struct padata_instance *padata_alloc(struct workqueue_struct *wq, + const struct cpumask *pcpumask, + const struct cpumask *cbcpumask) { struct padata_instance *pinst; struct parallel_data *pd = NULL; @@ -1016,6 +1003,19 @@ err: } /** + * padata_alloc_possible - Allocate and initialize padata instance. + * Use the cpu_possible_mask for serial and + * parallel workers. + * + * @wq: workqueue to use for the allocated padata instance + */ +struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq) +{ + return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask); +} +EXPORT_SYMBOL(padata_alloc_possible); + +/** * padata_free - free a padata instance * * @padata_inst: padata instance to free
[tip:smp/hotplug] ACPI/processor: Use cpu_hotplug_disable() instead of get_online_cpus()
Commit-ID: 16ab1d27c5040ae369c4652e6ce0a9a82bdbb6c5 Gitweb: http://git.kernel.org/tip/16ab1d27c5040ae369c4652e6ce0a9a82bdbb6c5 Author: Thomas GleixnerAuthorDate: Tue, 18 Apr 2017 19:05:01 +0200 Committer: Thomas Gleixner CommitDate: Thu, 20 Apr 2017 13:08:56 +0200 ACPI/processor: Use cpu_hotplug_disable() instead of get_online_cpus() Converting the hotplug locking, i.e. get_online_cpus(), to a percpu rwsem unearthed a circular lock dependency which was hidden from lockdep due to the lockdep annotation of get_online_cpus() which prevents lockdep from creating full dependency chains. CPU0CPU1 lock(()); lock(cpu_hotplug_lock.rw_sem); lock(()); lock(cpu_hotplug_lock.rw_sem); This dependency is established via acpi_processor_start() which calls into the work queue code. And the work queue code establishes the reverse dependency. This is not a problem of get_online_cpus() recursion, it's a possible deadlock undetected by lockdep so far. The cure is to use cpu_hotplug_disable() instead of get_online_cpus() to protect the probing from acpi_processor_start(). There is a side effect to this: cpu_hotplug_disable() makes a concurrent cpu hotplug attempt via the sysfs interfaces fail with -EBUSY, but that probing usually happens during the boot process where no interaction is possible. Any later invocations are infrequent enough and concurrent hotplug attempts are so unlikely that the danger of user space visible regressions is very close to zero. Anyway, thats preferrable over a real deadlock. Signed-off-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Cc: linux-a...@vger.kernel.org Cc: Len Brown Link: http://lkml.kernel.org/r/20170418170553.964555...@linutronix.de --- drivers/acpi/processor_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 8697a82..591d1dd 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -268,9 +268,9 @@ static int acpi_processor_start(struct device *dev) return -ENODEV; /* Protect against concurrent CPU hotplug operations */ - get_online_cpus(); + cpu_hotplug_disable(); ret = __acpi_processor_start(device); - put_online_cpus(); + cpu_hotplug_enable(); return ret; }
[tip:smp/hotplug] PCI: Replace the racy recursion prevention
Commit-ID: 81edd60135c58f893bcc07f8e633ea8efe72084d Gitweb: http://git.kernel.org/tip/81edd60135c58f893bcc07f8e633ea8efe72084d Author: Thomas GleixnerAuthorDate: Tue, 18 Apr 2017 19:05:00 +0200 Committer: Thomas Gleixner CommitDate: Thu, 20 Apr 2017 13:08:56 +0200 PCI: Replace the racy recursion prevention pci_call_probe() can called recursively when a physcial function is probed and the probing creates virtual functions, which are populated via pci_bus_add_device() which in turn can end up calling pci_call_probe() again. The code has an interesting way to prevent recursing into the workqueue code. That's accomplished by a check whether the current task runs already on the numa node which is associated with the device. While that works to prevent the recursion into the workqueue code, it's racy versus normal execution as there is no guarantee that the node does not vanish after the check. There is another issue with this code. It dereferences cpumask_of_node() unconditionally without checking whether the node is available. Make the detection reliable by: - Mark a probed device as 'is_probed' in pci_call_probe() - Check in pci_call_probe for a virtual function. If it's a virtual function and the associated physical function device is marked 'is_probed' then this is a recursive call, so the call can be invoked in the calling context. - Add a check whether the node is online before dereferencing it. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Peter Zijlstra Cc: linux-...@vger.kernel.org Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170418170553.885818...@linutronix.de --- drivers/pci/pci-driver.c | 47 +-- include/linux/pci.h | 1 + 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f00e4d9..f84d2a8 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -320,10 +320,19 @@ static long local_pci_probe(void *_ddi) return 0; } +static bool pci_physfn_is_probed(struct pci_dev *dev) +{ +#ifdef CONFIG_PCI_IOV + return dev->is_virtfn && dev->physfn->is_probed; +#else + return false; +#endif +} + static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, const struct pci_device_id *id) { - int error, node; + int error, node, cpu; struct drv_dev_and_id ddi = { drv, dev, id }; /* @@ -332,33 +341,27 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, * on the right node. */ node = dev_to_node(>dev); + dev->is_probed = 1; + + cpu_hotplug_disable(); /* -* On NUMA systems, we are likely to call a PF probe function using -* work_on_cpu(). If that probe calls pci_enable_sriov() (which -* adds the VF devices via pci_bus_add_device()), we may re-enter -* this function to call the VF probe function. Calling -* work_on_cpu() again will cause a lockdep warning. Since VFs are -* always on the same node as the PF, we can work around this by -* avoiding work_on_cpu() when we're already on the correct node. -* -* Preemption is enabled, so it's theoretically unsafe to use -* numa_node_id(), but even if we run the probe function on the -* wrong node, it should be functionally correct. +* Prevent nesting work_on_cpu() for the case where a Virtual Function +* device is probed from work_on_cpu() of the Physical device. */ - if (node >= 0 && node != numa_node_id()) { - int cpu; - - cpu_hotplug_disable(); + if (node < 0 || node >= MAX_NUMNODES || !node_online(node) || + pci_physfn_is_probed(dev)) + cpu = nr_cpu_ids; + else cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); - if (cpu < nr_cpu_ids) - error = work_on_cpu(cpu, local_pci_probe, ); - else - error = local_pci_probe(); - cpu_hotplug_enable(); - } else + + if (cpu < nr_cpu_ids) + error = work_on_cpu(cpu, local_pci_probe, ); + else error = local_pci_probe(); + dev->is_probed = 0; + cpu_hotplug_enable(); return error; } diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a..3efe145 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -365,6 +365,7 @@ struct pci_dev { unsigned intirq_managed:1; unsigned inthas_secondary_link:1; unsigned intnon_compliant_bars:1; /* broken BARs; ignore them */ + unsigned intis_probed:1;
[tip:smp/hotplug] perf/core: Remove redundant get_online_cpus()
Commit-ID: 3e27bdd5a4fc7954af4027f1a77e9556deed653d Gitweb: http://git.kernel.org/tip/3e27bdd5a4fc7954af4027f1a77e9556deed653d Author: Thomas GleixnerAuthorDate: Tue, 18 Apr 2017 19:05:02 +0200 Committer: Thomas Gleixner CommitDate: Thu, 20 Apr 2017 13:08:56 +0200 perf/core: Remove redundant get_online_cpus() SyS_perf_event_open() calls get_online_cpus() and eventually invokes swevent_hlist_get() which does it again. All callchains leading to swevent_hlist_get() originate from SyS_perf_event_open() so the extra protection is redundant. Remove it. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170418170554.043759...@linutronix.de --- kernel/events/core.c | 5 - 1 file changed, 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index ff01cba..634dd95 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7592,7 +7592,6 @@ static int swevent_hlist_get(void) { int err, cpu, failed_cpu; - get_online_cpus(); for_each_possible_cpu(cpu) { err = swevent_hlist_get_cpu(cpu); if (err) { @@ -7600,8 +7599,6 @@ static int swevent_hlist_get(void) goto fail; } } - put_online_cpus(); - return 0; fail: for_each_possible_cpu(cpu) { @@ -7609,8 +7606,6 @@ fail: break; swevent_hlist_put_cpu(cpu); } - - put_online_cpus(); return err; }
[tip:smp/hotplug] x86/perf: Drop EXPORT of perf_check_microcode
Commit-ID: 1516643d0831ac6807aee360206cf0f0691c5da0 Gitweb: http://git.kernel.org/tip/1516643d0831ac6807aee360206cf0f0691c5da0 Author: Thomas GleixnerAuthorDate: Tue, 18 Apr 2017 19:04:57 +0200 Committer: Thomas Gleixner CommitDate: Thu, 20 Apr 2017 13:08:55 +0200 x86/perf: Drop EXPORT of perf_check_microcode The only caller is the microcode update, which cannot be modular. Drop the export. Signed-off-by: Thomas Gleixner Acked-by: Borislav Petkov Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20170418170553.620260...@linutronix.de --- arch/x86/events/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 580b60f..ac650d5 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2224,7 +2224,6 @@ void perf_check_microcode(void) if (x86_pmu.check_microcode) x86_pmu.check_microcode(); } -EXPORT_SYMBOL_GPL(perf_check_microcode); static struct pmu pmu = { .pmu_enable = x86_pmu_enable,
[tip:smp/hotplug] PCI: Use cpu_hotplug_disable() instead of get_online_cpus()
Commit-ID: b4d1673371196dd9aebdd2f61d946165c777b931 Gitweb: http://git.kernel.org/tip/b4d1673371196dd9aebdd2f61d946165c777b931 Author: Thomas GleixnerAuthorDate: Tue, 18 Apr 2017 19:04:59 +0200 Committer: Thomas Gleixner CommitDate: Thu, 20 Apr 2017 13:08:55 +0200 PCI: Use cpu_hotplug_disable() instead of get_online_cpus() Converting the hotplug locking, i.e. get_online_cpus(), to a percpu rwsem unearthed a circular lock dependency which was hidden from lockdep due to the lockdep annotation of get_online_cpus() which prevents lockdep from creating full dependency chains. There are several variants of this. And example is: Chain exists of: cpu_hotplug_lock.rw_sem --> drm_global_mutex --> >mutex CPU0CPU1 lock(>mutex); lock(drm_global_mutex); lock(>mutex); lock(cpu_hotplug_lock.rw_sem); because there are dependencies through workqueues. The call chain is: get_online_cpus apply_workqueue_attrs __alloc_workqueue_key ttm_mem_global_init ast_ttm_mem_global_init drm_global_item_ref ast_mm_init ast_driver_load drm_dev_register drm_get_pci_dev ast_pci_probe local_pci_probe work_for_cpu_fn process_one_work worker_thread This is not a problem of get_online_cpus() recursion, it's a possible deadlock undetected by lockdep so far. The cure is to use cpu_hotplug_disable() instead of get_online_cpus() to protect the PCI probing. There is a side effect to this: cpu_hotplug_disable() makes a concurrent cpu hotplug attempt via the sysfs interfaces fail with -EBUSY, but PCI probing usually happens during the boot process where no interaction is possible. Any later invocations are infrequent enough and concurrent hotplug attempts are so unlikely that the danger of user space visible regressions is very close to zero. Anyway, thats preferrable over a real deadlock. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170418170553.806707...@linutronix.de --- drivers/pci/pci-driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index afa7271..f00e4d9 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -349,13 +349,13 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, if (node >= 0 && node != numa_node_id()) { int cpu; - get_online_cpus(); + cpu_hotplug_disable(); cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, ); else error = local_pci_probe(); - put_online_cpus(); + cpu_hotplug_enable(); } else error = local_pci_probe();
[tip:smp/hotplug] cpu/hotplug: Convert hotplug locking to percpu rwsem
Commit-ID: d215aab82d81974f438bfbc80aa437132f3c37c3 Gitweb: http://git.kernel.org/tip/d215aab82d81974f438bfbc80aa437132f3c37c3 Author: Thomas GleixnerAuthorDate: Tue, 18 Apr 2017 19:05:06 +0200 Committer: Thomas Gleixner CommitDate: Thu, 20 Apr 2017 13:08:58 +0200 cpu/hotplug: Convert hotplug locking to percpu rwsem There are no more (known) nested calls to get_online_cpus() so it's possible to remove the nested call magic and convert the mutex to a percpu-rwsem, which speeds up get/put_online_cpus() significantly for the uncontended case. The contended case (write locked for hotplug operations) is slow anyway, so the slightly more expensive down_write of the percpu rwsem does not matter. [ peterz: Add lockdep assertions ] Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170418170554.382344...@linutronix.de --- include/linux/cpu.h | 2 + kernel/cpu.c | 110 -- kernel/jump_label.c | 2 + kernel/padata.c | 1 + kernel/stop_machine.c | 2 + 5 files changed, 23 insertions(+), 94 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f920812..83010c3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -105,6 +105,7 @@ extern void cpu_hotplug_begin(void); extern void cpu_hotplug_done(void); extern void get_online_cpus(void); extern void put_online_cpus(void); +extern void lockdep_assert_hotplug_held(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); @@ -118,6 +119,7 @@ static inline void cpu_hotplug_done(void) {} #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) +static inline void lockdep_assert_hotplug_held(void) {} #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP diff --git a/kernel/cpu.c b/kernel/cpu.c index f932e68..05341f7 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -196,121 +197,41 @@ void cpu_maps_update_done(void) mutex_unlock(_add_remove_lock); } -/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. +/* + * If set, cpu_up and cpu_down will return -EBUSY and do nothing. * Should always be manipulated under cpu_add_remove_lock */ static int cpu_hotplug_disabled; #ifdef CONFIG_HOTPLUG_CPU -static struct { - struct task_struct *active_writer; - /* wait queue to wake up the active_writer */ - wait_queue_head_t wq; - /* verifies that no writer will get active while readers are active */ - struct mutex lock; - /* -* Also blocks the new readers during -* an ongoing cpu hotplug operation. -*/ - atomic_t refcount; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} cpu_hotplug = { - .active_writer = NULL, - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), - .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), -#ifdef CONFIG_DEBUG_LOCK_ALLOC - .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", _hotplug.dep_map), -#endif -}; - -/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ -#define cpuhp_lock_acquire_read() lock_map_acquire_read(_hotplug.dep_map) -#define cpuhp_lock_acquire_tryread() \ - lock_map_acquire_tryread(_hotplug.dep_map) -#define cpuhp_lock_acquire() lock_map_acquire(_hotplug.dep_map) -#define cpuhp_lock_release() lock_map_release(_hotplug.dep_map) - +DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); void get_online_cpus(void) { - might_sleep(); - if (cpu_hotplug.active_writer == current) - return; - cpuhp_lock_acquire_read(); - mutex_lock(_hotplug.lock); - atomic_inc(_hotplug.refcount); - mutex_unlock(_hotplug.lock); + percpu_down_read(_hotplug_lock); } EXPORT_SYMBOL_GPL(get_online_cpus); void put_online_cpus(void) { - int refcount; - - if (cpu_hotplug.active_writer == current) - return; - - refcount = atomic_dec_return(_hotplug.refcount); - if (WARN_ON(refcount < 0)) /* try to fix things up */ - atomic_inc(_hotplug.refcount); - - if (refcount <= 0 && waitqueue_active(_hotplug.wq)) - wake_up(_hotplug.wq); - - cpuhp_lock_release(); - + percpu_up_read(_hotplug_lock); } EXPORT_SYMBOL_GPL(put_online_cpus); -/* - * This ensures that the hotplug operation can begin only when the - * refcount goes to zero. - * - * Note that during a cpu-hotplug operation, the new readers, if any, - * will be blocked by the
[tip:sched/core] crypto: N2 - Replace racy task affinity logic
Commit-ID: 73810a069120aa831debb4d967310ab900f628ad Gitweb: http://git.kernel.org/tip/73810a069120aa831debb4d967310ab900f628ad Author: Thomas GleixnerAuthorDate: Thu, 13 Apr 2017 10:20:23 +0200 Committer: Thomas Gleixner CommitDate: Sat, 15 Apr 2017 12:20:56 +0200 crypto: N2 - Replace racy task affinity logic spu_queue_register() needs to invoke setup functions on a particular CPU. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by using work_on_cpu_safe() which guarantees to run the code on the requested CPU or to fail in case the CPU is offline. Signed-off-by: Thomas Gleixner Acked-by: Herbert Xu Acked-by: "David S. Miller" Cc: Fenghua Yu Cc: Tony Luck Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: Viresh Kumar Cc: linux-cry...@vger.kernel.org Cc: Michael Ellerman Cc: Tejun Heo Cc: Len Brown Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704131019420.2408@nanos Signed-off-by: Thomas Gleixner --- drivers/crypto/n2_core.c | 31 --- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index c5aac25..4ecb77a 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -65,6 +65,11 @@ struct spu_queue { struct list_headlist; }; +struct spu_qreg { + struct spu_queue*queue; + unsigned long type; +}; + static struct spu_queue **cpu_to_cwq; static struct spu_queue **cpu_to_mau; @@ -1631,31 +1636,27 @@ static void queue_cache_destroy(void) kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); } -static int spu_queue_register(struct spu_queue *p, unsigned long q_type) +static long spu_queue_register_workfn(void *arg) { - cpumask_var_t old_allowed; + struct spu_qreg *qr = arg; + struct spu_queue *p = qr->queue; + unsigned long q_type = qr->type; unsigned long hv_ret; - if (cpumask_empty(>sharing)) - return -EINVAL; - - if (!alloc_cpumask_var(_allowed, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(old_allowed, >cpus_allowed); - - set_cpus_allowed_ptr(current, >sharing); - hv_ret = sun4v_ncs_qconf(q_type, __pa(p->q), CWQ_NUM_ENTRIES, >qhandle); if (!hv_ret) sun4v_ncs_sethead_marker(p->qhandle, 0); - set_cpus_allowed_ptr(current, old_allowed); + return hv_ret ? -EINVAL : 0; +} - free_cpumask_var(old_allowed); +static int spu_queue_register(struct spu_queue *p, unsigned long q_type) +{ + int cpu = cpumask_any_and(>sharing, cpu_online_mask); + struct spu_qreg qr = { .queue = p, .type = q_type }; - return (hv_ret ? -EINVAL : 0); + return work_on_cpu_safe(cpu, spu_queue_register_workfn, ); } static int spu_queue_setup(struct spu_queue *p)
[tip:x86/urgent] kexec, x86/purgatory: Unbreak it and clean it up
Commit-ID: 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Gitweb: http://git.kernel.org/tip/40c50c1fecdf012a3bf055ec813f0ef2eda2749c Author: Thomas GleixnerAuthorDate: Fri, 10 Mar 2017 13:17:18 +0100 Committer: Thomas Gleixner CommitDate: Fri, 10 Mar 2017 20:55:09 +0100 kexec, x86/purgatory: Unbreak it and clean it up The purgatory code defines global variables which are referenced via a symbol lookup in the kexec code (core and arch). A recent commit addressing sparse warnings made these static and thereby broke kexec_file. Why did this happen? Simply because the whole machinery is undocumented and lacks any form of forward declarations. The variable names are unspecific and lack a prefix, so adding forward declarations creates shadow variables in the core code. Aside of that the code relies on magic constants and duplicate struct definitions with no way to ensure that these things stay in sync. The section placement of the purgatory variables happened by chance and not by design. Unbreak kexec and cleanup the mess: - Add proper forward declarations and document the usage - Use common struct definition - Use the proper common defines instead of magic constants - Add a purgatory_ prefix to have a proper name space - Use ARRAY_SIZE() instead of a homebrewn reimplementation - Add proper sections to the purgatory variables [ From Mike ] Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static") Reported-by: Mike Galbraith < Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire Cc: Borislav Petkov Cc: Vivek Goyal Cc: "Tobin C. Harding" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos Signed-off-by: Thomas Gleixner --- arch/powerpc/purgatory/trampoline.S | 12 ++-- arch/x86/include/asm/purgatory.h| 20 arch/x86/kernel/machine_kexec_64.c | 9 ++--- arch/x86/purgatory/purgatory.c | 35 +-- arch/x86/purgatory/purgatory.h | 8 arch/x86/purgatory/setup-x86_64.S | 2 +- arch/x86/purgatory/sha256.h | 1 - include/linux/purgatory.h | 23 +++ kernel/kexec_file.c | 8 kernel/kexec_internal.h | 6 +- 10 files changed, 78 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index f9760cc..3696ea6 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -116,13 +116,13 @@ dt_offset: .data .balign 8 -.globl sha256_digest -sha256_digest: +.globl purgatory_sha256_digest +purgatory_sha256_digest: .skip 32 - .size sha256_digest, . - sha256_digest + .size purgatory_sha256_digest, . - purgatory_sha256_digest .balign 8 -.globl sha_regions -sha_regions: +.globl purgatory_sha_regions +purgatory_sha_regions: .skip 8 * 2 * 16 - .size sha_regions, . - sha_regions + .size purgatory_sha_regions, . - purgatory_sha_regions diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h new file mode 100644 index 000..d7da272 --- /dev/null +++ b/arch/x86/include/asm/purgatory.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_PURGATORY_H +#define _ASM_X86_PURGATORY_H + +#ifndef __ASSEMBLY__ +#include + +extern void purgatory(void); +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + *lookup in kexec works + */ +extern unsigned long purgatory_backup_dest; +extern unsigned long purgatory_backup_src; +extern unsigned long purgatory_backup_sz; +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 307b1f4..857cdbd 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image) /* Setup copying of backup region */ if (image->type == KEXEC_TYPE_CRASH) { - ret = kexec_purgatory_get_set_symbol(image, "backup_dest", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_dest", >arch.backup_load_addr, sizeof(image->arch.backup_load_addr), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_src", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_src", >arch.backup_src_start,
[tip:x86/process] x86/process: Optimize TIF_NOTSC switch
Commit-ID: 5a920155e388ec22a22e0532fb695b9215c9b34d Gitweb: http://git.kernel.org/tip/5a920155e388ec22a22e0532fb695b9215c9b34d Author: Thomas GleixnerAuthorDate: Tue, 14 Feb 2017 00:11:04 -0800 Committer: Thomas Gleixner CommitDate: Sat, 11 Mar 2017 12:45:18 +0100 x86/process: Optimize TIF_NOTSC switch Provide and use a toggle helper instead of doing it with a branch. x86_64: arch/x86/kernel/process.o text data bss dec hex 3008 8577 16 116012d51 Before 2976 8577 16 115692d31 After i386: arch/x86/kernel/process.o text data bss dec hex 2925 8673 8 116062d56 Before 2893 8673 8 115742d36 After Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-4-kh...@kylehuey.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/tlbflush.h | 10 ++ arch/x86/kernel/process.c | 22 -- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa8594..ff4923a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask) } } +static inline void cr4_toggle_bits(unsigned long mask) +{ + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); + cr4 ^= mask; + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Read the CR4 shadow. */ static inline unsigned long cr4_read_shadow(void) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 83fa3cb..366db77 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -124,11 +124,6 @@ void flush_thread(void) fpu__clear(>thread.fpu); } -static void hard_disable_TSC(void) -{ - cr4_set_bits(X86_CR4_TSD); -} - void disable_TSC(void) { preempt_disable(); @@ -137,15 +132,10 @@ void disable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_disable_TSC(); + cr4_set_bits(X86_CR4_TSD); preempt_enable(); } -static void hard_enable_TSC(void) -{ - cr4_clear_bits(X86_CR4_TSD); -} - static void enable_TSC(void) { preempt_disable(); @@ -154,7 +144,7 @@ static void enable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_enable_TSC(); + cr4_clear_bits(X86_CR4_TSD); preempt_enable(); } @@ -233,12 +223,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } - if ((tifp ^ tifn) & _TIF_NOTSC) { - if (tifn & _TIF_NOTSC) - hard_disable_TSC(); - else - hard_enable_TSC(); - } + if ((tifp ^ tifn) & _TIF_NOTSC) + cr4_toggle_bits(X86_CR4_TSD); } /*
[tip:x86/urgent] x86/mtrr: Prevent CPU hotplug lock recursion
Commit-ID: 84393817db09bb436e934f8f8cc981cbca9ea4dc Gitweb: http://git.kernel.org/tip/84393817db09bb436e934f8f8cc981cbca9ea4dc Author: Thomas GleixnerAuthorDate: Tue, 15 Aug 2017 13:03:47 +0200 Committer: Thomas Gleixner CommitDate: Tue, 15 Aug 2017 13:03:47 +0200 x86/mtrr: Prevent CPU hotplug lock recursion Larry reported a CPU hotplug lock recursion in the MTRR code. WARNING: possible recursive locking detected systemd-udevd/153 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [] stop_machine+0x16/0x30 but task is already holding lock: (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [] mtrr_add_page+0x83/0x470 cpus_read_lock+0x48/0x90 stop_machine+0x16/0x30 mtrr_add_page+0x18b/0x470 mtrr_add+0x3e/0x70 mtrr_add_page() holds the hotplug rwsem already and calls stop_machine() which acquires it again. Call stop_machine_cpuslocked() instead. Reported-and-tested-by: Larry Finger Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708140920250.1865@nanos Cc: "Paul E. McKenney" Cc: Borislav Petkov --- arch/x86/kernel/cpu/mtrr/main.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c5bb63b..40d5a8a 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -237,6 +237,18 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ stop_machine(mtrr_rendezvous_handler, , cpu_online_mask); } +static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data = { .smp_reg = reg, + .smp_base = base, + .smp_size = size, + .smp_type = type + }; + + stop_machine_cpuslocked(mtrr_rendezvous_handler, , cpu_online_mask); +} + static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) { @@ -370,7 +382,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* Search for an empty MTRR */ i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { - set_mtrr(i, base, size, type); + set_mtrr_cpuslocked(i, base, size, type); if (likely(replace < 0)) { mtrr_usage_table[i] = 1; } else { @@ -378,7 +390,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (increment) mtrr_usage_table[i]++; if (unlikely(replace != i)) { - set_mtrr(replace, 0, 0, 0); + set_mtrr_cpuslocked(replace, 0, 0, 0); mtrr_usage_table[replace] = 0; } } @@ -506,7 +518,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) goto out; } if (--mtrr_usage_table[reg] < 1) - set_mtrr(reg, 0, 0, 0); + set_mtrr_cpuslocked(reg, 0, 0, 0); error = reg; out: mutex_unlock(_mutex);
[tip:irq/urgent] genirq/cpuhotplug: Revert "Set force affinity flag on hotplug migration"
Commit-ID: 8397913303abc9333f376a518a8368fa22ca5e6e Gitweb: http://git.kernel.org/tip/8397913303abc9333f376a518a8368fa22ca5e6e Author: Thomas GleixnerAuthorDate: Thu, 27 Jul 2017 12:21:11 +0200 Committer: Thomas Gleixner CommitDate: Thu, 27 Jul 2017 15:40:02 +0200 genirq/cpuhotplug: Revert "Set force affinity flag on hotplug migration" That commit was part of the changes moving x86 to the generic CPU hotplug interrupt migration code. The force flag was required on x86 before the hierarchical irqdomain rework, but invoking set_affinity() with force=true stayed and had no side effects. At some point in the past, the force flag got repurposed to support the exynos timer interrupt affinity setting to a not yet online CPU, so the interrupt controller callback does not verify the supplied affinity mask against cpu_online_mask. Setting the flag in the CPU hotplug code causes the cpu online masking to be blocked on these irq controllers and results in potentially affining an interrupt to the CPU which is unplugged, i.e. instead of moving it away, it's just reassigned to it. As the force flags is not longer needed on x86, it's safe to revert that patch so the ARM irqchips which use the force flag work again. Add comments to that effect, so this won't happen again. Note: The online mask handling should be done in the generic code and the force flag and the masking in the irq chips removed all together, but that's not a change possible for 4.13. Fixes: 77f85e66aa8b ("genirq/cpuhotplug: Set force affinity flag on hotplug migration") Reported-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Cc: Marc Zyngier Cc: Russell King Cc: LAK Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707271217590.3109@nanos Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 7 ++- kernel/irq/cpuhotplug.c | 9 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 00db35b..d2d54379 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -388,7 +388,12 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_mask_ack: ack and mask an interrupt source * @irq_unmask:unmask an interrupt source * @irq_eoi: end of interrupt - * @irq_set_affinity: set the CPU affinity on SMP machines + * @irq_set_affinity: Set the CPU affinity on SMP machines. If the force + * argument is true, it tells the driver to + * unconditionally apply the affinity setting. Sanity + * checks against the supplied affinity mask are not + * required. This is used for CPU hotplug where the + * target CPU is not yet set in the cpu_online_mask. * @irq_retrigger: resend an IRQ to the CPU * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @irq_set_wake: enable/disable power-management wake-on of an IRQ diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index aee8f7e..638eb9c 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -95,8 +95,13 @@ static bool migrate_one_irq(struct irq_desc *desc) affinity = cpu_online_mask; brokeaff = true; } - - err = irq_do_set_affinity(d, affinity, true); + /* +* Do not set the force argument of irq_do_set_affinity() as this +* disables the masking of offline CPUs from the supplied affinity +* mask and therefore might keep/reassign the irq to the outgoing +* CPU. +*/ + err = irq_do_set_affinity(d, affinity, false); if (err) { pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, err);
[tip:x86/platform] x86/PCI: Remove duplicate defines
Commit-ID: 9304d1621e6019c63497f8a4aad09d003916dbe9 Gitweb: http://git.kernel.org/tip/9304d1621e6019c63497f8a4aad09d003916dbe9 Author: Thomas GleixnerAuthorDate: Thu, 16 Mar 2017 22:50:03 +0100 Committer: Thomas Gleixner CommitDate: Wed, 28 Jun 2017 22:32:55 +0200 x86/PCI: Remove duplicate defines For some historic reason these defines are duplicated and also available in arch/x86/include/asm/pci_x86.h, Remove them. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Borislav Petkov Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170316215056.967808...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pci.h | 8 +--- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index f513cc2..473a729 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -77,14 +77,8 @@ static inline bool is_vmd(struct pci_bus *bus) extern unsigned int pcibios_assign_all_busses(void); extern int pci_legacy_init(void); -# ifdef CONFIG_ACPI -# define x86_default_pci_init pci_acpi_init -# else -# define x86_default_pci_init pci_legacy_init -# endif #else -# define pcibios_assign_all_busses() 0 -# define x86_default_pci_init NULL +static inline int pcibios_assign_all_busses(void) { return 0; } #endif extern unsigned long pci_mem_start;
[tip:x86/platform] x86/PCI: Select CONFIG_PCI_LOCKLESS_CONFIG
Commit-ID: df65c1bcd9b7b639177a5a15da1b8dc3bee4f5fa Gitweb: http://git.kernel.org/tip/df65c1bcd9b7b639177a5a15da1b8dc3bee4f5fa Author: Thomas GleixnerAuthorDate: Thu, 16 Mar 2017 22:50:07 +0100 Committer: Thomas Gleixner CommitDate: Wed, 28 Jun 2017 22:32:56 +0200 x86/PCI: Select CONFIG_PCI_LOCKLESS_CONFIG All x86 PCI configuration space accessors have either their own serialization or can operate completely lockless (ECAM). Disable the global lock in the generic PCI configuration space accessors. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Borislav Petkov Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170316215057.295079...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 + arch/x86/pci/common.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0efb4c9..0652c9f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -166,6 +166,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select IRQ_FORCED_THREADING + select PCI_LOCKLESS_CONFIG select PERF_EVENTS select RTC_LIB select RTC_MC146818_LIB diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 190e718..cfd1a89 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -75,8 +75,8 @@ struct pci_ops pci_root_ops = { }; /* - * This interrupt-safe spinlock protects all accesses to PCI - * configuration space. + * This interrupt-safe spinlock protects all accesses to PCI configuration + * space, except for the mmconfig (ECAM) based operations. */ DEFINE_RAW_SPINLOCK(pci_config_lock);
[tip:x86/platform] x86/PCI: Abort if legacy init fails
Commit-ID: aae3e318d012e76211f34bb65754f3d4d2a8c93d Gitweb: http://git.kernel.org/tip/aae3e318d012e76211f34bb65754f3d4d2a8c93d Author: Thomas GleixnerAuthorDate: Thu, 16 Mar 2017 22:50:04 +0100 Committer: Thomas Gleixner CommitDate: Wed, 28 Jun 2017 22:32:55 +0200 x86/PCI: Abort if legacy init fails If the legacy PCI init fails, then there are no PCI config space accesors available, but the code continues and tries to scan the busses, which fails due to the lack of config space accessors. Return right away, if the last init fallback fails. Switch the few printks to pr_info while at it. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Borislav Petkov Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170316215057.047576...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/pci/legacy.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index ea6f380..1cb01ab 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -24,12 +24,10 @@ static void pcibios_fixup_peer_bridges(void) int __init pci_legacy_init(void) { - if (!raw_pci_ops) { - printk("PCI: System does not support PCI\n"); - return 0; - } + if (!raw_pci_ops) + return 1; - printk("PCI: Probing PCI hardware\n"); + pr_info("PCI: Probing PCI hardware\n"); pcibios_scan_root(0); return 0; } @@ -46,7 +44,7 @@ void pcibios_scan_specific_bus(int busn) if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, ) && l != 0x && l != 0x) { DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); + pr_info("PCI: Discovered peer bus %02x\n", busn); pcibios_scan_root(busn); return; } @@ -60,8 +58,12 @@ static int __init pci_subsys_init(void) * The init function returns an non zero value when * pci_legacy_init should be invoked. */ - if (x86_init.pci.init()) - pci_legacy_init(); + if (x86_init.pci.init()) { + if (pci_legacy_init()) { + pr_info("PCI: System does not support PCI\n"); + return -ENODEV; + } + } pcibios_fixup_peer_bridges(); x86_init.pci.init_irq();
[tip:x86/platform] x86/PCI/ce4100: Properly lock accessor functions
Commit-ID: bb290fda879ffd1f6f6b0869bf7335554093f4bd Gitweb: http://git.kernel.org/tip/bb290fda879ffd1f6f6b0869bf7335554093f4bd Author: Thomas GleixnerAuthorDate: Thu, 16 Mar 2017 22:50:05 +0100 Committer: Thomas Gleixner CommitDate: Wed, 28 Jun 2017 22:32:55 +0200 x86/PCI/ce4100: Properly lock accessor functions x86 wants to get rid of the global pci_lock protecting the config space accessors so ECAM mode can operate completely lockless, but the CE4100 PCI code relies on that to protect the simulation registers. Restructure the code so it uses the x86 specific pci_config_lock to serialize the inner workings of the CE4100 PCI magic. That allows to remove the global locking via pci_lock later. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Borislav Petkov Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170316215057.126873...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/pci/ce4100.c | 87 --- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index b914e20..3353b76d 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c @@ -65,6 +65,9 @@ struct sim_reg_op { { PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\ {0, SIZE_TO_MASK(size)} }, +/* + * All read/write functions are called with pci_config_lock held. + */ static void reg_init(struct sim_dev_reg *reg) { pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4, @@ -73,21 +76,13 @@ static void reg_init(struct sim_dev_reg *reg) static void reg_read(struct sim_dev_reg *reg, u32 *value) { - unsigned long flags; - - raw_spin_lock_irqsave(_config_lock, flags); *value = reg->sim_reg.value; - raw_spin_unlock_irqrestore(_config_lock, flags); } static void reg_write(struct sim_dev_reg *reg, u32 value) { - unsigned long flags; - - raw_spin_lock_irqsave(_config_lock, flags); reg->sim_reg.value = (value & reg->sim_reg.mask) | (reg->sim_reg.value & ~reg->sim_reg.mask); - raw_spin_unlock_irqrestore(_config_lock, flags); } static void sata_reg_init(struct sim_dev_reg *reg) @@ -117,12 +112,8 @@ static void sata_revid_read(struct sim_dev_reg *reg, u32 *value) static void reg_noirq_read(struct sim_dev_reg *reg, u32 *value) { - unsigned long flags; - - raw_spin_lock_irqsave(_config_lock, flags); /* force interrupt pin value to 0 */ *value = reg->sim_reg.value & 0xfff00ff; - raw_spin_unlock_irqrestore(_config_lock, flags); } static struct sim_dev_reg bus1_fixups[] = { @@ -265,24 +256,33 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value) return retval; } -static int ce4100_conf_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) +static int ce4100_bus1_read(unsigned int devfn, int reg, int len, u32 *value) { + unsigned long flags; int i; - WARN_ON(seg); - if (bus == 1) { - for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { - if (bus1_fixups[i].dev_func == devfn && - bus1_fixups[i].reg == (reg & ~3) && - bus1_fixups[i].read) { - bus1_fixups[i].read(&(bus1_fixups[i]), - value); - extract_bytes(value, reg, len); - return 0; - } + for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { + if (bus1_fixups[i].dev_func == devfn && + bus1_fixups[i].reg == (reg & ~3) && + bus1_fixups[i].read) { + + raw_spin_lock_irqsave(_config_lock, flags); + bus1_fixups[i].read(&(bus1_fixups[i]), value); + raw_spin_unlock_irqrestore(_config_lock, flags); + extract_bytes(value, reg, len); + return 0; } } + return -1; +} + +static int ce4100_conf_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + WARN_ON(seg); + + if (bus == 1 && !ce4100_bus1_read(devfn, reg, len, value)) + return 0; if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) && !bridge_read(devfn, reg, len, value)) @@ -291,23 +291,32 @@ static int ce4100_conf_read(unsigned int seg, unsigned int bus, return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); } -static int ce4100_conf_write(unsigned int seg,
[tip:x86/platform] x86/PCI/mmcfg: Switch to ECAM config mode if possible
Commit-ID: 5d381c2e053918bd67c2d1cc50fc73c35bd547f7 Gitweb: http://git.kernel.org/tip/5d381c2e053918bd67c2d1cc50fc73c35bd547f7 Author: Thomas GleixnerAuthorDate: Thu, 16 Mar 2017 22:50:09 +0100 Committer: Thomas Gleixner CommitDate: Wed, 28 Jun 2017 22:32:57 +0200 x86/PCI/mmcfg: Switch to ECAM config mode if possible To allow lockless access to the whole PCI configuration space the mmconfig based accessor functions need to be propagated to the pci_root_ops. Unfortunatly this cannot be done before the PCI subsystem initialization happens even if mmconfig access is already available. The reason is that some of the special platform PCI implementations must be able to overrule that setting before further accesses happen. The earliest possible point is after x86_init.pci.init() has been run. This is at a point in the boot process where nothing actually uses the PCI devices so the accessor function pointers can be updated lockless w/o risk. The switch to full ECAM mode depends on the availability of mmconfig and unchanged default accessors. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Borislav Petkov Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170316215057.452220...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pci_x86.h | 15 +++ arch/x86/pci/common.c | 16 arch/x86/pci/legacy.c | 1 + arch/x86/pci/mmconfig-shared.c | 30 ++ 4 files changed, 54 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 9f1b21f..ad518a9 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -48,20 +48,14 @@ enum pci_bf_sort_state { pci_dmi_bf, }; -/* pci-i386.c */ - void pcibios_resource_survey(void); void pcibios_set_cache_line_size(void); -/* pci-pc.c */ - extern int pcibios_last_bus; extern struct pci_ops pci_root_ops; void pcibios_scan_specific_bus(int busn); -/* pci-irq.c */ - struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { @@ -122,11 +116,10 @@ extern void __init dmi_check_skip_isa_align(void); extern int __init pci_acpi_init(void); extern void __init pcibios_irq_init(void); extern int __init pcibios_init(void); +extern void __init pcibios_select_ops(void); extern int pci_legacy_init(void); extern void pcibios_fixup_irqs(void); -/* pci-mmconfig.c */ - /* "PCI MMCONFIG %04x [bus %02x-%02x]" */ #define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) @@ -141,6 +134,12 @@ struct pci_mmcfg_region { char name[PCI_MMCFG_RESOURCE_NAME_LEN]; }; +#ifdef CONFIG_PCI_MMCONFIG +extern void __init pci_mmcfg_select_ops(void); +#else +static inline void pci_mmcfg_select_ops(void) { } +#endif + extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index cfd1a89..81e4d21 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -157,6 +157,22 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev) } /* + * Called after the last possible modification to raw_pci_[ext_]ops. + * + * Verify that root_pci_ops have not been overwritten by any implementation + * of x86_init.pci.arch_init() and x86_init.pci.init(). + * + * If not, let the mmconfig code decide whether the ops can be switched + * over to the ECAM accessor functions. + */ +void __init pcibios_select_ops(void) +{ + if (pci_root_ops.read != pci_read || pci_root_ops.write != pci_write) + return; + pci_mmcfg_select_ops(); +} + +/* * Called after each bus is probed, but before its children * are examined. */ diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 1cb01ab..80ea40e 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -65,6 +65,7 @@ static int __init pci_subsys_init(void) } } + pcibios_select_ops(); pcibios_fixup_peer_bridges(); x86_init.pci.init_irq(); pcibios_init(); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index d1b47d5..6af6351 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -816,3 +816,33 @@ int pci_mmconfig_delete(u16 seg, u8 start, u8 end) return -ENOENT; } + +static int pci_ecam_read(struct pci_bus *bus, unsigned int devfn, int reg, +int size, u32 *value) +{ + return pci_mmcfg_read(pci_domain_nr(bus), bus->number, devfn, reg, + size, value); +} + +static int
[tip:x86/platform] PCI: Provide Kconfig option for lockless config space accessors
Commit-ID: 714fe383d6c9bd95d0d2cad8cbeff3688342d025 Gitweb: http://git.kernel.org/tip/714fe383d6c9bd95d0d2cad8cbeff3688342d025 Author: Thomas GleixnerAuthorDate: Thu, 16 Mar 2017 22:50:06 +0100 Committer: Thomas Gleixner CommitDate: Wed, 28 Jun 2017 22:32:56 +0200 PCI: Provide Kconfig option for lockless config space accessors The generic PCI configuration space accessors are globally serialized via pci_lock. On larger systems this causes massive lock contention when the configuration space has to be accessed frequently. One such access pattern is the Intel Uncore performance counter unit. Provide a kernel config option which can be selected by an architecture when the low level PCI configuration space accessors in the architecture use their own serialization or can operate completely lockless. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Borislav Petkov Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170316215057.205961...@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/pci/Kconfig | 3 +++ drivers/pci/access.c | 16 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index e0cacb7..c32a77f 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -86,6 +86,9 @@ config PCI_ATS config PCI_ECAM bool +config PCI_LOCKLESS_CONFIG + bool + config PCI_IOV bool "PCI IOV support" depends on PCI diff --git a/drivers/pci/access.c b/drivers/pci/access.c index c80e37a..913d672 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -25,6 +25,14 @@ DEFINE_RAW_SPINLOCK(pci_lock); #define PCI_word_BAD (pos & 1) #define PCI_dword_BAD (pos & 3) +#ifdef CONFIG_PCI_LOCKLESS_CONFIG +# define pci_lock_config(f)do { (void)(f); } while (0) +# define pci_unlock_config(f) do { (void)(f); } while (0) +#else +# define pci_lock_config(f)raw_spin_lock_irqsave(_lock, f) +# define pci_unlock_config(f) raw_spin_unlock_irqrestore(_lock, f) +#endif + #define PCI_OP_READ(size, type, len) \ int pci_bus_read_config_##size \ (struct pci_bus *bus, unsigned int devfn, int pos, type *value) \ @@ -33,10 +41,10 @@ int pci_bus_read_config_##size \ unsigned long flags;\ u32 data = 0; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - raw_spin_lock_irqsave(_lock, flags);\ + pci_lock_config(flags); \ res = bus->ops->read(bus, devfn, pos, len, ); \ *value = (type)data;\ - raw_spin_unlock_irqrestore(_lock, flags); \ + pci_unlock_config(flags); \ return res; \ } @@ -47,9 +55,9 @@ int pci_bus_write_config_##size \ int res;\ unsigned long flags;\ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - raw_spin_lock_irqsave(_lock, flags);\ + pci_lock_config(flags); \ res = bus->ops->write(bus, devfn, pos, len, value); \ - raw_spin_unlock_irqrestore(_lock, flags); \ + pci_unlock_config(flags); \ return res; \ }
[tip:irq/urgent] genirq/timings: Move free timings out of spinlocked region
Commit-ID: 2343877fbda701599653e63f8dcc318aa1bf15ee Gitweb: http://git.kernel.org/tip/2343877fbda701599653e63f8dcc318aa1bf15ee Author: Thomas GleixnerAuthorDate: Thu, 29 Jun 2017 23:33:39 +0200 Committer: Thomas Gleixner CommitDate: Tue, 4 Jul 2017 12:46:16 +0200 genirq/timings: Move free timings out of spinlocked region No point to do memory management from a interrupt disabled spin locked region. Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: Daniel Lezcano Cc: Heiko Stuebner Cc: Julia Cartwright Cc: Linus Walleij Cc: Brian Norris Cc: Doug Anderson Cc: linux-rockc...@lists.infradead.org Cc: John Keeping Cc: linux-g...@vger.kernel.org Link: http://lkml.kernel.org/r/20170629214344.196130...@linutronix.de --- kernel/irq/manage.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3e69343..91e1f23 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1489,7 +1489,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!desc->action) { irq_settings_clr_disable_unlazy(desc); irq_shutdown(desc); - irq_remove_timings(desc); } #ifdef CONFIG_SMP @@ -1531,8 +1530,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) } } - if (!desc->action) + if (!desc->action) { irq_release_resources(desc); + irq_remove_timings(desc); + } mutex_unlock(>request_mutex);
[tip:irq/urgent] genirq: Add mutex to irq desc to serialize request/free_irq()
Commit-ID: 9114014cf4e6df0b22d764380ae1fc54f1a7a8b2 Gitweb: http://git.kernel.org/tip/9114014cf4e6df0b22d764380ae1fc54f1a7a8b2 Author: Thomas GleixnerAuthorDate: Thu, 29 Jun 2017 23:33:37 +0200 Committer: Thomas Gleixner CommitDate: Tue, 4 Jul 2017 12:46:16 +0200 genirq: Add mutex to irq desc to serialize request/free_irq() The irq_request/release_resources() callbacks ar currently invoked under desc->lock with interrupts disabled. This is a source of problems on RT and conceptually not required. Add a seperate mutex to struct irq_desc which allows to serialize request/free_irq(), which can be used to move the resource functions out of the desc->lock held region. Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: Heiko Stuebner Cc: Julia Cartwright Cc: Linus Walleij Cc: Brian Norris Cc: Doug Anderson Cc: linux-rockc...@lists.infradead.org Cc: John Keeping Cc: linux-g...@vger.kernel.org Link: http://lkml.kernel.org/r/20170629214344.039220...@linutronix.de --- include/linux/irqdesc.h | 3 +++ kernel/irq/irqdesc.c| 1 + kernel/irq/manage.c | 8 3 files changed, 12 insertions(+) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index d425a3a..3e90a09 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -3,6 +3,7 @@ #include #include +#include /* * Core internal functions to deal with irq descriptors @@ -45,6 +46,7 @@ struct pt_regs; * IRQF_FORCE_RESUME set * @rcu: rcu head for delayed free * @kobj: kobject used to represent this struct in sysfs + * @request_mutex: mutex to protect request/free before locking desc->lock * @dir: /proc/irq/ procfs entry * @debugfs_file: dentry for the debugfs file * @name: flow handler name for /proc/interrupts output @@ -96,6 +98,7 @@ struct irq_desc { struct rcu_head rcu; struct kobject kobj; #endif + struct mutexrequest_mutex; int parent_irq; struct module *owner; const char *name; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 948b50e..906a67e 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -373,6 +373,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, raw_spin_lock_init(>lock); lockdep_set_class(>lock, _desc_lock_class); + mutex_init(>request_mutex); init_rcu_head(>rcu); desc_set_defaults(irq, desc, node, affinity, owner); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0934e02..0139908 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1167,6 +1167,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE) new->flags &= ~IRQF_ONESHOT; + mutex_lock(>request_mutex); + chip_bus_lock(desc); /* @@ -1350,6 +1352,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) raw_spin_unlock_irqrestore(>lock, flags); chip_bus_sync_unlock(desc); + mutex_unlock(>request_mutex); irq_setup_timings(desc, new); @@ -1383,6 +1386,8 @@ out_unlock: chip_bus_sync_unlock(desc); + mutex_unlock(>request_mutex); + out_thread: if (new->thread) { struct task_struct *t = new->thread; @@ -1446,6 +1451,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!desc) return NULL; + mutex_lock(>request_mutex); chip_bus_lock(desc); raw_spin_lock_irqsave(>lock, flags); @@ -1521,6 +1527,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) } } + mutex_unlock(>request_mutex); + irq_chip_pm_put(>irq_data); module_put(desc->owner); kfree(action->secondary);
[tip:irq/urgent] genirq: Move irq resource handling out of spinlocked region
Commit-ID: 46e48e257360f0845fe17089713cbad4db611e70 Gitweb: http://git.kernel.org/tip/46e48e257360f0845fe17089713cbad4db611e70 Author: Thomas GleixnerAuthorDate: Thu, 29 Jun 2017 23:33:38 +0200 Committer: Thomas Gleixner CommitDate: Tue, 4 Jul 2017 12:46:16 +0200 genirq: Move irq resource handling out of spinlocked region Aside of being conceptually wrong, there is also an actual (hard to trigger and mostly theoretical) problem. CPU0CPU1 free_irq(X) interrupt X spin_lock(desc->lock) wake irq thread() spin_unlock(desc->lock) spin_lock(desc->lock) remove action() shutdown_irq() release_resources() thread_handler() spin_unlock(desc->lock) access released resources. synchronize_irq() Move the release resources invocation after synchronize_irq() so it's guaranteed that the threaded handler has finished. Move the resource request call out of the desc->lock held region as well, so the invocation context is the same for both request and release. This solves the problems with those functions on RT as well. Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: Heiko Stuebner Cc: Julia Cartwright Cc: Linus Walleij Cc: Brian Norris Cc: Doug Anderson Cc: linux-rockc...@lists.infradead.org Cc: John Keeping Cc: linux-g...@vger.kernel.org Link: http://lkml.kernel.org/r/20170629214344.117028...@linutronix.de --- kernel/irq/manage.c | 23 +++ 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0139908..3e69343 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1168,6 +1168,14 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) new->flags &= ~IRQF_ONESHOT; mutex_lock(>request_mutex); + if (!desc->action) { + ret = irq_request_resources(desc); + if (ret) { + pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", + new->name, irq, desc->irq_data.chip->name); + goto out_mutex; + } + } chip_bus_lock(desc); @@ -1271,13 +1279,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { - ret = irq_request_resources(desc); - if (ret) { - pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", - new->name, irq, desc->irq_data.chip->name); - goto out_unlock; - } - init_waitqueue_head(>wait_for_threads); /* Setup the type (level, edge polarity) if configured: */ @@ -1386,6 +1387,10 @@ out_unlock: chip_bus_sync_unlock(desc); + if (!desc->action) + irq_release_resources(desc); + +out_mutex: mutex_unlock(>request_mutex); out_thread: @@ -1484,7 +1489,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!desc->action) { irq_settings_clr_disable_unlazy(desc); irq_shutdown(desc); - irq_release_resources(desc); irq_remove_timings(desc); } @@ -1527,6 +1531,9 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) } } + if (!desc->action) + irq_release_resources(desc); + mutex_unlock(>request_mutex); irq_chip_pm_put(>irq_data);
[tip:irq/urgent] genirq: Move bus locking into __setup_irq()
Commit-ID: 3a90795e1e885167209056a1a90be965add30e25 Gitweb: http://git.kernel.org/tip/3a90795e1e885167209056a1a90be965add30e25 Author: Thomas GleixnerAuthorDate: Thu, 29 Jun 2017 23:33:36 +0200 Committer: Thomas Gleixner CommitDate: Tue, 4 Jul 2017 12:46:15 +0200 genirq: Move bus locking into __setup_irq() There is no point in having the irq_bus_lock() protection around all callers to __setup_irq(). Move it into __setup_irq(). This is also a preparatory patch for addressing the issues with the irq resource callbacks. Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: Heiko Stuebner Cc: Julia Cartwright Cc: Linus Walleij Cc: Brian Norris Cc: Doug Anderson Cc: linux-rockc...@lists.infradead.org Cc: John Keeping Cc: linux-g...@vger.kernel.org Link: http://lkml.kernel.org/r/20170629214343.960949...@linutronix.de --- kernel/irq/manage.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 5c11c17..0934e02 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1167,6 +1167,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE) new->flags &= ~IRQF_ONESHOT; + chip_bus_lock(desc); + /* * The following block of code has to be executed atomically */ @@ -1347,6 +1349,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } raw_spin_unlock_irqrestore(>lock, flags); + chip_bus_sync_unlock(desc); irq_setup_timings(desc, new); @@ -1378,6 +1381,8 @@ mismatch: out_unlock: raw_spin_unlock_irqrestore(>lock, flags); + chip_bus_sync_unlock(desc); + out_thread: if (new->thread) { struct task_struct *t = new->thread; @@ -1417,9 +1422,7 @@ int setup_irq(unsigned int irq, struct irqaction *act) if (retval < 0) return retval; - chip_bus_lock(desc); retval = __setup_irq(irq, desc, act); - chip_bus_sync_unlock(desc); if (retval) irq_chip_pm_put(>irq_data); @@ -1674,9 +1677,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, return retval; } - chip_bus_lock(desc); retval = __setup_irq(irq, desc, action); - chip_bus_sync_unlock(desc); if (retval) { irq_chip_pm_put(>irq_data); @@ -1924,9 +1925,7 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) if (retval < 0) return retval; - chip_bus_lock(desc); retval = __setup_irq(irq, desc, act); - chip_bus_sync_unlock(desc); if (retval) irq_chip_pm_put(>irq_data); @@ -1980,9 +1979,7 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler, return retval; } - chip_bus_lock(desc); retval = __setup_irq(irq, desc, action); - chip_bus_sync_unlock(desc); if (retval) { irq_chip_pm_put(>irq_data);
[tip:smp/urgent] smp/hotplug: Move unparking of percpu threads to the control CPU
Commit-ID: 9cd4f1a4e7a858849e889a081a99adff83e08e4c Gitweb: http://git.kernel.org/tip/9cd4f1a4e7a858849e889a081a99adff83e08e4c Author: Thomas GleixnerAuthorDate: Tue, 4 Jul 2017 22:20:23 +0200 Committer: Thomas Gleixner CommitDate: Thu, 6 Jul 2017 10:55:10 +0200 smp/hotplug: Move unparking of percpu threads to the control CPU Vikram reported the following backtrace: BUG: scheduling while atomic: swapper/7/0/0x0002 CPU: 7 PID: 0 Comm: swapper/7 Not tainted 4.9.32-perf+ #680 schedule schedule_hrtimeout_range_clock schedule_hrtimeout wait_task_inactive __kthread_bind_mask __kthread_bind __kthread_unpark kthread_unpark cpuhp_online_idle cpu_startup_entry secondary_start_kernel He analyzed correctly that a parked cpu hotplug thread of an offlined CPU was still on the runqueue when the CPU came back online and tried to unpark it. This causes the thread which invoked kthread_unpark() to call wait_task_inactive() and subsequently schedule() with preemption disabled. His proposed workaround was to "make sure" that a parked thread has scheduled out when the CPU goes offline, so the situation cannot happen. But that's still wrong because the root cause is not the fact that the percpu thread is still on the runqueue and neither that preemption is disabled, which could be simply solved by enabling preemption before calling kthread_unpark(). The real issue is that the calling thread is the idle task of the upcoming CPU, which is not supposed to call anything which might sleep. The moron, who wrote that code, missed completely that kthread_unpark() might end up in schedule(). The solution is simpler than expected. The thread which controls the hotplug operation is waiting for the CPU to call complete() on the hotplug state completion. So the idle task of the upcoming CPU can set its state to CPUHP_AP_ONLINE_IDLE and invoke complete(). This in turn wakes the control task on a different CPU, which then can safely do the unpark and kick the now unparked hotplug thread of the upcoming CPU to complete the bringup to the final target state. Control CPU AP bringup_cpu(); __cpu_up() > bringup_ap(); bringup_wait_for_ap() wait_for_completion(); cpuhp_online_idle(); stopper); unpark(AP->hotplugthread); while(1) do_idle(); kick(AP->hotplugthread); wait_for_completion(); hotplug_thread() run_online_callbacks(); complete(); Fixes: 8df3e07e7f21 ("cpu/hotplug: Let upcoming cpu bring itself fully up") Reported-by: Vikram Mulukutla Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Sebastian Sewior Cc: Rusty Russell Cc: Tejun Heo Cc: Andrew Morton Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707042218020.2131@nanos Signed-off-by: Thomas Gleixner --- kernel/cpu.c | 37 +++-- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index b03a325..ab86045 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -271,11 +271,25 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ +static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st); + static int bringup_wait_for_ap(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(_state, cpu); + /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ wait_for_completion(>done); + BUG_ON(!cpu_online(cpu)); + + /* Unpark the stopper thread and the hotplug thread of the target cpu */ + stop_machine_unpark(cpu); + kthread_unpark(st->thread); + + /* Should we go further up ? */ + if (st->target > CPUHP_AP_ONLINE_IDLE) { + __cpuhp_kick_ap_work(st); + wait_for_completion(>done); + } return st->result; } @@ -296,9 +310,7 @@ static int bringup_cpu(unsigned int cpu) irq_unlock_sparse(); if (ret) return ret; - ret = bringup_wait_for_ap(cpu); - BUG_ON(!cpu_online(cpu)); - return ret; + return bringup_wait_for_ap(cpu); } /* @@ -767,31 +779,20 @@ void notify_cpu_starting(unsigned int cpu) } /* - * Called from the idle task. We need to set active here, so we can kick off - * the stopper thread and unpark the smpboot threads. If the target state is - * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the - * cpu further. + * Called from the idle task. Wake up the controlling task which brings the +
[tip:x86/platform] x86/PCI/mmcfg: Switch to ECAM config mode if possible
Commit-ID: b5b0f00c760b6e9673ab79b88ede2f3c7a039f74 Gitweb: http://git.kernel.org/tip/b5b0f00c760b6e9673ab79b88ede2f3c7a039f74 Author: Thomas GleixnerAuthorDate: Thu, 16 Mar 2017 22:50:09 +0100 Committer: Thomas Gleixner CommitDate: Thu, 29 Jun 2017 08:41:54 +0200 x86/PCI/mmcfg: Switch to ECAM config mode if possible To allow lockless access to the whole PCI configuration space the mmconfig based accessor functions need to be propagated to the pci_root_ops. Unfortunatly this cannot be done before the PCI subsystem initialization happens even if mmconfig access is already available. The reason is that some of the special platform PCI implementations must be able to overrule that setting before further accesses happen. The earliest possible point is after x86_init.pci.init() has been run. This is at a point in the boot process where nothing actually uses the PCI devices so the accessor function pointers can be updated lockless w/o risk. The switch to full ECAM mode depends on the availability of mmconfig and unchanged default accessors. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Borislav Petkov Cc: linux-...@vger.kernel.org Link: http://lkml.kernel.org/r/20170316215057.452220...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pci_x86.h | 20 arch/x86/pci/common.c | 16 arch/x86/pci/legacy.c | 1 + arch/x86/pci/mmconfig-shared.c | 30 ++ arch/x86/pci/mmconfig_32.c | 8 arch/x86/pci/mmconfig_64.c | 8 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 9f1b21f..65e1303 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -48,20 +48,14 @@ enum pci_bf_sort_state { pci_dmi_bf, }; -/* pci-i386.c */ - void pcibios_resource_survey(void); void pcibios_set_cache_line_size(void); -/* pci-pc.c */ - extern int pcibios_last_bus; extern struct pci_ops pci_root_ops; void pcibios_scan_specific_bus(int busn); -/* pci-irq.c */ - struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { @@ -122,11 +116,10 @@ extern void __init dmi_check_skip_isa_align(void); extern int __init pci_acpi_init(void); extern void __init pcibios_irq_init(void); extern int __init pcibios_init(void); +extern void __init pcibios_select_ops(void); extern int pci_legacy_init(void); extern void pcibios_fixup_irqs(void); -/* pci-mmconfig.c */ - /* "PCI MMCONFIG %04x [bus %02x-%02x]" */ #define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) @@ -141,6 +134,12 @@ struct pci_mmcfg_region { char name[PCI_MMCFG_RESOURCE_NAME_LEN]; }; +#ifdef CONFIG_PCI_MMCONFIG +extern void __init pci_mmcfg_select_ops(void); +#else +static inline void pci_mmcfg_select_ops(void) { } +#endif + extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg); @@ -197,6 +196,11 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); } +int pci_mmcfg_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value); +int pci_mmcfg_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value); + #ifdef CONFIG_PCI # ifdef CONFIG_ACPI # define x86_default_pci_init pci_acpi_init diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index cfd1a89..81e4d21 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -157,6 +157,22 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev) } /* + * Called after the last possible modification to raw_pci_[ext_]ops. + * + * Verify that root_pci_ops have not been overwritten by any implementation + * of x86_init.pci.arch_init() and x86_init.pci.init(). + * + * If not, let the mmconfig code decide whether the ops can be switched + * over to the ECAM accessor functions. + */ +void __init pcibios_select_ops(void) +{ + if (pci_root_ops.read != pci_read || pci_root_ops.write != pci_write) + return; + pci_mmcfg_select_ops(); +} + +/* * Called after each bus is probed, but before its children * are examined. */ diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 1cb01ab..80ea40e 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -65,6 +65,7 @@ static int __init pci_subsys_init(void) } } + pcibios_select_ops(); pcibios_fixup_peer_bridges();
[tip:x86/urgent] x86/hpet: Cure interface abuse in the resume path
Commit-ID: bb68cfe2f5a7f43058aed299fdbb73eb281734ed Gitweb: http://git.kernel.org/tip/bb68cfe2f5a7f43058aed299fdbb73eb281734ed Author: Thomas GleixnerAuthorDate: Mon, 31 Jul 2017 22:07:09 +0200 Committer: Thomas Gleixner CommitDate: Tue, 1 Aug 2017 13:02:37 +0200 x86/hpet: Cure interface abuse in the resume path The HPET resume path abuses irq_domain_[de]activate_irq() to restore the MSI message in the HPET chip for the boot CPU on resume and it relies on an implementation detail of the interrupt core code, which magically makes the HPET unmask call invoked via a irq_disable/enable pair. This worked as long as the irq code did unconditionally invoke the unmask() callback. With the recent changes which keep track of the masked state to avoid expensive hardware access, this does not longer work. As a consequence the HPET timer interrupts are not unmasked which breaks resume as the boot CPU waits forever that a timer interrupt arrives. Make the restore of the MSI message explicit and invoke the unmask() function directly. While at it get rid of the pointless affinity setting as nothing can change the affinity of the interrupt and the vector across suspend/resume. The restore of the MSI message reestablishes the previous affinity setting which is the correct one. Fixes: bf22ff45bed6 ("genirq: Avoid unnecessary low level irq function calls") Reported-and-tested-by: Tomi Sarvela Reported-by: Martin Peres Signed-off-by: Thomas Gleixner Acked-by: "Rafael J. Wysocki" Cc: jeffy.c...@rock-chips.com Cc: Peter Zijlstra Cc: Marc Zyngier Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707312158590.2287@nanos --- arch/x86/kernel/hpet.c | 27 +++ 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 16f82a3..8ce4212 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -345,21 +345,10 @@ static int hpet_shutdown(struct clock_event_device *evt, int timer) return 0; } -static int hpet_resume(struct clock_event_device *evt, int timer) -{ - if (!timer) { - hpet_enable_legacy_int(); - } else { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); - irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_hardirq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); - enable_irq(hdev->irq); - } +static int hpet_resume(struct clock_event_device *evt) +{ + hpet_enable_legacy_int(); hpet_print_config(); - return 0; } @@ -417,7 +406,7 @@ static int hpet_legacy_set_periodic(struct clock_event_device *evt) static int hpet_legacy_resume(struct clock_event_device *evt) { - return hpet_resume(evt, 0); + return hpet_resume(evt); } static int hpet_legacy_next_event(unsigned long delta, @@ -510,8 +499,14 @@ static int hpet_msi_set_periodic(struct clock_event_device *evt) static int hpet_msi_resume(struct clock_event_device *evt) { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + struct irq_data *data = irq_get_irq_data(hdev->irq); + struct msi_msg msg; - return hpet_resume(evt, hdev->num); + /* Restore the MSI msg and unmask the interrupt */ + irq_chip_compose_msi_msg(data, ); + hpet_msi_write(hdev, ); + hpet_msi_unmask(data); + return 0; } static int hpet_msi_next_event(unsigned long delta,
[tip:core/urgent] kernel/watchdog: Prevent false positives with turbo modes
Commit-ID: 7edaeb6841dfb27e362288ab8466ebdc4972e867 Gitweb: http://git.kernel.org/tip/7edaeb6841dfb27e362288ab8466ebdc4972e867 Author: Thomas GleixnerAuthorDate: Tue, 15 Aug 2017 09:50:13 +0200 Committer: Thomas Gleixner CommitDate: Fri, 18 Aug 2017 12:35:02 +0200 kernel/watchdog: Prevent false positives with turbo modes The hardlockup detector on x86 uses a performance counter based on unhalted CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the performance counter period, so the hrtimer should fire 2-3 times before the performance counter NMI fires. The NMI code checks whether the hrtimer fired since the last invocation. If not, it assumess a hard lockup. The calculation of those periods is based on the nominal CPU frequency. Turbo modes increase the CPU clock frequency and therefore shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x nominal frequency) the perf/NMI period is shorter than the hrtimer period which leads to false positives. A simple fix would be to shorten the hrtimer period, but that comes with the side effect of more frequent hrtimer and softlockup thread wakeups, which is not desired. Implement a low pass filter, which checks the perf/NMI period against kernel time. If the perf/NMI fires before 4/5 of the watchdog period has elapsed then the event is ignored and postponed to the next perf/NMI. That solves the problem and avoids the overhead of shorter hrtimer periods and more frequent softlockup thread wakeups. Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector") Reported-and-tested-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: dzic...@redhat.com Cc: pra...@redhat.com Cc: a...@linux.intel.com Cc: babu.mo...@oracle.com Cc: pet...@infradead.org Cc: eran...@google.com Cc: a...@redhat.com Cc: sta...@vger.kernel.org Cc: atom...@redhat.com Cc: a...@linux-foundation.org Cc: torva...@linux-foundation.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos --- arch/x86/Kconfig | 1 + include/linux/nmi.h | 8 +++ kernel/watchdog.c | 1 + kernel/watchdog_hld.c | 59 +++ lib/Kconfig.debug | 7 ++ 5 files changed, 76 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 781521b..9101bfc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI select HAVE_ALIGNED_STRUCT_PAGE if SLUB diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 8aa01fd..a36abe2 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif + +#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ +defined(CONFIG_HARDLOCKUP_DETECTOR) +void watchdog_update_hrtimer_threshold(u64 period); +#else +static inline void watchdog_update_hrtimer_threshold(u64 period) { } +#endif + extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 06d3389..f5d5202 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -240,6 +240,7 @@ static void set_sample_period(void) * hardlockup detector generates a warning */ sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); + watchdog_update_hrtimer_threshold(sample_period); } /* Commands for resetting the watchdog */ diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 295a0d8..3a09ea1 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP +static DEFINE_PER_CPU(ktime_t, last_timestamp); +static DEFINE_PER_CPU(unsigned int, nmi_rearmed); +static ktime_t watchdog_hrtimer_sample_threshold __read_mostly; + +void watchdog_update_hrtimer_threshold(u64 period) +{ + /* +* The hrtimer runs with a period of (watchdog_threshold * 2) / 5 +* +* So it runs effectively with 2.5 times the rate of the NMI +* watchdog. That means the hrtimer should fire 2-3 times before +* the NMI watchdog expires. The NMI watchdog on x86 is based on +* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles +* might run way faster than expected and the NMI fires in a +* smaller period than the one deduced from the nominal CPU +* frequency. Depending on
[tip:timers/core] posix-timers: Fix inverted SIGEV_NONE logic in common_timer_get()
Commit-ID: c6503be587e9c5c0aac4e2b45de982352f676a5b Gitweb: http://git.kernel.org/tip/c6503be587e9c5c0aac4e2b45de982352f676a5b Author: Thomas GleixnerAuthorDate: Mon, 12 Jun 2017 17:21:26 +0200 Committer: Thomas Gleixner CommitDate: Mon, 12 Jun 2017 17:29:07 +0200 posix-timers: Fix inverted SIGEV_NONE logic in common_timer_get() The refactoring of the posix-timer core to allow better code sharing introduced inverted logic vs. SIGEV_NONE timers in common_timer_get(). That causes hrtimer_forward() to be called on active timers, which rightfully triggers the warning hrtimer_forward(). Make sig_none what it says: signal mode == SIGEV_NONE. Fixes: 91d57bae0868 ("posix-timers: Make use of forward/remaining callbacks") Reported-by: Ye Xiaolong Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170609104457.ga39...@inn.lkp.intel.com --- kernel/time/posix-timers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 6e7a70b1..b53a0b5 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -644,7 +644,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) struct timespec64 ts64; bool sig_none; - sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE; + sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; iv = timr->it_interval; /* interval timer ? */
[tip:timers/core] posix-timers: Handle relative posix-timers correctly
Commit-ID: 67edab48caeb75d412706f4b9d3107afd1e07623 Gitweb: http://git.kernel.org/tip/67edab48caeb75d412706f4b9d3107afd1e07623 Author: Thomas GleixnerAuthorDate: Mon, 12 Jun 2017 19:39:49 +0200 Committer: Thomas Gleixner CommitDate: Mon, 12 Jun 2017 21:07:41 +0200 posix-timers: Handle relative posix-timers correctly The recent rework of the posix timer internals broke the magic posix mechanism, which requires that relative timers are not affected by modifications of the underlying clock. That means relative CLOCK_REALTIME timers cannot use CLOCK_REALTIME, because that can be set and adjusted. The underlying hrtimer switches the clock for these timers to CLOCK_MONOTONIC. That still works, but reading the remaining time of such a timer has been broken in the rework. The old code used the hrtimer internals directly and avoided the posix clock callbacks. Now common_timer_get() uses the underlying kclock->timer_get() callback, which is still CLOCK_REALTIME based. So the remaining time of such a timer is calculated against the wrong time base. Handle it by switching the k_itimer->kclock pointer according to the resulting hrtimer mode. k_itimer->it_clock still contains CLOCK_REALTIME because the timer might be set with ABSTIME later and then it needs to switch back to the realtime posix clock implementation. Fixes: eae1c4ae275f ("posix-timers: Make use of cancel/arm callbacks") Reported-by: Andrei Vagin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Cc: Cyrill Gorcunov Link: http://lkml.kernel.org/r/20170609201156.gb21...@outlook.office365.com --- kernel/time/posix-timers.c | 13 + 1 file changed, 13 insertions(+) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 88517dc..58c0f60 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -72,6 +72,7 @@ static DEFINE_SPINLOCK(hash_lock); static const struct k_clock * const posix_clocks[]; static const struct k_clock *clockid_to_kclock(const clockid_t id); +static const struct k_clock clock_realtime, clock_monotonic; /* * we assume that the new SIGEV_THREAD_ID shares no bits with the other @@ -750,6 +751,18 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, enum hrtimer_mode mode; mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; + /* +* Posix magic: Relative CLOCK_REALTIME timers are not affected by +* clock modifications, so they become CLOCK_MONOTONIC based under the +* hood. See hrtimer_init(). Update timr->kclock, so the generic +* functions which use timr->kclock->clock_get() work. +* +* Note: it_clock stays unmodified, because the next timer_set() might +* use ABSTIME, so it needs to switch back. +*/ + if (timr->it_clock == CLOCK_REALTIME) + timr->kclock = absolute ? _realtime : _monotonic; + hrtimer_init(>it.real.timer, timr->it_clock, mode); timr->it.real.timer.function = posix_timer_fn;
[tip:timers/core] posix-timers: Zero out oldval itimerspec
Commit-ID: 5c7a3a3d20a4e175304c0e23809e3d70be8fed8a Gitweb: http://git.kernel.org/tip/5c7a3a3d20a4e175304c0e23809e3d70be8fed8a Author: Thomas GleixnerAuthorDate: Mon, 12 Jun 2017 19:44:09 +0200 Committer: Thomas Gleixner CommitDate: Mon, 12 Jun 2017 21:07:40 +0200 posix-timers: Zero out oldval itimerspec The recent posix timer rework moved the clearing of the itimerspec to the real syscall implementation, but forgot that the kclock->timer_get() is used by timer_settime() as well. That results in an uninitialized variable and bogus values returned to user space. Add the missing memset to timer_settime(). Fixes: eabdec043853 ("posix-timers: Zero settings value in common code") Reported-by: Andrei Vagin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Cc: Cyrill Gorcunov Link: http://lkml.kernel.org/r/20170609201156.gb21...@outlook.office365.com --- kernel/time/posix-timers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index b53a0b5..88517dc 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -828,6 +828,8 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, if (!timespec64_valid(_spec64.it_interval) || !timespec64_valid(_spec64.it_value)) return -EINVAL; + if (rtn) + memset(rtn, 0, sizeof(*rtn)); retry: timr = lock_timer(timer_id, ); if (!timr)
[tip:timers/core] posix-cpu-timers: Make timespec to nsec conversion safe
Commit-ID: 098b0e01a91c42aaaf0425605cd126b03fcb0bcf Gitweb: http://git.kernel.org/tip/098b0e01a91c42aaaf0425605cd126b03fcb0bcf Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 17:37:36 +0200 Committer: Thomas Gleixner CommitDate: Tue, 20 Jun 2017 21:33:56 +0200 posix-cpu-timers: Make timespec to nsec conversion safe The expiry time of a posix cpu timer is supplied through sys_timer_set() via a struct timespec. The timespec is validated for correctness. In the actual set timer implementation the timespec is converted to a scalar nanoseconds value. If the tv_sec part of the time spec is large enough the conversion to nanoseconds (sec * NSEC_PER_SEC) overflows 64bit. Mitigate that by using the timespec_to_ktime() conversion function, which checks the tv_sec part for a potential mult overflow and clamps the result to KTIME_MAX, which is about 292 years. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Xishi Qiu Cc: John Stultz Link: http://lkml.kernel.org/r/20170620154113.588276...@linutronix.de --- kernel/time/posix-cpu-timers.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 9df618e..60cb24a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -580,7 +580,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, WARN_ON_ONCE(p == NULL); - new_expires = timespec64_to_ns(>it_value); + /* +* Use the to_ktime conversion because that clamps the maximum +* value to KTIME_MAX and avoid multiplication overflows. +*/ + new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value)); /* * Protect against sighand release/switch in exit/exec and p->cpu_timers
[tip:timers/core] itimer: Make timeval to nsec conversion range limited
Commit-ID: 35eb7258c009dc478338e674a5a84d25d0929c56 Gitweb: http://git.kernel.org/tip/35eb7258c009dc478338e674a5a84d25d0929c56 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 17:37:35 +0200 Committer: Thomas Gleixner CommitDate: Tue, 20 Jun 2017 21:33:56 +0200 itimer: Make timeval to nsec conversion range limited The expiry time of a itimer is supplied through sys_setitimer() via a struct timeval. The timeval is validated for correctness. In the actual set timer implementation the timeval is converted to a scalar nanoseconds value. If the tv_sec part of the time spec is large enough the conversion to nanoseconds (sec * NSEC_PER_SEC) overflows 64bit. Mitigate that by using the timeval_to_ktime() conversion function, which checks the tv_sec part for a potential mult overflow and clamps the result to KTIME_MAX, which is about 292 years. Reported-by: Xishi Qiu Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170620154113.505981...@linutronix.de --- kernel/time/itimer.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 9dd7ff5..2ef98a0 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -152,8 +152,12 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, u64 oval, nval, ointerval, ninterval; struct cpu_itimer *it = >signal->it[clock_id]; - nval = timeval_to_ns(>it_value); - ninterval = timeval_to_ns(>it_interval); + /* +* Use the to_ktime conversion because that clamps the maximum +* value to KTIME_MAX and avoid multiplication overflows. +*/ + nval = ktime_to_ns(timeval_to_ktime(value->it_value)); + ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval)); spin_lock_irq(>sighand->siglock);
[tip:irq/core] genirq: Add missing comment for IRQD_STARTED
Commit-ID: 1bb0401680da156ce1549e915e711bf5b2534cc5 Gitweb: http://git.kernel.org/tip/1bb0401680da156ce1549e915e711bf5b2534cc5 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:18 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:13 +0200 genirq: Add missing comment for IRQD_STARTED Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.614913...@linutronix.de --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index d996314..7e62e10 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -200,6 +200,7 @@ struct irq_data { * IRQD_WAKEUP_ARMED - Wakeup mode armed * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU * IRQD_AFFINITY_MANAGED - Affinity is auto-managed by the kernel + * IRQD_IRQ_STARTED- Startup state of the interrupt */ enum { IRQD_TRIGGER_MASK = 0xf,
[tip:irq/core] genirq: Provide irq_fixup_move_pending()
Commit-ID: cdd16365b0bd7c0cd19e2cc768b6bdc8021f32c3 Gitweb: http://git.kernel.org/tip/cdd16365b0bd7c0cd19e2cc768b6bdc8021f32c3 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:19 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:13 +0200 genirq: Provide irq_fixup_move_pending() If an CPU goes offline, the interrupts are migrated away, but a eventually pending interrupt move, which has not yet been made effective is kept pending even if the outgoing CPU is the sole target of the pending affinity mask. What's worse is, that the pending affinity mask is discarded even if it would contain a valid subset of the online CPUs. Implement a helper function which allows to avoid these issues. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.691345...@linutronix.de --- include/linux/irq.h| 5 + kernel/irq/migration.c | 30 ++ 2 files changed, 35 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index 7e62e10..d008065 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -491,9 +491,14 @@ extern void irq_migrate_all_off_this_cpu(void); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } +static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) +{ + return false; +} #endif extern int no_irq_affinity; diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 37ddb7b..6ca054a 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -4,6 +4,36 @@ #include "internals.h" +/** + * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU + * @desc: Interrupt descpriptor to clean up + * @force_clear: If set clear the move pending bit unconditionally. + * If not set, clear it only when the dying CPU is the + * last one in the pending mask. + * + * Returns true if the pending bit was set and the pending mask contains an + * online CPU other than the dying CPU. + */ +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + + if (!irqd_is_setaffinity_pending(data)) + return false; + + /* +* The outgoing CPU might be the last online target in a pending +* interrupt move. If that's the case clear the pending move bit. +*/ + if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) { + irqd_clr_move_pending(data); + return false; + } + if (force_clear) + irqd_clr_move_pending(data); + return true; +} + void irq_move_masked_irq(struct irq_data *idata) { struct irq_desc *desc = irq_data_to_desc(idata);
[tip:irq/core] genirq/proc: Replace ever repeating type cast
Commit-ID: c1a80386965e9fa3c2f8d1d57966216fe02c9124 Gitweb: http://git.kernel.org/tip/c1a80386965e9fa3c2f8d1d57966216fe02c9124 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:37 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:20 +0200 genirq/proc: Replace ever repeating type cast The proc file setup repeats the same ugly type cast for the irq number over and over. Do it once and hand in the local void pointer. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.160866...@linutronix.de --- kernel/irq/proc.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index eff7c0c..cbc4c5e 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -326,6 +326,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { static DEFINE_MUTEX(register_lock); + void __maybe_unused *irqp = (void *)(unsigned long) irq; char name [MAX_NAMELEN]; if (!root_irq_dir || (desc->irq_data.chip == _irq_chip)) @@ -351,20 +352,19 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) #ifdef CONFIG_SMP /* create /proc/irq//smp_affinity */ proc_create_data("smp_affinity", 0644, desc->dir, -_affinity_proc_fops, (void *)(long)irq); +_affinity_proc_fops, irqp); /* create /proc/irq//affinity_hint */ proc_create_data("affinity_hint", 0444, desc->dir, -_affinity_hint_proc_fops, (void *)(long)irq); +_affinity_hint_proc_fops, irqp); /* create /proc/irq//smp_affinity_list */ proc_create_data("smp_affinity_list", 0644, desc->dir, -_affinity_list_proc_fops, (void *)(long)irq); +_affinity_list_proc_fops, irqp); proc_create_data("node", 0444, desc->dir, -_node_proc_fops, (void *)(long)irq); +_node_proc_fops, irqp); #endif - proc_create_data("spurious", 0444, desc->dir, _spurious_proc_fops, (void *)(long)irq);
[tip:irq/core] genirq: Introduce effective affinity mask
Commit-ID: 0d3f54257dc300f2db480d6a46b34bdb87f18c1b Gitweb: http://git.kernel.org/tip/0d3f54257dc300f2db480d6a46b34bdb87f18c1b Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:38 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:20 +0200 genirq: Introduce effective affinity mask There is currently no way to evaluate the effective affinity mask of a given interrupt. Many irq chips allow only a single target CPU or a subset of CPUs in the affinity mask. Updating the mask at the time of setting the affinity to the subset would be counterproductive because information for cpu hotplug about assigned interrupt affinities gets lost. On CPU hotplug it's also pointless to force migrate an interrupt, which is not targeted at the CPU effectively. But currently the information is not available. Provide a seperate mask to be updated by the irq_chip->irq_set_affinity() implementations. Implement the read only proc files so the user can see the effective mask as well w/o trying to deduce it from /proc/interrupts. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.247834...@linutronix.de --- include/linux/irq.h | 29 + kernel/irq/Kconfig | 4 +++ kernel/irq/debugfs.c | 4 +++ kernel/irq/irqdesc.c | 14 kernel/irq/proc.c| 90 5 files changed, 134 insertions(+), 7 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 2b7e5a7..4087ef2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -137,6 +137,9 @@ struct irq_domain; * @affinity: IRQ affinity on SMP. If this is an IPI * related irq, then this is the mask of the * CPUs to which an IPI can be sent. + * @effective_affinity:The effective IRQ affinity on SMP as some irq + * chips do not allow multi CPU destinations. + * A subset of @affinity. * @msi_desc: MSI descriptor * @ipi_offset:Offset of first IPI target cpu in @affinity. Optional. */ @@ -148,6 +151,9 @@ struct irq_common_data { void*handler_data; struct msi_desc *msi_desc; cpumask_var_t affinity; +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + cpumask_var_t effective_affinity; +#endif #ifdef CONFIG_GENERIC_IRQ_IPI unsigned intipi_offset; #endif @@ -737,6 +743,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) return d->common->affinity; } +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->effective_affinity; +} +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ + cpumask_copy(d->common->effective_affinity, m); +} +#else +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ +} +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->affinity; +} +#endif + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 8d9498e..fcbb1d6 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -21,6 +21,10 @@ config GENERIC_IRQ_SHOW config GENERIC_IRQ_SHOW_LEVEL bool +# Supports effective affinity mask +config GENERIC_IRQ_EFFECTIVE_AFF_MASK + bool + # Facility to allocate a hardware interrupt. This is legacy support # and should not be used in new code. Use irq domains instead. config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 50ee2f6..edbef25 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -36,6 +36,10 @@ static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) msk = irq_data_get_affinity_mask(data); seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk)); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + msk = irq_data_get_effective_affinity_mask(data); + seq_printf(m, "effectiv: %*pbl\n", cpumask_pr_args(msk)); +#endif #ifdef CONFIG_GENERIC_PENDING_IRQ msk = desc->pending_mask; seq_printf(m, "pending: %*pbl\n", cpumask_pr_args(msk)); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index
[tip:irq/core] genirq: Remove pointless arg from show_irq_affinity
Commit-ID: 047dc6331de58da51818582c0db0dbfcb837e614 Gitweb: http://git.kernel.org/tip/047dc6331de58da51818582c0db0dbfcb837e614 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:35 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:19 +0200 genirq: Remove pointless arg from show_irq_affinity The third argument of the internal helper function is unused. Remove it. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.004958...@linutronix.de --- kernel/irq/proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d35bb8d..eff7c0c 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -37,7 +37,7 @@ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP -static int show_irq_affinity(int type, struct seq_file *m, void *v) +static int show_irq_affinity(int type, struct seq_file *m) { struct irq_desc *desc = irq_to_desc((long)m->private); const struct cpumask *mask = desc->irq_common_data.affinity; @@ -80,12 +80,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) int no_irq_affinity; static int irq_affinity_proc_show(struct seq_file *m, void *v) { - return show_irq_affinity(0, m, v); + return show_irq_affinity(0, m); } static int irq_affinity_list_proc_show(struct seq_file *m, void *v) { - return show_irq_affinity(1, m, v); + return show_irq_affinity(1, m); }
[tip:irq/core] x86/apic: Add name to irq chip
Commit-ID: 8947dfb257eb91d7487e06b7d2a069d82e7c19a2 Gitweb: http://git.kernel.org/tip/8947dfb257eb91d7487e06b7d2a069d82e7c19a2 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:01 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:06 +0200 x86/apic: Add name to irq chip Add the missing name, so debugging will work proper. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.266561...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f3557a1..6b21b9e 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -534,6 +534,7 @@ static int apic_set_affinity(struct irq_data *irq_data, } static struct irq_chip lapic_controller = { + .name = "APIC", .irq_ack= apic_ack_edge, .irq_set_affinity = apic_set_affinity, .irq_retrigger = apic_retrigger_irq,
[tip:irq/core] genirq/cpuhotplug: Add support for cleaning up move in progress
Commit-ID: f0383c24b4855f6a4b5a358c7b2d2c16e0437e9b Gitweb: http://git.kernel.org/tip/f0383c24b4855f6a4b5a358c7b2d2c16e0437e9b Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:29 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:17 +0200 genirq/cpuhotplug: Add support for cleaning up move in progress In order to move x86 to the generic hotplug migration code, add support for cleaning up move in progress bits. On architectures which have this x86 specific (mis)feature not enabled, this is optimized out by the compiler. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.525817...@linutronix.de --- arch/x86/include/asm/irq.h | 1 - include/linux/irq.h| 2 ++ kernel/irq/cpuhotplug.c| 28 ++-- kernel/irq/internals.h | 10 +- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 16d3fa2..668cca5 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -29,7 +29,6 @@ struct irq_desc; #include extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); -extern void irq_force_complete_move(struct irq_desc *desc); #endif #ifdef CONFIG_HAVE_KVM diff --git a/include/linux/irq.h b/include/linux/irq.h index d008065..299271a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -491,10 +491,12 @@ extern void irq_migrate_all_off_this_cpu(void); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); +void irq_force_complete_move(struct irq_desc *desc); bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } +static inline void irq_force_complete_move(struct irq_desc *desc) { } static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) { return false; diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 09b20e1..4be4bd6 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -18,7 +18,7 @@ static bool migrate_one_irq(struct irq_desc *desc) { struct irq_data *d = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(d); - const struct cpumask *affinity = d->common->affinity; + const struct cpumask *affinity; bool brokeaff = false; int err; @@ -41,9 +41,33 @@ static bool migrate_one_irq(struct irq_desc *desc) * Note: Do not check desc->action as this might be a chained * interrupt. */ + affinity = irq_data_get_affinity_mask(d); if (irqd_is_per_cpu(d) || !irqd_is_started(d) || - !cpumask_test_cpu(smp_processor_id(), affinity)) + !cpumask_test_cpu(smp_processor_id(), affinity)) { + /* +* If an irq move is pending, abort it if the dying CPU is +* the sole target. +*/ + irq_fixup_move_pending(desc, false); return false; + } + + /* +* Complete an eventually pending irq move cleanup. If this +* interrupt was moved in hard irq context, then the vectors need +* to be cleaned up. It can't wait until this interrupt actually +* happens and this CPU was involved. +*/ + irq_force_complete_move(desc); + + /* +* If there is a setaffinity pending, then try to reuse the pending +* mask, so the last change of the affinity does not get lost. If +* there is no move pending or the pending mask does not contain +* any online CPU, use the current affinity mask. +*/ + if (irq_fixup_move_pending(desc, true)) + affinity = irq_desc_get_pending_mask(desc); if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { affinity = cpu_online_mask; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 20b197f..fd4fa83 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -268,6 +268,10 @@ irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { cpumask_copy(mask, desc->pending_mask); } +static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) +{ + return desc->pending_mask; +} #else /* CONFIG_GENERIC_PENDING_IRQ */ static inline bool irq_can_move_pcntxt(struct irq_data *data) { @@ -285,7 +289,11 @@ static inline void irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
[tip:irq/core] genirq/cpuhotplug: Do not migrated shutdown irqs
Commit-ID: 91f26cb4cd3c22bd656ab46c49329aacaaab5504 Gitweb: http://git.kernel.org/tip/91f26cb4cd3c22bd656ab46c49329aacaaab5504 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:28 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:17 +0200 genirq/cpuhotplug: Do not migrated shutdown irqs Interrupts, which are shut down are tried to be migrated as well. That's pointless because the interrupt cannot fire and the next startup will move it to the proper place anyway. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.447550...@linutronix.de --- kernel/irq/cpuhotplug.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 41fe1e0..09b20e1 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -33,10 +33,15 @@ static bool migrate_one_irq(struct irq_desc *desc) } /* -* If this is a per-CPU interrupt, or the affinity does not -* include this CPU, then we have nothing to do. +* No move required, if: +* - Interrupt is per cpu +* - Interrupt is not started +* - Affinity mask does not include this CPU. +* +* Note: Do not check desc->action as this might be a chained +* interrupt. */ - if (irqd_is_per_cpu(d) || + if (irqd_is_per_cpu(d) || !irqd_is_started(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) return false;
[tip:irq/core] xen/events: Add support for effective affinity mask
Commit-ID: ef1c2cc88531a967fa97d1ac1f3f8a64ee6910b4 Gitweb: http://git.kernel.org/tip/ef1c2cc88531a967fa97d1ac1f3f8a64ee6910b4 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:45 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:23 +0200 xen/events: Add support for effective affinity mask Update the effective affinity mask when an interrupt was successfully targeted to a CPU. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.799944...@linutronix.de --- drivers/xen/events/events_base.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b52852f..2e567d8 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1343,8 +1343,12 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); + int ret = rebind_irq_to_cpu(data->irq, tcpu); - return rebind_irq_to_cpu(data->irq, tcpu); + if (!ret) + irq_data_update_effective_affinity(data, cpumask_of(tcpu)); + + return ret; } static void enable_dynirq(struct irq_data *data)
[tip:irq/core] x86/apic: Add irq_data argument to apic->cpu_mask_to_apicid()
Commit-ID: 0e24f7c9f67e218546ad44160d2a12d9d8be0171 Gitweb: http://git.kernel.org/tip/0e24f7c9f67e218546ad44160d2a12d9d8be0171 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:44 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:22 +0200 x86/apic: Add irq_data argument to apic->cpu_mask_to_apicid() The decision to which CPUs an interrupt is effectively routed happens in the various apic->cpu_mask_to_apicid() implementations To support effective affinity masks this information needs to be updated in irq_data. Add a pointer to irq_data to the callbacks and feed it through the call chain. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.720739...@linutronix.de --- arch/x86/include/asm/apic.h | 5 + arch/x86/kernel/apic/apic.c | 9 +++-- arch/x86/kernel/apic/vector.c | 25 +++-- arch/x86/kernel/apic/x2apic_cluster.c | 3 ++- arch/x86/kernel/apic/x2apic_uv_x.c| 5 +++-- 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3e64e99..5f01671 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -252,6 +252,8 @@ static inline int x2apic_enabled(void) { return 0; } #definex2apic_supported() (0) #endif /* !CONFIG_X86_X2APIC */ +struct irq_data; + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -297,6 +299,7 @@ struct apic { unsigned long (*set_apic_id)(unsigned int id); int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, + struct irq_data *irqdata, unsigned int *apicid); /* ipi */ @@ -540,8 +543,10 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, + struct irq_data *irqdata, unsigned int *apicid); extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, + struct irq_data *irqdata, unsigned int *apicid); static inline void diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 169dd42..14e5a47 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2201,7 +2201,9 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } -int default_cpu_mask_to_apicid(const struct cpumask *mask, unsigned int *apicid) +int default_cpu_mask_to_apicid(const struct cpumask *mask, + struct irq_data *irqdata, + unsigned int *apicid) { unsigned int cpu = cpumask_first(mask); @@ -2211,7 +2213,10 @@ int default_cpu_mask_to_apicid(const struct cpumask *mask, unsigned int *apicid) return 0; } -int flat_cpu_mask_to_apicid(const struct cpumask *mask, unsigned int *apicid) +int flat_cpu_mask_to_apicid(const struct cpumask *mask, + struct irq_data *irqdata, + unsigned int *apicid) + { unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1f57f5a..b270a76 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -103,7 +103,8 @@ static void free_apic_chip_data(struct apic_chip_data *data) } static int __assign_irq_vector(int irq, struct apic_chip_data *d, - const struct cpumask *mask) + const struct cpumask *mask, + struct irq_data *irqdata) { /* * NOTE! The local APIC isn't very good at handling @@ -226,32 +227,35 @@ success: * cpus masked out. */ cpumask_and(vector_searchmask, vector_searchmask, mask); - BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, >cfg.dest_apicid)); + BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata, + >cfg.dest_apicid)); return 0; } static int assign_irq_vector(int irq, struct apic_chip_data *data, -const struct cpumask *mask) +const struct cpumask *mask, +struct irq_data *irqdata) { int err; unsigned long flags; raw_spin_lock_irqsave(_lock, flags); - err = __assign_irq_vector(irq, data, mask); + err = __assign_irq_vector(irq, data, mask, irqdata);
[tip:irq/core] x86/apic: Implement effective irq mask update
Commit-ID: c7d6c9dd871f42c4e0ce5563d2f684e78ea673cf Gitweb: http://git.kernel.org/tip/c7d6c9dd871f42c4e0ce5563d2f684e78ea673cf Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:46 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:23 +0200 x86/apic: Implement effective irq mask update Add the effective irq mask update to the apic implementations and enable effective irq masks for x86. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.878370...@linutronix.de --- arch/x86/Kconfig | 1 + arch/x86/kernel/apic/apic.c | 3 +++ arch/x86/kernel/apic/x2apic_cluster.c | 4 3 files changed, 8 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fcf1dad..0172c0b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -87,6 +87,7 @@ config X86 select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP + select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select GENERIC_IRQ_MIGRATIONif SMP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 14e5a47..e740946 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2210,6 +2210,7 @@ int default_cpu_mask_to_apicid(const struct cpumask *mask, if (cpu >= nr_cpu_ids) return -EINVAL; *apicid = per_cpu(x86_cpu_to_apicid, cpu); + irq_data_update_effective_affinity(irqdata, cpumask_of(cpu)); return 0; } @@ -2218,11 +2219,13 @@ int flat_cpu_mask_to_apicid(const struct cpumask *mask, unsigned int *apicid) { + struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS; if (!cpu_mask) return -EINVAL; *apicid = (unsigned int)cpu_mask; + cpumask_bits(effmsk)[0] = cpu_mask; return 0; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 305031e..481237c 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,7 @@ static int x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, unsigned int *apicid) { + struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); unsigned int cpu; u32 dest = 0; u16 cluster; @@ -118,10 +120,12 @@ x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, dest = per_cpu(x86_cpu_to_logical_apicid, cpu); cluster = x2apic_cluster(cpu); + cpumask_clear(effmsk); for_each_cpu(cpu, mask) { if (cluster != x2apic_cluster(cpu)) continue; dest |= per_cpu(x86_cpu_to_logical_apicid, cpu); + cpumask_set_cpu(cpu, effmsk); } *apicid = dest;
[tip:irq/core] iommu/vt-d: Add name to irq chip
Commit-ID: 1bb3a5a76386ba2886ee44b903eeff5765bd71d4 Gitweb: http://git.kernel.org/tip/1bb3a5a76386ba2886ee44b903eeff5765bd71d4 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:03 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:07 +0200 iommu/vt-d: Add name to irq chip Add the missing name, so debugging will work proper. Signed-off-by: Thomas Gleixner Acked-by: Joerg Roedel Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: io...@lists.linux-foundation.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.431939...@linutronix.de --- drivers/iommu/intel_irq_remapping.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index a190cbd..ba5b580 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1205,10 +1205,11 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) } static struct irq_chip intel_ir_chip = { - .irq_ack = ir_ack_apic_edge, - .irq_set_affinity = intel_ir_set_affinity, - .irq_compose_msi_msg = intel_ir_compose_msi_msg, - .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, + .name = "INTEL-IR", + .irq_ack= ir_ack_apic_edge, + .irq_set_affinity = intel_ir_set_affinity, + .irq_compose_msi_msg= intel_ir_compose_msi_msg, + .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, }; static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
[tip:irq/core] x86/vector: Create named irq domain
Commit-ID: 9d35f859590efa48be51b8ccded6550e0440e2c7 Gitweb: http://git.kernel.org/tip/9d35f859590efa48be51b8ccded6550e0440e2c7 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:06 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:08 +0200 x86/vector: Create named irq domain Use the fwnode to create a named domain so diagnosis works. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.673635...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6b21b9e..47c5d01 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -429,11 +429,16 @@ static void init_legacy_irqs(void) { } int __init arch_early_irq_init(void) { + struct fwnode_handle *fn; + init_legacy_irqs(); - x86_vector_domain = irq_domain_add_tree(NULL, _vector_domain_ops, - NULL); + fn = irq_domain_alloc_named_fwnode("VECTOR"); + BUG_ON(!fn); + x86_vector_domain = irq_domain_create_tree(fn, _vector_domain_ops, + NULL); BUG_ON(x86_vector_domain == NULL); + irq_domain_free_fwnode(fn); irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain);
[tip:irq/core] genirq/irqdomain: Add map counter
Commit-ID: 9dc6be3d419398eae9a19cd09b7969ceff8eaf10 Gitweb: http://git.kernel.org/tip/9dc6be3d419398eae9a19cd09b7969ceff8eaf10 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:16 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:12 +0200 genirq/irqdomain: Add map counter Add a map counter instead of counting radix tree entries for diagnosis. That also gives correct information for linear domains. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.459397...@linutronix.de --- include/linux/irqdomain.h | 2 ++ kernel/irq/irqdomain.c| 4 2 files changed, 6 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9cf32a2..17ccd54 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -130,6 +130,7 @@ struct irq_domain_chip_generic; * @host_data: private data pointer for use by owner. Not touched by irq_domain * core code. * @flags: host per irq_domain flags + * @mapcount: The number of mapped interrupts * * Optional elements * @of_node: Pointer to device tree nodes associated with the irq_domain. Used @@ -152,6 +153,7 @@ struct irq_domain { const struct irq_domain_ops *ops; void *host_data; unsigned int flags; + unsigned int mapcount; /* Optional data */ struct fwnode_handle *fwnode; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index e1b925b..8d5805c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -423,6 +423,7 @@ void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) irq_data->domain = NULL; irq_data->hwirq = 0; + domain->mapcount--; /* Clear reverse map for this hwirq */ if (hwirq < domain->revmap_size) { @@ -474,6 +475,7 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, domain->name = irq_data->chip->name; } + domain->mapcount++; if (hwirq < domain->revmap_size) { domain->linear_revmap[hwirq] = virq; } else { @@ -1081,6 +1083,7 @@ static void irq_domain_insert_irq(int virq) struct irq_domain *domain = data->domain; irq_hw_number_t hwirq = data->hwirq; + domain->mapcount++; if (hwirq < domain->revmap_size) { domain->linear_revmap[hwirq] = virq; } else { @@ -1110,6 +1113,7 @@ static void irq_domain_remove_irq(int virq) struct irq_domain *domain = data->domain; irq_hw_number_t hwirq = data->hwirq; + domain->mapcount--; if (hwirq < domain->revmap_size) { domain->linear_revmap[hwirq] = 0; } else {
[tip:irq/core] genirq/debugfs: Add proper debugfs interface
Commit-ID: 087cdfb662ae50e3826e7cd2e54b6519d07b60f0 Gitweb: http://git.kernel.org/tip/087cdfb662ae50e3826e7cd2e54b6519d07b60f0 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:17 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:13 +0200 genirq/debugfs: Add proper debugfs interface Debugging (hierarchical) interupt domains is tedious as there is no information about the hierarchy and no information about states of interrupts in the various domain levels. Add a debugfs directory 'irq' and subdirectories 'domains' and 'irqs'. The domains directory contains the domain files. The content is information about the domain. If the domain is part of a hierarchy then the parent domains are printed as well. # ls /sys/kernel/debug/irq/domains/ default INTEL-IR-2 INTEL-IR-MSI-2 IO-APIC-IR-2 PCI-MSI DMAR-MSIINTEL-IR-3 INTEL-IR-MSI-3 IO-APIC-IR-3 unknown-1 INTEL-IR-0 INTEL-IR-MSI-0 IO-APIC-IR-0IO-APIC-IR-4 VECTOR INTEL-IR-1 INTEL-IR-MSI-1 IO-APIC-IR-1PCI-HT # cat /sys/kernel/debug/irq/domains/VECTOR name: VECTOR size: 0 mapped: 216 flags: 0x0041 # cat /sys/kernel/debug/irq/domains/IO-APIC-IR-0 name: IO-APIC-IR-0 size: 24 mapped: 19 flags: 0x0041 parent: INTEL-IR-3 name: INTEL-IR-3 size: 65536 mapped: 167 flags: 0x0041 parent: VECTOR name: VECTOR size: 0 mapped: 216 flags: 0x0041 Unfortunately there is no per cpu information about the VECTOR domain (yet). The irqs directory contains detailed information about mapped interrupts. # cat /sys/kernel/debug/irq/irqs/3 handler: handle_edge_irq status: 0x4000 istate: 0x ddepth: 1 wdepth: 0 dstate: 0x01018000 IRQD_IRQ_DISABLED IRQD_SINGLE_TARGET IRQD_MOVE_PCNTXT node: 0 affinity: 0-143 effectiv: 0 pending: domain: IO-APIC-IR-0 hwirq: 0x3 chip:IR-IO-APIC flags: 0x10 IRQCHIP_SKIP_SET_WAKE parent: domain: INTEL-IR-3 hwirq: 0x2 chip:INTEL-IR flags: 0x0 parent: domain: VECTOR hwirq: 0x3 chip:APIC flags: 0x0 This was developed to simplify the debugging of the managed affinity changes. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.537566...@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 4 + include/linux/irqdomain.h | 4 + kernel/irq/Kconfig| 11 +++ kernel/irq/Makefile | 1 + kernel/irq/debugfs.c | 215 ++ kernel/irq/internals.h| 22 + kernel/irq/irqdesc.c | 1 + kernel/irq/irqdomain.c| 87 ++- kernel/irq/manage.c | 1 + 9 files changed, 345 insertions(+), 1 deletion(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c9be579..d425a3a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -46,6 +46,7 @@ struct pt_regs; * @rcu: rcu head for delayed free * @kobj: kobject used to represent this struct in sysfs * @dir: /proc/irq/ procfs entry + * @debugfs_file: dentry for the debugfs file * @name: flow handler name for /proc/interrupts output */ struct irq_desc { @@ -88,6 +89,9 @@ struct irq_desc { #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + struct dentry *debugfs_file; +#endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; struct kobject kobj; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 17ccd54..914b0c3 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -139,6 +139,7 @@ struct irq_domain_chip_generic; * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * @debugfs_file: dentry for the domain debugfs file * * Revmap data, used internally by irq_domain * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that @@ -162,6 +163,9 @@ struct irq_domain { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + struct dentry *debugfs_file; +#endif /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
[tip:irq/core] PCI/vmd: Create named irq domain
Commit-ID: ae904cafd59d7120ef2afb97b252eadeba45e95f Gitweb: http://git.kernel.org/tip/ae904cafd59d7120ef2afb97b252eadeba45e95f Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:15 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:12 +0200 PCI/vmd: Create named irq domain Use the fwnode to create a named domain so diagnosis works. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: linux-...@vger.kernel.org Cc: Bjorn Helgaas Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.379861...@linutronix.de --- drivers/pci/host/vmd.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c index e27ad2a..31203d6 100644 --- a/drivers/pci/host/vmd.c +++ b/drivers/pci/host/vmd.c @@ -554,6 +554,7 @@ static int vmd_find_free_domain(void) static int vmd_enable_domain(struct vmd_dev *vmd) { struct pci_sysdata *sd = >sysdata; + struct fwnode_handle *fn; struct resource *res; u32 upper_bits; unsigned long flags; @@ -617,8 +618,13 @@ static int vmd_enable_domain(struct vmd_dev *vmd) sd->node = pcibus_to_node(vmd->dev->bus); - vmd->irq_domain = pci_msi_create_irq_domain(NULL, _msi_domain_info, + fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain); + if (!fn) + return -ENODEV; + + vmd->irq_domain = pci_msi_create_irq_domain(fn, _msi_domain_info, x86_vector_domain); + irq_domain_free_fwnode(fn); if (!vmd->irq_domain) return -ENODEV;
[tip:irq/core] genirq: Add force argument to irq_startup()
Commit-ID: 4cde9c6b826834b861a2b58653ab33150f562064 Gitweb: http://git.kernel.org/tip/4cde9c6b826834b861a2b58653ab33150f562064 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:49 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:24 +0200 genirq: Add force argument to irq_startup() In order to handle managed interrupts gracefully on irq_startup() so they won't lose their assigned affinity, it's necessary to allow startups which keep the interrupts in managed shutdown state, if none of the assigend CPUs is online. This allows drivers to request interrupts w/o the CPUs being online, which avoid online/offline churn in drivers. Add a force argument which can override that decision and let only request_irq() and enable_irq() allow the managed shutdown handling. enable_irq() is required, because the interrupt might be requested with IRQF_NOAUTOEN and enable_irq() invokes irq_startup() which would then wreckage the assignment again. All other callers force startup and potentially break the assigned affinity. No functional change as this only adds the function argument. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.112094...@linutronix.de --- kernel/irq/autoprobe.c | 4 ++-- kernel/irq/chip.c | 4 ++-- kernel/irq/internals.h | 9 - kernel/irq/manage.c| 4 ++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 0119b9d..d30a0dd 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -53,7 +53,7 @@ unsigned long probe_irq_on(void) if (desc->irq_data.chip->irq_set_type) desc->irq_data.chip->irq_set_type(>irq_data, IRQ_TYPE_PROBE); - irq_startup(desc, false); + irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE); } raw_spin_unlock_irq(>lock); } @@ -70,7 +70,7 @@ unsigned long probe_irq_on(void) raw_spin_lock_irq(>lock); if (!desc->action && irq_settings_can_probe(desc)) { desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; - if (irq_startup(desc, false)) + if (irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE)) desc->istate |= IRQS_PENDING; } raw_spin_unlock_irq(>lock); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 1163089..b7599e9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -212,7 +212,7 @@ static int __irq_startup(struct irq_desc *desc) return ret; } -int irq_startup(struct irq_desc *desc, bool resend) +int irq_startup(struct irq_desc *desc, bool resend, bool force) { int ret = 0; @@ -892,7 +892,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); desc->action = _action; - irq_startup(desc, true); + irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); } } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index ca4666b..5fd105e 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -66,7 +66,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags); extern void __disable_irq(struct irq_desc *desc); extern void __enable_irq(struct irq_desc *desc); -extern int irq_startup(struct irq_desc *desc, bool resend); +#define IRQ_RESEND true +#define IRQ_NORESEND false + +#define IRQ_START_FORCEtrue +#define IRQ_START_COND false + +extern int irq_startup(struct irq_desc *desc, bool resend, bool force); + extern void irq_shutdown(struct irq_desc *desc); extern void irq_enable(struct irq_desc *desc); extern void irq_disable(struct irq_desc *desc); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7dcf193..3577c09 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -509,7 +509,7 @@ void __enable_irq(struct irq_desc *desc) * time. If it was already started up, then irq_startup() * will invoke irq_enable() under the hood. */ - irq_startup(desc, true); + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); break; } default: @@ -1306,7 +1306,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (irq_settings_can_autoenable(desc)) { -
[tip:irq/core] genirq: Handle managed irqs gracefully in irq_startup()
Commit-ID: 761ea388e8c4e3ac883a94e16bcc8c51fa419d4f Gitweb: http://git.kernel.org/tip/761ea388e8c4e3ac883a94e16bcc8c51fa419d4f Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:50 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:24 +0200 genirq: Handle managed irqs gracefully in irq_startup() Affinity managed interrupts should keep their assigned affinity accross CPU hotplug. To avoid magic hackery in device drivers, the core code shall manage them transparently and set these interrupts into a managed shutdown state when the last CPU of the assigned affinity mask goes offline. The interrupt will be restarted when one of the CPUs in the assigned affinity mask comes back online. Add the necessary logic to irq_startup(). If an interrupt is requested and started up, the code checks whether it is affinity managed and if so, it checks whether a CPU in the interrupts affinity mask is online. If not, it puts the interrupt into managed shutdown state. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.189851...@linutronix.de --- include/linux/irq.h | 2 +- kernel/irq/chip.c | 64 ++--- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 0e37276..807042b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -346,7 +346,7 @@ static inline bool irqd_is_started(struct irq_data *d) return __irqd_to_state(d) & IRQD_IRQ_STARTED; } -static inline bool irqd_is_managed_shutdown(struct irq_data *d) +static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) { return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b7599e9..fc89eeb 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -195,6 +195,52 @@ static void irq_state_set_started(struct irq_desc *desc) irqd_set(>irq_data, IRQD_IRQ_STARTED); } +enum { + IRQ_STARTUP_NORMAL, + IRQ_STARTUP_MANAGED, + IRQ_STARTUP_ABORT, +}; + +#ifdef CONFIG_SMP +static int +__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + + if (!irqd_affinity_is_managed(d)) + return IRQ_STARTUP_NORMAL; + + irqd_clr_managed_shutdown(d); + + if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) { + /* +* Catch code which fiddles with enable_irq() on a managed +* and potentially shutdown IRQ. Chained interrupt +* installment or irq auto probing should not happen on +* managed irqs either. Emit a warning, break the affinity +* and start it up as a normal interrupt. +*/ + if (WARN_ON_ONCE(force)) + return IRQ_STARTUP_NORMAL; + /* +* The interrupt was requested, but there is no online CPU +* in it's affinity mask. Put it into managed shutdown +* state and let the cpu hotplug mechanism start it up once +* a CPU in the mask becomes available. +*/ + irqd_set_managed_shutdown(d); + return IRQ_STARTUP_ABORT; + } + return IRQ_STARTUP_MANAGED; +} +#else +static int +__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +{ + return IRQ_STARTUP_NORMAL; +} +#endif + static int __irq_startup(struct irq_desc *desc) { struct irq_data *d = irq_desc_get_irq_data(desc); @@ -214,15 +260,27 @@ static int __irq_startup(struct irq_desc *desc) int irq_startup(struct irq_desc *desc, bool resend, bool force) { + struct irq_data *d = irq_desc_get_irq_data(desc); + struct cpumask *aff = irq_data_get_affinity_mask(d); int ret = 0; desc->depth = 0; - if (irqd_is_started(>irq_data)) { + if (irqd_is_started(d)) { irq_enable(desc); } else { - ret = __irq_startup(desc); - irq_setup_affinity(desc); + switch (__irq_startup_managed(desc, aff, force)) { + case IRQ_STARTUP_NORMAL: + ret = __irq_startup(desc); + irq_setup_affinity(desc); + break; + case IRQ_STARTUP_MANAGED: + ret = __irq_startup(desc); + irq_set_affinity_locked(d, aff, false); + break; + case IRQ_STARTUP_ABORT: + return 0; + } }
[tip:irq/core] genirq: Split out irq_startup() code
Commit-ID: 708d174b6c32bffc5d73793bc7a267bcafeb6558 Gitweb: http://git.kernel.org/tip/708d174b6c32bffc5d73793bc7a267bcafeb6558 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:48 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:24 +0200 genirq: Split out irq_startup() code Split out the inner workings of irq_startup() so it can be reused to handle managed interrupts gracefully. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.033235...@linutronix.de --- kernel/irq/chip.c | 29 ++--- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index e290d73..1163089 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -195,6 +195,23 @@ static void irq_state_set_started(struct irq_desc *desc) irqd_set(>irq_data, IRQD_IRQ_STARTED); } +static int __irq_startup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + int ret = 0; + + irq_domain_activate_irq(d); + if (d->chip->irq_startup) { + ret = d->chip->irq_startup(d); + irq_state_clr_disabled(desc); + irq_state_clr_masked(desc); + } else { + irq_enable(desc); + } + irq_state_set_started(desc); + return ret; +} + int irq_startup(struct irq_desc *desc, bool resend) { int ret = 0; @@ -204,19 +221,9 @@ int irq_startup(struct irq_desc *desc, bool resend) if (irqd_is_started(>irq_data)) { irq_enable(desc); } else { - irq_domain_activate_irq(>irq_data); - if (desc->irq_data.chip->irq_startup) { - ret = desc->irq_data.chip->irq_startup(>irq_data); - irq_state_clr_disabled(desc); - irq_state_clr_masked(desc); - } else { - irq_enable(desc); - } - irq_state_set_started(desc); - /* Set default affinity mask once everything is setup */ + ret = __irq_startup(desc); irq_setup_affinity(desc); } - if (resend) check_irq_resend(desc);
[tip:irq/core] x86/uv: Create named irq domain
Commit-ID: f8409a6a4bf86e2d90ec8460df2874e4e19ebb27 Gitweb: http://git.kernel.org/tip/f8409a6a4bf86e2d90ec8460df2874e4e19ebb27 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:09 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:10 +0200 x86/uv: Create named irq domain Use the fwnode to create a named domain so diagnosis works. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.907511...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/platform/uv/uv_irq.c | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 776c659..03fc397 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -160,13 +160,21 @@ static struct irq_domain *uv_get_irq_domain(void) { static struct irq_domain *uv_domain; static DEFINE_MUTEX(uv_lock); + struct fwnode_handle *fn; mutex_lock(_lock); - if (uv_domain == NULL) { - uv_domain = irq_domain_add_tree(NULL, _domain_ops, NULL); - if (uv_domain) - uv_domain->parent = x86_vector_domain; - } + if (uv_domain) + goto out; + + fn = irq_domain_alloc_named_fwnode("UV-CORE"); + if (!fn) + goto out; + + uv_domain = irq_domain_create_tree(fn, _domain_ops, NULL); + irq_domain_free_fwnode(fn); + if (uv_domain) + uv_domain->parent = x86_vector_domain; +out: mutex_unlock(_lock); return uv_domain;
[tip:irq/core] genirq: Remove mask argument from setup_affinity()
Commit-ID: cba4235e6031e9318d68186f6d765c531cbea4e1 Gitweb: http://git.kernel.org/tip/cba4235e6031e9318d68186f6d765c531cbea4e1 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:21 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:14 +0200 genirq: Remove mask argument from setup_affinity() No point to have this alloc/free dance of cpumasks. Provide a static mask for setup_affinity() and protect it proper. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.851571...@linutronix.de --- kernel/irq/internals.h | 2 +- kernel/irq/manage.c| 53 ++ kernel/irq/proc.c | 8 +--- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 094db5b..33ca838 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -109,7 +109,7 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask); +extern int irq_select_affinity_usr(unsigned int irq); extern void irq_set_thread_affinity(struct irq_desc *desc); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 284f4eb..e2f20d5 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -345,15 +345,18 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); /* * Generic version of the affinity autoselector. */ -static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) +static int irq_setup_affinity(struct irq_desc *desc) { struct cpumask *set = irq_default_affinity; - int node = irq_desc_get_node(desc); + int ret, node = irq_desc_get_node(desc); + static DEFINE_RAW_SPINLOCK(mask_lock); + static struct cpumask mask; /* Excludes PER_CPU and NO_BALANCE interrupts */ if (!__irq_can_set_affinity(desc)) return 0; + raw_spin_lock(_lock); /* * Preserve the managed affinity setting and a userspace affinity * setup, but make sure that one of the targets is online. @@ -367,43 +370,42 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) irqd_clear(>irq_data, IRQD_AFFINITY_SET); } - cpumask_and(mask, cpu_online_mask, set); + cpumask_and(, cpu_online_mask, set); if (node != NUMA_NO_NODE) { const struct cpumask *nodemask = cpumask_of_node(node); /* make sure at least one of the cpus in nodemask is online */ - if (cpumask_intersects(mask, nodemask)) - cpumask_and(mask, mask, nodemask); + if (cpumask_intersects(, nodemask)) + cpumask_and(, , nodemask); } - irq_do_set_affinity(>irq_data, mask, false); - return 0; + ret = irq_do_set_affinity(>irq_data, , false); + raw_spin_unlock(_lock); + return ret; } #else /* Wrapper for ALPHA specific affinity selector magic */ -static inline int setup_affinity(struct irq_desc *d, struct cpumask *mask) +int irq_setup_affinity(struct irq_desc *desc) { - return irq_select_affinity(irq_desc_get_irq(d)); + return irq_select_affinity(irq_desc_get_irq(desc)); } #endif /* - * Called when affinity is set via /proc/irq + * Called when a bogus affinity is set via /proc/irq */ -int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask) +int irq_select_affinity_usr(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; int ret; raw_spin_lock_irqsave(>lock, flags); - ret = setup_affinity(desc, mask); + ret = irq_setup_affinity(desc); raw_spin_unlock_irqrestore(>lock, flags); return ret; } - #else -static inline int -setup_affinity(struct irq_desc *desc, struct cpumask *mask) +static inline int setup_affinity(struct irq_desc *desc) { return 0; } @@ -1128,7 +1130,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) struct irqaction *old, **old_ptr; unsigned long flags, thread_mask = 0; int ret, nested, shared = 0; - cpumask_var_t mask; if (!desc) return -EINVAL; @@ -1187,11 +1188,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } } - if (!alloc_cpumask_var(, GFP_KERNEL)) { - ret = -ENOMEM; - goto out_thread; - } - /* * Drivers are often written to work w/o knowledge about the
[tip:irq/core] genirq: Rename setup_affinity() to irq_setup_affinity()
Commit-ID: 43564bd97d0e6182bbd43b51b33254c728832551 Gitweb: http://git.kernel.org/tip/43564bd97d0e6182bbd43b51b33254c728832551 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:22 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:14 +0200 genirq: Rename setup_affinity() to irq_setup_affinity() Rename it with a proper irq_ prefix and make it available for other files in the core code. Preparatory patch for moving the irq affinity setup around. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.928501...@linutronix.de --- kernel/irq/internals.h | 6 ++ kernel/irq/manage.c| 7 +-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 33ca838..2d7927d 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -116,6 +116,12 @@ extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); +#ifdef CONFIG_SMP +extern int irq_setup_affinity(struct irq_desc *desc); +#else +static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; } +#endif + /* Inline functions for support of irq chips on slow busses */ static inline void chip_bus_lock(struct irq_desc *desc) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index e2f20d5..907fb79 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -345,7 +345,7 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); /* * Generic version of the affinity autoselector. */ -static int irq_setup_affinity(struct irq_desc *desc) +int irq_setup_affinity(struct irq_desc *desc) { struct cpumask *set = irq_default_affinity; int ret, node = irq_desc_get_node(desc); @@ -404,11 +404,6 @@ int irq_select_affinity_usr(unsigned int irq) raw_spin_unlock_irqrestore(>lock, flags); return ret; } -#else -static inline int setup_affinity(struct irq_desc *desc) -{ - return 0; -} #endif /**
[tip:irq/core] genirq/cpuhotplug: Add support for conditional masking
Commit-ID: 47a06d3a783217acae02976f15ca07ddc1ac024f Gitweb: http://git.kernel.org/tip/47a06d3a783217acae02976f15ca07ddc1ac024f Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:30 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:17 +0200 genirq/cpuhotplug: Add support for conditional masking Interrupts which cannot be migrated in process context, need to be masked before the affinity is changed forcefully. Add support for that. Will be compiled out for architectures which do not have this x86 specific issue. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.604565...@linutronix.de --- kernel/irq/cpuhotplug.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 4be4bd6..6f46587 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -18,6 +18,7 @@ static bool migrate_one_irq(struct irq_desc *desc) { struct irq_data *d = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(d); + bool maskchip = !irq_can_move_pcntxt(d) && !irqd_irq_masked(d); const struct cpumask *affinity; bool brokeaff = false; int err; @@ -69,6 +70,10 @@ static bool migrate_one_irq(struct irq_desc *desc) if (irq_fixup_move_pending(desc, true)) affinity = irq_desc_get_pending_mask(desc); + /* Mask the chip for interrupts which cannot move in process context */ + if (maskchip && chip->irq_mask) + chip->irq_mask(d); + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { affinity = cpu_online_mask; brokeaff = true; @@ -78,8 +83,12 @@ static bool migrate_one_irq(struct irq_desc *desc) if (err) { pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, err); - return false; + brokeaff = false; } + + if (maskchip && chip->irq_unmask) + chip->irq_unmask(d); + return brokeaff; }
[tip:irq/core] genirq/cpuhotplug: Set force affinity flag on hotplug migration
Commit-ID: 77f85e66aa8be563ae5804eebf74a78ec6ef Gitweb: http://git.kernel.org/tip/77f85e66aa8be563ae5804eebf74a78ec6ef Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:31 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:18 +0200 genirq/cpuhotplug: Set force affinity flag on hotplug migration Set the force migration flag when migrating interrupts away from an outgoing CPU. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.681874...@linutronix.de --- kernel/irq/cpuhotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 6f46587..e09cb91 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -79,7 +79,7 @@ static bool migrate_one_irq(struct irq_desc *desc) brokeaff = true; } - err = irq_do_set_affinity(d, affinity, false); + err = irq_do_set_affinity(d, affinity, true); if (err) { pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, err);
[tip:irq/core] x86/irq: Restructure fixup_irqs()
Commit-ID: 654abd0a7baf144998147787121da0f9422dafc8 Gitweb: http://git.kernel.org/tip/654abd0a7baf144998147787121da0f9422dafc8 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:32 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:18 +0200 x86/irq: Restructure fixup_irqs() Reorder fixup_irqs() so it matches the flow in the generic migration code. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.774272...@linutronix.de --- arch/x86/kernel/irq.c | 46 -- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9696007d..78bd2b8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -433,7 +433,6 @@ int check_irq_vectors_for_cpu_disable(void) void fixup_irqs(void) { unsigned int irq, vector; - static int warned; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; @@ -441,18 +440,27 @@ void fixup_irqs(void) for_each_irq_desc(irq, desc) { const struct cpumask *affinity; - int break_affinity = 0; - int set_affinity = 1; + bool break_affinity = false; if (!desc) continue; - if (irq == 2) - continue; /* interrupt's are disabled at this point */ raw_spin_lock(>lock); data = irq_desc_get_irq_data(desc); + chip = irq_data_get_irq_chip(data); + /* +* The interrupt descriptor might have been cleaned up +* already, but it is not yet removed from the radix +* tree. If the chip does not have an affinity setter, +* nothing to do here. +*/ + if (!chip && !chip->irq_set_affinity) { + raw_spin_unlock(>lock); + continue; + } + affinity = irq_data_get_affinity_mask(data); if (!irq_has_action(irq) || irqd_is_per_cpu(data) || @@ -485,30 +493,18 @@ void fixup_irqs(void) * affinity and use cpu_online_mask as fall back. */ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; + broke_affinity = true; affinity = cpu_online_mask; } - chip = irq_data_get_irq_chip(data); - /* -* The interrupt descriptor might have been cleaned up -* already, but it is not yet removed from the radix tree -*/ - if (!chip) { - raw_spin_unlock(>lock); - continue; - } - if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); - if (chip->irq_set_affinity) { - ret = chip->irq_set_affinity(data, affinity, true); - if (ret == -ENOSPC) - pr_crit("IRQ %d set affinity failed because there are no available vectors. The device assigned to this IRQ is unstable.\n", irq); - } else { - if (!(warned++)) - set_affinity = 0; + ret = chip->irq_set_affinity(data, affinity, true); + if (ret) { + pr_crit("IRQ %u: Force affinity failed (%d)\n", + d->irq, ret); + broke_affinity = false; } /* @@ -522,10 +518,8 @@ void fixup_irqs(void) raw_spin_unlock(>lock); - if (break_affinity && set_affinity) + if (broke_affinity) pr_notice("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - pr_notice("Cannot set affinity for irq %i\n", irq); } /*
[tip:irq/core] genirq: Introduce IRQD_MANAGED_SHUTDOWN
Commit-ID: 54fdf6a0875ca380647ac1cc9b5b8f2dbbbfa131 Gitweb: http://git.kernel.org/tip/54fdf6a0875ca380647ac1cc9b5b8f2dbbbfa131 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:47 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:23 +0200 genirq: Introduce IRQD_MANAGED_SHUTDOWN Affinity managed interrupts should keep their assigned affinity accross CPU hotplug. To avoid magic hackery in device drivers, the core code shall manage them transparently. This will set these interrupts into a managed shutdown state when the last CPU of the assigned affinity mask goes offline. The interrupt will be restarted when one of the CPUs in the assigned affinity mask comes back online. Introduce the necessary state flag and the accessor functions. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.954523...@linutronix.de --- include/linux/irq.h| 8 kernel/irq/internals.h | 10 ++ 2 files changed, 18 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index 4087ef2..0e37276 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -207,6 +207,8 @@ struct irq_data { * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU * IRQD_AFFINITY_MANAGED - Affinity is auto-managed by the kernel * IRQD_IRQ_STARTED- Startup state of the interrupt + * IRQD_MANAGED_SHUTDOWN - Interrupt was shutdown due to empty affinity + * mask. Applies only to affinity managed irqs. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -225,6 +227,7 @@ enum { IRQD_FORWARDED_TO_VCPU = (1 << 20), IRQD_AFFINITY_MANAGED = (1 << 21), IRQD_IRQ_STARTED= (1 << 22), + IRQD_MANAGED_SHUTDOWN = (1 << 23), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -343,6 +346,11 @@ static inline bool irqd_is_started(struct irq_data *d) return __irqd_to_state(d) & IRQD_IRQ_STARTED; } +static inline bool irqd_is_managed_shutdown(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 040806f..ca4666b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -193,6 +193,16 @@ static inline void irqd_clr_move_pending(struct irq_data *d) __irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING; } +static inline void irqd_set_managed_shutdown(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN; +} + +static inline void irqd_clr_managed_shutdown(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN; +} + static inline void irqd_clear(struct irq_data *d, unsigned int mask) { __irqd_to_state(d) &= ~mask;
[tip:irq/core] genirq: Allow fwnode to carry name information only
Commit-ID: d59f6617eef0f76e34f7a9993f5645c5ef467e42 Gitweb: http://git.kernel.org/tip/d59f6617eef0f76e34f7a9993f5645c5ef467e42 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:05 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:08 +0200 genirq: Allow fwnode to carry name information only In order to provide proper debug interface it's required to have domain names available when the domain is added. Non fwnode based architectures like x86 have no way to do so. It's not possible to use domain ops or host data for this as domain ops might be the same for several instances, but the names have to be unique. Extend the irqchip fwnode to allow transporting the domain name. If no node is supplied, create a 'unknown-N' placeholder. Warn if an invalid node is supplied and treat it like no node. This happens e.g. with i2 devices on x86 which hand in an ACPI type node which has no interface for retrieving the name. [ Folded a fix from Marc to make DT name parsing work ] Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.588784...@linutronix.de --- include/linux/irqdomain.h | 31 +- kernel/irq/irqdomain.c| 105 -- 2 files changed, 122 insertions(+), 14 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9f36160..9cf32a2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -189,6 +189,9 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* Irq domain name was allocated in __irq_domain_add() */ + IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -203,7 +206,33 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) } #ifdef CONFIG_IRQ_DOMAIN -struct fwnode_handle *irq_domain_alloc_fwnode(void *data); +struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, + const char *name, void *data); + +enum { + IRQCHIP_FWNODE_REAL, + IRQCHIP_FWNODE_NAMED, + IRQCHIP_FWNODE_NAMED_ID, +}; + +static inline +struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL); +} + +static inline +struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, +NULL); +} + +static inline struct fwnode_handle *irq_domain_alloc_fwnode(void *data) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, data); +} + void irq_domain_free_fwnode(struct fwnode_handle *fwnode); struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, irq_hw_number_t hwirq_max, int direct_max, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 70b9da7..e1b925b 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -26,39 +26,61 @@ static struct irq_domain *irq_default_domain; static void irq_domain_check_hierarchy(struct irq_domain *domain); struct irqchip_fwid { - struct fwnode_handle fwnode; - char *name; - void *data; + struct fwnode_handlefwnode; + unsigned inttype; + char*name; + void*data; }; /** * irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for * identifying an irq domain - * @data: optional user-provided data + * @type: Type of irqchip_fwnode. See linux/irqdomain.h + * @name: Optional user provided domain name + * @id:Optional user provided id if name != NULL + * @data: Optional user-provided data * - * Allocate a struct device_node, and return a poiner to the embedded + * Allocate a struct irqchip_fwid, and return a poiner to the embedded * fwnode_handle (or NULL on failure). + * + * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are + * solely to transport name information to irqdomain creation code. The + * node is not stored. For other types the pointer is kept in the irq + * domain struct. */ -struct fwnode_handle *irq_domain_alloc_fwnode(void *data) +struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, + const char *name, void *data) {
[tip:irq/core] genirq/msi: Prevent overwriting domain name
Commit-ID: 0165308a2f994939d2e1b36624f5a8f57746bc88 Gitweb: http://git.kernel.org/tip/0165308a2f994939d2e1b36624f5a8f57746bc88 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:04 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:08 +0200 genirq/msi: Prevent overwriting domain name Prevent overwriting an already assigned domain name. Remove the extra check for chip->name, because if domain->name is NULL overwriting it with NULL is not a problem. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.510684...@linutronix.de --- kernel/irq/msi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index fe4d48e..9e3f185 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -274,7 +274,8 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, fwnode, _domain_ops, info); - if (domain && info->chip && info->chip->name) + + if (domain && !domain->name && info->chip) domain->name = info->chip->name; return domain;
[tip:irq/core] iommu/amd: Add name to irq chip
Commit-ID: 290be194ba9d489e1857cc45d0dd24bf3429156b Gitweb: http://git.kernel.org/tip/290be194ba9d489e1857cc45d0dd24bf3429156b Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:02 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:07 +0200 iommu/amd: Add name to irq chip Add the missing name, so debugging will work proper. Signed-off-by: Thomas Gleixner Acked-by: Joerg Roedel Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: io...@lists.linux-foundation.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.343236...@linutronix.de --- drivers/iommu/amd_iommu.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 63cacf5..590e1e8 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4386,10 +4386,11 @@ static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) } static struct irq_chip amd_ir_chip = { - .irq_ack = ir_ack_apic_edge, - .irq_set_affinity = amd_ir_set_affinity, - .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity, - .irq_compose_msi_msg = ir_compose_msi_msg, + .name = "AMD-IR", + .irq_ack= ir_ack_apic_edge, + .irq_set_affinity = amd_ir_set_affinity, + .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity, + .irq_compose_msi_msg= ir_compose_msi_msg, }; int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
[tip:irq/core] genirq: Move initial affinity setup to irq_startup()
Commit-ID: 2e051552df69af6d134c2592d0d6f1ac80f01190 Gitweb: http://git.kernel.org/tip/2e051552df69af6d134c2592d0d6f1ac80f01190 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:23 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:15 +0200 genirq: Move initial affinity setup to irq_startup() The startup vs. setaffinity ordering of interrupts depends on the IRQF_NOAUTOEN flag. Chained interrupts are not getting any affinity assignment at all. A regular interrupt is started up and then the affinity is set. A IRQF_NOAUTOEN marked interrupt is not started up, but the affinity is set nevertheless. Move the affinity setup to startup_irq() so the ordering is always the same and chained interrupts get the proper default affinity assigned as well. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.020534...@linutronix.de --- kernel/irq/chip.c | 2 ++ kernel/irq/manage.c | 15 ++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index bc1331f..e290d73 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -213,6 +213,8 @@ int irq_startup(struct irq_desc *desc, bool resend) irq_enable(desc); } irq_state_set_started(desc); + /* Set default affinity mask once everything is setup */ + irq_setup_affinity(desc); } if (resend) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 907fb79..1e28307 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1327,6 +1327,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_ONESHOT) desc->istate |= IRQS_ONESHOT; + /* Exclude IRQ from balancing if requested */ + if (new->flags & IRQF_NOBALANCING) { + irq_settings_set_no_balancing(desc); + irqd_set(>irq_data, IRQD_NO_BALANCING); + } + if (irq_settings_can_autoenable(desc)) { irq_startup(desc, true); } else { @@ -1341,15 +1347,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) desc->depth = 1; } - /* Exclude IRQ from balancing if requested */ - if (new->flags & IRQF_NOBALANCING) { - irq_settings_set_no_balancing(desc); - irqd_set(>irq_data, IRQD_NO_BALANCING); - } - - /* Set default affinity mask once everything is setup */ - irq_setup_affinity(desc); - } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; unsigned int omsk = irqd_get_trigger_type(>irq_data);
[tip:irq/core] genirq/cpuhotplug: Use effective affinity mask
Commit-ID: 415fcf1a2293046e0c1f4ab8558a87bad66652b1 Gitweb: http://git.kernel.org/tip/415fcf1a2293046e0c1f4ab8558a87bad66652b1 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:39 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:21 +0200 genirq/cpuhotplug: Use effective affinity mask If the architecture supports the effective affinity mask, migrating interrupts away which are not targeted by the effective mask is pointless. They can stay in the user or system supplied affinity mask, but won't be targetted at any given point as the affinity setter functions need to validate against the online cpu mask anyway. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.328488...@linutronix.de --- kernel/irq/cpuhotplug.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index e09cb91..0b093db 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -14,6 +14,14 @@ #include "internals.h" +/* For !GENERIC_IRQ_EFFECTIVE_AFF_MASK this looks at general affinity mask */ +static inline bool irq_needs_fixup(struct irq_data *d) +{ + const struct cpumask *m = irq_data_get_effective_affinity_mask(d); + + return cpumask_test_cpu(smp_processor_id(), m); +} + static bool migrate_one_irq(struct irq_desc *desc) { struct irq_data *d = irq_desc_get_irq_data(desc); @@ -42,9 +50,7 @@ static bool migrate_one_irq(struct irq_desc *desc) * Note: Do not check desc->action as this might be a chained * interrupt. */ - affinity = irq_data_get_affinity_mask(d); - if (irqd_is_per_cpu(d) || !irqd_is_started(d) || - !cpumask_test_cpu(smp_processor_id(), affinity)) { + if (irqd_is_per_cpu(d) || !irqd_is_started(d) || !irq_needs_fixup(d)) { /* * If an irq move is pending, abort it if the dying CPU is * the sole target. @@ -69,6 +75,8 @@ static bool migrate_one_irq(struct irq_desc *desc) */ if (irq_fixup_move_pending(desc, true)) affinity = irq_desc_get_pending_mask(desc); + else + affinity = irq_data_get_affinity_mask(d); /* Mask the chip for interrupts which cannot move in process context */ if (maskchip && chip->irq_mask)
[tip:irq/core] x86/apic: Move flat_cpu_mask_to_apicid_and() into C source
Commit-ID: ad95212ee6e0b62f38b287b40c9ab6a1ba3e892b Gitweb: http://git.kernel.org/tip/ad95212ee6e0b62f38b287b40c9ab6a1ba3e892b Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:40 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:21 +0200 x86/apic: Move flat_cpu_mask_to_apicid_and() into C source No point in having inlines assigned to function pointers at multiple places. Just bloats the text. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.405975...@linutronix.de --- arch/x86/include/asm/apic.h | 28 ++-- arch/x86/kernel/apic/apic.c | 16 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bdffcd9..a86be0a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -540,28 +540,12 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif -static inline int -flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) -{ - unsigned long cpu_mask = cpumask_bits(cpumask)[0] & -cpumask_bits(andmask)[0] & -cpumask_bits(cpu_online_mask)[0] & -APIC_ALL_CPUS; - - if (likely(cpu_mask)) { - *apicid = (unsigned int)cpu_mask; - return 0; - } else { - return -EINVAL; - } -} - -extern int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid); +extern int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid); +extern int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid); static inline void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2d75faf..e9b322f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2220,6 +2220,22 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return -EINVAL; } +int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid) +{ + unsigned long cpu_mask = cpumask_bits(cpumask)[0] & +cpumask_bits(andmask)[0] & +cpumask_bits(cpu_online_mask)[0] & +APIC_ALL_CPUS; + + if (likely(cpu_mask)) { + *apicid = (unsigned int)cpu_mask; + return 0; + } + return -EINVAL; +} + /* * Override the generic EOI implementation with an optimized version. * Only called during early boot when only one CPU is active and with
[tip:irq/core] x86/msi: Create named irq domains
Commit-ID: f8f37ca78915b51a73bf240409fcda30d811b76b Gitweb: http://git.kernel.org/tip/f8f37ca78915b51a73bf240409fcda30d811b76b Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:14 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:11 +0200 x86/msi: Create named irq domains Use the fwnode to create named irq domains so diagnosis works. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.299024...@linutronix.de --- arch/x86/kernel/apic/msi.c | 42 +- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index d79dc2a..9b18be7 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -136,13 +136,20 @@ static struct msi_domain_info pci_msi_domain_info = { .handler_name = "edge", }; -void arch_init_msi_domain(struct irq_domain *parent) +void __init arch_init_msi_domain(struct irq_domain *parent) { + struct fwnode_handle *fn; + if (disable_apic) return; - msi_default_domain = pci_msi_create_irq_domain(NULL, - _msi_domain_info, parent); + fn = irq_domain_alloc_named_fwnode("PCI-MSI"); + if (fn) { + msi_default_domain = + pci_msi_create_irq_domain(fn, _msi_domain_info, + parent); + irq_domain_free_fwnode(fn); + } if (!msi_default_domain) pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); } @@ -230,13 +237,20 @@ static struct irq_domain *dmar_get_irq_domain(void) { static struct irq_domain *dmar_domain; static DEFINE_MUTEX(dmar_lock); + struct fwnode_handle *fn; mutex_lock(_lock); - if (dmar_domain == NULL) - dmar_domain = msi_create_irq_domain(NULL, _msi_domain_info, + if (dmar_domain) + goto out; + + fn = irq_domain_alloc_named_fwnode("DMAR-MSI"); + if (fn) { + dmar_domain = msi_create_irq_domain(fn, _msi_domain_info, x86_vector_domain); + irq_domain_free_fwnode(fn); + } +out: mutex_unlock(_lock); - return dmar_domain; } @@ -326,9 +340,10 @@ static struct msi_domain_info hpet_msi_domain_info = { struct irq_domain *hpet_create_irq_domain(int hpet_id) { - struct irq_domain *parent; - struct irq_alloc_info info; struct msi_domain_info *domain_info; + struct irq_domain *parent, *d; + struct irq_alloc_info info; + struct fwnode_handle *fn; if (x86_vector_domain == NULL) return NULL; @@ -349,7 +364,16 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id) else hpet_msi_controller.name = "IR-HPET-MSI"; - return msi_create_irq_domain(NULL, domain_info, parent); + fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name, + hpet_id); + if (!fn) { + kfree(domain_info); + return NULL; + } + + d = msi_create_irq_domain(fn, domain_info, parent); + irq_domain_free_fwnode(fn); + return d; } int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
[tip:irq/core] x86/msi: Provide new iommu irqdomain interface
Commit-ID: 667724c5a3109675cf3bfe7d75795b8608d1bcbe Gitweb: http://git.kernel.org/tip/667724c5a3109675cf3bfe7d75795b8608d1bcbe Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:10 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:10 +0200 x86/msi: Provide new iommu irqdomain interface Provide a new interface for creating the iommu remapping domains, so that the caller can supply a name and a id in order to create named irqdomains. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Joerg Roedel Cc: Keith Busch Cc: Peter Zijlstra Cc: io...@lists.linux-foundation.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.986661...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 2 ++ arch/x86/kernel/apic/msi.c | 15 +++ 2 files changed, 17 insertions(+) diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index a210eba..0398675 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -56,6 +56,8 @@ irq_remapping_get_irq_domain(struct irq_alloc_info *info); /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); +extern struct irq_domain * +arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); /* Get parent irqdomain for interrupt remapping irqdomain */ static inline struct irq_domain *arch_get_ir_parent_domain(void) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index c61aec7..0e6618e 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -167,10 +167,25 @@ static struct msi_domain_info pci_msi_ir_domain_info = { .handler_name = "edge", }; +struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, + const char *name, int id) +{ + struct fwnode_handle *fn; + struct irq_domain *d; + + fn = irq_domain_alloc_named_id_fwnode(name, id); + if (!fn) + return NULL; + d = pci_msi_create_irq_domain(fn, _msi_ir_domain_info, parent); + irq_domain_free_fwnode(fn); + return d; +} + struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent) { return pci_msi_create_irq_domain(NULL, _msi_ir_domain_info, parent); } + #endif #ifdef CONFIG_DMAR_TABLE
[tip:irq/core] iommu/amd: Use named irq domain interface
Commit-ID: 3e49a8182277ea57736285aede5f43bfa6aa11b1 Gitweb: http://git.kernel.org/tip/3e49a8182277ea57736285aede5f43bfa6aa11b1 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:12 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:11 +0200 iommu/amd: Use named irq domain interface Signed-off-by: Thomas Gleixner Acked-by: Joerg Roedel Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: io...@lists.linux-foundation.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.142270...@linutronix.de --- drivers/iommu/amd_iommu.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 590e1e8..503849d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4395,13 +4395,20 @@ static struct irq_chip amd_ir_chip = { int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { - iommu->ir_domain = irq_domain_add_tree(NULL, _ir_domain_ops, iommu); + struct fwnode_handle *fn; + + fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index); + if (!fn) + return -ENOMEM; + iommu->ir_domain = irq_domain_create_tree(fn, _ir_domain_ops, iommu); + irq_domain_free_fwnode(fn); if (!iommu->ir_domain) return -ENOMEM; iommu->ir_domain->parent = arch_get_ir_parent_domain(); - iommu->msi_domain = arch_create_msi_irq_domain(iommu->ir_domain); - + iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain, +"AMD-IR-MSI", +iommu->index); return 0; }
[tip:irq/core] iommu/vt-d: Use named irq domain interface
Commit-ID: cea29b656a5e5f1a7b7de42795c3ae6fc417ab0b Gitweb: http://git.kernel.org/tip/cea29b656a5e5f1a7b7de42795c3ae6fc417ab0b Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:11 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:10 +0200 iommu/vt-d: Use named irq domain interface Signed-off-by: Thomas Gleixner Acked-by: Joerg Roedel Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: io...@lists.linux-foundation.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.063083...@linutronix.de --- drivers/iommu/intel_irq_remapping.c | 22 -- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index ba5b580..8fc641e 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -500,8 +500,9 @@ static void iommu_enable_irq_remapping(struct intel_iommu *iommu) static int intel_setup_irq_remapping(struct intel_iommu *iommu) { struct ir_table *ir_table; - struct page *pages; + struct fwnode_handle *fn; unsigned long *bitmap; + struct page *pages; if (iommu->ir_table) return 0; @@ -525,15 +526,24 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) goto out_free_pages; } - iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(), - 0, INTR_REMAP_TABLE_ENTRIES, - NULL, _ir_domain_ops, - iommu); + fn = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id); + if (!fn) + goto out_free_bitmap; + + iommu->ir_domain = + irq_domain_create_hierarchy(arch_get_ir_parent_domain(), + 0, INTR_REMAP_TABLE_ENTRIES, + fn, _ir_domain_ops, + iommu); + irq_domain_free_fwnode(fn); if (!iommu->ir_domain) { pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); goto out_free_bitmap; } - iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain); + iommu->ir_msi_domain = + arch_create_remap_msi_irq_domain(iommu->ir_domain, +"INTEL-IR-MSI", +iommu->seq_id); ir_table->base = page_address(pages); ir_table->bitmap = bitmap;
[tip:irq/core] x86/irq: Use irq_migrate_all_off_this_cpu()
Commit-ID: ad7a929fa4bb1143357aa83043a149d5c27c68fd Gitweb: http://git.kernel.org/tip/ad7a929fa4bb1143357aa83043a149d5c27c68fd Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:33 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:18 +0200 x86/irq: Use irq_migrate_all_off_this_cpu() The generic migration code supports all the required features already. Remove the x86 specific implementation and use the generic one. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.851311...@linutronix.de --- arch/x86/Kconfig | 1 + arch/x86/kernel/irq.c | 89 ++- 2 files changed, 3 insertions(+), 87 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0efb4c9..fcf1dad 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -87,6 +87,7 @@ config X86 select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP + select GENERIC_IRQ_MIGRATIONif SMP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 78bd2b8..4aa03c5 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -432,95 +432,12 @@ int check_irq_vectors_for_cpu_disable(void) /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { - unsigned int irq, vector; + unsigned int irr, vector; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; - int ret; - for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; - bool break_affinity = false; - - if (!desc) - continue; - - /* interrupt's are disabled at this point */ - raw_spin_lock(>lock); - - data = irq_desc_get_irq_data(desc); - chip = irq_data_get_irq_chip(data); - /* -* The interrupt descriptor might have been cleaned up -* already, but it is not yet removed from the radix -* tree. If the chip does not have an affinity setter, -* nothing to do here. -*/ - if (!chip && !chip->irq_set_affinity) { - raw_spin_unlock(>lock); - continue; - } - - affinity = irq_data_get_affinity_mask(data); - - if (!irq_has_action(irq) || irqd_is_per_cpu(data) || - cpumask_subset(affinity, cpu_online_mask)) { - irq_fixup_move_pending(desc, false); - raw_spin_unlock(>lock); - continue; - } - - /* -* Complete an eventually pending irq move cleanup. If this -* interrupt was moved in hard irq context, then the -* vectors need to be cleaned up. It can't wait until this -* interrupt actually happens and this CPU was involved. -*/ - irq_force_complete_move(desc); - - /* -* If there is a setaffinity pending, then try to reuse the -* pending mask, so the last change of the affinity does -* not get lost. If there is no move pending or the pending -* mask does not contain any online CPU, use the current -* affinity mask. -*/ - if (irq_fixup_move_pending(desc, true)) - affinity = desc->pending_mask; - - /* -* If the mask does not contain an offline CPU, break -* affinity and use cpu_online_mask as fall back. -*/ - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - broke_affinity = true; - affinity = cpu_online_mask; - } - - if (!irqd_can_move_in_process_context(data) && chip->irq_mask) - chip->irq_mask(data); - - ret = chip->irq_set_affinity(data, affinity, true); - if (ret) { - pr_crit("IRQ %u: Force affinity failed (%d)\n", - d->irq, ret); - broke_affinity = false; - } - - /* -* We unmask if the irq was not marked masked by the -* core code. That respects the lazy irq disable -* behaviour. -
[tip:irq/core] x86/apic: Mark single target interrupts
Commit-ID: 3ca57222c36ba31b80aa25de313f3c8ab26a8102 Gitweb: http://git.kernel.org/tip/3ca57222c36ba31b80aa25de313f3c8ab26a8102 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:54 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:26 +0200 x86/apic: Mark single target interrupts If the interrupt destination mode of the APIC is physical then the effective affinity is restricted to a single CPU. Mark the interrupt accordingly in the domain allocation code, so the core code can avoid pointless affinity setting attempts. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.508846...@linutronix.de --- arch/x86/kernel/apic/vector.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index b270a76..2567dc0 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -371,6 +371,13 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data); if (err) goto error; + /* +* If the apic destination mode is physical, then the +* effective affinity is restricted to a single target +* CPU. Mark the interrupt accordingly. +*/ + if (!apic->irq_dest_mode) + irqd_set_single_target(irq_data); } return 0;
[tip:irq/core] genirq: Introduce IRQD_SINGLE_TARGET flag
Commit-ID: d52dd44175bd27ad9d8e34a994fb80877c1f6d61 Gitweb: http://git.kernel.org/tip/d52dd44175bd27ad9d8e34a994fb80877c1f6d61 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:52 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:25 +0200 genirq: Introduce IRQD_SINGLE_TARGET flag Many interrupt chips allow only a single CPU as interrupt target. The core code has no knowledge about that. That's unfortunate as it could avoid trying to readd a newly online CPU to the effective affinity mask. Add the status flag and the necessary accessors. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.352343...@linutronix.de --- include/linux/irq.h | 16 kernel/irq/debugfs.c | 1 + 2 files changed, 17 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index 19cea63..00db35b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -209,6 +209,7 @@ struct irq_data { * IRQD_IRQ_STARTED- Startup state of the interrupt * IRQD_MANAGED_SHUTDOWN - Interrupt was shutdown due to empty affinity * mask. Applies only to affinity managed irqs. + * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -228,6 +229,7 @@ enum { IRQD_AFFINITY_MANAGED = (1 << 21), IRQD_IRQ_STARTED= (1 << 22), IRQD_MANAGED_SHUTDOWN = (1 << 23), + IRQD_SINGLE_TARGET = (1 << 24), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -276,6 +278,20 @@ static inline bool irqd_is_level_type(struct irq_data *d) return __irqd_to_state(d) & IRQD_LEVEL; } +/* + * Must only be called of irqchip.irq_set_affinity() or low level + * hieararchy domain allocation functions. + */ +static inline void irqd_set_single_target(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_SINGLE_TARGET; +} + +static inline bool irqd_is_single_target(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_SINGLE_TARGET; +} + static inline bool irqd_is_wakeup_set(struct irq_data *d) { return __irqd_to_state(d) & IRQD_WAKEUP_STATE; diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index edbef25..dbd6e78 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -105,6 +105,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_PER_CPU), BIT_MASK_DESCR(IRQD_NO_BALANCING), + BIT_MASK_DESCR(IRQD_SINGLE_TARGET), BIT_MASK_DESCR(IRQD_MOVE_PCNTXT), BIT_MASK_DESCR(IRQD_AFFINITY_SET), BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
[tip:irq/core] genirq/cpuhotplug: Handle managed IRQs on CPU hotplug
Commit-ID: c5cb83bb337c25caae995d992d1cdf9b317f83de Gitweb: http://git.kernel.org/tip/c5cb83bb337c25caae995d992d1cdf9b317f83de Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:51 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:25 +0200 genirq/cpuhotplug: Handle managed IRQs on CPU hotplug If a CPU goes offline, interrupts affine to the CPU are moved away. If the outgoing CPU is the last CPU in the affinity mask the migration code breaks the affinity and sets it it all online cpus. This is a problem for affinity managed interrupts as CPU hotplug is often used for power management purposes. If the affinity is broken, the interrupt is not longer affine to the CPUs to which it was allocated. The affinity spreading allows to lay out multi queue devices in a way that they are assigned to a single CPU or a group of CPUs. If the last CPU goes offline, then the queue is not longer used, so the interrupt can be shutdown gracefully and parked until one of the assigned CPUs comes online again. Add a graceful shutdown mechanism into the irq affinity breaking code path, mark the irq as MANAGED_SHUTDOWN and leave the affinity mask unmodified. In the online path, scan the active interrupts for managed interrupts and if the interrupt is functional and the newly online CPU is part of the affinity mask, restart the interrupt if it is marked MANAGED_SHUTDOWN or if the interrupts is started up, try to add the CPU back to the effective affinity mask. Originally-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170619235447.273417...@linutronix.de --- include/linux/cpuhotplug.h | 1 + include/linux/irq.h| 5 + kernel/cpu.c | 5 + kernel/irq/cpuhotplug.c| 45 + 4 files changed, 56 insertions(+) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0f2a803..c15f22c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -124,6 +124,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE_IDLE, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, + CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_PERF_ONLINE, CPUHP_AP_PERF_X86_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE, diff --git a/include/linux/irq.h b/include/linux/irq.h index 807042b..19cea63 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -500,7 +500,12 @@ extern int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask, bool force); extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); +#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION) extern void irq_migrate_all_off_this_cpu(void); +extern int irq_affinity_online_cpu(unsigned int cpu); +#else +# define irq_affinity_online_cpu NULL +#endif #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); diff --git a/kernel/cpu.c b/kernel/cpu.c index cb51034..b86b32e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup.single = smpboot_unpark_threads, .teardown.single= NULL, }, + [CPUHP_AP_IRQ_AFFINITY_ONLINE] = { + .name = "irq/affinity:online", + .startup.single = irq_affinity_online_cpu, + .teardown.single= NULL, + }, [CPUHP_AP_PERF_ONLINE] = { .name = "perf:online", .startup.single = perf_event_init_cpu, diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 0b093db..b7964e7 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -83,6 +83,15 @@ static bool migrate_one_irq(struct irq_desc *desc) chip->irq_mask(d); if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + /* +* If the interrupt is managed, then shut it down and leave +* the affinity untouched. +*/ + if (irqd_affinity_is_managed(d)) { + irqd_set_managed_shutdown(d); + irq_shutdown(desc); + return false; + } affinity = cpu_online_mask; brokeaff = true; } @@ -129,3 +138,39 @@ void irq_migrate_all_off_this_cpu(void) } } } + +static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) +{ + struct irq_data *data =
[tip:irq/core] genirq/cpuhotplug: Avoid irq affinity setting for single targets
Commit-ID: 8f31a9845db348f5781df47ce04c79e4cfe90016 Gitweb: http://git.kernel.org/tip/8f31a9845db348f5781df47ce04c79e4cfe90016 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:53 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:25 +0200 genirq/cpuhotplug: Avoid irq affinity setting for single targets Avoid trying to add a newly online CPU to the effective affinity mask of an started up interrupt. That interrupt will either stay on the already online CPU or move around for no value. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.431321...@linutronix.de --- kernel/irq/cpuhotplug.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index b7964e7..aee8f7e 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -148,9 +148,17 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) return; - if (irqd_is_managed_and_shutdown(data)) + if (irqd_is_managed_and_shutdown(data)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); - else + return; + } + + /* +* If the interrupt can only be directed to a single target +* CPU then it is already assigned to a CPU in the affinity +* mask. No point in trying to move it around. +*/ + if (!irqd_is_single_target(data)) irq_set_affinity_locked(data, affinity, false); }
[tip:irq/core] x86/ioapic: Create named irq domain
Commit-ID: 1b604745c8474c76e5fd1682ea5b7da0a1c6d440 Gitweb: http://git.kernel.org/tip/1b604745c8474c76e5fd1682ea5b7da0a1c6d440 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:07 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:09 +0200 x86/ioapic: Create named irq domain Use the fwnode to create a named domain so diagnosis works, but only when the the ioapic is not device tree based. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.752782...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 22 -- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 347bb9f..444ae92 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2223,6 +2223,8 @@ static int mp_irqdomain_create(int ioapic) struct ioapic *ip = [ioapic]; struct ioapic_domain_cfg *cfg = >irqdomain_cfg; struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + struct fwnode_handle *fn; + char *name = "IO-APIC"; if (cfg->type == IOAPIC_DOMAIN_INVALID) return 0; @@ -2233,9 +2235,25 @@ static int mp_irqdomain_create(int ioapic) parent = irq_remapping_get_ir_irq_domain(); if (!parent) parent = x86_vector_domain; + else + name = "IO-APIC-IR"; + + /* Handle device tree enumerated APICs proper */ + if (cfg->dev) { + fn = of_node_to_fwnode(cfg->dev); + } else { + fn = irq_domain_alloc_named_id_fwnode(name, ioapic); + if (!fn) + return -ENOMEM; + } + + ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, +(void *)(long)ioapic); + + /* Release fw handle if it was allocated above */ + if (!cfg->dev) + irq_domain_free_fwnode(fn); - ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops, - (void *)(long)ioapic); if (!ip->irqdomain) return -ENOMEM;
[tip:irq/core] x86/htirq: Create named domain
Commit-ID: 5f432711ba94400fb39e9be81913ced81c141758 Gitweb: http://git.kernel.org/tip/5f432711ba94400fb39e9be81913ced81c141758 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:08 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:09 +0200 x86/htirq: Create named domain Use the fwnode to create a named domain so diagnosis works. Mark the init function __init while at it. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.829047...@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/htirq.c | 21 - 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index ae50d34..56ccf93 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -150,16 +150,27 @@ static const struct irq_domain_ops htirq_domain_ops = { .deactivate = htirq_domain_deactivate, }; -void arch_init_htirq_domain(struct irq_domain *parent) +void __init arch_init_htirq_domain(struct irq_domain *parent) { + struct fwnode_handle *fn; + if (disable_apic) return; - htirq_domain = irq_domain_add_tree(NULL, _domain_ops, NULL); + fn = irq_domain_alloc_named_fwnode("PCI-HT"); + if (!fn) + goto warn; + + htirq_domain = irq_domain_create_tree(fn, _domain_ops, NULL); + irq_domain_free_fwnode(fn); if (!htirq_domain) - pr_warn("failed to initialize irqdomain for HTIRQ.\n"); - else - htirq_domain->parent = parent; + goto warn; + + htirq_domain->parent = parent; + return; + +warn: + pr_warn("Failed to initialize irqdomain for HTIRQ.\n"); } int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
[tip:irq/core] x86/irq: Cleanup pending irq move in fixup_irqs()
Commit-ID: 8e7b632237df8b17526411d1d98f838580bb6aa3 Gitweb: http://git.kernel.org/tip/8e7b632237df8b17526411d1d98f838580bb6aa3 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:20 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:13 +0200 x86/irq: Cleanup pending irq move in fixup_irqs() If an CPU goes offline, the interrupts are migrated away, but a eventually pending interrupt move, which has not yet been made effective is kept pending even if the outgoing CPU is the sole target of the pending affinity mask. What's worse is, that the pending affinity mask is discarded even if it would contain a valid subset of the online CPUs. Use the newly introduced helper to: - Discard a pending move when the outgoing CPU is the only target in the pending mask. - Use the pending mask instead of the affinity mask to find a valid target for the CPU if the pending mask intersects with the online CPUs. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.774068...@linutronix.de --- arch/x86/kernel/irq.c | 25 + 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f34fe74..9696007d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -440,9 +440,9 @@ void fixup_irqs(void) int ret; for_each_irq_desc(irq, desc) { + const struct cpumask *affinity; int break_affinity = 0; int set_affinity = 1; - const struct cpumask *affinity; if (!desc) continue; @@ -454,19 +454,36 @@ void fixup_irqs(void) data = irq_desc_get_irq_data(desc); affinity = irq_data_get_affinity_mask(data); + if (!irq_has_action(irq) || irqd_is_per_cpu(data) || cpumask_subset(affinity, cpu_online_mask)) { + irq_fixup_move_pending(desc, false); raw_spin_unlock(>lock); continue; } /* -* Complete the irq move. This cpu is going down and for -* non intr-remapping case, we can't wait till this interrupt -* arrives at this cpu before completing the irq move. +* Complete an eventually pending irq move cleanup. If this +* interrupt was moved in hard irq context, then the +* vectors need to be cleaned up. It can't wait until this +* interrupt actually happens and this CPU was involved. */ irq_force_complete_move(desc); + /* +* If there is a setaffinity pending, then try to reuse the +* pending mask, so the last change of the affinity does +* not get lost. If there is no move pending or the pending +* mask does not contain any online CPU, use the current +* affinity mask. +*/ + if (irq_fixup_move_pending(desc, true)) + affinity = desc->pending_mask; + + /* +* If the mask does not contain an offline CPU, break +* affinity and use cpu_online_mask as fall back. +*/ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; affinity = cpu_online_mask;
[tip:irq/core] genirq: Move irq_fixup_move_pending() to core
Commit-ID: 36d84fb45140f151fa4e145381dbce5e5ffed24d Gitweb: http://git.kernel.org/tip/36d84fb45140f151fa4e145381dbce5e5ffed24d Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:34 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:19 +0200 genirq: Move irq_fixup_move_pending() to core Now that x86 uses the generic code, the function declaration and inline stub can move to the core internal header. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.928156...@linutronix.de --- include/linux/irq.h| 5 - kernel/irq/internals.h | 5 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 299271a..2b7e5a7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -492,15 +492,10 @@ extern void irq_migrate_all_off_this_cpu(void); void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); void irq_force_complete_move(struct irq_desc *desc); -bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } static inline void irq_force_complete_move(struct irq_desc *desc) { } -static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) -{ - return false; -} #endif extern int no_irq_affinity; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index fd4fa83..040806f 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -272,6 +272,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) { return desc->pending_mask; } +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else /* CONFIG_GENERIC_PENDING_IRQ */ static inline bool irq_can_move_pcntxt(struct irq_data *data) { @@ -293,6 +294,10 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) { return NULL; } +static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) +{ + return false; +} #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
[tip:irq/core] genirq: Remove pointless gfp argument
Commit-ID: 4ab764c336123157690eea1dcf81851c58d1 Gitweb: http://git.kernel.org/tip/4ab764c336123157690eea1dcf81851c58d1 Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:36 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:19 +0200 genirq: Remove pointless gfp argument All callers hand in GPF_KERNEL. No point to have an extra argument for that. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.082544...@linutronix.de --- kernel/irq/irqdesc.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index feade53..48d4f03 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -54,14 +54,14 @@ static void __init init_irq_default_affinity(void) #endif #ifdef CONFIG_SMP -static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) +static int alloc_masks(struct irq_desc *desc, int node) { if (!zalloc_cpumask_var_node(>irq_common_data.affinity, -gfp, node)) +GFP_KERNEL, node)) return -ENOMEM; #ifdef CONFIG_GENERIC_PENDING_IRQ - if (!zalloc_cpumask_var_node(>pending_mask, gfp, node)) { + if (!zalloc_cpumask_var_node(>pending_mask, GFP_KERNEL, node)) { free_cpumask_var(desc->irq_common_data.affinity); return -ENOMEM; } @@ -86,7 +86,7 @@ static void desc_smp_init(struct irq_desc *desc, int node, #else static inline int -alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; } +alloc_masks(struct irq_desc *desc, int node) { return 0; } static inline void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { } #endif @@ -344,9 +344,8 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, struct module *owner) { struct irq_desc *desc; - gfp_t gfp = GFP_KERNEL; - desc = kzalloc_node(sizeof(*desc), gfp, node); + desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node); if (!desc) return NULL; /* allocate based on nr_cpu_ids */ @@ -354,7 +353,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, if (!desc->kstat_irqs) goto err_desc; - if (alloc_masks(desc, gfp, node)) + if (alloc_masks(desc, node)) goto err_kstat; raw_spin_lock_init(>lock); @@ -525,7 +524,7 @@ int __init early_irq_init(void) for (i = 0; i < count; i++) { desc[i].kstat_irqs = alloc_percpu(unsigned int); - alloc_masks([i], GFP_KERNEL, node); + alloc_masks([i], node); raw_spin_lock_init([i].lock); lockdep_set_class([i].lock, _desc_lock_class); desc_set_defaults(i, [i], node, NULL, NULL);
[tip:irq/core] x86/msi: Remove unused remap irq domain interface
Commit-ID: 0323b9690448e1d1ada91dac9d8fa62f7285751a Gitweb: http://git.kernel.org/tip/0323b9690448e1d1ada91dac9d8fa62f7285751a Author: Thomas GleixnerAuthorDate: Tue, 20 Jun 2017 01:37:13 +0200 Committer: Thomas Gleixner CommitDate: Thu, 22 Jun 2017 18:21:11 +0200 x86/msi: Remove unused remap irq domain interface Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.221049...@linutronix.de --- arch/x86/include/asm/irq_remapping.h | 1 - arch/x86/kernel/apic/msi.c | 6 -- 2 files changed, 7 deletions(-) diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0398675..023b4a9 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -55,7 +55,6 @@ extern struct irq_domain * irq_remapping_get_irq_domain(struct irq_alloc_info *info); /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ -extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); extern struct irq_domain * arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 0e6618e..d79dc2a 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -180,12 +180,6 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, irq_domain_free_fwnode(fn); return d; } - -struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent) -{ - return pci_msi_create_irq_domain(NULL, _msi_ir_domain_info, parent); -} - #endif #ifdef CONFIG_DMAR_TABLE