[PATCH 0/3 v2] xen: Implement EFI reset_system callback
Hi all, This small patch series implements EFI reset_system callback when using EFI Xen. Without this, it will not be possible to reboot/power off ARM64 DOM0 when using ACPI. Cheers, Cc: Boris Ostrovsky Cc: Juergen Gross Julien Grall (3): xen: Export xen_reboot arm/xen: Consolidate calls to shutdown hypercall in a single helper xen: Implement EFI reset_system callback arch/arm/xen/efi.c | 2 +- arch/arm/xen/enlighten.c | 16 ++-- arch/x86/xen/efi.c | 2 +- arch/x86/xen/enlighten.c | 2 +- drivers/xen/efi.c| 18 ++ include/xen/xen-ops.h| 5 + 6 files changed, 36 insertions(+), 9 deletions(-) -- 2.11.0
[PATCH 3/3 v2] xen: Implement EFI reset_system callback
93557] 7bc0: [ 37.401477] 7be0: 08c97dd8 08cc6fb0 08cc6fb8 [ 37.409396] 7c00: 6974726174736552 800902067a50 05f5e0ff 08e7 [ 37.417318] 7c20: 08e706c0 08f42bfd 88f42bef 0006 [ 37.425234] 7c40: deadbeef a7c27470 [ 37.430190] [< (null)>] (null) [ 37.434982] [] machine_restart+0x6c/0x70 [ 37.440550] [] kernel_restart+0x6c/0x78 [ 37.446030] [] SyS_reboot+0x130/0x228 [ 37.451337] [] el0_svc_naked+0x24/0x28 [ 37.456737] Code: bad PC value [ 37.459891] ---[ end trace 76e2fc17e050aecd ]--- Signed-off-by: Julien Grall -- Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x...@kernel.org The x86 code has theoritically a similar issue, altought EFI does not seem to be the preferred method. I have only built test it on x86. This should also probably be fixed in stable tree. Changes in v2: - Implement xen_efi_reset_system using xen_reboot - Move xen_efi_reset_system in drivers/xen/efi.c --- arch/arm/xen/efi.c| 2 +- arch/x86/xen/efi.c| 2 +- drivers/xen/efi.c | 18 ++ include/xen/xen-ops.h | 3 +++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c index 16db419f9e90..b4d78959cadf 100644 --- a/arch/arm/xen/efi.c +++ b/arch/arm/xen/efi.c @@ -35,6 +35,6 @@ void __init xen_efi_runtime_setup(void) efi.update_capsule = xen_efi_update_capsule; efi.query_capsule_caps = xen_efi_query_capsule_caps; efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; - efi.reset_system = NULL; /* Functionality provided by Xen. */ + efi.reset_system = xen_efi_reset_system; } EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 3be012115853..30bb2e80cfe7 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -81,7 +81,7 @@ static const struct efi efi_xen __initconst = { .update_capsule = xen_efi_update_capsule, .query_capsule_caps = xen_efi_query_capsule_caps, .get_next_high_mono_count = xen_efi_get_next_high_mono_count, - .reset_system = NULL, /* Functionality provided by Xen. */ + .reset_system = xen_efi_reset_system, .set_virtual_address_map = NULL, /* Not used under Xen. */ .flags= 0 /* Initialized later. */ }; diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index 22f71ffd3406..9243a9051078 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -263,3 +264,20 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, return efi_data(op).status; } EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); + +void xen_efi_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + switch (reset_type) { + case EFI_RESET_COLD: + case EFI_RESET_WARM: + xen_reboot(SHUTDOWN_reboot); + break; + case EFI_RESET_SHUTDOWN: + xen_reboot(SHUTDOWN_poweroff); + break; + default: + BUG(); + } +} +EXPORT_SYMBOL_GPL(xen_efi_reset_system); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index fc5ddb472f86..197bb4866327 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -122,6 +122,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, unsigned long count, u64 *max_size, int *reset_type); +void xen_efi_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data); + #ifdef CONFIG_PREEMPT -- 2.11.0
[PATCH 2/3 v2] arm/xen: Consolidate calls to shutdown hypercall in a single helper
Signed-off-by: Julien Grall --- Changes in v2: - Patch added --- arch/arm/xen/enlighten.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 81e3217b12d3..ba7f4c8f5c3e 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -191,20 +191,24 @@ static int xen_dying_cpu(unsigned int cpu) return 0; } -static void xen_restart(enum reboot_mode reboot_mode, const char *cmd) +void xen_reboot(int reason) { - struct sched_shutdown r = { .reason = SHUTDOWN_reboot }; + struct sched_shutdown r = { .reason = reason }; int rc; + rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); BUG_ON(rc); } +static void xen_restart(enum reboot_mode reboot_mode, const char *cmd) +{ + xen_reboot(SHUTDOWN_reboot); +} + + static void xen_power_off(void) { - struct sched_shutdown r = { .reason = SHUTDOWN_poweroff }; - int rc; - rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); - BUG_ON(rc); + xen_reboot(SHUTDOWN_poweroff); } static irqreturn_t xen_arm_callback(int irq, void *arg) -- 2.11.0
[PATCH 1/3 v2] xen: Export xen_reboot
The helper xen_reboot will be called by the EFI code in a later patch. Note that the ARM version does not yet exist and will be added in a later patch too. Signed-off-by: Julien Grall --- Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x...@kernel.org Changes in v2: - Patch added --- arch/x86/xen/enlighten.c | 2 +- include/xen/xen-ops.h| 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ec1d5c46e58f..563f2d963a04 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1294,7 +1294,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .end_context_switch = xen_end_context_switch, }; -static void xen_reboot(int reason) +void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; int cpu; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index b5486e648607..fc5ddb472f86 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -22,6 +22,8 @@ void xen_timer_resume(void); void xen_arch_resume(void); void xen_arch_suspend(void); +void xen_reboot(int reason); + void xen_resume_notifier_register(struct notifier_block *nb); void xen_resume_notifier_unregister(struct notifier_block *nb); -- 2.11.0
Re: [PATCH 1/2] xen/arm, arm64: fix xen_dma_ops after 815dd18 "Consolidate get_dma_ops..."
Hi Stefano, On 24/04/17 20:16, Stefano Stabellini wrote: Given the outstanding regression we need to fix as soon as possible, I'll queue these patches on the xentip tree for 4.12. It looks like there is another rc for 4.11. I am wondering whether you could try to send a pull request to Linus so it can be fixed in 4.11? Cheers, -- Julien Grall
Re: irqchip/irq-gic: BAD_MADT_GICC_ENTRY may fail when booting with ACPI 5.1
Hi Lorenzo, On 05/23/2017 06:06 PM, Lorenzo Pieralisi wrote: [+Al] On Tue, May 23, 2017 at 05:40:28PM +0100, Julien Grall wrote: Hi all, I am currently looking at adding support of ACPI 5.1 in Xen. When trying to boot DOM00 I get a panic in Linux (for the full log see [1]): (XEN) DOM0: [0.00] No valid GICC entries exist The error message is coming from gic_v2_acpi_init. Digging down in the code, it is failing because of BAD_MADT_GICC_ENTRY is returning false in gic_acpi_parse_madt_cpu: /* Macros for consistency checks of the GICC subtable of MADT */ #define ACPI_MADT_GICC_LENGTH \ (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) #define BAD_MADT_GICC_ENTRY(entry, end) \ (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) ||\ (entry)->header.length != ACPI_MADT_GICC_LENGTH) The 'end' parameter corresponds to the end of the MADT table. In the case of ACPI 5.1, the size of GICC is smaller compare to 6.0+ (76 vs 80 bytes) but the parameter 'entry' is type of acpi_madt_generic_interrupt (sizeof(...) = 80). #define BAD_MADT_GICC_ENTRY(entry, end) \ (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \ ((unsigned long)(entry) + ACPI_MADT_GICC_LENGTH) > (end)) Would this solve it ? Yes, I am now able to boot DOM0 up to the prompt. My concern with this solution is the code will still use the acpi_madt_generic_interrupt code. If someone tries to access field not existing in 5.1 (such as efficiency_class), it may return wrong value or even worst crash. Although, I don't see any user of efficiency_class in Linux so far. Cheers, -- Julien Grall
Re: [Xen-devel] [PATCH v1] xen/grant-table: log the lack of grants
On 07/13/2017 06:42 PM, Wengang wrote: Hi, Hello, Anyone can you please review this patch? Most of the developer community were at Xen Summit this week, so you may expect some delay in review. However, in general it is better to CC relevant maintainers of your code to raise attention on your patch. I did it for you this time. Cheers, thanks, wengang On 07/07/2017 11:23 AM, Wengang Wang wrote: log a message when we enter this situation: 1) we already allocated the max number of available grants from hypervisor and 2) we still need more (but the request fails because of 1)). Sometimes the lack of grants causes IO hangs in xen_blkfront devices. Adding this log would help debuging. Signed-off-by: Wengang Wang Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Junxiao Bi --- drivers/xen/grant-table.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index d6786b8..2c6a911 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1072,8 +1073,14 @@ static int gnttab_expand(unsigned int req_entries) cur = nr_grant_frames; extra = ((req_entries + (grefs_per_grant_frame-1)) / grefs_per_grant_frame); -if (cur + extra > gnttab_max_grant_frames()) +if (cur + extra > gnttab_max_grant_frames()) { +pr_warn_ratelimited("xen/grant-table: max_grant_frames reached" +" cur=%u extra=%u limit=%u" +" gnttab_free_count=%u req_entries=%u\n", +cur, extra, gnttab_max_grant_frames(), +gnttab_free_count, req_entries); return -ENOSPC; +} rc = gnttab_map(cur, cur + extra - 1); if (rc == 0) ___ Xen-devel mailing list xen-de...@lists.xen.org https://lists.xen.org/xen-devel -- Julien Grall
Re: [Xen-devel] [PATCH] xen: use hvc console for dom0
Hi, On 26/02/18 12:32, Juergen Gross wrote: On 26/02/18 13:06, Andrii Anisov wrote: Hello Juergen, On 26.02.18 13:08, Juergen Gross wrote: Today the hvc console is added as a preferred console for pv domUs only. As this requires a boot parameter for getting dom0 messages per default add it for dom0, too. Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c Is this something x86 specific? Could it be a generic approach? In case ARM wants something similar I guess the test for xen_initial_domain() should be dropped in xen_early_init(). I am pretty sure we discussed to remove !xen_initial_domain() for Arm in the past. But I don't remember why the patch was not sent to remove it. Anyway, I guess this should be fine to have hvc as a preferred console for the initial domain as well. Cheers, -- Julien Grall
Re: [Xen-devel] [PATCH] xen: use hvc console for dom0
Hi, On 27/02/2018 20:03, Stefano Stabellini wrote: On Tue, 27 Feb 2018, Julien Grall wrote: Hi, On 26/02/18 12:32, Juergen Gross wrote: On 26/02/18 13:06, Andrii Anisov wrote: Hello Juergen, On 26.02.18 13:08, Juergen Gross wrote: Today the hvc console is added as a preferred console for pv domUs only. As this requires a boot parameter for getting dom0 messages per default add it for dom0, too. Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c Is this something x86 specific? Could it be a generic approach? In case ARM wants something similar I guess the test for xen_initial_domain() should be dropped in xen_early_init(). I am pretty sure we discussed to remove !xen_initial_domain() for Arm in the past. But I don't remember why the patch was not sent to remove it. Anyway, I guess this should be fine to have hvc as a preferred console for the initial domain as well. Usually, Dom0 has access to several physical UARTs and/or VGA, making this patch less obviously desirable. I think that for Dom0 the best behavior would be to add "hvc0" as first console rather than last console, so that if the user specified something else, this call won't interfere with it. Well, that's exactly the goal of add_preferred_console. It will use hvc0 unless specified otherwise by the user on the command line. Cheers, -- Julien Grall
Re: [PATCH v6 05/11] arm64/mm: Add support for XPFO
Hi, CC Juergen, Boris and Stefano. On 08/09/17 18:24, Tycho Andersen wrote: On Fri, Sep 08, 2017 at 12:53:47AM -0700, Christoph Hellwig wrote: +/* + * Lookup the page table entry for a virtual address and return a pointer to + * the entry. Based on x86 tree. + */ +static pte_t *lookup_address(unsigned long addr) Seems like this should be moved to common arm64 mm code and used by kernel_page_present. Sounds good, I'll include something like the patch below in the next series. Unfortunately, adding an implementation of lookup_address seems to be slightly more complicated than necessary, because of the xen piece. We have to define lookup_address() with the level parameter, but it's not obvious to me to name the page levels. So for now I've just left it as a WARN() if someone supplies it. It seems like xen still does need this to be defined, because if I define it without level: drivers/xen/xenbus/xenbus_client.c: In function ‘xenbus_unmap_ring_vfree_pv’: drivers/xen/xenbus/xenbus_client.c:760:4: error: too many arguments to function ‘lookup_address’ lookup_address(addr, &level)).maddr; ^~ In file included from ./arch/arm64/include/asm/page.h:37:0, from ./include/linux/mmzone.h:20, from ./include/linux/gfp.h:5, from ./include/linux/mm.h:9, from drivers/xen/xenbus/xenbus_client.c:33: ./arch/arm64/include/asm/pgtable-types.h:67:15: note: declared here extern pte_t *lookup_address(unsigned long addr); ^~ I've cc-d the xen folks, maybe they can suggest a way to untangle it? Alternatively, if someone can suggest a good naming scheme for the page levels, I can just do that. The implementation of lookup_address(...) on ARM for Xen (see include/xen/arm/page.h) is just a BUG(). This is because this code should never be called (only used for x86 PV code). Furthermore, xenbus client does not use at all the level. It is just to cope with the x86 version of lookup_address. So one way to solve the problem would be to introduce xen_lookup_address(addr) that would be implemented as: - on x86 unsigned int level; return lookup_address(addr, &level).maddr; - on ARM BUG(); With that there would be no prototype clash and avoid introducing a level parameter. Cheers, -- Julien Grall
Re: [PATCH] xen/events: events_fifo: Don't use {get,put}_cpu() in xen_evtchn_fifo_init()
Hi, Gentle ping. This patch was reviewed but not queued. Are we waiting for other reviewed? Cheers, On 18/08/17 11:15, Julien Grall wrote: Hi Boris, On 17/08/17 18:36, Boris Ostrovsky wrote: On 08/17/2017 12:14 PM, Julien Grall wrote: When booting Linux as Xen guest with CONFIG_DEBUG_ATOMIC, the following splat appears: [0.002323] Mountpoint-cache hash table entries: 1024 (order: 1, 8192 bytes) [0.019717] ASID allocator initialised with 65536 entries [0.020019] xen:grant_table: Grant tables using version 1 layout [0.020051] Grant table initialized [0.020069] BUG: sleeping function called from invalid context at /data/src/linux/mm/page_alloc.c:4046 [0.020100] in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0 [0.020123] no locks held by swapper/0/1. [0.020143] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.13.0-rc5 #598 [0.020166] Hardware name: FVP Base (DT) [0.020182] Call trace: [0.020199] [] dump_backtrace+0x0/0x270 [0.020222] [] show_stack+0x24/0x30 [0.020244] [] dump_stack+0xb8/0xf0 [0.020267] [] ___might_sleep+0x1c8/0x1f8 [0.020291] [] __might_sleep+0x58/0x90 [0.020313] [] __alloc_pages_nodemask+0x1c0/0x12e8 [0.020338] [] alloc_page_interleave+0x38/0x88 [0.020363] [] alloc_pages_current+0xdc/0xf0 [0.020387] [] __get_free_pages+0x28/0x50 [0.020411] [] evtchn_fifo_alloc_control_block+0x2c/0xa0 [0.020437] [] xen_evtchn_fifo_init+0x38/0xb4 [0.020461] [] xen_init_IRQ+0x44/0xc8 [0.020484] [] xen_guest_init+0x250/0x300 [0.020507] [] do_one_initcall+0x44/0x130 [0.020531] [] kernel_init_freeable+0x120/0x288 [0.020556] [] kernel_init+0x18/0x110 [0.020578] [] ret_from_fork+0x10/0x40 [0.020606] xen:events: Using FIFO-based ABI [0.020658] Xen: initializing cpu0 [0.027727] Hierarchical SRCU implementation. [0.036235] EFI services will not be available. [0.043810] smp: Bringing up secondary CPUs ... This is because get_cpu() in xen_evtchn_fifo_init() will disable preemption, but __get_free_page() might sleep (GFP_ATOMIC is not set). xen_evtchn_fifo_init() will always be called before SMP is initialized, so {get,put}_cpu() could be replaced by a simple smp_processor_id(). On x86 this will be called out of init_IRQ(), which is already preceded by preempt_disable(). Well the main problem is preempt_disable() itself. in_atomic() will check preempt_count and return 1 if it is non-zero. __get_free_page might sleep if GFP_ATOMIC is not set and therefore you will see the splat when CONFIG_DEBUG_ATOMIC is enabled. However, those checks don't happen before the scheduler is setup. Hence why you don't see the error on x86. Cheers, Reviewed-by: Boris Ostrovsky -- Julien Grall
[PATCH] xen/privcmd: Support correctly 64KB page granularity when mapping memory
Commit 5995a68 "xen/privcmd: Add support for Linux 64KB page granularity" did not go far enough to support 64KB in mmap_batch_fn. The variable 'nr' is the number of 4KB chunk to map. However, when Linux is using 64KB page granularity the array of pages (vma->vm_private_data) contain one page per 64KB. Fix it by incrementing st->index correctly. Furthermore, st->va is not correctly incremented as PAGE_SIZE != XEN_PAGE_SIZE. Fixes: 5995a68 ("xen/privcmd: Add support for Linux 64KB page granularity") CC: sta...@vger.kernel.org Reported-by: Feng Kan Signed-off-by: Julien Grall --- drivers/xen/privcmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 7a92a5e1d40c..feca75b07fdd 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) st->global_error = 1; } } - st->va += PAGE_SIZE * nr; - st->index += nr; + st->va += XEN_PAGE_SIZE * nr; + st->index += nr / XEN_PFN_PER_PAGE; return 0; } -- 2.11.0
Re: [PATCH] xen/privcmd: Support correctly 64KB page granularity when mapping memory
Hi Boris, On 31/05/17 14:54, Boris Ostrovsky wrote: On 05/31/2017 09:03 AM, Julien Grall wrote: Commit 5995a68 "xen/privcmd: Add support for Linux 64KB page granularity" did not go far enough to support 64KB in mmap_batch_fn. The variable 'nr' is the number of 4KB chunk to map. However, when Linux is using 64KB page granularity the array of pages (vma->vm_private_data) contain one page per 64KB. Fix it by incrementing st->index correctly. Furthermore, st->va is not correctly incremented as PAGE_SIZE != XEN_PAGE_SIZE. Fixes: 5995a68 ("xen/privcmd: Add support for Linux 64KB page granularity") CC: sta...@vger.kernel.org Reported-by: Feng Kan Signed-off-by: Julien Grall --- drivers/xen/privcmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 7a92a5e1d40c..feca75b07fdd 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) st->global_error = 1; } } - st->va += PAGE_SIZE * nr; - st->index += nr; + st->va += XEN_PAGE_SIZE * nr; + st->index += nr / XEN_PFN_PER_PAGE; return 0; } Are we still using PAGE_MASK for xen_remap_domain_gfn_array()? Do you mean in the xen_xlate_remap_gfn_array implementation? If so there are no use of PAGE_MASK as the code has been converted to support 64K page granularity. If you mean the x86 version of xen_remap_domain_gfn_array, then we don't really care as x86 only use 4KB page granularity. Cheers, -- Julien Grall
Re: [PATCH] xen/privcmd: Support correctly 64KB page granularity when mapping memory
Hi Boris, On 01/06/17 14:33, Boris Ostrovsky wrote: On 06/01/2017 08:50 AM, Julien Grall wrote: Hi Boris, On 31/05/17 14:54, Boris Ostrovsky wrote: On 05/31/2017 09:03 AM, Julien Grall wrote: Commit 5995a68 "xen/privcmd: Add support for Linux 64KB page granularity" did not go far enough to support 64KB in mmap_batch_fn. The variable 'nr' is the number of 4KB chunk to map. However, when Linux is using 64KB page granularity the array of pages (vma->vm_private_data) contain one page per 64KB. Fix it by incrementing st->index correctly. Furthermore, st->va is not correctly incremented as PAGE_SIZE != XEN_PAGE_SIZE. Fixes: 5995a68 ("xen/privcmd: Add support for Linux 64KB page granularity") CC: sta...@vger.kernel.org Reported-by: Feng Kan Signed-off-by: Julien Grall --- drivers/xen/privcmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 7a92a5e1d40c..feca75b07fdd 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) st->global_error = 1; } } -st->va += PAGE_SIZE * nr; -st->index += nr; +st->va += XEN_PAGE_SIZE * nr; +st->index += nr / XEN_PFN_PER_PAGE; return 0; } Are we still using PAGE_MASK for xen_remap_domain_gfn_array()? Do you mean in the xen_xlate_remap_gfn_array implementation? If so there are no use of PAGE_MASK as the code has been converted to support 64K page granularity. If you mean the x86 version of xen_remap_domain_gfn_array, then we don't really care as x86 only use 4KB page granularity. I meant right above the change that you made. Should it also be replaced with XEN_PAGE_MASK? (Sorry for being unclear.) Oh. The code in xen_remap_domain_gfn_array is relying on st->va to be page aligned. So I think we want to keep PAGE_MASK here. Cheers, -- Julien Grall
Re: [PATCH] xen/privcmd: Support correctly 64KB page granularity when mapping memory
Hi Boris, On 01/06/17 16:16, Boris Ostrovsky wrote: On 06/01/2017 10:01 AM, Julien Grall wrote: Hi Boris, On 01/06/17 14:33, Boris Ostrovsky wrote: On 06/01/2017 08:50 AM, Julien Grall wrote: Hi Boris, On 31/05/17 14:54, Boris Ostrovsky wrote: On 05/31/2017 09:03 AM, Julien Grall wrote: Commit 5995a68 "xen/privcmd: Add support for Linux 64KB page granularity" did not go far enough to support 64KB in mmap_batch_fn. The variable 'nr' is the number of 4KB chunk to map. However, when Linux is using 64KB page granularity the array of pages (vma->vm_private_data) contain one page per 64KB. Fix it by incrementing st->index correctly. Furthermore, st->va is not correctly incremented as PAGE_SIZE != XEN_PAGE_SIZE. Fixes: 5995a68 ("xen/privcmd: Add support for Linux 64KB page granularity") CC: sta...@vger.kernel.org Reported-by: Feng Kan Signed-off-by: Julien Grall --- drivers/xen/privcmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 7a92a5e1d40c..feca75b07fdd 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) st->global_error = 1; } } -st->va += PAGE_SIZE * nr; -st->index += nr; +st->va += XEN_PAGE_SIZE * nr; +st->index += nr / XEN_PFN_PER_PAGE; return 0; } Are we still using PAGE_MASK for xen_remap_domain_gfn_array()? Do you mean in the xen_xlate_remap_gfn_array implementation? If so there are no use of PAGE_MASK as the code has been converted to support 64K page granularity. If you mean the x86 version of xen_remap_domain_gfn_array, then we don't really care as x86 only use 4KB page granularity. I meant right above the change that you made. Should it also be replaced with XEN_PAGE_MASK? (Sorry for being unclear.) Oh. The code in xen_remap_domain_gfn_array is relying on st->va to be page aligned. So I think we want to keep PAGE_MASK here. Doe this imply then that 'nr' 4K pages is integral number of PAGE_SIZE (i.e. (nr*XEN_PAGE_SIZE) % PAGE_SIZE == 0) and if yes --- do we test this somewhere? I don't see it. nr might be smaller for the last batch. But all the intermediate batch should have ((nr * XEN_PAGE_SIZE) % PAGE_SIZE == 0). I think the BUILD_BUG_ON in privcmd_ioctl_mmap_batch ensure that all the intermediate batch will always be an integral number of PAGE_SIZE. Cheers, -- Julien Grall
Re: [PATCH] xen/privcmd: Support correctly 64KB page granularity when mapping memory
On 01/06/2017 21:41, Boris Ostrovsky wrote: On 06/01/2017 11:38 AM, Julien Grall wrote: Hi Boris, On 01/06/17 16:16, Boris Ostrovsky wrote: On 06/01/2017 10:01 AM, Julien Grall wrote: Hi Boris, On 01/06/17 14:33, Boris Ostrovsky wrote: On 06/01/2017 08:50 AM, Julien Grall wrote: Hi Boris, On 31/05/17 14:54, Boris Ostrovsky wrote: On 05/31/2017 09:03 AM, Julien Grall wrote: Commit 5995a68 "xen/privcmd: Add support for Linux 64KB page granularity" did not go far enough to support 64KB in mmap_batch_fn. The variable 'nr' is the number of 4KB chunk to map. However, when Linux is using 64KB page granularity the array of pages (vma->vm_private_data) contain one page per 64KB. Fix it by incrementing st->index correctly. Furthermore, st->va is not correctly incremented as PAGE_SIZE != XEN_PAGE_SIZE. Fixes: 5995a68 ("xen/privcmd: Add support for Linux 64KB page granularity") CC: sta...@vger.kernel.org Reported-by: Feng Kan Signed-off-by: Julien Grall --- drivers/xen/privcmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 7a92a5e1d40c..feca75b07fdd 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) st->global_error = 1; } } -st->va += PAGE_SIZE * nr; -st->index += nr; +st->va += XEN_PAGE_SIZE * nr; +st->index += nr / XEN_PFN_PER_PAGE; return 0; } Are we still using PAGE_MASK for xen_remap_domain_gfn_array()? Do you mean in the xen_xlate_remap_gfn_array implementation? If so there are no use of PAGE_MASK as the code has been converted to support 64K page granularity. If you mean the x86 version of xen_remap_domain_gfn_array, then we don't really care as x86 only use 4KB page granularity. I meant right above the change that you made. Should it also be replaced with XEN_PAGE_MASK? (Sorry for being unclear.) Oh. The code in xen_remap_domain_gfn_array is relying on st->va to be page aligned. So I think we want to keep PAGE_MASK here. Doe this imply then that 'nr' 4K pages is integral number of PAGE_SIZE (i.e. (nr*XEN_PAGE_SIZE) % PAGE_SIZE == 0) and if yes --- do we test this somewhere? I don't see it. I now see that this should (obviously) stay as PAGE_MASK, so Reviewed-by: Boris Ostrovsky Thank you! but nr might be smaller for the last batch. But all the intermediate batch should have ((nr * XEN_PAGE_SIZE) % PAGE_SIZE == 0). how can we have nr not covering full PAGE_SIZEs? If you are using 64K pages, how can you map, say, only 4K (if nr==1)? Xen is using 4K page granularity and so does the toolstack and the hypercall interface. They are unaware of the page size of the kernel. So even if Linux is using 64K pages, the resulting mapping will be broken down in 4K chunk to issue the hypercall. To give you a concrete example, if the toolstack requests to map 4K, Linux will allocate a 64K page. Only the first 4K chunk (0-4K) will have effective mapping to host RAM. The rest (4-64K) will not be mapped. Cheers, -- Julien Grall
irqchip/irq-gic: BAD_MADT_GICC_ENTRY may fail when booting with ACPI 5.1
fff80006afff000-80006effefff] (XEN) DOM0: [0.00] Memory: 2901652K/3146372K available (9212K kernel code, 1016K rwdata, 3844K rodata, 1088K init, 305K bss, 22833 (XEN) DOM0: 6K reserved, 16384K cma-reserved) (XEN) DOM0: [0.00] Virtual kernel memory layout: (XEN) DOM0: [0.00] modules : 0x - 0x0800 ( 128 MB) (XEN) DOM0: [0.00] vmalloc : 0x0800 - 0x7dffbfff (129022 GB) (XEN) DOM0: [0.00] .text : 0x0808 - 0x0898 ( 9216 KB) (XEN) DOM0: [0.00] .rodata : 0x0898 - 0x08d5 ( 3904 KB) (XEN) DOM0: [0.00] .init : 0x08d5 - 0x08e6 ( 1088 KB) (XEN) DOM0: [0.00] .data : 0x08e6 - 0x08f5e200 ( 1017 KB) (XEN) DOM0: [0.00].bss : 0x08f5e200 - 0x08faa95c ( 306 KB) (XEN) DOM0: [0.00] fixed : 0x7dfffe7fd000 - 0x7dfffec0 ( 4108 KB) (XEN) DOM0: [0.00] PCI I/O : 0x7dfffee0 - 0x7de0 (16 MB) (XEN) DOM0: [0.00] vmemmap : 0x7e00 - 0x8000 ( 2048 GB maximum) (XEN) DOM0: [0.00] 0x7e00 - 0x7e00246f8040 ( 582 MB actual) (XEN) DOM0: [0.00] memory : 0x8000 - 0x80091be01000 ( 37310 MB) (XEN) DOM0: [0.00] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 (XEN) DOM0: [0.00] Preemptible hierarchical RCU implementation. (XEN) DOM0: [0.00] RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=1. (XEN) DOM0: [0.00] RCU: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 (XEN) DOM0: [0.00] NR_IRQS:64 nr_irqs:64 0 (XEN) DOM0: [0.00] No valid GICC entries exist (XEN) DOM0: [0.00] Kernel panic - not syncing: No interrupt controller found. (XEN) DOM0: [0.00] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.12.0-rc2-00049-gfde8e33d1068 #547 (XEN) DOM0: [0.00] Hardware name: (null) (DT) (XEN) DOM0: [0.00] Call trace: (XEN) DOM0: [0.00] [] dump_backtrace+0x0/0x228 (XEN) DOM0: [0.00] [] show_stack+0x14/0x20 (XEN) DOM0: [0.00] [] dump_stack+0x98/0xb8 (XEN) DOM0: [0.00] [] panic+0x114/0x284 (XEN) DOM0: [0.00] [] init_IRQ+0x24/0x2c (XEN) DOM0: [0.00] [] start_kernel+0x230/0x390 (XEN) DOM0: [0.00] [] __primary_switched+0x64/0x6c (XEN) DOM0: [0.00] ---[ end Kernel panic - not syncing: No interrupt controller found. Cheers, -- Julien Grall
Request backporting 374d446d25d6 and 985626564eed in stable 4.9
Hello all, We are looking at using Linux 4.9 in Xen automatic testing and noticed regression when booting is as Xen Dom0 on some boards (e.g Arndale). After investigation, the following 2 patches resolved the booting regression: - 374d446d25d6271ee615952a3b7f123ba4983c35 "ARM: 8636/1: Cleanup sanity_check_meminfo" - 985626564eedc470ce2866e53938303368ad41b7 "ARM: 8637/1: Adjust memory boundaries after reservations" Would it be possible to backport them on Linux 4.9 stable? Cheers, -- Julien Grall
Re: [PATCH] xen/privcmd: Support correctly 64KB page granularity when mapping memory
Hi, It has been reviewed-by Boris but I don't see the patch queued. Would it be possible to queue it for 4.12? Cheers, On 01/06/17 21:41, Boris Ostrovsky wrote: On 06/01/2017 11:38 AM, Julien Grall wrote: Hi Boris, On 01/06/17 16:16, Boris Ostrovsky wrote: On 06/01/2017 10:01 AM, Julien Grall wrote: Hi Boris, On 01/06/17 14:33, Boris Ostrovsky wrote: On 06/01/2017 08:50 AM, Julien Grall wrote: Hi Boris, On 31/05/17 14:54, Boris Ostrovsky wrote: On 05/31/2017 09:03 AM, Julien Grall wrote: Commit 5995a68 "xen/privcmd: Add support for Linux 64KB page granularity" did not go far enough to support 64KB in mmap_batch_fn. The variable 'nr' is the number of 4KB chunk to map. However, when Linux is using 64KB page granularity the array of pages (vma->vm_private_data) contain one page per 64KB. Fix it by incrementing st->index correctly. Furthermore, st->va is not correctly incremented as PAGE_SIZE != XEN_PAGE_SIZE. Fixes: 5995a68 ("xen/privcmd: Add support for Linux 64KB page granularity") CC: sta...@vger.kernel.org Reported-by: Feng Kan Signed-off-by: Julien Grall --- drivers/xen/privcmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 7a92a5e1d40c..feca75b07fdd 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) st->global_error = 1; } } -st->va += PAGE_SIZE * nr; -st->index += nr; +st->va += XEN_PAGE_SIZE * nr; +st->index += nr / XEN_PFN_PER_PAGE; return 0; } Are we still using PAGE_MASK for xen_remap_domain_gfn_array()? Do you mean in the xen_xlate_remap_gfn_array implementation? If so there are no use of PAGE_MASK as the code has been converted to support 64K page granularity. If you mean the x86 version of xen_remap_domain_gfn_array, then we don't really care as x86 only use 4KB page granularity. I meant right above the change that you made. Should it also be replaced with XEN_PAGE_MASK? (Sorry for being unclear.) Oh. The code in xen_remap_domain_gfn_array is relying on st->va to be page aligned. So I think we want to keep PAGE_MASK here. Doe this imply then that 'nr' 4K pages is integral number of PAGE_SIZE (i.e. (nr*XEN_PAGE_SIZE) % PAGE_SIZE == 0) and if yes --- do we test this somewhere? I don't see it. I now see that this should (obviously) stay as PAGE_MASK, so Reviewed-by: Boris Ostrovsky but nr might be smaller for the last batch. But all the intermediate batch should have ((nr * XEN_PAGE_SIZE) % PAGE_SIZE == 0). how can we have nr not covering full PAGE_SIZEs? If you are using 64K pages, how can you map, say, only 4K (if nr==1)? -boris I think the BUILD_BUG_ON in privcmd_ioctl_mmap_batch ensure that all the intermediate batch will always be an integral number of PAGE_SIZE. Cheers, -- Julien Grall
Re: [PATCH 0/3 v2] xen: Implement EFI reset_system callback
Hi all, It looks like the series has fully been acked, can someone merge this into xentip? Cheers, On 04/24/2017 06:58 PM, Julien Grall wrote: Hi all, This small patch series implements EFI reset_system callback when using EFI Xen. Without this, it will not be possible to reboot/power off ARM64 DOM0 when using ACPI. Cheers, Cc: Boris Ostrovsky Cc: Juergen Gross Julien Grall (3): xen: Export xen_reboot arm/xen: Consolidate calls to shutdown hypercall in a single helper xen: Implement EFI reset_system callback arch/arm/xen/efi.c | 2 +- arch/arm/xen/enlighten.c | 16 ++-- arch/x86/xen/efi.c | 2 +- arch/x86/xen/enlighten.c | 2 +- drivers/xen/efi.c| 18 ++ include/xen/xen-ops.h| 5 + 6 files changed, 36 insertions(+), 9 deletions(-) -- Julien Grall
Re: [PATCH 0/3 v2] xen: Implement EFI reset_system callback
Hi Juergen, On 02/05/17 10:30, Juergen Gross wrote: On 02/05/17 11:08, Julien Grall wrote: Hi all, It looks like the series has fully been acked, can someone merge this into xentip? As I already wrote: patch 1 doesn't apply any longer. As there were other conflicts between xentip and Linus' tree I'm doing a rebase of for-linus-4.12 right now, so I can do the rebase of this patch for you. Sorry I missed your answer on patch #1. I saw you just rebased it, thank you for that! Cheers, -- Julien Grall
Fail to build Linux with O= in master
Hello, I have tried to build the latest master and got the following error: 42sh> make O=~/works/linux-build/aarch64-4k -C ~/works/linux make: Entering directory '/home/julgra01/works/linux' Makefile:135: *** failed to create output directory "~/works/linux-build/aarch64-4k". Stop. make: Leaving directory '/home/julgra01/works/linux' The bisector found the commit 8e9b466 "kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)". Looking at the code, it seems that it is because the code is now using $(realpath ...). Indeed when I replace ~ by $HOME the kernel is now built. I am not a Makefile expert, so I would appreciate if anyone could suggest a patch for this. Regards, -- Julien Grall
Re: Fail to build Linux with O= in master
Hello, On 30/09/17 01:37, Masahiro Yamada wrote: 2017-09-30 8:59 GMT+09:00 Julien Grall : Hello, I have tried to build the latest master and got the following error: 42sh> make O=~/works/linux-build/aarch64-4k -C ~/works/linux make: Entering directory '/home/julgra01/works/linux' Makefile:135: *** failed to create output directory "~/works/linux-build/aarch64-4k". Stop. make: Leaving directory '/home/julgra01/works/linux' The bisector found the commit 8e9b466 "kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)". Looking at the code, it seems that it is because the code is now using $(realpath ...). Indeed when I replace ~ by $HOME the kernel is now built. I am not a Makefile expert, so I would appreciate if anyone could suggest a patch for this. Thanks for the report. Which shell are you using? I am using dash. I use bash and I have no problem. If I try dash, indeed, it fails. I will take a look. Thank you! Cheers, -- Julien Grall
Re: [PATCH] kbuild: revert $(realpath ...) to $(shell cd ... && /bin/pwd)
Hello, On 02/10/17 09:07, Masahiro Yamada wrote: I thought commit 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") was a safe conversion, but it changed the behavior. $(abspath ...) / $(realpath ...) does not expand shell special characters, such as '~'. Here is a simple Makefile example: >8 $(info /bin/pwd: $(shell cd ~/; /bin/pwd)) $(info abspath: $(abspath ~/)) $(info realpath: $(realpath ~/)) all: @: >8 $ make /bin/pwd: /home/masahiro abspath: /home/masahiro/workspace/~ realpath: This can be a real problem if 'make O=~/foo' is invoked from another Makefile or primitive shell like dash. This commit partially reverts 8e9b46679923. Fixes: 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") Reported-by: Julien Grall Signed-off-by: Masahiro Yamada Tested-by: Julien Grall Regards, --- Makefile | 4 ++-- tools/power/cpupower/Makefile | 2 +- tools/scripts/Makefile.include | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index cf007a3..1b48717 100644 --- a/Makefile +++ b/Makefile @@ -130,8 +130,8 @@ endif ifneq ($(KBUILD_OUTPUT),) # check that the output directory actually exists saved-output := $(KBUILD_OUTPUT) -$(shell [ -d $(KBUILD_OUTPUT) ] || mkdir -p $(KBUILD_OUTPUT)) -KBUILD_OUTPUT := $(realpath $(KBUILD_OUTPUT)) +KBUILD_OUTPUT := $(shell mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) \ + && /bin/pwd) $(if $(KBUILD_OUTPUT),, \ $(error failed to create output directory "$(saved-output)")) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 4c5a481..d6e1c02 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -26,7 +26,7 @@ endif ifneq ($(OUTPUT),) # check that the output directory actually exists -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 9dc8f07..1e8b611 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,7 +1,7 @@ ifneq ($(O),) ifeq ($(origin O), command line) - ABSOLUTE_O := $(realpath $(O)) - dummy := $(if $(ABSOLUTE_O),,$(error O=$(O) does not exist)) + dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) @@ -12,7 +12,7 @@ endif # check that the output directory actually exists ifneq ($(OUTPUT),) -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif -- Julien Grall
[PATCH] DT/arm,gic-v3: Update the ITS size in the examples
Currently, the examples are using 2MB for the ITS size. Per the specification (section 8.18 in ARM IHI 0069D), the ITS address map is 128KB. Update the examples to match the specification. Signed-off-by: Julien Grall --- .../devicetree/bindings/interrupt-controller/arm,gic-v3.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt index 4c29cdab0ea5..5eb108e180fa 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt @@ -99,7 +99,7 @@ Examples: compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c20 0 0x20>; + reg = <0x0 0x2c20 0 0x2>; }; }; @@ -124,14 +124,14 @@ Examples: compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c20 0 0x20>; + reg = <0x0 0x2c20 0 0x2>; }; gic-its@2c40 { compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c40 0 0x20>; + reg = <0x0 0x2c40 0 0x2>; }; ppi-partitions { -- 2.11.0
[tip: timers/core] hrtimer: Don't take expiry_lock when timer is currently migrated
The following commit has been merged into the timers/core branch of tip: Commit-ID: 68b2c8c1e421096f4b46ac2ac502d25ca067a2a6 Gitweb: https://git.kernel.org/tip/68b2c8c1e421096f4b46ac2ac502d25ca067a2a6 Author:Julien Grall AuthorDate:Wed, 21 Aug 2019 10:24:09 +01:00 Committer: Thomas Gleixner CommitterDate: Wed, 21 Aug 2019 16:10:01 +02:00 hrtimer: Don't take expiry_lock when timer is currently migrated migration_base is used as a placeholder when an hrtimer is migrated to a different CPU. In the case that hrtimer_cancel_wait_running() hits a timer which is currently migrated it would pointlessly acquire the expiry lock of the migration base, which is even not initialized. Surely it could be initialized, but there is absolutely no point in acquiring this lock because the timer is guaranteed not to run it's callback for which the caller waits to finish on that base. So it would just do the inc/lock/dec/unlock dance for nothing. As the base switch is short and non-preemptible, there is no issue when the wait function returns immediately. The timer base and base->cpu_base cannot be NULL in the code path which is invoking that, so just replace those checks with a check whether base is migration base. [ tglx: Updated from RT patch. Massaged changelog. Added comment. ] Signed-off-by: Julien Grall Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190821092409.13225-4-julien.gr...@arm.com --- kernel/time/hrtimer.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f48864e..ebbd0fb 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1217,7 +1217,11 @@ void hrtimer_cancel_wait_running(const struct hrtimer *timer) /* Lockless read. Prevent the compiler from reloading it below */ struct hrtimer_clock_base *base = READ_ONCE(timer->base); - if (!timer->is_soft || !base || !base->cpu_base) { + /* +* Just relax if the timer expires in hard interrupt context or if +* it is currently on the migration base. +*/ + if (!timer->is_soft || base == &migration_base) cpu_relax(); return; }
[tip: timers/core] hrtimer: Protect lockless access to timer->base
The following commit has been merged into the timers/core branch of tip: Commit-ID: dd2261ed45aaeddeb77768f291d604179bcab096 Gitweb: https://git.kernel.org/tip/dd2261ed45aaeddeb77768f291d604179bcab096 Author:Julien Grall AuthorDate:Wed, 21 Aug 2019 10:24:07 +01:00 Committer: Thomas Gleixner CommitterDate: Wed, 21 Aug 2019 16:10:01 +02:00 hrtimer: Protect lockless access to timer->base The update to timer->base is protected by the base->cpu_base->lock(). However, hrtimer_cancel_wait_running() does access it lockless. So the compiler is allowed to refetch timer->base which can cause havoc when the timer base is changed concurrently. Use READ_ONCE() to prevent this. [ tglx: Adapted from a RT patch ] Signed-off-by: Julien Grall Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190821092409.13225-2-julien.gr...@arm.com --- kernel/time/hrtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8333537..f48864e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1214,7 +1214,8 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, */ void hrtimer_cancel_wait_running(const struct hrtimer *timer) { - struct hrtimer_clock_base *base = timer->base; + /* Lockless read. Prevent the compiler from reloading it below */ + struct hrtimer_clock_base *base = READ_ONCE(timer->base); if (!timer->is_soft || !base || !base->cpu_base) { cpu_relax();