[tip: x86/fpu] x86/fpu: Handle FPU-related and clearcpuid command line arguments earlier
The following commit has been merged into the x86/fpu branch of tip: Commit-ID: 1ef5423a55c2ac6f1361811efe75b6e46d1023ed Gitweb: https://git.kernel.org/tip/1ef5423a55c2ac6f1361811efe75b6e46d1023ed Author:Mike Hommey AuthorDate:Tue, 22 Sep 2020 06:56:38 +09:00 Committer: Borislav Petkov CommitterDate: Tue, 22 Sep 2020 00:24:27 +02:00 x86/fpu: Handle FPU-related and clearcpuid command line arguments earlier FPU initialization handles them currently. However, in the case of clearcpuid=, some other early initialization code may check for features before the FPU initialization code is called. Handling the argument earlier allows the command line to influence those early initializations. Signed-off-by: Mike Hommey Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200921215638.37980-1...@glandium.org --- arch/x86/kernel/cpu/common.c | 55 +++- arch/x86/kernel/fpu/init.c | 55 +--- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c5d6f17..3c75193 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -1221,6 +1222,59 @@ static void detect_nopl(void) } /* + * We parse cpu parameters early because fpu__init_system() is executed + * before parse_early_param(). + */ +static void __init cpu_parse_early_param(void) +{ + char arg[128]; + char *argptr = arg; + int arglen, res, bit; + +#ifdef CONFIG_X86_32 + if (cmdline_find_option_bool(boot_command_line, "no387")) +#ifdef CONFIG_MATH_EMULATION + setup_clear_cpu_cap(X86_FEATURE_FPU); +#else + pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); +#endif + + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) + setup_clear_cpu_cap(X86_FEATURE_FXSR); +#endif + + if (cmdline_find_option_bool(boot_command_line, "noxsave")) + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + + if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + + if (cmdline_find_option_bool(boot_command_line, "noxsaves")) + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); + if (arglen <= 0) + return; + + pr_info("Clearing CPUID bits:"); + do { + res = get_option(, ); + if (res == 0 || res == 3) + break; + + /* If the argument was too long, the last bit may be cut off */ + if (res == 1 && arglen >= sizeof(arg)) + break; + + if (bit >= 0 && bit < NCAPINTS * 32) { + pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); + setup_clear_cpu_cap(bit); + } + } while (res == 2); + pr_cont("\n"); +} + +/* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, * cache alignment. @@ -1255,6 +1309,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + cpu_parse_early_param(); if (this_cpu->c_early_init) this_cpu->c_early_init(c); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index f8ff895..701f196 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -238,65 +237,11 @@ static void __init fpu__init_system_ctx_switch(void) } /* - * We parse fpu parameters early because fpu__init_system() is executed - * before parse_early_param(). - */ -static void __init fpu__init_parse_early_param(void) -{ - char arg[128]; - char *argptr = arg; - int arglen, res, bit; - -#ifdef CONFIG_X86_32 - if (cmdline_find_option_bool(boot_command_line, "no387")) -#ifdef CONFIG_MATH_EMULATION - setup_clear_cpu_cap(X86_FEATURE_FPU); -#else - pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); -#endif - - if (cmdline_find_option_bool(boot_command_line, "nofxsr")) - setup_clear_cpu_cap(X86_FEATURE_FXSR); -#endif - - if (cmdline_find_option_bool(boot_command_line, "noxsave")) - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - - if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
[PATCH v3] x86/boot: Handle fpu-related and clearcpuid command line arguments earlier
FPU initialization handles them currently. However, in the case of clearcpuid, some other early initialization code may check for features before the FPU initialization code is called. Handling the argument earlier allows the command line to influence those early initializations. Signed-off-by: Mike Hommey --- arch/x86/kernel/cpu/common.c | 55 arch/x86/kernel/fpu/init.c | 55 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8d4715e84268..6220fae87263 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -1220,6 +1221,59 @@ static void detect_nopl(void) #endif } +/* + * We parse cpu parameters early because fpu__init_system() is executed + * before parse_early_param(). + */ +static void __init cpu_parse_early_param(void) +{ + char arg[128]; + char *argptr = arg; + int arglen, res, bit; + +#ifdef CONFIG_X86_32 + if (cmdline_find_option_bool(boot_command_line, "no387")) +#ifdef CONFIG_MATH_EMULATION + setup_clear_cpu_cap(X86_FEATURE_FPU); +#else + pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); +#endif + + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) + setup_clear_cpu_cap(X86_FEATURE_FXSR); +#endif + + if (cmdline_find_option_bool(boot_command_line, "noxsave")) + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + + if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + + if (cmdline_find_option_bool(boot_command_line, "noxsaves")) + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); + if (arglen <= 0) + return; + + pr_info("Clearing CPUID bits:"); + do { + res = get_option(, ); + if (res == 0 || res == 3) + break; + + /* If the argument was too long, the last bit may be cut off */ + if (res == 1 && arglen >= sizeof(arg)) + break; + + if (bit >= 0 && bit < NCAPINTS * 32) { + pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); + setup_clear_cpu_cap(bit); + } + } while (res == 2); + pr_cont("\n"); +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -1255,6 +1309,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + cpu_parse_early_param(); if (this_cpu->c_early_init) this_cpu->c_early_init(c); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index f8ff895aaf7e..701f196d7c68 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -237,66 +236,12 @@ static void __init fpu__init_system_ctx_switch(void) on_boot_cpu = 0; } -/* - * We parse fpu parameters early because fpu__init_system() is executed - * before parse_early_param(). - */ -static void __init fpu__init_parse_early_param(void) -{ - char arg[128]; - char *argptr = arg; - int arglen, res, bit; - -#ifdef CONFIG_X86_32 - if (cmdline_find_option_bool(boot_command_line, "no387")) -#ifdef CONFIG_MATH_EMULATION - setup_clear_cpu_cap(X86_FEATURE_FPU); -#else - pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); -#endif - - if (cmdline_find_option_bool(boot_command_line, "nofxsr")) - setup_clear_cpu_cap(X86_FEATURE_FXSR); -#endif - - if (cmdline_find_option_bool(boot_command_line, "noxsave")) - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - - if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - - if (cmdline_find_option_bool(boot_command_line, "noxsaves")) - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - - arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); - if (arglen <= 0) - return; - - pr_info("Clearing CPUID bits:"); - do { - res = get_option(, ); - if (res == 0 || res == 3) -
[PATCH v2] x86/boot: Handle fpu-related and clearcpuid command line arguments earlier
FPU initialization handles them currently. However, in the case of clearcpuid, some other early initialization code may check for features before the FPU initialization code is called. Handling the argument earlier allows the command line to influence those early initializations. Signed-off-by: Mike Hommey --- arch/x86/kernel/cpu/common.c | 41 arch/x86/kernel/fpu/init.c | 41 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c5d6f17d9b9d..5e2e4d3621bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -1220,6 +1221,45 @@ static void detect_nopl(void) #endif } +/* + * We parse cpu parameters early because early_identify_cpu() is executed + * before parse_early_param(). + */ +static void __init cpu__init_parse_early_param(void) +{ + char arg[32]; + char *argptr = arg; + int bit; + +#ifdef CONFIG_X86_32 + if (cmdline_find_option_bool(boot_command_line, "no387")) +#ifdef CONFIG_MATH_EMULATION + setup_clear_cpu_cap(X86_FEATURE_FPU); +#else + pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); +#endif + + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) + setup_clear_cpu_cap(X86_FEATURE_FXSR); +#endif + + if (cmdline_find_option_bool(boot_command_line, "noxsave")) + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + + if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + + if (cmdline_find_option_bool(boot_command_line, "noxsaves")) + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + if (cmdline_find_option(boot_command_line, "clearcpuid", arg, + sizeof(arg)) && + get_option(, ) && + bit >= 0 && + bit < NCAPINTS * 32) + setup_clear_cpu_cap(bit); +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -1255,6 +1295,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + cpu__init_parse_early_param(); if (this_cpu->c_early_init) this_cpu->c_early_init(c); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 61ddc3a5e5c2..701f196d7c68 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -237,52 +236,12 @@ static void __init fpu__init_system_ctx_switch(void) on_boot_cpu = 0; } -/* - * We parse fpu parameters early because fpu__init_system() is executed - * before parse_early_param(). - */ -static void __init fpu__init_parse_early_param(void) -{ - char arg[32]; - char *argptr = arg; - int bit; - -#ifdef CONFIG_X86_32 - if (cmdline_find_option_bool(boot_command_line, "no387")) -#ifdef CONFIG_MATH_EMULATION - setup_clear_cpu_cap(X86_FEATURE_FPU); -#else - pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); -#endif - - if (cmdline_find_option_bool(boot_command_line, "nofxsr")) - setup_clear_cpu_cap(X86_FEATURE_FXSR); -#endif - - if (cmdline_find_option_bool(boot_command_line, "noxsave")) - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - - if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - - if (cmdline_find_option_bool(boot_command_line, "noxsaves")) - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - - if (cmdline_find_option(boot_command_line, "clearcpuid", arg, - sizeof(arg)) && - get_option(, ) && - bit >= 0 && - bit < NCAPINTS * 32) - setup_clear_cpu_cap(bit); -} - /* * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: */ void __init fpu__init_system(struct cpuinfo_x86 *c) { - fpu__init_parse_early_param(); fpu__init_system_early_generic(c); /* -- 2.28.0
[PATCH] x86/boot: Delay BSP init until after FPU initialization
FPU initialization handles the clearcpuid command line argument. If it comes after BSP init, clearcpuid cannot be used to disable features that trigger some parts of the BSP init code. Signed-off-by: Mike Hommey --- arch/x86/kernel/cpu/common.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) I was trying to use clearcpuid=440 to disable X86_FEATURES_AMD_SSBD to reproduce the behavior that happens on Zen/Zen+ on a Zen2 machine, but that didn't work because the command line is handled after the setup for X86_FEATURE_LS_CFG_SSBD. I tought about either moving the command line handling earlier, but it seems there wasn't a specific reason for BSP init being earlier than FPU initialization so I went with reordering those instead. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c5d6f17d9b9d..c3bbca91a14b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1261,9 +1261,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) c->cpu_index = 0; filter_cpuid_features(c, false); - - if (this_cpu->c_bsp_init) - this_cpu->c_bsp_init(c); } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); } @@ -1276,6 +1273,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) fpu__init_system(c); + if (have_cpuid_p()) { + if (this_cpu->c_bsp_init) + this_cpu->c_bsp_init(c); + } #ifdef CONFIG_X86_32 /* * Regardless of whether PCID is enumerated, the SDM says -- 2.28.0
Re: [PATCH 08/17] mm: madvise MADV_USERFAULT
On Fri, Oct 03, 2014 at 07:07:58PM +0200, Andrea Arcangeli wrote: > MADV_USERFAULT is a new madvise flag that will set VM_USERFAULT in the > vma flags. Whenever VM_USERFAULT is set in an anonymous vma, if > userland touches a still unmapped virtual address, a sigbus signal is > sent instead of allocating a new page. The sigbus signal handler will > then resolve the page fault in userland by calling the > remap_anon_pages syscall. What does "unmapped virtual address" mean in this context? Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 08/17] mm: madvise MADV_USERFAULT
On Fri, Oct 03, 2014 at 07:07:58PM +0200, Andrea Arcangeli wrote: MADV_USERFAULT is a new madvise flag that will set VM_USERFAULT in the vma flags. Whenever VM_USERFAULT is set in an anonymous vma, if userland touches a still unmapped virtual address, a sigbus signal is sent instead of allocating a new page. The sigbus signal handler will then resolve the page fault in userland by calling the remap_anon_pages syscall. What does unmapped virtual address mean in this context? Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
What could cause siginfo_t.si_addr to always be NULL?
Hi, We're running into an interesting issue with Firefox on Android, in which a segfault signal handler always gets a value of 0 for siginfo_t.si_addr. The most intesting part is that this only happens on a few devices/android version combinations. Catching a segfault in gdb also shows the problem in $_siginfo, which suggests this would be the kernel doing something weird. Thus my question, what in the kernel could cause this behavior? Cheers, Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
What could cause siginfo_t.si_addr to always be NULL?
Hi, We're running into an interesting issue with Firefox on Android, in which a segfault signal handler always gets a value of 0 for siginfo_t.si_addr. The most intesting part is that this only happens on a few devices/android version combinations. Catching a segfault in gdb also shows the problem in $_siginfo, which suggests this would be the kernel doing something weird. Thus my question, what in the kernel could cause this behavior? Cheers, Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/PATCH 0/2] ext4: Transparent Decompression Support
On Fri, Jul 26, 2013 at 09:20:34AM -0400, Jörn Engel wrote: > On Fri, 26 July 2013 12:01:23 +0400, Vyacheslav Dubeyko wrote: > > > > We are discussing not about good or bad idea. We need to elaborate a > > right solution. I think that suggested idea is not clear. Do you > > want to support compression in ext4? Or do you want to add some new > > compression feature (likewise file-oriented compression)? If we are > > talking about compression in ext4 then it needs to use e2compr patch > > set. Otherwise, if we are talking about file compression then it is > > not question of concrete filesystem. And we need to make > > implementation on VFS level. It is only architectural point of view. > > I don't think the e2compr patches are strictly necessary. They are a > good option, but not the only one. > > One trick to simplify the problem is to make Dhaval's compressed files > strictly read-only. It will require some dance to load the compressed > content, flip the switch, then uncompress data on the fly and disallow > writes. Not the most pleasing of interfaces, but yet another option. > > > Why do you try to implement likewise concept on kernel level? It > > looks like you try to move some user-space concept in kernel-space. > > The kernel controls the page cache. Once the page cache is filled > with uncompressed file content, you can do mmap, regular file io, etc. > Putting uncompression code into the kernel makes sense to me. Whether > a solution different from e2compr makes sense is yet to be seen. > > Whatever you do, it will require support from the on-disk format and > the userspace ABI. Setting the compression bit on a file has the > clear advantage that it is an established interface and also supported > by other filesystems. Introducing yet another interface requires a > fairly strong case to be made. But who knows, maybe Dhaval can pull > it off. Come to think of it, the whole thing could be handled entirely in user space through fuse. While this is probably a workable solution on desktop/server environments, it doesn't pan out on Android: /dev/fuse is rarely available, and even if it were, fusermount needs to be there and be a setuid program (or have the right capabilities). So, another angle could be to allow some things to happen without privileges, such as mounting filesystems in a private namespace. That wouldn't solve the lack of /dev/fuse, though. Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/PATCH 0/2] ext4: Transparent Decompression Support
On Fri, Jul 26, 2013 at 09:20:34AM -0400, Jörn Engel wrote: On Fri, 26 July 2013 12:01:23 +0400, Vyacheslav Dubeyko wrote: We are discussing not about good or bad idea. We need to elaborate a right solution. I think that suggested idea is not clear. Do you want to support compression in ext4? Or do you want to add some new compression feature (likewise file-oriented compression)? If we are talking about compression in ext4 then it needs to use e2compr patch set. Otherwise, if we are talking about file compression then it is not question of concrete filesystem. And we need to make implementation on VFS level. It is only architectural point of view. I don't think the e2compr patches are strictly necessary. They are a good option, but not the only one. One trick to simplify the problem is to make Dhaval's compressed files strictly read-only. It will require some dance to load the compressed content, flip the switch, then uncompress data on the fly and disallow writes. Not the most pleasing of interfaces, but yet another option. Why do you try to implement likewise concept on kernel level? It looks like you try to move some user-space concept in kernel-space. The kernel controls the page cache. Once the page cache is filled with uncompressed file content, you can do mmap, regular file io, etc. Putting uncompression code into the kernel makes sense to me. Whether a solution different from e2compr makes sense is yet to be seen. Whatever you do, it will require support from the on-disk format and the userspace ABI. Setting the compression bit on a file has the clear advantage that it is an established interface and also supported by other filesystems. Introducing yet another interface requires a fairly strong case to be made. But who knows, maybe Dhaval can pull it off. Come to think of it, the whole thing could be handled entirely in user space through fuse. While this is probably a workable solution on desktop/server environments, it doesn't pan out on Android: /dev/fuse is rarely available, and even if it were, fusermount needs to be there and be a setuid program (or have the right capabilities). So, another angle could be to allow some things to happen without privileges, such as mounting filesystems in a private namespace. That wouldn't solve the lack of /dev/fuse, though. Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC v3] Support volatile range for anon vma
On Tue, Dec 11, 2012 at 05:11:17PM +0900, Minchan Kim wrote: > On Tue, Dec 11, 2012 at 08:59:50AM +0100, Mike Hommey wrote: > > On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote: > > > On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote: > > > > On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote: > > > > > - What's the madvise(addr, length, MADV_VOLATILE)? > > > > > > > > > > It's a hint that user deliver to kernel so kernel can *discard* > > > > > pages in a range anytime. > > > > > > > > > > - What happens if user access page(ie, virtual address) discarded > > > > > by kernel? > > > > > > > > > > The user can see zero-fill-on-demand pages as if madvise(DONTNEED). > > > > > > > > What happened to getting SIGBUS? > > > > > > I thought it could force for user to handle signal. > > > If user can receive signal, what can he do? > > > Maybe he can call madivse(NOVOLATILE) in my old version but I removed it > > > in this version so user don't need handle signal handling. > > > > NOVOLATILE and signal throwing are two different and not necessarily > > related needs. We (Mozilla) could probably live without NOVOLATILE, > > but certainly not without signal throwing. > > What's shortcoming if we don't provide signal handling? > Could you explain how you want to signal in your allocator? The main use case we have for signals is not an allocator. We're currently using ashmem to decompress libraries on Android. We would like to use volatile memory for that instead, so that unused pages can be discarded. With NOVOLATILE, or when getting zero-filled pages, that just doesn't pan out: you may well be jumping in the volatile memory from anywhere, and you can't check the status of the page you're jumping into before jumping. Thus you need to be signaled when reaching a discarded page. Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC v3] Support volatile range for anon vma
On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote: > On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote: > > On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote: > > > - What's the madvise(addr, length, MADV_VOLATILE)? > > > > > > It's a hint that user deliver to kernel so kernel can *discard* > > > pages in a range anytime. > > > > > > - What happens if user access page(ie, virtual address) discarded > > > by kernel? > > > > > > The user can see zero-fill-on-demand pages as if madvise(DONTNEED). > > > > What happened to getting SIGBUS? > > I thought it could force for user to handle signal. > If user can receive signal, what can he do? > Maybe he can call madivse(NOVOLATILE) in my old version but I removed it > in this version so user don't need handle signal handling. NOVOLATILE and signal throwing are two different and not necessarily related needs. We (Mozilla) could probably live without NOVOLATILE, but certainly not without signal throwing. Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC v3] Support volatile range for anon vma
On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote: On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote: On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote: - What's the madvise(addr, length, MADV_VOLATILE)? It's a hint that user deliver to kernel so kernel can *discard* pages in a range anytime. - What happens if user access page(ie, virtual address) discarded by kernel? The user can see zero-fill-on-demand pages as if madvise(DONTNEED). What happened to getting SIGBUS? I thought it could force for user to handle signal. If user can receive signal, what can he do? Maybe he can call madivse(NOVOLATILE) in my old version but I removed it in this version so user don't need handle signal handling. NOVOLATILE and signal throwing are two different and not necessarily related needs. We (Mozilla) could probably live without NOVOLATILE, but certainly not without signal throwing. Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC v3] Support volatile range for anon vma
On Tue, Dec 11, 2012 at 05:11:17PM +0900, Minchan Kim wrote: On Tue, Dec 11, 2012 at 08:59:50AM +0100, Mike Hommey wrote: On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote: On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote: On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote: - What's the madvise(addr, length, MADV_VOLATILE)? It's a hint that user deliver to kernel so kernel can *discard* pages in a range anytime. - What happens if user access page(ie, virtual address) discarded by kernel? The user can see zero-fill-on-demand pages as if madvise(DONTNEED). What happened to getting SIGBUS? I thought it could force for user to handle signal. If user can receive signal, what can he do? Maybe he can call madivse(NOVOLATILE) in my old version but I removed it in this version so user don't need handle signal handling. NOVOLATILE and signal throwing are two different and not necessarily related needs. We (Mozilla) could probably live without NOVOLATILE, but certainly not without signal throwing. What's shortcoming if we don't provide signal handling? Could you explain how you want to signal in your allocator? The main use case we have for signals is not an allocator. We're currently using ashmem to decompress libraries on Android. We would like to use volatile memory for that instead, so that unused pages can be discarded. With NOVOLATILE, or when getting zero-filled pages, that just doesn't pan out: you may well be jumping in the volatile memory from anywhere, and you can't check the status of the page you're jumping into before jumping. Thus you need to be signaled when reaching a discarded page. Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC v3] Support volatile range for anon vma
On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote: > - What's the madvise(addr, length, MADV_VOLATILE)? > > It's a hint that user deliver to kernel so kernel can *discard* > pages in a range anytime. > > - What happens if user access page(ie, virtual address) discarded > by kernel? > > The user can see zero-fill-on-demand pages as if madvise(DONTNEED). What happened to getting SIGBUS? Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC v3] Support volatile range for anon vma
On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote: - What's the madvise(addr, length, MADV_VOLATILE)? It's a hint that user deliver to kernel so kernel can *discard* pages in a range anytime. - What happens if user access page(ie, virtual address) discarded by kernel? The user can see zero-fill-on-demand pages as if madvise(DONTNEED). What happened to getting SIGBUS? Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/3] Volatile Ranges (v7) & Lots of words
On Fri, Nov 02, 2012 at 09:59:07PM +0100, Michael Kerrisk wrote: > John, > > A question at on one point: > > On Wed, Oct 3, 2012 at 12:38 AM, John Stultz wrote: > > On 10/02/2012 12:39 AM, NeilBrown wrote: > [...] > >> The SIGBUS interface could have some merit if it really reduces > >> overhead. I > >> worry about app bugs that could result from the non-deterministic > >> behaviour. A range could get unmapped while it is in use and testing > >> for > >> the case of "get a SIGBUS half way though accessing something" would not > >> be straight forward (SIGBUS on first step of access should be easy). > >> I guess that is up to the app writer, but I have never liked anything > >> about > >> the signal interface and encouraging further use doesn't feel wise. > > > > Initially I didn't like the idea, but have warmed considerably to it. Mainly > > due to the concern that the constant unmark/access/mark pattern would be too > > much overhead, and having a lazy method will be much nicer for performance. > > But yes, at the cost of additional complexity of handling the signal, > > marking the faulted address range as non-volatile, restoring the data and > > continuing. > > At a finer level of detail, how do you see this as happening in the > application. I mean: in the general case, repopulating the purged > volatile page would have to be done outside the signal handler (I > think, because async-signal-safety considerations would preclude too > much compdex stuff going on inside the handler). That implies > longjumping out of the handler, repopulating the pages with data, and > then restarting whatever work was being done when the SIGBUS was > generated. There are different strategies that can be used to repopulate the pages, within or outside the signal handler, and I'd say it's not that important of a detail. That being said, if the kernel could be helpful and avoid people shooting themselves in the foot, that would be great, too. I don't know how possible this would be but being able to get the notification on a signalfd in a dedicated thread would certainly improve things (I guess other usecases of SIGSEGV/SIGBUG handlers could appreciate something like this). The kernel would pause the faulting thread while sending the notification on the signalfd, and the notified thread would be allowed to resume the faulting thread when it's done doing its job. Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/3] Volatile Ranges (v7) Lots of words
On Fri, Nov 02, 2012 at 09:59:07PM +0100, Michael Kerrisk wrote: John, A question at on one point: On Wed, Oct 3, 2012 at 12:38 AM, John Stultz john.stu...@linaro.org wrote: On 10/02/2012 12:39 AM, NeilBrown wrote: [...] The SIGBUS interface could have some merit if it really reduces overhead. I worry about app bugs that could result from the non-deterministic behaviour. A range could get unmapped while it is in use and testing for the case of get a SIGBUS half way though accessing something would not be straight forward (SIGBUS on first step of access should be easy). I guess that is up to the app writer, but I have never liked anything about the signal interface and encouraging further use doesn't feel wise. Initially I didn't like the idea, but have warmed considerably to it. Mainly due to the concern that the constant unmark/access/mark pattern would be too much overhead, and having a lazy method will be much nicer for performance. But yes, at the cost of additional complexity of handling the signal, marking the faulted address range as non-volatile, restoring the data and continuing. At a finer level of detail, how do you see this as happening in the application. I mean: in the general case, repopulating the purged volatile page would have to be done outside the signal handler (I think, because async-signal-safety considerations would preclude too much compdex stuff going on inside the handler). That implies longjumping out of the handler, repopulating the pages with data, and then restarting whatever work was being done when the SIGBUS was generated. There are different strategies that can be used to repopulate the pages, within or outside the signal handler, and I'd say it's not that important of a detail. That being said, if the kernel could be helpful and avoid people shooting themselves in the foot, that would be great, too. I don't know how possible this would be but being able to get the notification on a signalfd in a dedicated thread would certainly improve things (I guess other usecases of SIGSEGV/SIGBUG handlers could appreciate something like this). The kernel would pause the faulting thread while sending the notification on the signalfd, and the notified thread would be allowed to resume the faulting thread when it's done doing its job. Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/3] Volatile Ranges (v7) & Lots of words
On Fri, Sep 28, 2012 at 11:16:30PM -0400, John Stultz wrote: > fd based interfaces vs madvise: > In talking with Taras Glek, he pointed out that for his > needs, the fd based interface is a little annoying, as it > requires having to get access to tmpfs file and mmap it in, > then instead of just referencing a pointer to the data he > wants to mark volatile, he has to calculate the offset from > start of the mmap and pass those file offsets to the interface. > Instead he mentioned that using something like madvise would be > much nicer, since they could just pass a pointer to the object > in memory they want to make volatile and avoid the extra work. > > I'm not opposed to adding an madvise interface for this as > well, but since we have a existing use case with Android's > ashmem, I want to make sure we support this existing behavior. > Specifically as with ashmem applications can be sharing > these tmpfs fds, and so file-relative volatile ranges make > more sense if you need to coordinate what data is volatile > between two applications. > > Also, while I agree that having an madvise interface for > volatile ranges would be nice, it does open up some more > complex implementation issues, since with files, there is a > fixed relationship between pages and the files' address_space > mapping, where you can't have pages shared between different > mappings. This makes it easy to hang the volatile-range tree > off of the mapping (well, indirectly via a hash table). With > general anonymous memory, pages can be shared between multiple > processes, and as far as I understand, don't have any grouping > structure we could use to determine if the page is in a > volatile range or not. We would also need to determine more > complex questions like: What are the semantics of volatility > with copy-on-write pages? I'm hoping to investigate this > idea more deeply soon so I can be sure whatever is pushed has > a clear plan of how to address this idea. Further thoughts > here would be appreciated. Note it doesn't have to be a vs. situation. madvise could be an additional way to interface with volatile ranges on a given fd. That is, madvise doesn't have to mean anonymous memory. As a matter of fact, MADV_WILLNEED/MADV_DONTNEED are usually used on mmaped files. Similarly, there could be a way to use madvise to mark volatile ranges, without the application having to track what memory ranges are associated to what part of what file, which the kernel already tracks. Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/3] Volatile Ranges (v7) Lots of words
On Fri, Sep 28, 2012 at 11:16:30PM -0400, John Stultz wrote: fd based interfaces vs madvise: In talking with Taras Glek, he pointed out that for his needs, the fd based interface is a little annoying, as it requires having to get access to tmpfs file and mmap it in, then instead of just referencing a pointer to the data he wants to mark volatile, he has to calculate the offset from start of the mmap and pass those file offsets to the interface. Instead he mentioned that using something like madvise would be much nicer, since they could just pass a pointer to the object in memory they want to make volatile and avoid the extra work. I'm not opposed to adding an madvise interface for this as well, but since we have a existing use case with Android's ashmem, I want to make sure we support this existing behavior. Specifically as with ashmem applications can be sharing these tmpfs fds, and so file-relative volatile ranges make more sense if you need to coordinate what data is volatile between two applications. Also, while I agree that having an madvise interface for volatile ranges would be nice, it does open up some more complex implementation issues, since with files, there is a fixed relationship between pages and the files' address_space mapping, where you can't have pages shared between different mappings. This makes it easy to hang the volatile-range tree off of the mapping (well, indirectly via a hash table). With general anonymous memory, pages can be shared between multiple processes, and as far as I understand, don't have any grouping structure we could use to determine if the page is in a volatile range or not. We would also need to determine more complex questions like: What are the semantics of volatility with copy-on-write pages? I'm hoping to investigate this idea more deeply soon so I can be sure whatever is pushed has a clear plan of how to address this idea. Further thoughts here would be appreciated. Note it doesn't have to be a vs. situation. madvise could be an additional way to interface with volatile ranges on a given fd. That is, madvise doesn't have to mean anonymous memory. As a matter of fact, MADV_WILLNEED/MADV_DONTNEED are usually used on mmaped files. Similarly, there could be a way to use madvise to mark volatile ranges, without the application having to track what memory ranges are associated to what part of what file, which the kernel already tracks. Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Question about your git habits
On Fri, Feb 22, 2008 at 11:10:48PM -0500, Daniel Barkalow wrote: > I find that the sequence of changes I make is pretty much unrelated to the > sequence of changes that end up in the project's history, because my > changes as I make them involve writing a lot of stubs (so I can build) and > then filling them out. It's beneficial to have version control on this so > that, if I screw up filling out a stub, I can get back to where I was. > > Having made a complete series, I then generate a new series of commits, > each of which does one thing, without any bugs that I've resolved, such > that the net result is the end of the messy history, except with any > debugging or useless stuff skipped. It's this series that gets merged into > the project history, and I discard the other history. > > The real trick is that the early patches in a lot of series often refactor > existing code in ways that are generally good and necessary for your > eventual outcome, but which you'd never think of until you've written more > of the series. Generating a new commit sequence is necessary to end up > with a history where it looks from the start like you know where you're > going and have everything done that needs to be done when you get to the > point of needing it. Furthermore, you want to be able to test these > commits in isolation, without the distraction of the changes that actually > prompted them, which means that you want to have your working tree is a > state that you never actually had it in as you were developing the end > result. > > This means that you'll usually want to rewrite commits for any series that > isn't a single obvious patch, so it's not a big deal to commit any time > you want to work on some different branch. I do that so much that I have this alias: reorder = !sh -c 'git rebase -i --onto $0 $0 $1' ... and actually pass it only one argument most of the time. Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Question about your git habits
On Fri, Feb 22, 2008 at 11:10:48PM -0500, Daniel Barkalow wrote: I find that the sequence of changes I make is pretty much unrelated to the sequence of changes that end up in the project's history, because my changes as I make them involve writing a lot of stubs (so I can build) and then filling them out. It's beneficial to have version control on this so that, if I screw up filling out a stub, I can get back to where I was. Having made a complete series, I then generate a new series of commits, each of which does one thing, without any bugs that I've resolved, such that the net result is the end of the messy history, except with any debugging or useless stuff skipped. It's this series that gets merged into the project history, and I discard the other history. The real trick is that the early patches in a lot of series often refactor existing code in ways that are generally good and necessary for your eventual outcome, but which you'd never think of until you've written more of the series. Generating a new commit sequence is necessary to end up with a history where it looks from the start like you know where you're going and have everything done that needs to be done when you get to the point of needing it. Furthermore, you want to be able to test these commits in isolation, without the distraction of the changes that actually prompted them, which means that you want to have your working tree is a state that you never actually had it in as you were developing the end result. This means that you'll usually want to rewrite commits for any series that isn't a single obvious patch, so it's not a big deal to commit any time you want to work on some different branch. I do that so much that I have this alias: reorder = !sh -c 'git rebase -i --onto $0 $0 $1' ... and actually pass it only one argument most of the time. Mike -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/