[tip: x86/fpu] x86/fpu: Handle FPU-related and clearcpuid command line arguments earlier

2020-09-22 Thread tip-bot2 for Mike Hommey
The following commit has been merged into the x86/fpu branch of tip:

Commit-ID: 1ef5423a55c2ac6f1361811efe75b6e46d1023ed
Gitweb:
https://git.kernel.org/tip/1ef5423a55c2ac6f1361811efe75b6e46d1023ed
Author:Mike Hommey 
AuthorDate:Tue, 22 Sep 2020 06:56:38 +09:00
Committer: Borislav Petkov 
CommitterDate: Tue, 22 Sep 2020 00:24:27 +02:00

x86/fpu: Handle FPU-related and clearcpuid command line arguments earlier

FPU initialization handles them currently. However, in the case
of clearcpuid=, some other early initialization code may check for
features before the FPU initialization code is called. Handling the
argument earlier allows the command line to influence those early
initializations.

Signed-off-by: Mike Hommey 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20200921215638.37980-1...@glandium.org
---
 arch/x86/kernel/cpu/common.c | 55 +++-
 arch/x86/kernel/fpu/init.c   | 55 +---
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c5d6f17..3c75193 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -1221,6 +1222,59 @@ static void detect_nopl(void)
 }
 
 /*
+ * We parse cpu parameters early because fpu__init_system() is executed
+ * before parse_early_param().
+ */
+static void __init cpu_parse_early_param(void)
+{
+   char arg[128];
+   char *argptr = arg;
+   int arglen, res, bit;
+
+#ifdef CONFIG_X86_32
+   if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
+   setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+   pr_err("Option 'no387' required CONFIG_MATH_EMULATION 
enabled.\n");
+#endif
+
+   if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
+   setup_clear_cpu_cap(X86_FEATURE_FXSR);
+#endif
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsave"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+   arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, 
sizeof(arg));
+   if (arglen <= 0)
+   return;
+
+   pr_info("Clearing CPUID bits:");
+   do {
+   res = get_option(, );
+   if (res == 0 || res == 3)
+   break;
+
+   /* If the argument was too long, the last bit may be cut off */
+   if (res == 1 && arglen >= sizeof(arg))
+   break;
+
+   if (bit >= 0 && bit < NCAPINTS * 32) {
+   pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+   setup_clear_cpu_cap(bit);
+   }
+   } while (res == 2);
+   pr_cont("\n");
+}
+
+/*
  * Do minimum CPU detection early.
  * Fields really needed: vendor, cpuid_level, family, model, mask,
  * cache alignment.
@@ -1255,6 +1309,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 
*c)
get_cpu_cap(c);
get_cpu_address_sizes(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+   cpu_parse_early_param();
 
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index f8ff895..701f196 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -5,7 +5,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -238,65 +237,11 @@ static void __init fpu__init_system_ctx_switch(void)
 }
 
 /*
- * We parse fpu parameters early because fpu__init_system() is executed
- * before parse_early_param().
- */
-static void __init fpu__init_parse_early_param(void)
-{
-   char arg[128];
-   char *argptr = arg;
-   int arglen, res, bit;
-
-#ifdef CONFIG_X86_32
-   if (cmdline_find_option_bool(boot_command_line, "no387"))
-#ifdef CONFIG_MATH_EMULATION
-   setup_clear_cpu_cap(X86_FEATURE_FPU);
-#else
-   pr_err("Option 'no387' required CONFIG_MATH_EMULATION 
enabled.\n");
-#endif
-
-   if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
-   setup_clear_cpu_cap(X86_FEATURE_FXSR);
-#endif
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsave"))
-   setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))

[PATCH v3] x86/boot: Handle fpu-related and clearcpuid command line arguments earlier

2020-09-21 Thread Mike Hommey
FPU initialization handles them currently. However, in the case of
clearcpuid, some other early initialization code may check for features
before the FPU initialization code is called. Handling the argument
earlier allows the command line to influence those early
initializations.

Signed-off-by: Mike Hommey 
---
 arch/x86/kernel/cpu/common.c | 55 
 arch/x86/kernel/fpu/init.c   | 55 
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8d4715e84268..6220fae87263 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -1220,6 +1221,59 @@ static void detect_nopl(void)
 #endif
 }
 
+/*
+ * We parse cpu parameters early because fpu__init_system() is executed
+ * before parse_early_param().
+ */
+static void __init cpu_parse_early_param(void)
+{
+   char arg[128];
+   char *argptr = arg;
+   int arglen, res, bit;
+
+#ifdef CONFIG_X86_32
+   if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
+   setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+   pr_err("Option 'no387' required CONFIG_MATH_EMULATION 
enabled.\n");
+#endif
+
+   if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
+   setup_clear_cpu_cap(X86_FEATURE_FXSR);
+#endif
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsave"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+   arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, 
sizeof(arg));
+   if (arglen <= 0)
+   return;
+
+   pr_info("Clearing CPUID bits:");
+   do {
+   res = get_option(, );
+   if (res == 0 || res == 3)
+   break;
+
+   /* If the argument was too long, the last bit may be cut off */
+   if (res == 1 && arglen >= sizeof(arg))
+   break;
+
+   if (bit >= 0 && bit < NCAPINTS * 32) {
+   pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+   setup_clear_cpu_cap(bit);
+   }
+   } while (res == 2);
+   pr_cont("\n");
+}
+
 /*
  * Do minimum CPU detection early.
  * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -1255,6 +1309,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 
*c)
get_cpu_cap(c);
get_cpu_address_sizes(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+   cpu_parse_early_param();
 
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index f8ff895aaf7e..701f196d7c68 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -5,7 +5,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -237,66 +236,12 @@ static void __init fpu__init_system_ctx_switch(void)
on_boot_cpu = 0;
 }
 
-/*
- * We parse fpu parameters early because fpu__init_system() is executed
- * before parse_early_param().
- */
-static void __init fpu__init_parse_early_param(void)
-{
-   char arg[128];
-   char *argptr = arg;
-   int arglen, res, bit;
-
-#ifdef CONFIG_X86_32
-   if (cmdline_find_option_bool(boot_command_line, "no387"))
-#ifdef CONFIG_MATH_EMULATION
-   setup_clear_cpu_cap(X86_FEATURE_FPU);
-#else
-   pr_err("Option 'no387' required CONFIG_MATH_EMULATION 
enabled.\n");
-#endif
-
-   if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
-   setup_clear_cpu_cap(X86_FEATURE_FXSR);
-#endif
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsave"))
-   setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
-   setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
-   setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-
-   arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, 
sizeof(arg));
-   if (arglen <= 0)
-   return;
-
-   pr_info("Clearing CPUID bits:");
-   do {
-   res = get_option(, );
-   if (res == 0 || res == 3)
-  

[PATCH v2] x86/boot: Handle fpu-related and clearcpuid command line arguments earlier

2020-09-20 Thread Mike Hommey
FPU initialization handles them currently. However, in the case of
clearcpuid, some other early initialization code may check for features
before the FPU initialization code is called. Handling the argument
earlier allows the command line to influence those early
initializations.

Signed-off-by: Mike Hommey 
---
 arch/x86/kernel/cpu/common.c | 41 
 arch/x86/kernel/fpu/init.c   | 41 
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c5d6f17d9b9d..5e2e4d3621bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -1220,6 +1221,45 @@ static void detect_nopl(void)
 #endif
 }
 
+/*
+ * We parse cpu parameters early because early_identify_cpu() is executed
+ * before parse_early_param().
+ */
+static void __init cpu__init_parse_early_param(void)
+{
+   char arg[32];
+   char *argptr = arg;
+   int bit;
+
+#ifdef CONFIG_X86_32
+   if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
+   setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+   pr_err("Option 'no387' required CONFIG_MATH_EMULATION 
enabled.\n");
+#endif
+
+   if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
+   setup_clear_cpu_cap(X86_FEATURE_FXSR);
+#endif
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsave"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+
+   if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
+   setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+   if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
+   sizeof(arg)) &&
+   get_option(, ) &&
+   bit >= 0 &&
+   bit < NCAPINTS * 32)
+   setup_clear_cpu_cap(bit);
+}
+
 /*
  * Do minimum CPU detection early.
  * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -1255,6 +1295,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 
*c)
get_cpu_cap(c);
get_cpu_address_sizes(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+   cpu__init_parse_early_param();
 
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 61ddc3a5e5c2..701f196d7c68 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -5,7 +5,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -237,52 +236,12 @@ static void __init fpu__init_system_ctx_switch(void)
on_boot_cpu = 0;
 }
 
-/*
- * We parse fpu parameters early because fpu__init_system() is executed
- * before parse_early_param().
- */
-static void __init fpu__init_parse_early_param(void)
-{
-   char arg[32];
-   char *argptr = arg;
-   int bit;
-
-#ifdef CONFIG_X86_32
-   if (cmdline_find_option_bool(boot_command_line, "no387"))
-#ifdef CONFIG_MATH_EMULATION
-   setup_clear_cpu_cap(X86_FEATURE_FPU);
-#else
-   pr_err("Option 'no387' required CONFIG_MATH_EMULATION 
enabled.\n");
-#endif
-
-   if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
-   setup_clear_cpu_cap(X86_FEATURE_FXSR);
-#endif
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsave"))
-   setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
-   setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-
-   if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
-   setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-
-   if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
-   sizeof(arg)) &&
-   get_option(, ) &&
-   bit >= 0 &&
-   bit < NCAPINTS * 32)
-   setup_clear_cpu_cap(bit);
-}
-
 /*
  * Called on the boot CPU once per system bootup, to set up the initial
  * FPU state that is later cloned into all processes:
  */
 void __init fpu__init_system(struct cpuinfo_x86 *c)
 {
-   fpu__init_parse_early_param();
fpu__init_system_early_generic(c);
 
/*
-- 
2.28.0



[PATCH] x86/boot: Delay BSP init until after FPU initialization

2020-09-19 Thread Mike Hommey
FPU initialization handles the clearcpuid command line argument. If it
comes after BSP init, clearcpuid cannot be used to disable features that
trigger some parts of the BSP init code.

Signed-off-by: Mike Hommey 
---
 arch/x86/kernel/cpu/common.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

I was trying to use clearcpuid=440 to disable X86_FEATURES_AMD_SSBD to
reproduce the behavior that happens on Zen/Zen+ on a Zen2 machine, but
that didn't work because the command line is handled after the setup for
X86_FEATURE_LS_CFG_SSBD.

I tought about either moving the command line handling earlier, but it
seems there wasn't a specific reason for BSP init being earlier than FPU
initialization so I went with reordering those instead.

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c5d6f17d9b9d..c3bbca91a14b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1261,9 +1261,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 
*c)
 
c->cpu_index = 0;
filter_cpuid_features(c, false);
-
-   if (this_cpu->c_bsp_init)
-   this_cpu->c_bsp_init(c);
} else {
setup_clear_cpu_cap(X86_FEATURE_CPUID);
}
@@ -1276,6 +1273,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 
*c)
 
fpu__init_system(c);
 
+   if (have_cpuid_p()) {
+   if (this_cpu->c_bsp_init)
+   this_cpu->c_bsp_init(c);
+   }
 #ifdef CONFIG_X86_32
/*
 * Regardless of whether PCID is enumerated, the SDM says
-- 
2.28.0



Re: [PATCH 08/17] mm: madvise MADV_USERFAULT

2014-10-03 Thread Mike Hommey
On Fri, Oct 03, 2014 at 07:07:58PM +0200, Andrea Arcangeli wrote:
> MADV_USERFAULT is a new madvise flag that will set VM_USERFAULT in the
> vma flags. Whenever VM_USERFAULT is set in an anonymous vma, if
> userland touches a still unmapped virtual address, a sigbus signal is
> sent instead of allocating a new page. The sigbus signal handler will
> then resolve the page fault in userland by calling the
> remap_anon_pages syscall.

What does "unmapped virtual address" mean in this context?

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 08/17] mm: madvise MADV_USERFAULT

2014-10-03 Thread Mike Hommey
On Fri, Oct 03, 2014 at 07:07:58PM +0200, Andrea Arcangeli wrote:
 MADV_USERFAULT is a new madvise flag that will set VM_USERFAULT in the
 vma flags. Whenever VM_USERFAULT is set in an anonymous vma, if
 userland touches a still unmapped virtual address, a sigbus signal is
 sent instead of allocating a new page. The sigbus signal handler will
 then resolve the page fault in userland by calling the
 remap_anon_pages syscall.

What does unmapped virtual address mean in this context?

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


What could cause siginfo_t.si_addr to always be NULL?

2013-08-27 Thread Mike Hommey
Hi,

We're running into an interesting issue with Firefox on Android, in
which a segfault signal handler always gets a value of 0 for
siginfo_t.si_addr. The most intesting part is that this only happens on
a few devices/android version combinations. Catching a segfault in gdb
also shows the problem in $_siginfo, which suggests this would be the
kernel doing something weird. Thus my question, what in the kernel could
cause this behavior?

Cheers,

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


What could cause siginfo_t.si_addr to always be NULL?

2013-08-27 Thread Mike Hommey
Hi,

We're running into an interesting issue with Firefox on Android, in
which a segfault signal handler always gets a value of 0 for
siginfo_t.si_addr. The most intesting part is that this only happens on
a few devices/android version combinations. Catching a segfault in gdb
also shows the problem in $_siginfo, which suggests this would be the
kernel doing something weird. Thus my question, what in the kernel could
cause this behavior?

Cheers,

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC/PATCH 0/2] ext4: Transparent Decompression Support

2013-07-29 Thread Mike Hommey
On Fri, Jul 26, 2013 at 09:20:34AM -0400, Jörn Engel wrote:
> On Fri, 26 July 2013 12:01:23 +0400, Vyacheslav Dubeyko wrote:
> > 
> > We are discussing not about good or bad idea. We need to elaborate a
> > right solution. I think that suggested idea is not clear. Do you
> > want to support compression in ext4? Or do you want to add some new
> > compression feature (likewise file-oriented compression)? If we are
> > talking about compression in ext4 then it needs to use e2compr patch
> > set. Otherwise, if we are talking about file compression then it is
> > not question of concrete filesystem. And we need to make
> > implementation on VFS level. It is only architectural point of view.
> 
> I don't think the e2compr patches are strictly necessary.  They are a
> good option, but not the only one.
> 
> One trick to simplify the problem is to make Dhaval's compressed files
> strictly read-only.  It will require some dance to load the compressed
> content, flip the switch, then uncompress data on the fly and disallow
> writes.  Not the most pleasing of interfaces, but yet another option.
> 
> > Why do you try to implement likewise concept on kernel level? It
> > looks like you try to move some user-space concept in kernel-space.
> 
> The kernel controls the page cache.  Once the page cache is filled
> with uncompressed file content, you can do mmap, regular file io, etc.
> Putting uncompression code into the kernel makes sense to me.  Whether
> a solution different from e2compr makes sense is yet to be seen.
> 
> Whatever you do, it will require support from the on-disk format and
> the userspace ABI.  Setting the compression bit on a file has the
> clear advantage that it is an established interface and also supported
> by other filesystems.  Introducing yet another interface requires a
> fairly strong case to be made.  But who knows, maybe Dhaval can pull
> it off.

Come to think of it, the whole thing could be handled entirely in user
space through fuse. While this is probably a workable solution on
desktop/server environments, it doesn't pan out on Android: /dev/fuse is
rarely available, and even if it were, fusermount needs to be there and
be a setuid program (or have the right capabilities). So, another angle
could be to allow some things to happen without privileges, such as
mounting filesystems in a private namespace. That wouldn't solve the
lack of /dev/fuse, though.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC/PATCH 0/2] ext4: Transparent Decompression Support

2013-07-29 Thread Mike Hommey
On Fri, Jul 26, 2013 at 09:20:34AM -0400, Jörn Engel wrote:
 On Fri, 26 July 2013 12:01:23 +0400, Vyacheslav Dubeyko wrote:
  
  We are discussing not about good or bad idea. We need to elaborate a
  right solution. I think that suggested idea is not clear. Do you
  want to support compression in ext4? Or do you want to add some new
  compression feature (likewise file-oriented compression)? If we are
  talking about compression in ext4 then it needs to use e2compr patch
  set. Otherwise, if we are talking about file compression then it is
  not question of concrete filesystem. And we need to make
  implementation on VFS level. It is only architectural point of view.
 
 I don't think the e2compr patches are strictly necessary.  They are a
 good option, but not the only one.
 
 One trick to simplify the problem is to make Dhaval's compressed files
 strictly read-only.  It will require some dance to load the compressed
 content, flip the switch, then uncompress data on the fly and disallow
 writes.  Not the most pleasing of interfaces, but yet another option.
 
  Why do you try to implement likewise concept on kernel level? It
  looks like you try to move some user-space concept in kernel-space.
 
 The kernel controls the page cache.  Once the page cache is filled
 with uncompressed file content, you can do mmap, regular file io, etc.
 Putting uncompression code into the kernel makes sense to me.  Whether
 a solution different from e2compr makes sense is yet to be seen.
 
 Whatever you do, it will require support from the on-disk format and
 the userspace ABI.  Setting the compression bit on a file has the
 clear advantage that it is an established interface and also supported
 by other filesystems.  Introducing yet another interface requires a
 fairly strong case to be made.  But who knows, maybe Dhaval can pull
 it off.

Come to think of it, the whole thing could be handled entirely in user
space through fuse. While this is probably a workable solution on
desktop/server environments, it doesn't pan out on Android: /dev/fuse is
rarely available, and even if it were, fusermount needs to be there and
be a setuid program (or have the right capabilities). So, another angle
could be to allow some things to happen without privileges, such as
mounting filesystems in a private namespace. That wouldn't solve the
lack of /dev/fuse, though.

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v3] Support volatile range for anon vma

2012-12-11 Thread Mike Hommey
On Tue, Dec 11, 2012 at 05:11:17PM +0900, Minchan Kim wrote:
> On Tue, Dec 11, 2012 at 08:59:50AM +0100, Mike Hommey wrote:
> > On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote:
> > > On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote:
> > > > On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote:
> > > > > - What's the madvise(addr, length, MADV_VOLATILE)?
> > > > > 
> > > > >   It's a hint that user deliver to kernel so kernel can *discard*
> > > > >   pages in a range anytime.
> > > > > 
> > > > > - What happens if user access page(ie, virtual address) discarded
> > > > >   by kernel?
> > > > > 
> > > > >   The user can see zero-fill-on-demand pages as if madvise(DONTNEED).
> > > > 
> > > > What happened to getting SIGBUS?
> > > 
> > > I thought it could force for user to handle signal.
> > > If user can receive signal, what can he do?
> > > Maybe he can call madivse(NOVOLATILE) in my old version but I removed it
> > > in this version so user don't need handle signal handling.
> > 
> > NOVOLATILE and signal throwing are two different and not necessarily
> > related needs. We (Mozilla) could probably live without NOVOLATILE,
> > but certainly not without signal throwing.
> 
> What's shortcoming if we don't provide signal handling?
> Could you explain how you want to signal in your allocator?

The main use case we have for signals is not an allocator. We're
currently using ashmem to decompress libraries on Android. We would like
to use volatile memory for that instead, so that unused pages can be
discarded. With NOVOLATILE, or when getting zero-filled pages, that just
doesn't pan out: you may well be jumping in the volatile memory from
anywhere, and you can't check the status of the page you're jumping into
before jumping. Thus you need to be signaled when reaching a discarded
page.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v3] Support volatile range for anon vma

2012-12-11 Thread Mike Hommey
On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote:
> On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote:
> > On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote:
> > > - What's the madvise(addr, length, MADV_VOLATILE)?
> > > 
> > >   It's a hint that user deliver to kernel so kernel can *discard*
> > >   pages in a range anytime.
> > > 
> > > - What happens if user access page(ie, virtual address) discarded
> > >   by kernel?
> > > 
> > >   The user can see zero-fill-on-demand pages as if madvise(DONTNEED).
> > 
> > What happened to getting SIGBUS?
> 
> I thought it could force for user to handle signal.
> If user can receive signal, what can he do?
> Maybe he can call madivse(NOVOLATILE) in my old version but I removed it
> in this version so user don't need handle signal handling.

NOVOLATILE and signal throwing are two different and not necessarily
related needs. We (Mozilla) could probably live without NOVOLATILE,
but certainly not without signal throwing.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v3] Support volatile range for anon vma

2012-12-11 Thread Mike Hommey
On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote:
 On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote:
  On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote:
   - What's the madvise(addr, length, MADV_VOLATILE)?
   
 It's a hint that user deliver to kernel so kernel can *discard*
 pages in a range anytime.
   
   - What happens if user access page(ie, virtual address) discarded
 by kernel?
   
 The user can see zero-fill-on-demand pages as if madvise(DONTNEED).
  
  What happened to getting SIGBUS?
 
 I thought it could force for user to handle signal.
 If user can receive signal, what can he do?
 Maybe he can call madivse(NOVOLATILE) in my old version but I removed it
 in this version so user don't need handle signal handling.

NOVOLATILE and signal throwing are two different and not necessarily
related needs. We (Mozilla) could probably live without NOVOLATILE,
but certainly not without signal throwing.

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v3] Support volatile range for anon vma

2012-12-11 Thread Mike Hommey
On Tue, Dec 11, 2012 at 05:11:17PM +0900, Minchan Kim wrote:
 On Tue, Dec 11, 2012 at 08:59:50AM +0100, Mike Hommey wrote:
  On Tue, Dec 11, 2012 at 04:37:44PM +0900, Minchan Kim wrote:
   On Tue, Dec 11, 2012 at 08:17:42AM +0100, Mike Hommey wrote:
On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote:
 - What's the madvise(addr, length, MADV_VOLATILE)?
 
   It's a hint that user deliver to kernel so kernel can *discard*
   pages in a range anytime.
 
 - What happens if user access page(ie, virtual address) discarded
   by kernel?
 
   The user can see zero-fill-on-demand pages as if madvise(DONTNEED).

What happened to getting SIGBUS?
   
   I thought it could force for user to handle signal.
   If user can receive signal, what can he do?
   Maybe he can call madivse(NOVOLATILE) in my old version but I removed it
   in this version so user don't need handle signal handling.
  
  NOVOLATILE and signal throwing are two different and not necessarily
  related needs. We (Mozilla) could probably live without NOVOLATILE,
  but certainly not without signal throwing.
 
 What's shortcoming if we don't provide signal handling?
 Could you explain how you want to signal in your allocator?

The main use case we have for signals is not an allocator. We're
currently using ashmem to decompress libraries on Android. We would like
to use volatile memory for that instead, so that unused pages can be
discarded. With NOVOLATILE, or when getting zero-filled pages, that just
doesn't pan out: you may well be jumping in the volatile memory from
anywhere, and you can't check the status of the page you're jumping into
before jumping. Thus you need to be signaled when reaching a discarded
page.

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v3] Support volatile range for anon vma

2012-12-10 Thread Mike Hommey
On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote:
> - What's the madvise(addr, length, MADV_VOLATILE)?
> 
>   It's a hint that user deliver to kernel so kernel can *discard*
>   pages in a range anytime.
> 
> - What happens if user access page(ie, virtual address) discarded
>   by kernel?
> 
>   The user can see zero-fill-on-demand pages as if madvise(DONTNEED).

What happened to getting SIGBUS?

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v3] Support volatile range for anon vma

2012-12-10 Thread Mike Hommey
On Tue, Dec 11, 2012 at 11:41:04AM +0900, Minchan Kim wrote:
 - What's the madvise(addr, length, MADV_VOLATILE)?
 
   It's a hint that user deliver to kernel so kernel can *discard*
   pages in a range anytime.
 
 - What happens if user access page(ie, virtual address) discarded
   by kernel?
 
   The user can see zero-fill-on-demand pages as if madvise(DONTNEED).

What happened to getting SIGBUS?

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Volatile Ranges (v7) & Lots of words

2012-11-29 Thread Mike Hommey
On Fri, Nov 02, 2012 at 09:59:07PM +0100, Michael Kerrisk wrote:
> John,
> 
> A question at on one point:
> 
> On Wed, Oct 3, 2012 at 12:38 AM, John Stultz  wrote:
> > On 10/02/2012 12:39 AM, NeilBrown wrote:
> [...]
> >>   The SIGBUS interface could have some merit if it really reduces
> >> overhead.  I
> >>   worry about app bugs that could result from the non-deterministic
> >>   behaviour.   A range could get unmapped while it is in use and testing
> >> for
> >>   the case of "get a SIGBUS half way though accessing something" would not
> >>   be straight forward (SIGBUS on first step of access should be easy).
> >>   I guess that is up to the app writer, but I have never liked anything
> >> about
> >>   the signal interface and encouraging further use doesn't feel wise.
> >
> > Initially I didn't like the idea, but have warmed considerably to it. Mainly
> > due to the concern that the constant unmark/access/mark pattern would be too
> > much overhead, and having a lazy method will be much nicer for performance.
> > But yes, at the cost of additional complexity of handling the signal,
> > marking the faulted address range as non-volatile, restoring the data and
> > continuing.
> 
> At a finer level of detail, how do you see this as happening in the
> application. I mean: in the general case, repopulating the purged
> volatile page would have to be done outside the signal handler (I
> think, because async-signal-safety considerations would preclude too
> much compdex stuff going on inside the handler). That implies
> longjumping out of the handler, repopulating the pages with data, and
> then restarting whatever work was being done when the SIGBUS was
> generated.

There are different strategies that can be used to repopulate the pages,
within or outside the signal handler, and I'd say it's not that
important of a detail.

That being said, if the kernel could be helpful and avoid people
shooting themselves in the foot, that would be great, too.

I don't know how possible this would be but being able to get the
notification on a signalfd in a dedicated thread would certainly improve
things (I guess other usecases of SIGSEGV/SIGBUG handlers could
appreciate something like this). The kernel would pause the faulting
thread while sending the notification on the signalfd, and the notified
thread would be allowed to resume the faulting thread when it's done
doing its job.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Volatile Ranges (v7) Lots of words

2012-11-29 Thread Mike Hommey
On Fri, Nov 02, 2012 at 09:59:07PM +0100, Michael Kerrisk wrote:
 John,
 
 A question at on one point:
 
 On Wed, Oct 3, 2012 at 12:38 AM, John Stultz john.stu...@linaro.org wrote:
  On 10/02/2012 12:39 AM, NeilBrown wrote:
 [...]
The SIGBUS interface could have some merit if it really reduces
  overhead.  I
worry about app bugs that could result from the non-deterministic
behaviour.   A range could get unmapped while it is in use and testing
  for
the case of get a SIGBUS half way though accessing something would not
be straight forward (SIGBUS on first step of access should be easy).
I guess that is up to the app writer, but I have never liked anything
  about
the signal interface and encouraging further use doesn't feel wise.
 
  Initially I didn't like the idea, but have warmed considerably to it. Mainly
  due to the concern that the constant unmark/access/mark pattern would be too
  much overhead, and having a lazy method will be much nicer for performance.
  But yes, at the cost of additional complexity of handling the signal,
  marking the faulted address range as non-volatile, restoring the data and
  continuing.
 
 At a finer level of detail, how do you see this as happening in the
 application. I mean: in the general case, repopulating the purged
 volatile page would have to be done outside the signal handler (I
 think, because async-signal-safety considerations would preclude too
 much compdex stuff going on inside the handler). That implies
 longjumping out of the handler, repopulating the pages with data, and
 then restarting whatever work was being done when the SIGBUS was
 generated.

There are different strategies that can be used to repopulate the pages,
within or outside the signal handler, and I'd say it's not that
important of a detail.

That being said, if the kernel could be helpful and avoid people
shooting themselves in the foot, that would be great, too.

I don't know how possible this would be but being able to get the
notification on a signalfd in a dedicated thread would certainly improve
things (I guess other usecases of SIGSEGV/SIGBUG handlers could
appreciate something like this). The kernel would pause the faulting
thread while sending the notification on the signalfd, and the notified
thread would be allowed to resume the faulting thread when it's done
doing its job.

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Volatile Ranges (v7) & Lots of words

2012-10-09 Thread Mike Hommey
On Fri, Sep 28, 2012 at 11:16:30PM -0400, John Stultz wrote:
> fd based interfaces vs madvise:
>   In talking with Taras Glek, he pointed out that for his
>   needs, the fd based interface is a little annoying, as it
>   requires having to get access to tmpfs file and mmap it in,
>   then instead of just referencing a pointer to the data he
>   wants to mark volatile, he has to calculate the offset from
>   start of the mmap and pass those file offsets to the interface.
>   Instead he mentioned that using something like madvise would be
>   much nicer, since they could just pass a pointer to the object
>   in memory they want to make volatile and avoid the extra work.
> 
>   I'm not opposed to adding an madvise interface for this as
>   well, but since we have a existing use case with Android's
>   ashmem, I want to make sure we support this existing behavior.
>   Specifically as with ashmem  applications can be sharing
>   these tmpfs fds, and so file-relative volatile ranges make
>   more sense if you need to coordinate what data is volatile
>   between two applications.
> 
>   Also, while I agree that having an madvise interface for
>   volatile ranges would be nice, it does open up some more
>   complex implementation issues, since with files, there is a
>   fixed relationship between pages and the files' address_space
>   mapping, where you can't have pages shared between different
>   mappings. This makes it easy to hang the volatile-range tree
>   off of the mapping (well, indirectly via a hash table). With
>   general anonymous memory, pages can be shared between multiple
>   processes, and as far as I understand, don't have any grouping
>   structure we could use to determine if the page is in a
>   volatile range or not. We would also need to determine more
>   complex questions like: What are the semantics of volatility
>   with copy-on-write pages?  I'm hoping to investigate this
>   idea more deeply soon so I can be sure whatever is pushed has
>   a clear plan of how to address this idea. Further thoughts
>   here would be appreciated.

Note it doesn't have to be a vs. situation. madvise could be an
additional way to interface with volatile ranges on a given fd.

That is, madvise doesn't have to mean anonymous memory. As a matter of
fact, MADV_WILLNEED/MADV_DONTNEED are usually used on mmaped files.
Similarly, there could be a way to use madvise to mark volatile ranges,
without the application having to track what memory ranges are
associated to what part of what file, which the kernel already tracks.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Volatile Ranges (v7) Lots of words

2012-10-09 Thread Mike Hommey
On Fri, Sep 28, 2012 at 11:16:30PM -0400, John Stultz wrote:
 fd based interfaces vs madvise:
   In talking with Taras Glek, he pointed out that for his
   needs, the fd based interface is a little annoying, as it
   requires having to get access to tmpfs file and mmap it in,
   then instead of just referencing a pointer to the data he
   wants to mark volatile, he has to calculate the offset from
   start of the mmap and pass those file offsets to the interface.
   Instead he mentioned that using something like madvise would be
   much nicer, since they could just pass a pointer to the object
   in memory they want to make volatile and avoid the extra work.
 
   I'm not opposed to adding an madvise interface for this as
   well, but since we have a existing use case with Android's
   ashmem, I want to make sure we support this existing behavior.
   Specifically as with ashmem  applications can be sharing
   these tmpfs fds, and so file-relative volatile ranges make
   more sense if you need to coordinate what data is volatile
   between two applications.
 
   Also, while I agree that having an madvise interface for
   volatile ranges would be nice, it does open up some more
   complex implementation issues, since with files, there is a
   fixed relationship between pages and the files' address_space
   mapping, where you can't have pages shared between different
   mappings. This makes it easy to hang the volatile-range tree
   off of the mapping (well, indirectly via a hash table). With
   general anonymous memory, pages can be shared between multiple
   processes, and as far as I understand, don't have any grouping
   structure we could use to determine if the page is in a
   volatile range or not. We would also need to determine more
   complex questions like: What are the semantics of volatility
   with copy-on-write pages?  I'm hoping to investigate this
   idea more deeply soon so I can be sure whatever is pushed has
   a clear plan of how to address this idea. Further thoughts
   here would be appreciated.

Note it doesn't have to be a vs. situation. madvise could be an
additional way to interface with volatile ranges on a given fd.

That is, madvise doesn't have to mean anonymous memory. As a matter of
fact, MADV_WILLNEED/MADV_DONTNEED are usually used on mmaped files.
Similarly, there could be a way to use madvise to mark volatile ranges,
without the application having to track what memory ranges are
associated to what part of what file, which the kernel already tracks.

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Question about your git habits

2008-02-23 Thread Mike Hommey
On Fri, Feb 22, 2008 at 11:10:48PM -0500, Daniel Barkalow wrote:
> I find that the sequence of changes I make is pretty much unrelated to the 
> sequence of changes that end up in the project's history, because my 
> changes as I make them involve writing a lot of stubs (so I can build) and 
> then filling them out. It's beneficial to have version control on this so 
> that, if I screw up filling out a stub, I can get back to where I was.
> 
> Having made a complete series, I then generate a new series of commits, 
> each of which does one thing, without any bugs that I've resolved, such 
> that the net result is the end of the messy history, except with any 
> debugging or useless stuff skipped. It's this series that gets merged into 
> the project history, and I discard the other history.
> 
> The real trick is that the early patches in a lot of series often refactor 
> existing code in ways that are generally good and necessary for your 
> eventual outcome, but which you'd never think of until you've written more 
> of the series. Generating a new commit sequence is necessary to end up 
> with a history where it looks from the start like you know where you're 
> going and have everything done that needs to be done when you get to the 
> point of needing it. Furthermore, you want to be able to test these 
> commits in isolation, without the distraction of the changes that actually 
> prompted them, which means that you want to have your working tree is a 
> state that you never actually had it in as you were developing the end 
> result.
> 
> This means that you'll usually want to rewrite commits for any series that 
> isn't a single obvious patch, so it's not a big deal to commit any time 
> you want to work on some different branch.

I do that so much that I have this alias:
reorder = !sh -c 'git rebase -i --onto $0 $0 $1'

... and actually pass it only one argument most of the time.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Question about your git habits

2008-02-23 Thread Mike Hommey
On Fri, Feb 22, 2008 at 11:10:48PM -0500, Daniel Barkalow wrote:
 I find that the sequence of changes I make is pretty much unrelated to the 
 sequence of changes that end up in the project's history, because my 
 changes as I make them involve writing a lot of stubs (so I can build) and 
 then filling them out. It's beneficial to have version control on this so 
 that, if I screw up filling out a stub, I can get back to where I was.
 
 Having made a complete series, I then generate a new series of commits, 
 each of which does one thing, without any bugs that I've resolved, such 
 that the net result is the end of the messy history, except with any 
 debugging or useless stuff skipped. It's this series that gets merged into 
 the project history, and I discard the other history.
 
 The real trick is that the early patches in a lot of series often refactor 
 existing code in ways that are generally good and necessary for your 
 eventual outcome, but which you'd never think of until you've written more 
 of the series. Generating a new commit sequence is necessary to end up 
 with a history where it looks from the start like you know where you're 
 going and have everything done that needs to be done when you get to the 
 point of needing it. Furthermore, you want to be able to test these 
 commits in isolation, without the distraction of the changes that actually 
 prompted them, which means that you want to have your working tree is a 
 state that you never actually had it in as you were developing the end 
 result.
 
 This means that you'll usually want to rewrite commits for any series that 
 isn't a single obvious patch, so it's not a big deal to commit any time 
 you want to work on some different branch.

I do that so much that I have this alias:
reorder = !sh -c 'git rebase -i --onto $0 $0 $1'

... and actually pass it only one argument most of the time.

Mike
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/