Re: [ndctl PATCH 07/10] daxctl: detect races when onlining memory blocks

2019-10-18 Thread Dan Williams
On Wed, Oct 2, 2019 at 4:49 PM Vishal Verma  wrote:
>
> Sometimes, system configuration can result in new memory blocks getting
> onlined automatically. Often, these auto-onlining mechanisms don't
> provide a choice or configurability in the matter of which zone is used
> for these new blocks, and they just end up in ZONE_NORMAL.
>
> Usually, for hot-plugged memory, ZONE_NORMAL is undesirable because:
>  - An application might want total control over this memory
>  - ZONE_NORMAL precludes hot-removal of this memory
>  - Having kernel data structures in this memory, especially performance
>sensitive ones, such as page tables, may be undesirable.
>
> Thus report if a race condition is encountered while onlining memory,
> and provide the user options to remedy it.
>
> Clarify the default zone expectations, and the race detection behavior
> in the daxctl-reconfigure-device man page, and move the relevant section
> under the 'Description' header, instead of hidden away under the
> '--no-online' option.
>
> Cc: Ben Olson 
> Cc: Dave Hansen 
> Cc: Dan Williams 
> Signed-off-by: Vishal Verma 
> ---
>  daxctl/device.c|  9 ++
>  daxctl/lib/libdaxctl.c | 62 +-
>  2 files changed, 52 insertions(+), 19 deletions(-)
>
> diff --git a/daxctl/device.c b/daxctl/device.c
> index 920efc6..28698bf 100644
> --- a/daxctl/device.c
> +++ b/daxctl/device.c
> @@ -174,6 +174,15 @@ static int dev_online_memory(struct daxctl_dev *dev)
> devname, strerror(-num_on));
> return num_on;
> }
> +   if (num_on)
> +   fprintf(stderr,
> +   "%s:\n  WARNING: detected a race while onlining memory\n"
> +   "  Some memory may not be in the expected zone. It is\n"
> +   "  recommended to disable any other onlining 
> mechanisms,\n"
> +   "  and retry. If onlining is to be left to other 
> agents,\n"
> +   "  use the --no-online option to suppress this warning\n",
> +   devname);
> +
> if (num_on == num_sections) {
> fprintf(stderr, "%s: all memory sections (%d) already 
> online\n",
> devname, num_on);
> diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
> index 617887c..5a7e37c 100644
> --- a/daxctl/lib/libdaxctl.c
> +++ b/daxctl/lib/libdaxctl.c
> @@ -1079,10 +1079,10 @@ static int memblock_is_online(struct daxctl_memory 
> *mem, char *memblock)
> return 0;
>  }
>
> -static int online_one_memblock(struct daxctl_memory *mem, char *memblock)
> +static int online_one_memblock(struct daxctl_memory *mem, char *memblock,
> +   int *status)
>  {
> struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
> -   const char *devname = daxctl_dev_get_devname(dev);
> struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
> const char *mode = "online_movable";
> int len = mem->buf_len, rc;
> @@ -1097,10 +1097,6 @@ static int online_one_memblock(struct daxctl_memory 
> *mem, char *memblock)
> if (rc < 0)
> return -ENOMEM;
>
> -   /*
> -* if already online, possibly due to kernel config or a udev rule,
> -* there is nothing to do and we can skip over the memblock
> -*/
> rc = memblock_is_online(mem, memblock);
> if (rc)
> return rc;
> @@ -1108,18 +1104,14 @@ static int online_one_memblock(struct daxctl_memory 
> *mem, char *memblock)
> rc = sysfs_write_attr_quiet(ctx, path, mode);
> if (rc) {
> /*
> -* While we performed an already-online check above, there
> -* is still a TOCTOU hole where someone (such as a udev rule)
> -* may have raced to online the memory. In such a case,
> -* the sysfs store will fail, however we can check for this
> -* by simply reading the state again. If it changed to the
> -* desired state, then we don't have to error out.
> +* If the block got onlined, potentially by some other agent,
> +* do nothing for now. There will be a full scan for zone
> +* correctness later.
>  */
> -   if (memblock_is_online(mem, memblock))
> -   return 1;
> -   err(ctx, "%s: Failed to online %s: %s\n",
> -   devname, path, strerror(-rc));
> +   if (memblock_is_online(mem, memblock) == 1)
> +   return 0;
> }
> +
> return rc;
>  }
>
> @@ -1150,7 +1142,7 @@ static int offline_one_memblock(struct daxctl_memory 
> *mem, char *memblock)
>
> rc = sysfs_write_attr_quiet(ctx, path, mode);
> if (rc) {
> -   /* Close the TOCTOU hole like in online_one_memblock() above 
> */
> +   /* check if something raced us to offline (unlikely) */

[ndctl PATCH 07/10] daxctl: detect races when onlining memory blocks

2019-10-02 Thread Vishal Verma
Sometimes, system configuration can result in new memory blocks getting
onlined automatically. Often, these auto-onlining mechanisms don't
provide a choice or configurability in the matter of which zone is used
for these new blocks, and they just end up in ZONE_NORMAL.

Usually, for hot-plugged memory, ZONE_NORMAL is undesirable because:
 - An application might want total control over this memory
 - ZONE_NORMAL precludes hot-removal of this memory
 - Having kernel data structures in this memory, especially performance
   sensitive ones, such as page tables, may be undesirable.

Thus report if a race condition is encountered while onlining memory,
and provide the user options to remedy it.

Clarify the default zone expectations, and the race detection behavior
in the daxctl-reconfigure-device man page, and move the relevant section
under the 'Description' header, instead of hidden away under the
'--no-online' option.

Cc: Ben Olson 
Cc: Dave Hansen 
Cc: Dan Williams 
Signed-off-by: Vishal Verma 
---
 daxctl/device.c|  9 ++
 daxctl/lib/libdaxctl.c | 62 +-
 2 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/daxctl/device.c b/daxctl/device.c
index 920efc6..28698bf 100644
--- a/daxctl/device.c
+++ b/daxctl/device.c
@@ -174,6 +174,15 @@ static int dev_online_memory(struct daxctl_dev *dev)
devname, strerror(-num_on));
return num_on;
}
+   if (num_on)
+   fprintf(stderr,
+   "%s:\n  WARNING: detected a race while onlining memory\n"
+   "  Some memory may not be in the expected zone. It is\n"
+   "  recommended to disable any other onlining mechanisms,\n"
+   "  and retry. If onlining is to be left to other agents,\n"
+   "  use the --no-online option to suppress this warning\n",
+   devname);
+
if (num_on == num_sections) {
fprintf(stderr, "%s: all memory sections (%d) already online\n",
devname, num_on);
diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
index 617887c..5a7e37c 100644
--- a/daxctl/lib/libdaxctl.c
+++ b/daxctl/lib/libdaxctl.c
@@ -1079,10 +1079,10 @@ static int memblock_is_online(struct daxctl_memory 
*mem, char *memblock)
return 0;
 }
 
-static int online_one_memblock(struct daxctl_memory *mem, char *memblock)
+static int online_one_memblock(struct daxctl_memory *mem, char *memblock,
+   int *status)
 {
struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
-   const char *devname = daxctl_dev_get_devname(dev);
struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
const char *mode = "online_movable";
int len = mem->buf_len, rc;
@@ -1097,10 +1097,6 @@ static int online_one_memblock(struct daxctl_memory 
*mem, char *memblock)
if (rc < 0)
return -ENOMEM;
 
-   /*
-* if already online, possibly due to kernel config or a udev rule,
-* there is nothing to do and we can skip over the memblock
-*/
rc = memblock_is_online(mem, memblock);
if (rc)
return rc;
@@ -1108,18 +1104,14 @@ static int online_one_memblock(struct daxctl_memory 
*mem, char *memblock)
rc = sysfs_write_attr_quiet(ctx, path, mode);
if (rc) {
/*
-* While we performed an already-online check above, there
-* is still a TOCTOU hole where someone (such as a udev rule)
-* may have raced to online the memory. In such a case,
-* the sysfs store will fail, however we can check for this
-* by simply reading the state again. If it changed to the
-* desired state, then we don't have to error out.
+* If the block got onlined, potentially by some other agent,
+* do nothing for now. There will be a full scan for zone
+* correctness later.
 */
-   if (memblock_is_online(mem, memblock))
-   return 1;
-   err(ctx, "%s: Failed to online %s: %s\n",
-   devname, path, strerror(-rc));
+   if (memblock_is_online(mem, memblock) == 1)
+   return 0;
}
+
return rc;
 }
 
@@ -1150,7 +1142,7 @@ static int offline_one_memblock(struct daxctl_memory 
*mem, char *memblock)
 
rc = sysfs_write_attr_quiet(ctx, path, mode);
if (rc) {
-   /* Close the TOCTOU hole like in online_one_memblock() above */
+   /* check if something raced us to offline (unlikely) */
if (!memblock_is_online(mem, memblock))
return 1;
err(ctx, "%s: Failed to offline %s: %s\n",
@@ -1274,7 +1266,7 @@ static int op_for_one_memblock(struct daxctl_memory *mem, 
char *memblock,
 
switch (op) {