On Wed, Oct 2, 2019 at 4:49 PM Vishal Verma <[email protected]> wrote:
>
> Sometimes, system configuration can result in new memory blocks getting
> onlined automatically. Often, these auto-onlining mechanisms don't
> provide a choice or configurability in the matter of which zone is used
> for these new blocks, and they just end up in ZONE_NORMAL.
>
> Usually, for hot-plugged memory, ZONE_NORMAL is undesirable because:
>  - An application might want total control over this memory
>  - ZONE_NORMAL precludes hot-removal of this memory
>  - Having kernel data structures in this memory, especially performance
>    sensitive ones, such as page tables, may be undesirable.
>
> Thus report if a race condition is encountered while onlining memory,
> and provide the user options to remedy it.
>
> Clarify the default zone expectations, and the race detection behavior
> in the daxctl-reconfigure-device man page, and move the relevant section
> under the 'Description' header, instead of hidden away under the
> '--no-online' option.
>
> Cc: Ben Olson <[email protected]>
> Cc: Dave Hansen <[email protected]>
> Cc: Dan Williams <[email protected]>
> Signed-off-by: Vishal Verma <[email protected]>
> ---
>  daxctl/device.c        |  9 ++++++
>  daxctl/lib/libdaxctl.c | 62 +++++++++++++++++++++++++++++-------------
>  2 files changed, 52 insertions(+), 19 deletions(-)
>
> diff --git a/daxctl/device.c b/daxctl/device.c
> index 920efc6..28698bf 100644
> --- a/daxctl/device.c
> +++ b/daxctl/device.c
> @@ -174,6 +174,15 @@ static int dev_online_memory(struct daxctl_dev *dev)
>                         devname, strerror(-num_on));
>                 return num_on;
>         }
> +       if (num_on)
> +               fprintf(stderr,
> +                   "%s:\n  WARNING: detected a race while onlining memory\n"
> +                   "  Some memory may not be in the expected zone. It is\n"
> +                   "  recommended to disable any other onlining 
> mechanisms,\n"
> +                   "  and retry. If onlining is to be left to other 
> agents,\n"
> +                   "  use the --no-online option to suppress this warning\n",
> +                   devname);
> +
>         if (num_on == num_sections) {
>                 fprintf(stderr, "%s: all memory sections (%d) already 
> online\n",
>                         devname, num_on);
> diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
> index 617887c..5a7e37c 100644
> --- a/daxctl/lib/libdaxctl.c
> +++ b/daxctl/lib/libdaxctl.c
> @@ -1079,10 +1079,10 @@ static int memblock_is_online(struct daxctl_memory 
> *mem, char *memblock)
>         return 0;
>  }
>
> -static int online_one_memblock(struct daxctl_memory *mem, char *memblock)
> +static int online_one_memblock(struct daxctl_memory *mem, char *memblock,
> +               int *status)
>  {
>         struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
> -       const char *devname = daxctl_dev_get_devname(dev);
>         struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
>         const char *mode = "online_movable";
>         int len = mem->buf_len, rc;
> @@ -1097,10 +1097,6 @@ static int online_one_memblock(struct daxctl_memory 
> *mem, char *memblock)
>         if (rc < 0)
>                 return -ENOMEM;
>
> -       /*
> -        * if already online, possibly due to kernel config or a udev rule,
> -        * there is nothing to do and we can skip over the memblock
> -        */
>         rc = memblock_is_online(mem, memblock);
>         if (rc)
>                 return rc;
> @@ -1108,18 +1104,14 @@ static int online_one_memblock(struct daxctl_memory 
> *mem, char *memblock)
>         rc = sysfs_write_attr_quiet(ctx, path, mode);
>         if (rc) {
>                 /*
> -                * While we performed an already-online check above, there
> -                * is still a TOCTOU hole where someone (such as a udev rule)
> -                * may have raced to online the memory. In such a case,
> -                * the sysfs store will fail, however we can check for this
> -                * by simply reading the state again. If it changed to the
> -                * desired state, then we don't have to error out.
> +                * If the block got onlined, potentially by some other agent,
> +                * do nothing for now. There will be a full scan for zone
> +                * correctness later.
>                  */
> -               if (memblock_is_online(mem, memblock))
> -                       return 1;
> -               err(ctx, "%s: Failed to online %s: %s\n",
> -                       devname, path, strerror(-rc));
> +               if (memblock_is_online(mem, memblock) == 1)
> +                       return 0;
>         }
> +
>         return rc;
>  }
>
> @@ -1150,7 +1142,7 @@ static int offline_one_memblock(struct daxctl_memory 
> *mem, char *memblock)
>
>         rc = sysfs_write_attr_quiet(ctx, path, mode);
>         if (rc) {
> -               /* Close the TOCTOU hole like in online_one_memblock() above 
> */
> +               /* check if something raced us to offline (unlikely) */
>                 if (!memblock_is_online(mem, memblock))
>                         return 1;
>                 err(ctx, "%s: Failed to offline %s: %s\n",
> @@ -1274,7 +1266,7 @@ static int op_for_one_memblock(struct daxctl_memory 
> *mem, char *memblock,
>
>         switch (op) {
>         case MEM_SET_ONLINE:
> -               return online_one_memblock(mem, memblock);
> +               return online_one_memblock(mem, memblock, status);
>         case MEM_SET_OFFLINE:
>                 return offline_one_memblock(mem, memblock);
>         case MEM_IS_ONLINE:
> @@ -1349,9 +1341,41 @@ out_dir:
>         return rc;
>  }
>
> +/*
> + * daxctl_memory_online() will online to ZONE_MOVABLE by default
> + */
>  DAXCTL_EXPORT int daxctl_memory_online(struct daxctl_memory *mem)
>  {
> -       return daxctl_memory_op(mem, MEM_SET_ONLINE);
> +       struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
> +       const char *devname = daxctl_dev_get_devname(dev);
> +       struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
> +       int rc;
> +
> +       rc = daxctl_memory_op(mem, MEM_SET_ONLINE);
> +       if (rc)
> +               return rc;
> +
> +       /*
> +        * Detect any potential races when blocks were being brought online by
> +        * checking the zone in which the memory blocks are at this point. If
> +        * any of the blocks are not in ZONE_MOVABLE, emit a warning.
> +        */
> +       mem->zone = 0;
> +       rc = daxctl_memory_op(mem, MEM_FIND_ZONE);
> +       if (rc)
> +               return rc;
> +       if (mem->zone != MEM_ZONE_MOVABLE) {
> +               err(ctx,
> +                   "%s:\n  WARNING: detected a race while onlining memory\n"
> +                   "  Some memory may not be in the expected zone. It is\n"
> +                   "  recommended to disable any other onlining 
> mechanisms,\n"
> +                   "  and retry. If onlining is to be left to other 
> agents,\n"
> +                   "  use the --no-online option to suppress this warning\n",
> +                   devname);

Rather than duplicate this largish warning message what about a
smaller one that references the man page about remediation
possibilities. Then you can also name distro udev rule as one of the
usual suspects that causes this.
_______________________________________________
Linux-nvdimm mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to