Re: [dm-devel] Make the mq policy an alias for smq

2016-02-26 Thread Mike Snitzer
On Thu, Feb 11 2016 at  5:41am -0500,
Joe Thornber  wrote:

> On Wed, Feb 10, 2016 at 09:13:05PM -0500, Mike Snitzer wrote:
> > Shouldn't get_policy()'s call to get_policy_once() resolve "mq" to be
> > "smq" if we just add this to the bottom of dm-cache-policy-smq?:
> > 
> > MODULE_ALIAS("dm-cache-mq");
> 
> Yes, you're right.  See my fixup patch.

Hmm, not seeing what you consider to be the latest fixed up version..
(nothing that sets: MODULE_ALIAS("dm-cache-mq") in smq.c)

But I've staged this in linux-next for 4.6, see:
https://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-next=8cbd0cb8cca67baa3e430fb6d636613fdc1cee1a

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH V2]multipath-tools: prevent unnecessary reinstate of stand-by paths with implicit tpgs mode and no active array paths

2016-02-26 Thread Shiva Krishna


On 2/26/16, 11:00 AM, "Benjamin Marzinski"  wrote:

>On Fri, Feb 26, 2016 at 02:25:08AM +, Shiva Krishna wrote:
>> --- 
>>  libmultipath/propsel.c |2 +-
>>  libmultipath/structs.h |1 +
>>  multipathd/main.c  |   19 ---
>>  3 files changed, 18 insertions(+), 4 deletions(-)
>> 
>> diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
>> index f64d5e4..b9e389f 100644
>> --- a/libmultipath/propsel.c
>> +++ b/libmultipath/propsel.c
>> @@ -374,7 +374,7 @@ detect_prio(struct path * pp)
>>  int ret;
>>  struct prio *p = >prio;
>>  
>> -if (get_target_port_group_support(pp->fd) <= 0)
>> +if ((pp->tpgs = get_target_port_group_support(pp->fd)) <= 0)
>>  return;
>>  ret = get_target_port_group(pp->fd);
>>  if (ret < 0)
>
>Again, setting this in detect_prio will make this useless for all
>devices that don't use detect_prio. After the "out:" label in
>select_prio, you can check if the alua prioritizer is being used,
>and set this there.
Thanks Ben, Hannes for the comments. I hesitated to put alua specific
checks in select_prio. Also, this condition is specific to targets
which support alua and implicit tpgs mode. Hence creating a config
option might not be necessary. I will post an updated patch with the
suggested change.
> 
>
>-Ben
>
>> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
>> index 5f68a8e..ef5fb7e 100644
>> --- a/libmultipath/structs.h
>> +++ b/libmultipath/structs.h
>> @@ -193,6 +193,7 @@ struct path {
>>  int detect_prio;
>>  int watch_checks;
>>  int wait_checks;
>> +int tpgs;
>>  char * uid_attribute;
>>  char * getuid;
>>  struct prio prio;
>> diff --git a/multipathd/main.c b/multipathd/main.c
>> index 04f6d02..b6b0053 100644
>> --- a/multipathd/main.c
>> +++ b/multipathd/main.c
>> @@ -62,6 +62,7 @@ static int use_watchdog;
>>  #include 
>>  #include 
>>  #include 
>> +#include "prioritizers/alua_rtpg.h"
>>  
>>  #include "main.h"
>>  #include "pidfile.h"
>> @@ -1161,6 +1162,7 @@ check_path (struct vectors * vecs, struct path *
>>pp)
>>  int new_path_up = 0;
>>  int chkr_new_path_up = 0;
>>  int add_active;
>> +int ignore_reinstate = 0;
>>  int oldchkrstate = pp->chkrstate;
>>  
>>  if (pp->initialized && !pp->mpp)
>> @@ -1235,6 +1237,16 @@ check_path (struct vectors * vecs, struct path *
>>pp)
>>  pp->wait_checks = 0;
>>  }
>>  
>> +/*
>> + * don't reinstate failed path, if its in stand-by
>> + * and if target supports only implicit tpgs mode.
>> + * this will prevent unnecessary i/o by dm on stand-by
>> + * paths if there are no other active paths in map.
>> + */
>> +ignore_reinstate = (newstate == PATH_GHOST &&
>> +pp->mpp->nr_active == 0 &&
>> +pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
>> +
>>  pp->chkrstate = newstate;
>>  if (newstate != pp->state) {
>>  int oldstate = pp->state;
>> @@ -1297,7 +1309,7 @@ check_path (struct vectors * vecs, struct path *
>>pp)
>>  pp->watch_checks--;
>>  add_active = 0;
>>  }
>> -if (reinstate_path(pp, add_active)) {
>> +if (!ignore_reinstate && reinstate_path(pp, add_active)) {
>>  condlog(3, "%s: reload map", pp->dev);
>>  ev_add_path(pp, vecs);
>>  pp->tick = 1;
>> @@ -1316,8 +1328,9 @@ check_path (struct vectors * vecs, struct path *
>>pp)
>>  enable_group(pp);
>>  }
>>  else if (newstate == PATH_UP || newstate == PATH_GHOST) {
>> -if (pp->dmstate == PSTATE_FAILED ||
>> -pp->dmstate == PSTATE_UNDEF) {
>> +if ((pp->dmstate == PSTATE_FAILED ||
>> +pp->dmstate == PSTATE_UNDEF) &&
>> +!ignore_reinstate) {
>>  /* Clear IO errors */
>>  if (reinstate_path(pp, 0)) {
>>  condlog(3, "%s: reload map", pp->dev);
>> --
>> 


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v5 0/3] init: add support to directly boot to a mapped device

2016-02-26 Thread Mike Snitzer
On Fri, Feb 26 2016 at  2:59pm -0500,
Kees Cook  wrote:

> On Fri, Feb 26, 2016 at 11:21 AM, Mike Snitzer  wrote:
> > On Fri, Feb 26 2016 at  1:52pm -0500,
> > Kees Cook  wrote:
> >
> >> On Fri, Feb 26, 2016 at 8:53 AM, Mike Snitzer  wrote:
> >> > On Mon, Feb 22 2016 at  1:55pm -0500,
> >> > Kees Cook  wrote:
> >> >
> >> >> On Sun, Feb 21, 2016 at 2:08 PM, Alasdair G Kergon  
> >> >> wrote:
> >> >> > On Sat, Feb 20, 2016 at 10:13:49AM -0800, Kees Cook wrote:
> >> >> >> This is a resurrection of a patch series from a few years back, first
> >> >> >> brought to the dm maintainers in 2010. It creates a way to define dm
> >> >> >> devices on the kernel command line for systems that do not use an
> >> >> >> initramfs, or otherwise need a dm running before init starts.
> >> >> >>
> >> >> >> This has been used by Chrome OS for several years, and now by Brillo
> >> >> >> (and likely Android soon).
> >> >> >>
> >> >> >> The last version was v4:
> >> >> >> https://patchwork.kernel.org/patch/104860/
> >> >> >> https://patchwork.kernel.org/patch/104861/
> >> >> >
> >> >> > Inconsistencies in the terminology here can be sorted out during 
> >> >> > review,
> >> >> > and I see that you've taken on board some of my review comments from
> >> >> > 2010, but what are your responses to the rest of them?
> >> >>
> >> >> Ah, sorry, the threads I could find were incomplete, so I wasn't able
> >> >> to find those comments that were made to Will's 2010 submission. In
> >> >> some of the cleanups I did I was very confused about "target" vs
> >> >> "table", and tried to fix that. Regardless, I'm open to fixing
> >> >> whatever is needed. :)
> >> >>
> >> >> Thanks for looking at this again!
> >> >
> >> > This work isn't going to fly as is.  I appreciate the effort and the
> >> > goal (without understanding _why_) but: you're open-coding, duplicating
> >> > and/or reinventing way too much in do_mounts_dm.c
> >> >
> >> > 1) You first need to answer: _why_ is using a proper initramfs not
> >> > viable?  A very simple initramfs that issues dmsetup commands, etc,
> >> > isn't so daunting is it?  Why is it so important for the kernel to
> >> > natively provide a dmsetup interface?  Chrome, Android, etc cannot use
> >> > initramfs?
> >>
> >> That is correct: Chrome OS does not (and won't) use an initramfs. This
> >> is mainly for reasons of boot speed, verified boot block size, and
> >> maybe some other things I don't remember.
> >
> > Not sure what "verified boot block size" means but...
> 
> Chrome OS uses coreboot as its boot firmware and a coreboot module
> known as "depthcharge" for doing the crypto-verification and booting
> of the Chrome OS system. This is the static root of trust Chrome OS
> extends from its read-only boot firmware through to the kernel it
> loads and the dm-verity partition it mounts as the read-only root
> filesystem. To keep the boot speed fast and the kernel size small,
> there is no initramfs.
> 
> > Sorry I really don't buy that using a custom initramfs would be the
> > source of slow boot.  initramfs is _not_ this hugely inefficient
> > mechanism you'd have us believe.
> 
> I didn't say it was hugely inefficient, but for Chrome OS it's not
> needed, and was a measurable source of boot time. We just got rid of
> it since it was redundant. I can't change that design decision; I'm
> just here to help bring the dm= boot support upstream. :)
> 
> > And if that is the justification for this early boot dm= support then
> > the Chrome OS project/team will have to continue to carry the hack
> > locally.  It has no place upstream.  But I'm open to revisiting this if
> > it can be implemented in a very cheap way.
> 
> Yeah, I'm open to whatever suggestions you have.
> 
> >> > 2) If you are able to adequately justify the need for dm=:
> >> > I'd much rather the dm= kernel commandline be a simple series of
> >> > comma-delimited dmsetup-like commands.
> >> >
> >> > You'd handle each command with extremely basic parsing:
> >> >[,  ]
> >> > (inventing a special token to denote , to support tables with
> >> > multiple entries, rather than relying on commas and counts, etc)
> >>
> >> Sure, changing the syntax is fine by me. We'd need to plumb access to
> >> the ioctl interface, though.
> >
> > I was hoping to avoid any extra hacks but yes... seems you'd need a new
> > API to issue the equivalent of a DM ioctl programatically.  Hopefully
> > it'd be quite a small wrapper.
> 
> Seems like it shouldn't be too bad.

OK, I'm waiting on you to give it a shot.  I'll do my best to help.

Thanks,
Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v5 0/3] init: add support to directly boot to a mapped device

2016-02-26 Thread Kees Cook
On Fri, Feb 26, 2016 at 11:21 AM, Mike Snitzer  wrote:
> On Fri, Feb 26 2016 at  1:52pm -0500,
> Kees Cook  wrote:
>
>> On Fri, Feb 26, 2016 at 8:53 AM, Mike Snitzer  wrote:
>> > On Mon, Feb 22 2016 at  1:55pm -0500,
>> > Kees Cook  wrote:
>> >
>> >> On Sun, Feb 21, 2016 at 2:08 PM, Alasdair G Kergon  
>> >> wrote:
>> >> > On Sat, Feb 20, 2016 at 10:13:49AM -0800, Kees Cook wrote:
>> >> >> This is a resurrection of a patch series from a few years back, first
>> >> >> brought to the dm maintainers in 2010. It creates a way to define dm
>> >> >> devices on the kernel command line for systems that do not use an
>> >> >> initramfs, or otherwise need a dm running before init starts.
>> >> >>
>> >> >> This has been used by Chrome OS for several years, and now by Brillo
>> >> >> (and likely Android soon).
>> >> >>
>> >> >> The last version was v4:
>> >> >> https://patchwork.kernel.org/patch/104860/
>> >> >> https://patchwork.kernel.org/patch/104861/
>> >> >
>> >> > Inconsistencies in the terminology here can be sorted out during review,
>> >> > and I see that you've taken on board some of my review comments from
>> >> > 2010, but what are your responses to the rest of them?
>> >>
>> >> Ah, sorry, the threads I could find were incomplete, so I wasn't able
>> >> to find those comments that were made to Will's 2010 submission. In
>> >> some of the cleanups I did I was very confused about "target" vs
>> >> "table", and tried to fix that. Regardless, I'm open to fixing
>> >> whatever is needed. :)
>> >>
>> >> Thanks for looking at this again!
>> >
>> > This work isn't going to fly as is.  I appreciate the effort and the
>> > goal (without understanding _why_) but: you're open-coding, duplicating
>> > and/or reinventing way too much in do_mounts_dm.c
>> >
>> > 1) You first need to answer: _why_ is using a proper initramfs not
>> > viable?  A very simple initramfs that issues dmsetup commands, etc,
>> > isn't so daunting is it?  Why is it so important for the kernel to
>> > natively provide a dmsetup interface?  Chrome, Android, etc cannot use
>> > initramfs?
>>
>> That is correct: Chrome OS does not (and won't) use an initramfs. This
>> is mainly for reasons of boot speed, verified boot block size, and
>> maybe some other things I don't remember.
>
> Not sure what "verified boot block size" means but...

Chrome OS uses coreboot as its boot firmware and a coreboot module
known as "depthcharge" for doing the crypto-verification and booting
of the Chrome OS system. This is the static root of trust Chrome OS
extends from its read-only boot firmware through to the kernel it
loads and the dm-verity partition it mounts as the read-only root
filesystem. To keep the boot speed fast and the kernel size small,
there is no initramfs.

> Sorry I really don't buy that using a custom initramfs would be the
> source of slow boot.  initramfs is _not_ this hugely inefficient
> mechanism you'd have us believe.

I didn't say it was hugely inefficient, but for Chrome OS it's not
needed, and was a measurable source of boot time. We just got rid of
it since it was redundant. I can't change that design decision; I'm
just here to help bring the dm= boot support upstream. :)

> And if that is the justification for this early boot dm= support then
> the Chrome OS project/team will have to continue to carry the hack
> locally.  It has no place upstream.  But I'm open to revisiting this if
> it can be implemented in a very cheap way.

Yeah, I'm open to whatever suggestions you have.

>> > 2) If you are able to adequately justify the need for dm=:
>> > I'd much rather the dm= kernel commandline be a simple series of
>> > comma-delimited dmsetup-like commands.
>> >
>> > You'd handle each command with extremely basic parsing:
>> >[,  ]
>> > (inventing a special token to denote , to support tables with
>> > multiple entries, rather than relying on commas and counts, etc)
>>
>> Sure, changing the syntax is fine by me. We'd need to plumb access to
>> the ioctl interface, though.
>
> I was hoping to avoid any extra hacks but yes... seems you'd need a new
> API to issue the equivalent of a DM ioctl programatically.  Hopefully
> it'd be quite a small wrapper.

Seems like it shouldn't be too bad.

>> > and you'd then have do_mounts_dm.c open /dev/mapper/control directly and
>> > issue proper DM ioctls rather than adding all your shim code.  This last
>> > bit of opening /dev/mapper/control from init needs more research -- not
>> > sure if doing such a thing from kernel is viable/safe/acceptable.
>>
>> Well, there's no /dev and no init since our dm is the root device
>> (dm-verity). We need everything up and running before we mount the
>> root filesystem, very similar to do_mount_md.c's purpose.
>
> Ah yes, microoptimization associated with no udev or normal Linux boot
> comes full circle and limits the use of existing standard interfaces.

Chrome OS 

Re: [dm-devel] [PATCH v5 0/3] init: add support to directly boot to a mapped device

2016-02-26 Thread Mike Snitzer
On Fri, Feb 26 2016 at  1:52pm -0500,
Kees Cook  wrote:

> On Fri, Feb 26, 2016 at 8:53 AM, Mike Snitzer  wrote:
> > On Mon, Feb 22 2016 at  1:55pm -0500,
> > Kees Cook  wrote:
> >
> >> On Sun, Feb 21, 2016 at 2:08 PM, Alasdair G Kergon  wrote:
> >> > On Sat, Feb 20, 2016 at 10:13:49AM -0800, Kees Cook wrote:
> >> >> This is a resurrection of a patch series from a few years back, first
> >> >> brought to the dm maintainers in 2010. It creates a way to define dm
> >> >> devices on the kernel command line for systems that do not use an
> >> >> initramfs, or otherwise need a dm running before init starts.
> >> >>
> >> >> This has been used by Chrome OS for several years, and now by Brillo
> >> >> (and likely Android soon).
> >> >>
> >> >> The last version was v4:
> >> >> https://patchwork.kernel.org/patch/104860/
> >> >> https://patchwork.kernel.org/patch/104861/
> >> >
> >> > Inconsistencies in the terminology here can be sorted out during review,
> >> > and I see that you've taken on board some of my review comments from
> >> > 2010, but what are your responses to the rest of them?
> >>
> >> Ah, sorry, the threads I could find were incomplete, so I wasn't able
> >> to find those comments that were made to Will's 2010 submission. In
> >> some of the cleanups I did I was very confused about "target" vs
> >> "table", and tried to fix that. Regardless, I'm open to fixing
> >> whatever is needed. :)
> >>
> >> Thanks for looking at this again!
> >
> > This work isn't going to fly as is.  I appreciate the effort and the
> > goal (without understanding _why_) but: you're open-coding, duplicating
> > and/or reinventing way too much in do_mounts_dm.c
> >
> > 1) You first need to answer: _why_ is using a proper initramfs not
> > viable?  A very simple initramfs that issues dmsetup commands, etc,
> > isn't so daunting is it?  Why is it so important for the kernel to
> > natively provide a dmsetup interface?  Chrome, Android, etc cannot use
> > initramfs?
> 
> That is correct: Chrome OS does not (and won't) use an initramfs. This
> is mainly for reasons of boot speed, verified boot block size, and
> maybe some other things I don't remember.

Not sure what "verified boot block size" means but...

Sorry I really don't buy that using a custom initramfs would be the
source of slow boot.  initramfs is _not_ this hugely inefficient
mechanism you'd have us believe.

And if that is the justification for this early boot dm= support then
the Chrome OS project/team will have to continue to carry the hack
locally.  It has no place upstream.  But I'm open to revisiting this if
it can be implemented in a very cheap way.

> > 2) If you are able to adequately justify the need for dm=:
> > I'd much rather the dm= kernel commandline be a simple series of
> > comma-delimited dmsetup-like commands.
> >
> > You'd handle each command with extremely basic parsing:
> >[,  ]
> > (inventing a special token to denote , to support tables with
> > multiple entries, rather than relying on commas and counts, etc)
> 
> Sure, changing the syntax is fine by me. We'd need to plumb access to
> the ioctl interface, though.

I was hoping to avoid any extra hacks but yes... seems you'd need a new
API to issue the equivalent of a DM ioctl programatically.  Hopefully
it'd be quite a small wrapper.

> > and you'd then have do_mounts_dm.c open /dev/mapper/control directly and
> > issue proper DM ioctls rather than adding all your shim code.  This last
> > bit of opening /dev/mapper/control from init needs more research -- not
> > sure if doing such a thing from kernel is viable/safe/acceptable.
> 
> Well, there's no /dev and no init since our dm is the root device
> (dm-verity). We need everything up and running before we mount the
> root filesystem, very similar to do_mount_md.c's purpose.

Ah yes, microoptimization associated with no udev or normal Linux boot
comes full circle and limits the use of existing standard interfaces.

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH V2]multipath-tools: prevent unnecessary reinstate of stand-by paths with implicit tpgs mode and no active array paths

2016-02-26 Thread Benjamin Marzinski
On Fri, Feb 26, 2016 at 02:25:08AM +, Shiva Krishna wrote:
> --- 
>  libmultipath/propsel.c |2 +-
>  libmultipath/structs.h |1 +
>  multipathd/main.c  |   19 ---
>  3 files changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
> index f64d5e4..b9e389f 100644
> --- a/libmultipath/propsel.c
> +++ b/libmultipath/propsel.c
> @@ -374,7 +374,7 @@ detect_prio(struct path * pp)
>   int ret;
>   struct prio *p = >prio;
>  
> - if (get_target_port_group_support(pp->fd) <= 0)
> + if ((pp->tpgs = get_target_port_group_support(pp->fd)) <= 0)
>   return;
>   ret = get_target_port_group(pp->fd);
>   if (ret < 0)

Again, setting this in detect_prio will make this useless for all
devices that don't use detect_prio. After the "out:" label in
select_prio, you can check if the alua prioritizer is being used,
and set this there. 

-Ben

> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 5f68a8e..ef5fb7e 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -193,6 +193,7 @@ struct path {
>   int detect_prio;
>   int watch_checks;
>   int wait_checks;
> + int tpgs;
>   char * uid_attribute;
>   char * getuid;
>   struct prio prio;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 04f6d02..b6b0053 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -62,6 +62,7 @@ static int use_watchdog;
>  #include 
>  #include 
>  #include 
> +#include "prioritizers/alua_rtpg.h"
>  
>  #include "main.h"
>  #include "pidfile.h"
> @@ -1161,6 +1162,7 @@ check_path (struct vectors * vecs, struct path * pp)
>   int new_path_up = 0;
>   int chkr_new_path_up = 0;
>   int add_active;
> + int ignore_reinstate = 0;
>   int oldchkrstate = pp->chkrstate;
>  
>   if (pp->initialized && !pp->mpp)
> @@ -1235,6 +1237,16 @@ check_path (struct vectors * vecs, struct path * pp)
>   pp->wait_checks = 0;
>   }
>  
> + /*
> +  * don't reinstate failed path, if its in stand-by
> +  * and if target supports only implicit tpgs mode.
> +  * this will prevent unnecessary i/o by dm on stand-by
> +  * paths if there are no other active paths in map.
> +  */
> + ignore_reinstate = (newstate == PATH_GHOST &&
> + pp->mpp->nr_active == 0 &&
> + pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
> +
>   pp->chkrstate = newstate;
>   if (newstate != pp->state) {
>   int oldstate = pp->state;
> @@ -1297,7 +1309,7 @@ check_path (struct vectors * vecs, struct path * pp)
>   pp->watch_checks--;
>   add_active = 0;
>   }
> - if (reinstate_path(pp, add_active)) {
> + if (!ignore_reinstate && reinstate_path(pp, add_active)) {
>   condlog(3, "%s: reload map", pp->dev);
>   ev_add_path(pp, vecs);
>   pp->tick = 1;
> @@ -1316,8 +1328,9 @@ check_path (struct vectors * vecs, struct path * pp)
>   enable_group(pp);
>   }
>   else if (newstate == PATH_UP || newstate == PATH_GHOST) {
> - if (pp->dmstate == PSTATE_FAILED ||
> - pp->dmstate == PSTATE_UNDEF) {
> + if ((pp->dmstate == PSTATE_FAILED ||
> + pp->dmstate == PSTATE_UNDEF) &&
> + !ignore_reinstate) {
>   /* Clear IO errors */
>   if (reinstate_path(pp, 0)) {
>   condlog(3, "%s: reload map", pp->dev);
> --
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH]multipath-tools: prevent unnecessary reinstate of stand-by paths with implicit tpgs mode and no active array paths.

2016-02-26 Thread Benjamin Marzinski
On Fri, Feb 26, 2016 at 12:32:51AM +, Shiva Krishna wrote:
> 
> 
> On 2/25/16, 12:49 PM, "Benjamin Marzinski"  wrote:
> 
> >On Sat, Feb 20, 2016 at 08:23:29PM +, Shiva Krishna wrote:
> >
> >I understand your problem, but this isn't the right patch to fix it. For
> >one this check
> >
> >+if (newstate != PATH_GHOST || pp->mpp->nr_active > 0 ||
> >+pp->tpgs != TPGS_IMPLICIT) {
> >
> >is pretty problematic.  Every path that isn't alua or doesn't enable
> >detect_prio will have pp->tpgs != TPGS_IMPLICIT (because this will be
> >zeroed out on path creation for all paths, and TPGS_IMPLICIT == 0x1). If
> >you need special handling for PATH_GHOST, and you don't want to copy and
> >modify an existing checker, then you should probably go with a config
> >option to enable this on your device, perhaps ghost_is_standby.  Then
> >you (and anyone else who needs this) can set that in your builtin device
> >config, and you don't need to try to craft a complicated check to get
> >this right.
> That was exactly my intention. To allow reinstate in all those cases and
> prevent this only if pp->tpgs == TPGS_IMPLICIT. Even if we add a new config
> parameter to avoid this, I think a check like this is still needed, because
> Even if ghost is stand-by we want reinstate when there are other active
> paths
> present in the map. That is to get them out of the failed state. My
> intention
> was to keep them failed as long as there are no active paths
> present/discovered.

I have no objection to the (pp->mpp->nr_active > 0) check.  My point is
that with your code (pp->tpgs != TPGS_IMPLICT) does not correctly weed
out the implicit alua paths.  Since you set pp->tpgs in detect_prio, all
implict alua devices that don't use detect_prio (and this is most of
them), will not have pp->tpgs set to TPGS_IMPLICIT. I also wonder if
this way of dealing with PATH_GHOST paths would be useful to other
device setups, not just ones where you have standby paths with implicit
alua. In this case, making it a seperate config option would make it
available to any device. If this is really only useful for this specific
case, then I'm not against doing some alua specific checks in
select_prio.

-Ben

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v5 0/3] init: add support to directly boot to a mapped device

2016-02-26 Thread Mike Snitzer
On Mon, Feb 22 2016 at  1:55pm -0500,
Kees Cook  wrote:

> On Sun, Feb 21, 2016 at 2:08 PM, Alasdair G Kergon  wrote:
> > On Sat, Feb 20, 2016 at 10:13:49AM -0800, Kees Cook wrote:
> >> This is a resurrection of a patch series from a few years back, first
> >> brought to the dm maintainers in 2010. It creates a way to define dm
> >> devices on the kernel command line for systems that do not use an
> >> initramfs, or otherwise need a dm running before init starts.
> >>
> >> This has been used by Chrome OS for several years, and now by Brillo
> >> (and likely Android soon).
> >>
> >> The last version was v4:
> >> https://patchwork.kernel.org/patch/104860/
> >> https://patchwork.kernel.org/patch/104861/
> >
> > Inconsistencies in the terminology here can be sorted out during review,
> > and I see that you've taken on board some of my review comments from
> > 2010, but what are your responses to the rest of them?
> 
> Ah, sorry, the threads I could find were incomplete, so I wasn't able
> to find those comments that were made to Will's 2010 submission. In
> some of the cleanups I did I was very confused about "target" vs
> "table", and tried to fix that. Regardless, I'm open to fixing
> whatever is needed. :)
> 
> Thanks for looking at this again!

This work isn't going to fly as is.  I appreciate the effort and the
goal (without understanding _why_) but: you're open-coding, duplicating
and/or reinventing way too much in do_mounts_dm.c

1) You first need to answer: _why_ is using a proper initramfs not
viable?  A very simple initramfs that issues dmsetup commands, etc,
isn't so daunting is it?  Why is it so important for the kernel to
natively provide a dmsetup interface?  Chrome, Android, etc cannot use
initramfs?

2) If you are able to adequately justify the need for dm=:
I'd much rather the dm= kernel commandline be a simple series of
comma-delimited dmsetup-like commands.

You'd handle each command with extremely basic parsing:
   [,  ]
(inventing a special token to denote , to support tables with
multiple entries, rather than relying on commas and counts, etc)

and you'd then have do_mounts_dm.c open /dev/mapper/control directly and
issue proper DM ioctls rather than adding all your shim code.  This last
bit of opening /dev/mapper/control from init needs more research -- not
sure if doing such a thing from kernel is viable/safe/acceptable.

Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] blkio cgroups controller doesn't work with LVM?

2016-02-26 Thread Vivek Goyal
On Thu, Feb 25, 2016 at 09:53:14AM -0500, Mike Snitzer wrote:
> On Thu, Feb 25 2016 at  2:48am -0500,
> Nikolay Borisov  wrote:
> 
> > 
> > 
> > On 02/24/2016 08:12 PM, Chris Friesen wrote:
> > > 
> > > Hi,
> > > 
> > > Are there known limitations with the blkio cgroup controller when used
> > > with LVM?
> > > 
> > > I'm using Ubuntu 15.10 with the 4.2 kernel.  I got the same results with
> > > CentOS 7.
> > > 
> > > I set up two groups, /sys/fs/cgroup/blkio/test1 and
> > > /sys/fs/cgroup/blkio/test2.  I set the weight for test1 to 500, and the
> > > weight for test2 to 1000.
> > 
> > The weighed mode of blkio works only with CFQ scheduler. And as far as I
> > have seen you cannot set CFQ to be the scheduler of DM devices. In this
> > case you can use the BLK io throttling mechanism. That's what I've
> > encountered in my practice. Though I'd be happy to be proven wrong by
> > someone. I believe the following sentence in the blkio controller states
> > that:
> > "
> > First one is proportional weight time based division of disk policy. It
> > is implemented in CFQ. Hence this policy takes effect only on leaf nodes
> > when CFQ is being used.
> > "
> 
> Right, LVM created devices are bio-based DM devices in the kernel.
> bio-based block devices do _not_ have an IO scheduler.  Their underlying
> request-based device does.
> 
> I'm not well-versed on the top-level cgroup interface and how it maps to
> associated resources that are established in the kernel.  But it could
> be that the configuration of blkio cgroup against a bio-based LVM device
> needs to be passed through to the underlying request-based device
> (e.g. /dev/sda4 in Chris's case)?
> 
> I'm also wondering whether the latest cgroup work that Tejun has just
> finished (afaik to support buffered IO in the IO controller) will afford
> us a more meaningful reason to work to make cgroups' blkio controller
> actually work with bio-based devices like LVM's DM devices?
> 
> I'm very much open to advice on how to proceed with investigating this
> integration work.  Tejun, Vivek, anyone else: if you have advice on next
> steps for DM on this front _please_ yell, thanks!

Ok, here is my understanding. Tejun, please correct me if that's not the
case anymore. I have not been able to keep pace with all the recent work.

IO throttling policies should be applied on top level dm devices and these
should work for reads and direct writes.

For IO throttling buffered writes, I think it might not work on dm devices
as it because we might not be copying cgroup information when cloning
happens in dm layer.

IIRC, one concern with cloning cgroup info from parent bio was that how
would one take care of any priority inversion issues. For example, we are
waiting for a clone to finish IO which is in severely throttled IO cgroup
and rest of the IO can't proceed till that IO finishes).

IIUC, there might not be a straight forward answer to that question. We
probably will have to look at all the dm code closely and if that
serialization is possible in any of the paths, then reset the cgroup info.

For CFQ's proportional policy, it might not work well when a dm device
is sitting on top. And reason being that for all reads and direct writes
we inherit cgroup from submitter and dm might be submitting IO from an
internal thread, hence losing the cgroup of submitter hence IO gets
misclassified at dm level.

To solve this, we will have to carry submitter's cgroup info in bio and
clones and again think of priority inversion issues.

Thanks
Vivek

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


[dm-devel] [PATCH] dmsetup: improve message command

2016-02-26 Thread Werner Koch
Hi!

I am playing with a new crypto container format and propose to enhance
"dmsetup message" to accept the actual message from stdin instead of
taking it only from the command line.  This is useful to set a key and
similar to how "dmsetup create" is reading the table from stdin.

Patch attached.


Shalom-Salam,

   Werner

-- 
Die Gedanken sind frei.  Ausnahmen regelt ein Bundesgesetz.
>From ae2b5739007b0829c3a142d0d5e782b3b7fe3028 Mon Sep 17 00:00:00 2001
From: Werner Koch 
Date: Fri, 19 Feb 2016 21:47:45 +0100
Subject: [PATCH] dmsetup: command message may now read the message from stdin.

When "dmsetup messsage" is used to set an encryption key it is better
to read that from stdin so that the key does not show up in ps or the
shell history.  Thus instead of an error message when the third arg is
missing, it is now taken from stdin.  stdin is also switched to
unbuffered mode so that sensitive data should not end up in stdio
buffers.  A new wipememory macro (taken from GnuPG) is used to
securely erase the message from memory.

Signed-off-by: Werner Koch 
---
 man/dmsetup.8.in |  5 +++--
 tools/dmsetup.c  | 68 ++--
 2 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/man/dmsetup.8.in b/man/dmsetup.8.in
index f92dbe5..c9c886a 100644
--- a/man/dmsetup.8.in
+++ b/man/dmsetup.8.in
@@ -127,7 +127,7 @@ dmsetup \(em low level logical volume management
 .  BR message
 .  IR device_name
 .  IR sector
-.  IR message
+.  RI [ message ]
 ..
 .CMD_MESSAGE
 .
@@ -714,7 +714,8 @@ reactivating it with proper mangling mode used (see also \fB\-\-manglename\fP).
 .HP
 .CMD_MESSAGE
 .br
-Send message to target. If sector not needed use 0.
+Send message to target. If sector not needed use 0.  If the message
+argument is not given, the first line read from stdin is used.
 .
 .HP
 .CMD_MKNODES
diff --git a/tools/dmsetup.c b/tools/dmsetup.c
index 4db6004..8e988f0 100644
--- a/tools/dmsetup.c
+++ b/tools/dmsetup.c
@@ -105,6 +105,15 @@ extern char *optarg;
 
 #define err(msg, x...) fprintf(stderr, msg "\n", ##x)
 
+/* To avoid that a compiler optimizes memset calls away, this macro
+ * should be used securely clear memory. */
+#define wipememory(_ptr,_len) do { \
+  volatile char *_vptr=(volatile char *)(_ptr); \
+  size_t _vlen=(_len); \
+  while(_vlen) { *_vptr=0; _vptr++; _vlen--; } \
+  } while(0)
+
+
 /* program_id used for dmstats-managed statistics regions */
 #define DM_STATS_PROGRAM_ID "dmstats"
 
@@ -1234,25 +1243,50 @@ static int _message(CMD_ARGS)
 	argc--;
 	argv++;
 
-	if (argc <= 0)
-		err("No message supplied.\n");
-
-	for (i = 0; i < argc; i++)
-		sz += strlen(argv[i]) + 1;
-
-	if (!(str = dm_zalloc(sz))) {
-		err("message string allocation failed");
-		goto out;
-	}
-
-	for (i = 0; i < argc; i++) {
-		if (i)
-			strcat(str, " ");
-		strcat(str, argv[i]);
-	}
+	if (argc <= 0) {
+		/* Read messsage from stdin (one line).  */
+size_t len = LINE_SIZE;
+
+/* Try avoiding storing potential sensitive data in a
+ * stdio buffer.  */
+if (setvbuf (stdin, NULL, _IONBF, BUFSIZ)) {
+err("Failed to switch stdin to unbuffered mode.");
+goto out;
+}
+
+if (!(str = dm_malloc(len))) {
+err("Failed to malloc line buffer.");
+goto out;
+}
+
+if (!fgets(str, (int) len, stdin)) {
+err("Error reading line from stdin.");
+dm_free(str);
+goto out;
+}
+len = strlen (str);
+if (len && str[len-1]=='\n')
+str[--len] = 0;
+
+} else {
+for (i = 0; i < argc; i++)
+sz += strlen(argv[i]) + 1;
+
+if (!(str = dm_zalloc(sz))) {
+err("message string allocation failed");
+goto out;
+}
+
+for (i = 0; i < argc; i++) {
+if (i)
+strcat(str, " ");
+strcat(str, argv[i]);
+}
+}
 
 	i = dm_task_set_message(dmt, str);
 
+wipememory (str, strlen(str));
 	dm_free(str);
 
 	if (!i)
@@ -5132,7 +5166,7 @@ static struct command _dmsetup_commands[] = {
 	{"reload", " [|]", 0, 2, 0, 0, _load},
 	{"wipe_table", "[-f|--force] [--noflush] [--nolockfs] ", 1, -1, 1, 0, _error_device},
 	{"rename", " [--setuuid] ", 1, 2, 0, 0, _rename},
-	{"message", "  ", 2, -1, 0, 0, _message},
+	{"message", "  []", 2, -1, 0, 0, _message},
 	{"ls", "[--target ] [--exec ] [-o ] [--tree]", 0, 0, 0, 0, _ls},
 	{"info", "[]", 0, -1, 1, 0, _info},
 	{"deps", "[-o ] []", 0, -1, 1, 0, _deps},
-- 
2.1.4

--
dm-devel mailing list