[dm-devel] [PATCH] libmultipath: fix unit to seconds in log message for checker timeout

2017-06-27 Thread Martin Wilck
checker_timeout is in seconds, not milliseconds, since 905281da
"Specify checker_timeout in seconds". Fix the log messages.

Signed-off-by: Martin Wilck 
---
 libmultipath/propsel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
index 27f39517..4ac7edcd 100644
--- a/libmultipath/propsel.c
+++ b/libmultipath/propsel.c
@@ -333,11 +333,11 @@ out:
pp->dev, c->timeout);
}
else if (sysfs_get_timeout(pp, >timeout) > 0)
-   condlog(3, "%s: checker timeout = %u ms (setting: kernel 
sysfs)",
+   condlog(3, "%s: checker timeout = %u s (setting: kernel sysfs)",
pp->dev, c->timeout);
else {
c->timeout = DEF_TIMEOUT;
-   condlog(3, "%s: checker timeout = %u ms (setting: multipath 
internal)",
+   condlog(3, "%s: checker timeout = %u s (setting: multipath 
internal)",
pp->dev, c->timeout);
}
return 0;
-- 
2.13.1

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


[dm-devel] [PATCH]prioritizers/path_latency: Fix failure of sg_read to a nvme device

2017-06-27 Thread Yang Feng
Fixed failure of sg_read to a nvme device:
1. Send sg_read command to a nvme device failed,
because the block size in user-mode is 4096 bytes,
not equal to the bolck size in kernel-mode, who is
512 bytes. According to the func nvme_trans_io of
scsi.c in the dir "drivers/nvme/host". The code
fragmentS as follows:
/* If block count and actual data buffer size dont match, error out */
if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
res = -EINVAL;
goto out;
}
2. And the SCSI-to-NVMe translations will be removed in
the patch "nvme: Remove SCSI translations" in the linux-nvme,
so using the nvme_read command is a good idea.

Signed-off-by: Yang Feng 
---
 libmultipath/prioritizers/path_latency.c | 78 +++-
 1 file changed, 66 insertions(+), 12 deletions(-)

diff --git a/libmultipath/prioritizers/path_latency.c 
b/libmultipath/prioritizers/path_latency.c
index 34b734b..8f633e0 100644
--- a/libmultipath/prioritizers/path_latency.c
+++ b/libmultipath/prioritizers/path_latency.c
@@ -23,14 +23,32 @@
 #include 
 #include 
 #include 
-
 #include "debug.h"
 #include "prio.h"
 #include "structs.h"
+#include 
+#include 
 #include "../checkers/libsg.h"
 
 #define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency prio: " fmt, 
##args)
 
+struct nvme_user_io {
+__u8 opcode;
+__u8 flags;
+__u16 control;
+__u16 nblocks;
+__u16 rsvd;
+__u64 metadata;
+__u64 addr;
+__u64 slba;
+__u32 dsmgmt;
+__u32 reftag;
+__u16 apptag;
+__u16 appmask;
+};
+
+#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
+
 #define MAX_IO_NUM  200
 #define MIN_IO_NUM  2
 
@@ -51,19 +69,55 @@ static long long path_latency[MAX_IO_NUM];
 
 static inline long long timeval_to_us(const struct timespec *tv)
 {
-   return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / 
NSEC_PER_USEC);
+return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / 
NSEC_PER_USEC);
+}
+
+int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+__u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void 
*data, void *metadata)
+{
+struct nvme_user_io io = {
+.opcode = opcode,
+.flags = 0,
+.control = control,
+.nblocks = nblocks,
+.rsvd = 0,
+.metadata = (__u64)(uintptr_t) metadata,
+.addr = (__u64)(uintptr_t) data,
+.slba = slba,
+.dsmgmt = dsmgmt,
+.reftag = reftag,
+.appmask = apptag,
+.apptag = appmask,
+};
+
+return ioctl(fd, NVME_IOCTL_SUBMIT_IO, );
+}
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+__u32 reftag, __u16 apptag, __u16 appmask, void *data, void 
*metadata)
+{
+return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt,
+reftag, apptag, appmask, data, metadata);
 }
 
-static int do_readsector0(int fd, unsigned int timeout)
+static int do_readsector0(struct path *pp, unsigned int timeout)
 {
-   unsigned char buf[4096];
-   unsigned char sbuf[SENSE_BUFF_LEN];
-   int ret;
+unsigned char buf[4096];
+unsigned char mbuf[512];
+unsigned char sbuf[SENSE_BUFF_LEN];
 
-   ret = sg_read(fd, [0], 4096, [0],
- SENSE_BUFF_LEN, timeout);
+if (!strncmp(pp->dev, "nvme", 4))
+{
+if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) < 0)
+return 0;
+}
+else
+{
+if (sg_read(pp->fd, [0], 4096, [0],SENSE_BUFF_LEN, timeout) 
== 2)
+return 0;
+}
 
-   return ret;
+return 1;
 }
 
 int check_args_valid(int io_num, int base_num)
@@ -218,9 +272,9 @@ int getprio (struct path *pp, char *args, unsigned int 
timeout)
 (void)clock_gettime(CLOCK_MONOTONIC, );
 before = timeval_to_us();
 
-if (do_readsector0(pp->fd, timeout) == 2)
+if (do_readsector0(pp, timeout) == 0)
 {
-pp_pl_log(0, "%s: path down", pp->dev);
+pp_pl_log(0, "%s: send read sector0 command fail", pp->dev);
 return -1;
 }
 
@@ -246,7 +300,7 @@ int getprio (struct path *pp, char *args, unsigned int 
timeout)
 Warn the user if latency_interval is smaller than (2 * 
standard_deviation), or equal */
 standard_deviation = calc_standard_deviation(path_latency, index, 
avglatency);
 latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, 
MIN_AVG_LATENCY, base_num);
-if ((latency_interval!= 0)
+if ((latency_interval != 0)
 && (latency_interval <= (2 * standard_deviation)))
 pp_pl_log(3, "%s: latency interval (%lld) according to average latency 
(%lld us) is smaller than "
 "2 * standard deviation (%lld us), or equal, args base_num (%d) 
needs to be set bigger value",
-- 
2.6.4.windows.1


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH 2/2] multipath: attempt at common multipath.rules

2017-06-27 Thread Martin Wilck
Hello Ben,

Thanks again for your effort on this issue.
I finally found time to look into this proposal more deeply, sorry that
it took so long.

Please find my comments below.

Best regards,
Martin

On Wed, 2017-04-12 at 17:15 -0500, Benjamin Marzinski wrote:
> This is a proposal to try and bring the Redhat and SuSE
> multipath.rules
> closer. There are a couple of changes that I'd like some input on.
> 
> The big change is moving the kpartx call into the multipath
> rules.  Half
> of the current kpartx.rules file is about creating symlinks for
> multiple
> types of dm devices. The other half auto-creates kpartx devices on
> top
> of multipath devices. Since it is only creating kpartx devices on top
> of
> multipath devices, I've moved the these rules into multipath.rules,
> or
> rather, I've replaced them with the redhat rules in multipath.rules. 

I don't like the move of the rules too much, for two reasons:

1) Even if multipath is the only use case today, I see no deeper reason
why kpartx would be used only for multipath devices. It could be used
to create partitions on top of other dm targets as well.

2) multipath.rules now consists of two completely unrelated parts, one
for detecting paths that are part of multipath maps, and one for
running kpartx on the maps themselves. That's confusing and sort of
against the "spirit" of udev rules, as far as I understand it.
Logically, if you really want to merge this into another .rules file,
the kpartx part would rather belong into 11-dm-mpath.rules (which deals
with multipath maps) than in 56-multipath.rules (which deals with
paths).

> The
> biggest difference is the kpartx isn't run on every reload.  It works
> with the 11-dm-mpath.rules code to not run kpartx on multipathd
> generated reloads or when there aren't any working paths. It does
> remember if it didn't get to run kpartx when it was supposed to
> (because
> there were no valid paths or the device was suspended) and will make
> sure to run it on the next possible uevent.

I like this part, but i have some suggestions, see below.

> The other change is the redhat multipath rules remove the partition
> device nodes for devices claimed by multipath. The udev rule will
> only
> do this one time (both to keep from running partx on every event, and
> so
> that if users manually reread the partition table, we don't keep
> removing them when clearly they are wanted). Redhat does this because
> we
> had multiple customer issues where they were using the scsi
> partitions
> instead of the kpartx devices. Obviously, with setting the partition
> devices to not ready and clearing their fs_type, this isn't
> essential,
> but it has helped make customers do the right thing.

Isn't this racy? You call "partx -d" on the parent device (e.g. sda).
At this point in time, the kernel will already have detected the
partitions and "add" uevents for them are likely to follow up quickly.
You generate "remove" events that may race with the "add" - or am I
overlooking something?

I'd feel better if we'd call "partx -d" while processing the "add"
uevent for the _partitions_, ideally late in that process. That would
make (almost) sure that "add" was finished when "remove" was generated.
See below.

> 
> Cc: Martin Wilck 
> Cc: Hannes Reinecke 
> Signed-off-by: Benjamin Marzinski 
> ---
>  kpartx/kpartx.rules   |  8 
>  multipath/multipath.rules | 27 ---
>  2 files changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/kpartx/kpartx.rules b/kpartx/kpartx.rules
> index 48a4d6c..906e320 100644
> --- a/kpartx/kpartx.rules
> +++ b/kpartx/kpartx.rules
> @@ -34,12 +34,4 @@ ENV{ID_FS_LABEL_ENC}=="?*",
> IMPORT{db}="ID_FS_LABEL_ENC"
>  ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_FS_LABEL_ENC}=="?*", \
>   SYMLINK+="disk/by-label/$env{ID_FS_LABEL_ENC}"
>  
> -# Create dm tables for partitions
> -ENV{DM_ACTION}=="PATH_FAILED|PATH_REINSTATED", GOTO="kpartx_end"
> -ENV{DM_NR_VALID_PATHS}=="0", GOTO="kpartx_end"
> -ENV{ENV{DM_UDEV_PRIMARY_SOURCE_FLAG}!="1",
> IMPORT{db}="DM_SUBSYSTEM_UDEV_FLAG1"
> -ENV{DM_SUBSYSTEM_UDEV_FLAG1}=="1", GOTO="kpartx_end"
> -ENV{DM_STATE}!="SUSPENDED", ENV{DM_UUID}=="mpath-*", \
> - RUN+="/sbin/kpartx -u -p -part /dev/$name"
> -
>  LABEL="kpartx_end"
> diff --git a/multipath/multipath.rules b/multipath/multipath.rules
> index 86defc0..10b9b2b 100644
> --- a/multipath/multipath.rules
> +++ b/multipath/multipath.rules
> @@ -1,13 +1,13 @@
>  # Set DM_MULTIPATH_DEVICE_PATH if the device should be handled by
> multipath
>  SUBSYSTEM!="block", GOTO="end_mpath"
>  ACTION!="add|change", GOTO="end_mpath"
> -KERNEL!="sd*|dasd*", GOTO="end_mpath"
> -
> +KERNEL!="sd*|dasd*|rbd*|dm-*", GOTO="end_mpath"
>  IMPORT{cmdline}="nompath"
>  ENV{nompath}=="?*", GOTO="end_mpath"
>  IMPORT{cmdline}="multipath"
>  ENV{multipath}=="off", GOTO="end_mpath"
>  
> +KERNEL=="dm-*", GOTO="check_kpartx"
>  ENV{DEVTYPE}!="partition", 

Re: [dm-devel] move bounce limits settings into the drivers

2017-06-27 Thread Jens Axboe
On 06/19/2017 01:26 AM, Christoph Hellwig wrote:
> Currently we still default to a bounce all highmem setting for block
> drivers.  This series defaults to no bouncing and instead adds call
> to blk_queue_bounce_limit to those drivers that need it.   It also
> has a few cleanups in that area.

Looks straight forward and trivial to me, I'll pull it in for
4.13.

-- 
Jens Axboe

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] dm-cache coherence issue

2017-06-27 Thread Joe Thornber
On Mon, Jun 26, 2017 at 10:36:23PM +0200, Johannes Bauer wrote:
> On 26.06.2017 21:56, Mike Snitzer wrote:
> 
> >> Interesting, I did *not* change to writethrough. However, there
> >> shouldn't have been any I/O on the device (it was not accessed by
> >> anything after I switched to the cleaner policy).
> [...]
> >> Anyways, I'll try to replicate my scenario again because I'm actually
> >> quite sure that I did everything correctly (I did it a few times).
> > 
> > Except you didn't first switch to writethrough -- which is _not_
> > correct.
> 
> Absolutely, very good to know. So even without any I/O being request,
> dm-cache is allowed to "hold back" pages as long as the dm-cache device
> is in writeback mode? Would this also explain why the "dmsetup wait"
> hung indefinitely?

Some things to try to see if it makes a difference:

- unmount the cache before checksumming it so we know there's no IO from
  the page cache going in.

- deactivate the cache before checksumming the origin.

- Stop using encryption on top of cache and see if that makes a
  difference.

- Use 'dmsetup wait' properly, as Mike mentioned.  See the following code
  from dmtest:


https://github.com/jthornber/device-mapper-test-suite/blob/master/lib/dmtest/cache_utils.rb#L28

https://github.com/jthornber/device-mapper-test-suite/blob/master/lib/dmtest/device-mapper/event_tracker.rb

- use md5sum rather than your checksum program.  Humour me.



- Joe

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v8 0/2] dm: boot a mapped device without an initramfs

2017-06-27 Thread Kees Cook
On Fri, May 19, 2017 at 12:11 AM, Enric Balletbo i Serra
 wrote:
> Dear all,
>
> So here is a new version of the patches to be reviewed, this time as
> suggested by Alasdair the patches are reworked to match with the new
> dmsetup bootformat feature [1]. These patches are not reviewed yet but
> the format was discussed in the IRC and was suggested to send the
> kernel patches in parallel.

Just pinging on this thread again. Alasdair, how does this look to you?

Thanks!

-Kees

>
> Changes since v7:
>  - Fix build error due commit
> e516db4f67 (dm ioctl: add a new DM_DEV_ARM_POLL ioctl)
>
> Changes since v6:
>  - Add a new function to issue the equivalent of a DM ioctl programatically.
>  - Use the new ioctl interface to create the devices.
>  - Use a comma-delimited and semi-colon delimited dmsetup-like commands.
>
> Changes since v5:
>  - https://www.redhat.com/archives/dm-devel/2016-February/msg00112.html
>
> [1] https://www.redhat.com/archives/linux-lvm/2017-May/msg00047.html
>
> Wating for your feedback,
>
> Enric Balletbo i Serra (1):
>   dm ioctl: add a device mapper ioctl function.
>
> Will Drewry (1):
>   init: add support to directly boot to a mapped device
>
>  Documentation/admin-guide/kernel-parameters.rst |   1 +
>  Documentation/admin-guide/kernel-parameters.txt |   3 +
>  Documentation/device-mapper/dm-boot.txt |  65 
>  drivers/md/dm-ioctl.c   |  50 +++
>  include/linux/device-mapper.h   |   6 +
>  init/Makefile   |   1 +
>  init/do_mounts.c|   1 +
>  init/do_mounts.h|  10 +
>  init/do_mounts_dm.c | 459 
> 
>  9 files changed, 596 insertions(+)
>  create mode 100644 Documentation/device-mapper/dm-boot.txt
>  create mode 100644 init/do_mounts_dm.c
>
> --
> 2.9.3
>



-- 
Kees Cook
Pixel Security

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel