Re: [PATCH blktests] common/rc: allow the loop driver to be built into the kernel

2018-10-22 Thread Omar Sandoval
On Mon, Oct 22, 2018 at 06:50:04AM -0400, Theodore Ts'o wrote:
> A number of kernel modules used by blktests must be compiled as
> modules, since the module needs to be loaded with specific options, or
> part of the test is to exercise what what happens when the kernel
> module is loaded.  This is not true for the loop driver, so add a new
> bash function, _have_kernel_module which works like _have_module but
> will not fail if the driver is compiled directly into the kernel.

`modprobe loop` works for me if the module is built in, are you using
one from busybox or something? According to strace, it looks at the
depmod information (namely, /lib/modules/$(uname -r)/modules.builtin.bin).


Re: [PATCH 2/2] loop/007: Add test for oops during backing file verification

2018-10-22 Thread Omar Sandoval
On Thu, Oct 18, 2018 at 12:31:47PM +0200, Jan Kara wrote:
> Add regression test for patch "block/loop: Use global lock for ioctl()
> operation." where we can oops while traversing list of loop devices
> backing newly created device.
> 
> Signed-off-by: Jan Kara 

Looks good, sans a missing addition to src/.gitignore. I can fix that
and apply this once I hear back regarding the other test.


Re: [PATCH 1/2] loop/006: Add test for setting partscan flag

2018-10-22 Thread Omar Sandoval
On Thu, Oct 18, 2018 at 12:31:46PM +0200, Jan Kara wrote:
> Add test for setting partscan flag.
> 
> Signed-off-by: Jan Kara 

Sorry I didn't notice this earlier, but loop/001 already does a
partition rescan (via losetup -P). Does that cover this test case?

> ---
>  src/Makefile   |  3 ++-
>  src/loop_set_status_partscan.c | 45 
> ++
>  tests/loop/006 | 33 +++
>  tests/loop/006.out |  2 ++
>  4 files changed, 82 insertions(+), 1 deletion(-)
>  create mode 100644 src/loop_set_status_partscan.c
>  create mode 100755 tests/loop/006
>  create mode 100644 tests/loop/006.out
> 
> diff --git a/src/Makefile b/src/Makefile
> index f89f61701179..6dadcbec8beb 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -4,7 +4,8 @@ C_TARGETS := \
>   openclose \
>   sg/dxfer-from-dev \
>   sg/syzkaller1 \
> - nbdsetsize
> + nbdsetsize \
> + loop_set_status_partscan
>  
>  CXX_TARGETS := \
>   discontiguous-io
> diff --git a/src/loop_set_status_partscan.c b/src/loop_set_status_partscan.c
> new file mode 100644
> index ..8873a12e4334
> --- /dev/null
> +++ b/src/loop_set_status_partscan.c
> @@ -0,0 +1,45 @@
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +void usage(const char *progname)
> +{
> + fprintf(stderr, "usage: %s PATH\n", progname);
> + exit(EXIT_FAILURE);
> +}
> +
> +int main(int argc, char **argv)
> +{
> + int ret;
> + int fd;
> + struct loop_info64 info;
> +
> + if (argc != 2)
> + usage(argv[0]);
> +
> + fd = open(argv[1], O_RDONLY);
> + if (fd == -1) {
> + perror("open");
> + return EXIT_FAILURE;
> + }
> +
> + memset(, 0, sizeof(info));
> + info.lo_flags = LO_FLAGS_PARTSCAN;
> + memcpy(info.lo_file_name, "part", 5);

What's the significance of this file name?

> + ret = ioctl(fd, LOOP_SET_STATUS64, );
> + if (ret == -1) {
> + perror("ioctl");
> + close(fd);
> + return EXIT_FAILURE;
> + }
> + close(fd);
> + return EXIT_SUCCESS;
> +}

[snip]


[PATCH] null_blk: Add conventional zone configuration for zoned support

2018-10-22 Thread Masato Suzuki
Allow the creation of conventional zones by adding the nr_conv_zones
configuration attribute. This new attribute is used only for zoned devices and
indicates the number of conventional zones to create. The default value is 0.
Since host-managed zoned block devices must always have at least one sequential
zone, if the value of nr_conv_zones is larger than or equal to the total number
of zones of the device nr_zones, nr_conv_zones is automatically changed to
nr_zones - 1.

Signed-off-by: Masato Suzuki 
---
 drivers/block/null_blk.h   |  1 +
 drivers/block/null_blk_main.c  |  7 +++
 drivers/block/null_blk_zoned.c | 27 ++-
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 34e0030f0592..b9d0b57d3642 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -49,6 +49,7 @@ struct nullb_device {
unsigned long completion_nsec; /* time in ns to complete a request */
unsigned long cache_size; /* disk cache size in MB */
unsigned long zone_size; /* zone size in MB if device is zoned */
+   unsigned int nr_conv_zones; /* number of conventional zones */
unsigned int submit_queues; /* number of submission queues */
unsigned int home_node; /* home node for the device */
unsigned int queue_mode; /* block interface */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 093b614d6524..ce6fa0ddc9f9 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -188,6 +188,10 @@ static unsigned long g_zone_size = 256;
 module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
 MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must 
be power-of-two: Default: 256");
 
+static unsigned int g_nr_conv_zones;
+module_param_named(nr_conv_zones, g_nr_conv_zones, uint, S_IRUGO);
+MODULE_PARM_DESC(nr_conv_zones, "Number of conventional zones when block 
device is zoned. Default: 0");
+
 static struct nullb_device *null_alloc_dev(void);
 static void null_free_dev(struct nullb_device *dev);
 static void null_del_dev(struct nullb *nullb);
@@ -293,6 +297,7 @@ NULLB_DEVICE_ATTR(mbps, uint);
 NULLB_DEVICE_ATTR(cache_size, ulong);
 NULLB_DEVICE_ATTR(zoned, bool);
 NULLB_DEVICE_ATTR(zone_size, ulong);
+NULLB_DEVICE_ATTR(nr_conv_zones, uint);
 
 static ssize_t nullb_device_power_show(struct config_item *item, char *page)
 {
@@ -407,6 +412,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
_device_attr_badblocks,
_device_attr_zoned,
_device_attr_zone_size,
+   _device_attr_nr_conv_zones,
NULL,
 };
 
@@ -520,6 +526,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->use_per_node_hctx = g_use_per_node_hctx;
dev->zoned = g_zoned;
dev->zone_size = g_zone_size;
+   dev->nr_conv_zones = g_nr_conv_zones;
return dev;
 }
 
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 7c6b86d98700..cf0d07491d29 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -29,7 +29,25 @@ int null_zone_init(struct nullb_device *dev)
if (!dev->zones)
return -ENOMEM;
 
-   for (i = 0; i < dev->nr_zones; i++) {
+   if (dev->nr_conv_zones >= dev->nr_zones) {
+   dev->nr_conv_zones = dev->nr_zones - 1;
+   pr_info("null_blk: changed the number of conventional zones to 
%u",
+   dev->nr_conv_zones);
+   }
+
+   for (i = 0; i <  dev->nr_conv_zones; i++ ) {
+   struct blk_zone *zone = >zones[i];
+
+   zone->start = sector;
+   zone->len = dev->zone_size_sects;
+   zone->wp = zone->start + zone->len;
+   zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
+   zone->cond = BLK_ZONE_COND_NOT_WP;
+
+   sector += dev->zone_size_sects;
+   }
+
+   for (i = dev->nr_conv_zones; i < dev->nr_zones; i++) {
struct blk_zone *zone = >zones[i];
 
zone->start = zone->wp = sector;
@@ -125,6 +143,8 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->wp == zone->start + zone->len)
zone->cond = BLK_ZONE_COND_FULL;
break;
+   case BLK_ZONE_COND_NOT_WP:
+   break;
default:
/* Invalid zone condition */
cmd->error = BLK_STS_IOERR;
@@ -138,6 +158,11 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t 
sector)
unsigned int zno = null_zone_no(dev, sector);
struct blk_zone *zone = >zones[zno];
 
+   if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
+   cmd->error = BLK_STS_IOERR;
+   return;
+   }
+
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
 }
-- 
2.14.4



[PATCH blktests] common/rc: allow the loop driver to be built into the kernel

2018-10-22 Thread Theodore Ts'o
A number of kernel modules used by blktests must be compiled as
modules, since the module needs to be loaded with specific options, or
part of the test is to exercise what what happens when the kernel
module is loaded.  This is not true for the loop driver, so add a new
bash function, _have_kernel_module which works like _have_module but
will not fail if the driver is compiled directly into the kernel.

Signed-off-by: Theodore Ts'o 
---
 common/rc  | 10 +-
 tests/nvme/002 |  4 ++--
 tests/nvme/003 |  4 ++--
 tests/nvme/004 |  4 ++--
 tests/nvme/005 |  6 +++---
 tests/nvme/006 |  5 +++--
 tests/nvme/008 |  5 +++--
 tests/nvme/010 |  2 +-
 tests/nvme/012 |  2 +-
 tests/nvme/014 |  5 +++--
 tests/nvme/015 |  4 ++--
 tests/nvme/019 |  5 +++--
 tests/nvme/021 |  5 +++--
 tests/nvme/022 |  5 +++--
 tests/nvme/023 |  5 +++--
 tests/nvme/024 |  5 +++--
 tests/nvme/025 |  5 +++--
 tests/nvme/026 |  5 +++--
 tests/nvme/027 |  5 +++--
 tests/nvme/028 |  5 +++--
 20 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/common/rc b/common/rc
index 8a892bc..be6133e 100644
--- a/common/rc
+++ b/common/rc
@@ -35,6 +35,14 @@ _have_module() {
return 1
 }
 
+_have_kernel_module() {
+   if modprobe -n -q "$1" || test -d "/sys/module/$1" ; then
+   return 0
+   fi
+   SKIP_REASON="$1 is not available as a module or built into the kernel"
+   return 1
+}
+
 _have_module_param() {
if ! modinfo -F parm -0 "$1" | grep -q -z "^$2:"; then
SKIP_REASON="$1 module does not have parameter $2"
@@ -60,7 +68,7 @@ _have_src_program() {
 }
 
 _have_loop() {
-   _have_module loop && _have_program losetup
+   _have_kernel_module loop && _have_program losetup
 }
 
 _have_blktrace() {
diff --git a/tests/nvme/002 b/tests/nvme/002
index 2b01534..0624cb1 100755
--- a/tests/nvme/002
+++ b/tests/nvme/002
@@ -10,8 +10,8 @@
 DESCRIPTION="create many subsystems and test discovery"
 
 requires() {
-   _have_program nvme && _have_module nvme-loop && _have_module loop && \
-   _have_configfs
+   _have_program nvme && _have_module nvme-loop && \
+   _have_kernel_module loop && _have_configfs
 }
 
 test() {
diff --git a/tests/nvme/003 b/tests/nvme/003
index 693ad8d..2a2dae2 100755
--- a/tests/nvme/003
+++ b/tests/nvme/003
@@ -11,8 +11,8 @@ DESCRIPTION="test if we're sending keep-alives to a discovery 
controller"
 QUICK=1
 
 requires() {
-   _have_program nvme && _have_module nvme-loop && _have_module loop \
-   && _have_configfs
+   _have_program nvme && _have_module nvme-loop && \
+   _have_kernel_module loop && _have_configfs
 }
 
 test() {
diff --git a/tests/nvme/004 b/tests/nvme/004
index 55b5c67..14137e1 100755
--- a/tests/nvme/004
+++ b/tests/nvme/004
@@ -12,8 +12,8 @@ DESCRIPTION="test nvme and nvmet UUID NS descriptors"
 QUICK=1
 
 requires() {
-   _have_program nvme && _have_module nvme-loop && _have_module loop && \
-   _have_configfs
+   _have_program nvme && _have_module nvme-loop && \
+   _have_kernel_module loop && _have_configfs
 }
 
 test() {
diff --git a/tests/nvme/005 b/tests/nvme/005
index fa7a75d..4f638dd 100755
--- a/tests/nvme/005
+++ b/tests/nvme/005
@@ -11,9 +11,9 @@ DESCRIPTION="reset local loopback target"
 QUICK=1
 
 requires() {
-   _have_module nvme-loop && _have_module nvmet && _have_module loop && \
-   _have_configfs && _have_module nvme-core && \
-   _have_module_param nvme-core multipath
+   _have_module nvme-loop && _have_module nvmet && \
+   _have_kernel_module loop && _have_configfs && \
+   _have_module nvme-core && _have_module_param nvme-core multipath
 }
 
 test() {
diff --git a/tests/nvme/006 b/tests/nvme/006
index a2693a2..ff04e34 100755
--- a/tests/nvme/006
+++ b/tests/nvme/006
@@ -10,8 +10,9 @@ DESCRIPTION="create an NVMeOF target with a block 
device-backed ns"
 QUICK=1
 
 requires() {
-   _have_program nvme && _have_module nvme-loop && _have_module loop && \
-   _have_module nvmet && _have_configfs
+   _have_program nvme && _have_module nvme-loop && \
+   _have_kernel_module loop && _have_module nvmet && \
+   _have_configfs
 }
 
 test() {
diff --git a/tests/nvme/008 b/tests/nvme/008
index e5aa50b..4d4c626 100755
--- a/tests/nvme/008
+++ b/tests/nvme/008
@@ -10,8 +10,9 @@ DESCRIPTION="create an NVMeOF host with a block device-backed 
ns"
 QUICK=1
 
 requires() {
-   _have_program nvme && _have_module nvme-loop && _have_module loop && \
-   _have_module nvmet && _have_configfs
+   _have_program nvme && _have_module nvme-loop && \
+   _have_kernel_module loop && _have_module nvmet && \
+   _have_configfs
 }
 
 test() {
diff --git a/tests/nvme/010 b/tests/nvme/010
index a76b176..11387f0 100755
--- a/tests/nvme/010
+++ b/tests/nvme/010
@@ -10,7 +10,7 @@ DESCRIPTION="run data verification fio job on 

[PATCH v2 5/5] lightnvm: pblk: Support for packed metadata

2018-10-22 Thread Igor Konopko
In current pblk implementation, l2p mapping for not closed lines
is always stored only in OOB metadata and recovered from it.

Such a solution does not provide data integrity when drives does
not have such a OOB metadata space.

The goal of this patch is to add support for so called packed
metadata, which store l2p mapping for open lines in last sector
of every write unit.

Signed-off-by: Igor Konopko 
---
 drivers/lightnvm/pblk-core.c | 53 +---
 drivers/lightnvm/pblk-init.c | 37 ++--
 drivers/lightnvm/pblk-rb.c   |  3 +++
 drivers/lightnvm/pblk-read.c |  6 +
 drivers/lightnvm/pblk-recovery.c |  5 ++--
 drivers/lightnvm/pblk-sysfs.c|  7 ++
 drivers/lightnvm/pblk-write.c| 14 ---
 drivers/lightnvm/pblk.h  | 13 +-
 8 files changed, 125 insertions(+), 13 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index b1e104765868..245abf29620f 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -376,7 +376,7 @@ void pblk_write_should_kick(struct pblk *pblk)
 {
unsigned int secs_avail = pblk_rb_read_count(>rwb);
 
-   if (secs_avail >= pblk->min_write_pgs)
+   if (secs_avail >= pblk->min_write_pgs_data)
pblk_write_kick(pblk);
 }
 
@@ -407,7 +407,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
struct pblk_line *line)
struct pblk_line_meta *lm = >lm;
struct pblk_line_mgmt *l_mg = >l_mg;
struct list_head *move_list = NULL;
-   int vsc = le32_to_cpu(*line->vsc);
+   int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data)
+   * (pblk->min_write_pgs - pblk->min_write_pgs_data);
+   int vsc = le32_to_cpu(*line->vsc) + packed_meta;
 
lockdep_assert_held(>lock);
 
@@ -620,12 +622,15 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void 
*data,
 }
 
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
-  unsigned long secs_to_flush)
+  unsigned long secs_to_flush, bool skip_meta)
 {
int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
 
+   if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs)
+   min = max = pblk->min_write_pgs_data;
+
if (secs_avail >= max)
secs_to_sync = max;
else if (secs_avail >= min)
@@ -852,7 +857,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct 
pblk_line *line,
 next_rq:
memset(, 0, sizeof(struct nvm_rq));
 
-   rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+   rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
rq_len = rq_ppas * geo->csecs;
 
bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
@@ -2161,3 +2166,43 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct 
ppa_addr *ppas,
}
spin_unlock(>trans_lock);
 }
+
+void pblk_set_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
+{
+   void *meta_list = rqd->meta_list;
+   void *page;
+   int i = 0;
+
+   if (pblk_is_oob_meta_supported(pblk))
+   return;
+
+   /* We need to zero out metadata corresponding to packed meta page */
+   pblk_set_meta_lba(pblk, meta_list, rqd->nr_ppas - 1,
+ cpu_to_le64(ADDR_EMPTY));
+
+   page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+   /* We need to fill last page of request (packed metadata)
+* with data from oob meta buffer.
+*/
+   for (; i < rqd->nr_ppas; i++)
+   memcpy(page + (i * sizeof(struct pblk_sec_meta)),
+   pblk_get_meta(pblk, meta_list, i),
+   sizeof(struct pblk_sec_meta));
+}
+
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
+{
+   void *meta_list = rqd->meta_list;
+   void *page;
+   int i = 0;
+
+   if (pblk_is_oob_meta_supported(pblk))
+   return;
+
+   page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+   /* We need to fill oob meta buffer with data from packe metadata */
+   for (; i < rqd->nr_ppas; i++)
+   memcpy(pblk_get_meta(pblk, meta_list, i),
+   page + (i * sizeof(struct pblk_sec_meta)),
+   sizeof(struct pblk_sec_meta));
+}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ded0618f6cda..7e09717a93d4 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -406,12 +406,44 @@ static int pblk_core_init(struct pblk *pblk)
pblk->nr_flush_rst = 0;
 
pblk->min_write_pgs = geo->ws_opt;
+   pblk->min_write_pgs_data = pblk->min_write_pgs;
max_write_ppas = pblk->min_write_pgs * geo->all_luns;
pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
pblk->max_write_pgs = min_t(int, 

[PATCH v2 2/5] lightnvm: pblk: Helpers for OOB metadata

2018-10-22 Thread Igor Konopko
Currently pblk assumes that size of OOB metadata on drive is always
equal to size of pblk_sec_meta struct. This commit add helpers which will
allow to handle different sizes of OOB metadata on drive.

Signed-off-by: Igor Konopko 
---
 drivers/lightnvm/pblk-core.c |  5 +++--
 drivers/lightnvm/pblk-map.c  | 20 +++---
 drivers/lightnvm/pblk-read.c | 45 +---
 drivers/lightnvm/pblk-recovery.c | 13 ++--
 drivers/lightnvm/pblk.h  | 22 
 5 files changed, 73 insertions(+), 32 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 6944aac43b01..0f33055f40eb 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -796,10 +796,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, 
struct pblk_line *line,
rqd.is_seq = 1;
 
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
-   struct pblk_sec_meta *meta_list = rqd.meta_list;
+   void *meta_list = rqd.meta_list;
 
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-   meta_list[i].lba = lba_list[paddr] = addr_empty;
+   pblk_set_meta_lba(pblk, meta_list, i, addr_empty);
+   lba_list[paddr] = addr_empty;
}
 
ret = pblk_submit_io_sync_sem(pblk, );
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 6dcbd44e3acb..4bae30129bc9 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -22,7 +22,7 @@
 static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
  struct ppa_addr *ppa_list,
  unsigned long *lun_bitmap,
- struct pblk_sec_meta *meta_list,
+ void *meta_list,
  unsigned int valid_secs)
 {
struct pblk_line *line = pblk_line_get_data(pblk);
@@ -68,14 +68,16 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned 
int sentry,
kref_get(>ref);
w_ctx = pblk_rb_w_ctx(>rwb, sentry + i);
w_ctx->ppa = ppa_list[i];
-   meta_list[i].lba = cpu_to_le64(w_ctx->lba);
+   pblk_set_meta_lba(pblk, meta_list, i,
+ cpu_to_le64(w_ctx->lba));
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
if (lba_list[paddr] != addr_empty)
line->nr_valid_lbas++;
else
atomic64_inc(>pad_wa);
} else {
-   lba_list[paddr] = meta_list[i].lba = addr_empty;
+   lba_list[paddr] = addr_empty;
+   pblk_set_meta_lba(pblk, meta_list, i, addr_empty);
__pblk_map_invalidate(pblk, line, paddr);
}
}
@@ -88,7 +90,8 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, 
unsigned int sentry,
 unsigned long *lun_bitmap, unsigned int valid_secs,
 unsigned int off)
 {
-   struct pblk_sec_meta *meta_list = rqd->meta_list;
+   void *meta_list = rqd->meta_list;
+   void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
unsigned int map_secs;
int min = pblk->min_write_pgs;
@@ -96,8 +99,9 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, 
unsigned int sentry,
 
for (i = off; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
+   meta_buffer = pblk_get_meta(pblk, meta_list, i);
if (pblk_map_page_data(pblk, sentry + i, _list[i],
-   lun_bitmap, _list[i], map_secs)) {
+   lun_bitmap, meta_buffer, map_secs)) {
bio_put(rqd->bio);
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
pblk_pipeline_stop(pblk);
@@ -113,7 +117,8 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq 
*rqd,
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = >geo;
struct pblk_line_meta *lm = >lm;
-   struct pblk_sec_meta *meta_list = rqd->meta_list;
+   void *meta_list = rqd->meta_list;
+   void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_line *e_line, *d_line;
unsigned int map_secs;
@@ -122,8 +127,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq 
*rqd,
 
for (i = 0; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
+   meta_buffer = pblk_get_meta(pblk, meta_list, i);
if (pblk_map_page_data(pblk, sentry + i, _list[i],
-   

[PATCH v2 3/5] lightnvm: Flexible DMA pool entry size

2018-10-22 Thread Igor Konopko
Currently whole lightnvm and pblk uses single DMA pool,
for which entry size is always equal to PAGE_SIZE.
PPA list always needs 8b*64, so there is only 56b*64
space for OOB meta. Since NVMe OOB meta can be bigger,
such as 128b, this solution is not robustness.

This patch add the possiblity to support OOB meta above
56b by changing DMA pool size based on OOB meta size.

Signed-off-by: Igor Konopko 
---
 drivers/lightnvm/core.c  | 45 ++--
 drivers/lightnvm/pblk-core.c |  8 +++
 drivers/lightnvm/pblk-recovery.c |  4 ++--
 drivers/lightnvm/pblk.h  | 10 -
 drivers/nvme/host/lightnvm.c |  8 +--
 include/linux/lightnvm.h |  4 +++-
 6 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index efb976a863d2..68f0812077d5 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -1145,11 +1145,9 @@ int nvm_register(struct nvm_dev *dev)
if (!dev->q || !dev->ops)
return -EINVAL;
 
-   dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
-   if (!dev->dma_pool) {
-   pr_err("nvm: could not create dma pool\n");
-   return -ENOMEM;
-   }
+   ret = nvm_realloc_dma_pool(dev);
+   if (ret)
+   return ret;
 
ret = nvm_init(dev);
if (ret)
@@ -1162,7 +1160,12 @@ int nvm_register(struct nvm_dev *dev)
 
return 0;
 err_init:
-   dev->ops->destroy_dma_pool(dev->dma_pool);
+   if (dev->dma_pool) {
+   dev->ops->destroy_dma_pool(dev->dma_pool);
+   dev->dma_pool = NULL;
+   dev->dma_pool_size = 0;
+   }
+
return ret;
 }
 EXPORT_SYMBOL(nvm_register);
@@ -1187,6 +1190,36 @@ void nvm_unregister(struct nvm_dev *dev)
 }
 EXPORT_SYMBOL(nvm_unregister);
 
+int nvm_realloc_dma_pool(struct nvm_dev *dev)
+{
+   int exp_pool_size;
+
+   exp_pool_size = max_t(int, PAGE_SIZE,
+ (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
+   exp_pool_size = round_up(exp_pool_size, PAGE_SIZE);
+
+   if (dev->dma_pool_size >= exp_pool_size)
+   return 0;
+
+   if (dev->dma_pool) {
+   dev->ops->destroy_dma_pool(dev->dma_pool);
+   dev->dma_pool = NULL;
+   dev->dma_pool_size = 0;
+   }
+
+   dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist",
+ exp_pool_size);
+   if (!dev->dma_pool) {
+   dev->dma_pool_size = 0;
+   pr_err("nvm: could not create dma pool\n");
+   return -ENOMEM;
+   }
+   dev->dma_pool_size = exp_pool_size;
+
+   return 0;
+}
+EXPORT_SYMBOL(nvm_realloc_dma_pool);
+
 static int __nvm_configure_create(struct nvm_ioctl_create *create)
 {
struct nvm_dev *dev;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 0f33055f40eb..b1e104765868 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -250,8 +250,8 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq 
*rqd)
if (rqd->nr_ppas == 1)
return 0;
 
-   rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
-   rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
+   rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
+   rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
 
return 0;
 }
@@ -846,8 +846,8 @@ int pblk_line_emeta_read(struct pblk *pblk, struct 
pblk_line *line,
if (!meta_list)
return -ENOMEM;
 
-   ppa_list = meta_list + pblk_dma_meta_size;
-   dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+   ppa_list = meta_list + pblk_dma_meta_size(pblk);
+   dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
 
 next_rq:
memset(, 0, sizeof(struct nvm_rq));
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 977b2ca5d849..b5c8a0ed9bb1 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -474,8 +474,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, 
struct pblk_line *line)
if (!meta_list)
return -ENOMEM;
 
-   ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
-   dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+   ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
+   dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
 
data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
if (!data) {
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index d09c1b341e07..c03fa037d037 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -104,7 +104,6 @@ enum {
PBLK_RL_LOW = 4
 };
 
-#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA)
 #define pblk_dma_ppa_size (sizeof(u64) * 

[PATCH v2 0/5] lightnvm: Flexible metadata

2018-10-22 Thread Igor Konopko
This series of patches extends the way how pblk can
store L2P sector metadata. After this set of changes
any size of NVMe metadata (including 0) is supported.

Patches are rebased on top of block/for-next since
there was no ocssd/for-4.21 branch yet.

Changes v1 --> v2:
-Revert sector meta size back to 16b for pblk
-Dma pool for larger oob meta are handled in core instead of pblk
-Pblk oob meta helpers uses __le64 as input outpu instead of u64
-Other minor fixes based on v1 patch review

Igor Konopko (5):
  lightnvm: pblk: Move lba list to partial read context
  lightnvm: pblk: Helpers for OOB metadata
  lightnvm: Flexible DMA pool entry size
  lightnvm: Disable interleaved metadata
  lightnvm: pblk: Support for packed metadata

 drivers/lightnvm/core.c  | 45 +++
 drivers/lightnvm/pblk-core.c | 66 ++--
 drivers/lightnvm/pblk-init.c | 43 --
 drivers/lightnvm/pblk-map.c  | 20 +++-
 drivers/lightnvm/pblk-rb.c   |  3 ++
 drivers/lightnvm/pblk-read.c | 63 +-
 drivers/lightnvm/pblk-recovery.c | 22 --
 drivers/lightnvm/pblk-sysfs.c|  7 +
 drivers/lightnvm/pblk-write.c| 14 ++---
 drivers/lightnvm/pblk.h  | 47 ++--
 drivers/nvme/host/lightnvm.c |  9 --
 include/linux/lightnvm.h |  5 ++-
 12 files changed, 272 insertions(+), 72 deletions(-)

-- 
2.14.4



[PATCH v2 4/5] lightnvm: Disable interleaved metadata

2018-10-22 Thread Igor Konopko
Currently pblk and lightnvm does only check for size
of OOB metadata and does not care wheather this meta
is located in separate buffer or is interleaved with
data in single buffer.

In reality only the first scenario is supported, where
second mode will break pblk functionality during any
IO operation.

The goal of this patch is to block creation of pblk
devices in case of interleaved metadata

Signed-off-by: Igor Konopko 
---
 drivers/lightnvm/pblk-init.c | 6 ++
 drivers/nvme/host/lightnvm.c | 1 +
 include/linux/lightnvm.h | 1 +
 3 files changed, 8 insertions(+)

diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 13822594647c..ded0618f6cda 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -1154,6 +1154,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct 
gendisk *tdisk,
return ERR_PTR(-EINVAL);
}
 
+   if (geo->ext) {
+   pblk_err(pblk, "extended metadata not supported\n");
+   kfree(pblk);
+   return ERR_PTR(-EINVAL);
+   }
+
spin_lock_init(>resubmit_lock);
spin_lock_init(>trans_lock);
spin_lock_init(>lock);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index d1e47a93bcfd..b71c730a6e32 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -983,6 +983,7 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
 
geo->csecs = 1 << ns->lba_shift;
geo->sos = ns->ms;
+   geo->ext = ns->ext;
 
if (nvm_realloc_dma_pool(ndev))
nvm_unregister(ndev);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 9d3b7c627cac..4870022ebff1 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -357,6 +357,7 @@ struct nvm_geo {
u32 clba;   /* sectors per chunk */
u16 csecs;  /* sector size */
u16 sos;/* out-of-band area size */
+   boolext;/* metadata in extended data buffer */
 
/* device write constrains */
u32 ws_min; /* minimum write size */
-- 
2.14.4



[PATCH v2 1/5] lightnvm: pblk: Move lba list to partial read context

2018-10-22 Thread Igor Konopko
Currently DMA allocated memory is reused on partial read
for lba_list_mem and lba_list_media arrays. In preparation
for dynamic DMA pool sizes we need to move this arrays
into pblk_pr_ctx structures.

Signed-off-by: Igor Konopko 
---
 drivers/lightnvm/pblk-read.c | 20 +---
 drivers/lightnvm/pblk.h  |  2 ++
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 9fba614adeeb..19917d3c19b3 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -224,7 +224,6 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
unsigned long *read_bitmap = pr_ctx->bitmap;
int nr_secs = pr_ctx->orig_nr_secs;
int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
-   __le64 *lba_list_mem, *lba_list_media;
void *src_p, *dst_p;
int hole, i;
 
@@ -237,13 +236,9 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
rqd->ppa_list[0] = ppa;
}
 
-   /* Re-use allocated memory for intermediate lbas */
-   lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
-   lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size);
-
for (i = 0; i < nr_secs; i++) {
-   lba_list_media[i] = meta_list[i].lba;
-   meta_list[i].lba = lba_list_mem[i];
+   pr_ctx->lba_list_media[i] = meta_list[i].lba;
+   meta_list[i].lba = pr_ctx->lba_list_mem[i];
}
 
/* Fill the holes in the original bio */
@@ -255,7 +250,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
kref_put(>ref, pblk_line_put);
 
-   meta_list[hole].lba = lba_list_media[i];
+   meta_list[hole].lba = pr_ctx->lba_list_media[i];
 
src_bv = new_bio->bi_io_vec[i++];
dst_bv = bio->bi_io_vec[bio_init_idx + hole];
@@ -295,13 +290,9 @@ static int pblk_setup_partial_read(struct pblk *pblk, 
struct nvm_rq *rqd,
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct pblk_pr_ctx *pr_ctx;
struct bio *new_bio, *bio = r_ctx->private;
-   __le64 *lba_list_mem;
int nr_secs = rqd->nr_ppas;
int i;
 
-   /* Re-use allocated memory for intermediate lbas */
-   lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
-
new_bio = bio_alloc(GFP_KERNEL, nr_holes);
 
if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
@@ -312,12 +303,12 @@ static int pblk_setup_partial_read(struct pblk *pblk, 
struct nvm_rq *rqd,
goto fail_free_pages;
}
 
-   pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
+   pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
if (!pr_ctx)
goto fail_free_pages;
 
for (i = 0; i < nr_secs; i++)
-   lba_list_mem[i] = meta_list[i].lba;
+   pr_ctx->lba_list_mem[i] = meta_list[i].lba;
 
new_bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
@@ -325,7 +316,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, 
struct nvm_rq *rqd,
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
 
-   pr_ctx->ppa_ptr = NULL;
pr_ctx->orig_bio = bio;
bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
pr_ctx->bio_init_idx = bio_init_idx;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 02bb2e98f8a9..2aca840c7838 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -132,6 +132,8 @@ struct pblk_pr_ctx {
unsigned int bio_init_idx;
void *ppa_ptr;
dma_addr_t dma_ppa_list;
+   __le64 lba_list_mem[NVM_MAX_VLBA];
+   __le64 lba_list_media[NVM_MAX_VLBA];
 };
 
 /* Pad context */
-- 
2.14.4



Re: [PATCH v2] block: BFQ default for single queue devices

2018-10-22 Thread Jens Axboe
On 10/19/18 4:59 AM, Paolo Valente wrote:
> 
> 
>> Il giorno 15 ott 2018, alle ore 20:26, Paolo Valente 
>>  ha scritto:
>>
>> ...
>>> This kind of policy does not belong in the kernel, at least
>>> not in the current form. If we had some sort of "enable best
>>> options for a desktop" then it could fall under that umbrella.
>>>
>>
>> I don't think bfq can be considered a scheduler for only desktops any
>> longer.
>>
> 
> Hi Jens,
> this reply of mine went on bugging me, until I understood my mistake.
> 
> The fact that I consider bfq good also for servers *does not* imply
> that having bfq in desktops is to be refused.
> 
> As for the option that you are hinting at, I also acknowledge that it
> would be trivial for an admin/developer to know whether a given kernel
> is meant for a desktop/personal system, while it is more difficult to
> choose explicitly among the various I/O schedulers available.
> 
> So, I apologize for my shortsighted, initial reply, and ask you if can
> elaborate a little more on this.  I'm willing to help, if I can.

I think I've written about this multiple times now, but for me it
really just boils down to sane default, and policy in the kernel.
BFQ is very complicated, about 10K lines of code. I'm not comfortable
making that the default right now - as I've mentioned in other
replies, I think something like that should be driven by the distros
as they will ultimately be the ones that usually get complaints
about behavioral changes that impact performance adversely. This isn't
just about running some benchmarks and calling it a day.

Maybe some day we can make it the default on mq for single queue
devices, but I just don't think we are there yet in terms of
coverage. 

While I don't work for a distro anymore, I do have my hands dirty
with a fairly substantial deployment at work. There we run mq-deadline
on single queue devices, and kyber on multiqueue capable devices.

-- 
Jens Axboe



Re: [PATCH v2] block: BFQ default for single queue devices

2018-10-22 Thread Jens Axboe
On 10/19/18 2:42 AM, Linus Walleij wrote:
> On Wed, Oct 17, 2018 at 4:59 PM Bryan Gurney  wrote:
> 
>> I feel strongly about the prevention of users running into errors
>> because of an incorrect scheduler default, because I encountered that
>> situation three times in my testing with zoned block devices. The
>> switch to SCSI_MQ would resolve that, since mq-deadline is the
>> default, but in my case, I was using Fedora 28, which disables
>> CONFIG_SCSI_MQ_DEFAULT (which is enabled in the 4.18 kernel), so my
>> default scheduler was cfq.
> 
> I think we should make a patch to the kernel that makes it
> impossible (even from sysfs) to choose a non-zone aware
> scheduler for these devices.
> 
> It's another topic than $SUBJECT patch though. I take this
> into account in this version.

Yes I agree, and I'd be happy to take such a patch. The only matching we
do now is mq-sched for mq-device, and vice versa.  And that will be
going away in 4.21, when there are no more !mq devices that use
scheduling.

If your device is zoned, then you should not be able to switch to a
scheduler that doesn't have support for that. The right approach here
would be to add a capability flag to the IO schedulers.

-- 
Jens Axboe



Re: [PATCH v2] block: BFQ default for single queue devices

2018-10-22 Thread Jens Axboe
On 10/19/18 2:22 AM, Pavel Machek wrote:
> Hi!
> 
 Which is also the approach that I've been advocating for here, instead
 of a kernel patch...
>>>
>>> I know you've been advocating the use of udev for IO scheduler selection.
>>> But do you want to force everybody to use udev? And for people who build
>>> their own (usually small) systems, do you want to force them to think about
>>> IO scheduler selection and writing appropriate rules? These are the
>>> problems people were mentioning and I'm not sure what is your opinion on
>>> this.
>>
>> I don't want to force everybody to use udev, use whatever you like on
>> your platform. For most people that is udev, for embedded it's something
>> else. As you said, distros already do this via udev. When I've had to
>> do it on my systems, I've added a udev rule to do it.
> 
> This is not really helpful.
> 
> So you want me and everyone else and everyone on embedded to mess with
> udev? No, thanks.

Did you read what I wrote?

> There are people booting with init=/bin/bash, too, running fsck. Would
> not it be nice to use reasonable schedulers there?

I can pretty much guarantee that fsck will run the same speed,
regardless of scheduler. And users generally don't care about
ultimate fairness on the device while running fsck...

If you (or someone else) doesn't want to use udev, use whatever
you want. You're doing something heavily customized at that
point anyway, surely this isn't a show stopper.

>> My opinion is that the kernel makes various schedulers available.
>> Deciding which one to use is policy that should go into user space.
>> The default should be something that's solid and works, fancier
>> setups and tuning should be left to user space.
> 
> Kernel should do reasonable thing by default, and it seems to be easy
> in this case.

I agree, we just differ on what we consider the reasonable choice to
be.

-- 
Jens Axboe



[GIT PULL] Block changes for 4.20

2018-10-22 Thread Jens Axboe
Hi Linus,

This is the main pull request for block changes for 4.20. This pull
request contains:

- Series enabling runtime PM for blk-mq (Bart).

- Two pull requests from Christoph for NVMe, with items such as;

- Better AEN tracking
- Multipath improvements
- RDMA fixes
- Rework of FC for target removal
- Fixes for issues identified by static checkers
- Fabric cleanups, as prep for TCP transport
- Various cleanups and bug fixes

- Block merging cleanups (Christoph)

- Conversion of drivers to generic DMA mapping API (Christoph)

- Series fixing ref count issues with blkcg (Dennis)

- Series improving BFQ heuristics (Paolo, et al)

- Series improving heuristics for the Kyber IO scheduler (Omar)

- Removal of dangerous bio_rewind_iter() API (Ming)

- Apply single queue IPI redirection logic to blk-mq (Ming)

- Set of fixes and improvements for bcache (Coly et al)

- Series closing a hotplug race with sysfs group attributes (Hannes)

- Set of patches for lightnvm:

- pblk trace support (Hans)
- SPDX license header update (Javier)
- Tons of refactoring patches to cleanly abstract the 1.2 and
  2.0 specs behind a common core interface. (Javier, Matias)
- Enable pblk to use a common interface to retrieve chunk
  metadata (Matias)
- Bug fixes (Various)

- Set of fixes and updates to the blk IO latency target (Josef)

- blk-mq queue number updates fixes (Jianchao)

- Convert a bunch of drivers from the old legacy IO interface to blk-mq.
  This will conclude with the removal of the legacy IO interface itself
  in 4.21, with the rest of the drivers (me, Omar)

- Removal of the DAC960 driver. The SCSI tree will introduce two
  replacement drivers for this. (Hannes)

I merged in v4.19-rc6 to resolve a conflict, but also to pull in a few
fixes that went into the storage area after the 4.20 tree was forked
off.

Please pull!


  git://git.kernel.dk/linux-block.git for-4.20/block-20181021



Bart Van Assche (25):
  blk-mq: Document the functions that iterate over requests
  block: Move power management code into a new source file
  block, scsi: Change the preempt-only flag into a counter
  block: Split blk_pm_add_request() and blk_pm_put_request()
  block: Schedule runtime resume earlier
  percpu-refcount: Introduce percpu_ref_resurrect()
  block: Allow unfreezing of a queue while requests are in progress
  block: Make blk_get_request() block for non-PM requests while suspended
  blk-mq: Enable support for runtime power management
  block: Finish renaming REQ_DISCARD into REQ_OP_DISCARD
  blk-mq-debugfs: Also show requests that have not yet been started
  nvmet: use strcmp() instead of strncmp() for subsystem lookup
  nvmet-rdma: check for timeout in nvme_rdma_wait_for_cm()
  nvme-core: declare local symbols static
  nvme-core: rework a NQN copying operation
  nvme-pci: fix nvme_suspend_queue() kernel-doc header
  nvmet: use strlcpy() instead of strcpy()
  nvmet-rdma: declare local symbols static
  nvmet: avoid integer overflow in the discard code
  nvme-fc: fix kernel-doc headers
  nvme-fc: introduce struct nvme_fcp_op_w_sgl
  nvme-fc: rework the request initialization code
  nvmet-fc: fix kernel-doc headers
  nvme-core: make implicit seed truncation explicit
  nvmet-fcloop: suppress a compiler warning

Bartlomiej Zolnierkiewicz (2):
  block: remove redundant 'default n' from Kconfig-s
  drivers/block: remove redundant 'default n' from Kconfig-s

Ben Peddell (1):
  bcache: Populate writeback_rate_minimum attribute

Chaitanya Kulkarni (4):
  nvmet: remove redundant module prefix
  nvme-core: add async event trace helper
  nvmet: remove unreachable code
  nvme-pci: remove duplicate check

Christoph Hellwig (23):
  block: use bio_add_page in bio_iov_iter_get_pages
  block: move integrity_req_gap_{back,front}_merge to blk.h
  block: move req_gap_{back,front}_merge to blk-merge.c
  block: move req_gap_back_merge to blk.h
  block: simplify BIOVEC_PHYS_MERGEABLE
  block: add a missing BIOVEC_SEG_BOUNDARY check in bio_add_pc_page
  block: merge BIOVEC_SEG_BOUNDARY into biovec_phys_mergeable
  block: remove bvec_to_phys
  block: don't include io.h from bio.h
  block: don't include bug.h from bio.h
  arm: remove the unused BIOVEC_MERGEABLE define
  xen: remove the xen_biovec_phys_mergeable export
  xen: provide a prototype for xen_biovec_phys_mergeable in xen.h
  block: remove ARCH_BIOVEC_PHYS_MERGEABLE
  xen: don't include  from  and 
  nvme: take node locality into account when selecting a path
  ubd: remove use of blk_rq_map_sg
  skd: switch to the generic DMA API
  sx8: remove dead IF_64BIT_DMA_IS_POSSIBLE code
  sx8: switch to the generic DMA