> On 26 Jun 2018, at 20.47, Heiner Litz <[email protected]> wrote:
>
>> On Fri, Jun 22, 2018 at 11:17 AM Matias Bjørling <[email protected]> wrote:
>>
>>> On 06/18/2018 07:56 PM, Heiner Litz wrote:
>>> In the read path, partial reads are currently performed synchronously
>>> which affects performance for workloads that generate many partial
>>> reads. This patch adds an asynchronous partial read path as well as
>>> the required partial read ctx.
>>>
>>> Signed-off-by: Heiner Litz <[email protected]>
>>> ---
>>> drivers/lightnvm/pblk-read.c | 183
>>> ++++++++++++++++++++++++++++---------------
>>> drivers/lightnvm/pblk.h | 10 +++
>>> 2 files changed, 130 insertions(+), 63 deletions(-)
>>>
>>> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
>>> index 6e93c48..828df98 100644
>>> --- a/drivers/lightnvm/pblk-read.c
>>> +++ b/drivers/lightnvm/pblk-read.c
>>> @@ -231,74 +231,36 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
>>> __pblk_end_io_read(pblk, rqd, true);
>>> }
>>>
>>> -static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
>>> - struct bio *orig_bio, unsigned int bio_init_idx,
>>> - unsigned long *read_bitmap)
>>> +static void pblk_end_partial_read(struct nvm_rq *rqd)
>>> {
>>> - struct pblk_sec_meta *meta_list = rqd->meta_list;
>>> - struct bio *new_bio;
>>> + struct pblk *pblk = rqd->private;
>>> + struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
>>> + struct pblk_pr_ctx *pr_ctx = r_ctx->private;
>>> + struct bio *new_bio = rqd->bio;
>>> + struct bio *bio = pr_ctx->orig_bio;
>>> struct bio_vec src_bv, dst_bv;
>>> - void *ppa_ptr = NULL;
>>> - void *src_p, *dst_p;
>>> - dma_addr_t dma_ppa_list = 0;
>>> - __le64 *lba_list_mem, *lba_list_media;
>>> - int nr_secs = rqd->nr_ppas;
>>> + struct pblk_sec_meta *meta_list = rqd->meta_list;
>>> + int bio_init_idx = pr_ctx->bio_init_idx;
>>> + unsigned long *read_bitmap = &pr_ctx->bitmap;
>>> + int nr_secs = pr_ctx->orig_nr_secs;
>>> int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
>>> - int i, ret, hole;
>>> -
>>> - /* Re-use allocated memory for intermediate lbas */
>>> - lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
>>> - lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size);
>>> -
>>> - new_bio = bio_alloc(GFP_KERNEL, nr_holes);
>>> -
>>> - if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
>>> - goto fail_add_pages;
>>> -
>>> - if (nr_holes != new_bio->bi_vcnt) {
>>> - pr_err("pblk: malformed bio\n");
>>> - goto fail;
>>> - }
>>> -
>>> - for (i = 0; i < nr_secs; i++)
>>> - lba_list_mem[i] = meta_list[i].lba;
>>> -
>>> - new_bio->bi_iter.bi_sector = 0; /* internal bio */
>>> - bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
>>> -
>>> - rqd->bio = new_bio;
>>> - rqd->nr_ppas = nr_holes;
>>> - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
>>> -
>>> - if (unlikely(nr_holes == 1)) {
>>> - ppa_ptr = rqd->ppa_list;
>>> - dma_ppa_list = rqd->dma_ppa_list;
>>> - rqd->ppa_addr = rqd->ppa_list[0];
>>> - }
>>> -
>>> - ret = pblk_submit_io_sync(pblk, rqd);
>>> - if (ret) {
>>> - bio_put(rqd->bio);
>>> - pr_err("pblk: sync read IO submission failed\n");
>>> - goto fail;
>>> - }
>>> -
>>> - if (rqd->error) {
>>> - atomic_long_inc(&pblk->read_failed);
>>> -#ifdef CONFIG_NVM_PBLK_DEBUG
>>> - pblk_print_failed_rqd(pblk, rqd, rqd->error);
>>> -#endif
>>> - }
>>> + __le64 *lba_list_mem, *lba_list_media;
>>> + void *src_p, *dst_p;
>>> + int hole, i;
>>>
>>> if (unlikely(nr_holes == 1)) {
>>> struct ppa_addr ppa;
>>>
>>> ppa = rqd->ppa_addr;
>>> - rqd->ppa_list = ppa_ptr;
>>> - rqd->dma_ppa_list = dma_ppa_list;
>>> + rqd->ppa_list = pr_ctx->ppa_ptr;
>>> + rqd->dma_ppa_list = pr_ctx->dma_ppa_list;
>>> rqd->ppa_list[0] = ppa;
>>> }
>>>
>>> + /* Re-use allocated memory for intermediate lbas */
>>> + lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
>>> + lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size);
>>> +
>>> for (i = 0; i < nr_secs; i++) {
>>> lba_list_media[i] = meta_list[i].lba;
>>> meta_list[i].lba = lba_list_mem[i];
>>> @@ -316,7 +278,7 @@ static int pblk_partial_read(struct pblk *pblk, struct
>>> nvm_rq *rqd,
>>> meta_list[hole].lba = lba_list_media[i];
>>>
>>> src_bv = new_bio->bi_io_vec[i++];
>>> - dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole];
>>> + dst_bv = bio->bi_io_vec[bio_init_idx + hole];
>>>
>>> src_p = kmap_atomic(src_bv.bv_page);
>>> dst_p = kmap_atomic(dst_bv.bv_page);
>>> @@ -334,19 +296,107 @@ static int pblk_partial_read(struct pblk *pblk,
>>> struct nvm_rq *rqd,
>>> } while (hole < nr_secs);
>>>
>>> bio_put(new_bio);
>>> + kfree(pr_ctx);
>>>
>>> /* restore original request */
>>> rqd->bio = NULL;
>>> rqd->nr_ppas = nr_secs;
>>>
>>> + bio_endio(bio);
>>> __pblk_end_io_read(pblk, rqd, false);
>>> - return NVM_IO_DONE;
>>> +}
>>>
>>> -fail:
>>> - /* Free allocated pages in new bio */
>>> +static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
>>> + unsigned int bio_init_idx,
>>> + unsigned long *read_bitmap,
>>> + int nr_holes)
>>> +{
>>> + struct pblk_sec_meta *meta_list = rqd->meta_list;
>>> + struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
>>> + struct pblk_pr_ctx *pr_ctx;
>>> + struct bio *new_bio, *bio = r_ctx->private;
>>> + __le64 *lba_list_mem;
>>> + int nr_secs = rqd->nr_ppas;
>>> + int i;
>>> +
>>> + /* Re-use allocated memory for intermediate lbas */
>>> + lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
>>> +
>>> + new_bio = bio_alloc(GFP_KERNEL, nr_holes);
>>
>>
>> new_bio can return NULL.
>
> see Jens's email
Yes, this is the same as in the original partial path
>
>>
>>> +
>>> + if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
>>> + goto fail;
>>
>> goto bio_put?
>
> do you only want the label to be changed? sure
To be consistent with the rest of pblk, the label should be fail_bio_put.
>
>>> +
>>> + if (nr_holes != new_bio->bi_vcnt) {
>>> + pr_err("pblk: malformed bio\n");
>>
>> I don't think there is a need for an error message here. In which case
>> would this happen?
>
> This is taken over from the original partial read path. Maybe Javier
> knows why it was put in there in the first place.
This should not happen and it’s a pblk internal error. I’m ok with making it a
WARN_ONCE()
>
>>
>>> + goto fail_pages;
>>> + }
>>
>> goto free_pages?
Same as above fail_free_pages
>
> sure I can change the label
>
>>
>>> +
>>> + pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
>>> + if (!pr_ctx)
>>> + goto fail_pages;
>>> +
>>> + for (i = 0; i < nr_secs; i++)
>>> + lba_list_mem[i] = meta_list[i].lba;
>>> +
>>> + new_bio->bi_iter.bi_sector = 0; /* internal bio */
>>> + bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
>>> +
>>> + rqd->bio = new_bio;
>>> + rqd->nr_ppas = nr_holes;
>>> + rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
>>> +
>>> + pr_ctx->ppa_ptr = NULL;
>>> + pr_ctx->orig_bio = bio;
>>> + pr_ctx->bitmap = *read_bitmap;
>>> + pr_ctx->bio_init_idx = bio_init_idx;
>>> + pr_ctx->orig_nr_secs = nr_secs;
>>> + r_ctx->private = pr_ctx;
>>> +
>>> + if (unlikely(nr_holes == 1)) {
>>> + pr_ctx->ppa_ptr = rqd->ppa_list;
>>> + pr_ctx->dma_ppa_list = rqd->dma_ppa_list;
>>> + rqd->ppa_addr = rqd->ppa_list[0];
>>> + }
>>> + return 0;
>>> +
>>> +fail_pages:
>>> pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
>>> -fail_add_pages:
>>> +fail:
>>> + bio_put(new_bio);
>>> +
>>> + return -ENOMEM;
>>> +}
>>> +
>>> +static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
>>> + unsigned int bio_init_idx,
>>> + unsigned long *read_bitmap, int nr_secs)
>>> +{
>>> + int nr_holes;
>>> + int ret;
>>> +
>>> + nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
>>> +
>>> + if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap,
>>> + nr_holes))
>>> + return NVM_IO_ERR;
>>> +
>>> + rqd->end_io = pblk_end_partial_read;
>>> +
>>> + ret = pblk_submit_io(pblk, rqd);
>>> + if (ret) {
>>> + bio_put(rqd->bio);
>>> + pr_err("pblk: partial read IO submission failed\n");
>>> + goto err;
>>> + }
>>> +
>>> + return NVM_IO_OK;
>>> +
>>> +err:
>>> pr_err("pblk: failed to perform partial read\n");
>>> +
>>> + /* Free allocated pages in new bio */
>>> + pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt);
>>> __pblk_end_io_read(pblk, rqd, false);
>>> return NVM_IO_ERR;
>>> }
>>> @@ -480,8 +530,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio
>>> *bio)
>>> /* The read bio request could be partially filled by the write buffer,
>>> * but there are some holes that need to be read from the drive.
>>> */
>>> - return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap);
>>> + ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap,
>>> + nr_secs);
>>> + if (ret)
>>> + goto fail_meta_free;
>>> +
>>> + return NVM_IO_OK;
>>>
>>> +fail_meta_free:
>>> + nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
>>> fail_rqd_free:
>>> pblk_free_rqd(pblk, rqd, PBLK_READ);
>>> return ret;
>>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
>>> index c072955..1c7ac06 100644
>>> --- a/drivers/lightnvm/pblk.h
>>> +++ b/drivers/lightnvm/pblk.h
>>> @@ -119,6 +119,16 @@ struct pblk_g_ctx {
>>> u64 lba;
>>> };
>>>
>>> +/* partial read context */
>>> +struct pblk_pr_ctx {
>>> + struct bio *orig_bio;
>>> + unsigned long bitmap;
>>
>> Guarantees that this is at least 32bit, but this should be at least 64
>> bit right?
>>
>>> + unsigned int orig_nr_secs;
>>> + unsigned int bio_init_idx;
>>> + void *ppa_ptr;
>>> + dma_addr_t dma_ppa_list;
>>> +};
>>> +
>>> /* Pad context */
>>> struct pblk_pad_rq {
>>> struct pblk *pblk;
>>>
>>