Re: [PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-23 Thread Hans Holmberg
On Fri, Apr 20, 2018 at 9:49 PM, Javier Gonzalez  wrote:
>> On 19 Apr 2018, at 09.39, Hans Holmberg  
>> wrote:
>>
>> From: Hans Holmberg 
>>
>> Write failures should not happen under normal circumstances,
>> so in order to bring the chunk back into a known state as soon
>> as possible, evacuate all the valid data out of the line and let the
>> fw judge if the block can be written to in the next reset cycle.
>>
>> Do this by introducing a new gc list for lines with failed writes,
>> and ensure that the rate limiter allocates a small portion of
>> the write bandwidth to get the job done.
>>
>> The lba list is saved in memory for use during gc as we
>> cannot gurantee that the emeta data is readable if a write
>> error occurred.
>>
>> Signed-off-by: Hans Holmberg 
>> ---
>> drivers/lightnvm/pblk-core.c  | 43 +--
>> drivers/lightnvm/pblk-gc.c| 79 
>> +++
>> drivers/lightnvm/pblk-init.c  | 39 ++---
>> drivers/lightnvm/pblk-rl.c| 29 +---
>> drivers/lightnvm/pblk-sysfs.c | 15 ++--
>> drivers/lightnvm/pblk-write.c |  2 ++
>> drivers/lightnvm/pblk.h   | 25 +++---
>> 7 files changed, 178 insertions(+), 54 deletions(-)
>>
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 7762e89..f6135e4 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
>> struct pblk_line *line)
>>
>>   lockdep_assert_held(>lock);
>>
>> - if (!vsc) {
>> + if (line->w_err_gc->has_write_err) {
>> + if (line->gc_group != PBLK_LINEGC_WERR) {
>> + line->gc_group = PBLK_LINEGC_WERR;
>> + move_list = _mg->gc_werr_list;
>> + pblk_rl_werr_line_in(>rl);
>> + }
>> + } else if (!vsc) {
>>   if (line->gc_group != PBLK_LINEGC_FULL) {
>>   line->gc_group = PBLK_LINEGC_FULL;
>>   move_list = _mg->gc_full_list;
>> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
>> pblk_line *line)
>>   line->state = PBLK_LINESTATE_FREE;
>>   line->gc_group = PBLK_LINEGC_NONE;
>>   pblk_line_free(line);
>> - spin_unlock(>lock);
>>
>> + if (line->w_err_gc->has_write_err) {
>> + pblk_rl_werr_line_out(>rl);
>> + line->w_err_gc->has_write_err = 0;
>> + }
>> +
>> + spin_unlock(>lock);
>>   atomic_dec(>pipeline_gc);
>>
>>   spin_lock(_mg->free_lock);
>> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
>> pblk_line *line)
>>
>>   spin_lock(_mg->close_lock);
>>   spin_lock(>lock);
>> +
>> + /* Update the in-memory start address for emeta, in case it has
>> +  * shifted due to write errors
>> +  */
>> + if (line->emeta_ssec != line->cur_sec)
>> + line->emeta_ssec = line->cur_sec;
>> +
>>   list_add_tail(>list, _mg->emeta_list);
>>   spin_unlock(>lock);
>>   spin_unlock(_mg->close_lock);
>>
>>   pblk_line_should_sync_meta(pblk);
>> +
>> +
>> +}
>> +
>> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
>> +{
>> + struct pblk_line_meta *lm = >lm;
>> + unsigned int lba_list_size = lm->emeta_len[2];
>> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
>> + struct pblk_emeta *emeta = line->emeta;
>> +
>> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
>> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
>> + lba_list_size);
>> }
>>
>> void pblk_line_close_ws(struct work_struct *work)
>> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
>>   ws);
>>   struct pblk *pblk = line_ws->pblk;
>>   struct pblk_line *line = line_ws->line;
>> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
>> +
>> + /* Write errors makes the emeta start address stored in smeta invalid,
>> +  * so keep a copy of the lba list until we've gc'd the line
>> +  */
>> + if (w_err_gc->has_write_err)
>> + pblk_save_lba_list(pblk, line);
>>
>>   pblk_line_close(pblk, line);
>>   mempool_free(line_ws, pblk->gen_ws_pool);
>> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
>> index b0cc277..62f0548 100644
>> --- a/drivers/lightnvm/pblk-gc.c
>> +++ b/drivers/lightnvm/pblk-gc.c
>> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
>> *work)
>>   struct pblk_line_mgmt *l_mg = >l_mg;
>>   struct pblk_line_meta *lm = >lm;
>>   struct pblk_gc *gc = >gc;
>> - struct line_emeta *emeta_buf;
>> + struct line_emeta *emeta_buf = NULL;
>>   struct pblk_line_ws 

Re: [PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-20 Thread Javier Gonzalez
> On 19 Apr 2018, at 09.39, Hans Holmberg  
> wrote:
> 
> From: Hans Holmberg 
> 
> Write failures should not happen under normal circumstances,
> so in order to bring the chunk back into a known state as soon
> as possible, evacuate all the valid data out of the line and let the
> fw judge if the block can be written to in the next reset cycle.
> 
> Do this by introducing a new gc list for lines with failed writes,
> and ensure that the rate limiter allocates a small portion of
> the write bandwidth to get the job done.
> 
> The lba list is saved in memory for use during gc as we
> cannot gurantee that the emeta data is readable if a write
> error occurred.
> 
> Signed-off-by: Hans Holmberg 
> ---
> drivers/lightnvm/pblk-core.c  | 43 +--
> drivers/lightnvm/pblk-gc.c| 79 +++
> drivers/lightnvm/pblk-init.c  | 39 ++---
> drivers/lightnvm/pblk-rl.c| 29 +---
> drivers/lightnvm/pblk-sysfs.c | 15 ++--
> drivers/lightnvm/pblk-write.c |  2 ++
> drivers/lightnvm/pblk.h   | 25 +++---
> 7 files changed, 178 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 7762e89..f6135e4 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
> struct pblk_line *line)
> 
>   lockdep_assert_held(>lock);
> 
> - if (!vsc) {
> + if (line->w_err_gc->has_write_err) {
> + if (line->gc_group != PBLK_LINEGC_WERR) {
> + line->gc_group = PBLK_LINEGC_WERR;
> + move_list = _mg->gc_werr_list;
> + pblk_rl_werr_line_in(>rl);
> + }
> + } else if (!vsc) {
>   if (line->gc_group != PBLK_LINEGC_FULL) {
>   line->gc_group = PBLK_LINEGC_FULL;
>   move_list = _mg->gc_full_list;
> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
> pblk_line *line)
>   line->state = PBLK_LINESTATE_FREE;
>   line->gc_group = PBLK_LINEGC_NONE;
>   pblk_line_free(line);
> - spin_unlock(>lock);
> 
> + if (line->w_err_gc->has_write_err) {
> + pblk_rl_werr_line_out(>rl);
> + line->w_err_gc->has_write_err = 0;
> + }
> +
> + spin_unlock(>lock);
>   atomic_dec(>pipeline_gc);
> 
>   spin_lock(_mg->free_lock);
> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
> pblk_line *line)
> 
>   spin_lock(_mg->close_lock);
>   spin_lock(>lock);
> +
> + /* Update the in-memory start address for emeta, in case it has
> +  * shifted due to write errors
> +  */
> + if (line->emeta_ssec != line->cur_sec)
> + line->emeta_ssec = line->cur_sec;
> +
>   list_add_tail(>list, _mg->emeta_list);
>   spin_unlock(>lock);
>   spin_unlock(_mg->close_lock);
> 
>   pblk_line_should_sync_meta(pblk);
> +
> +
> +}
> +
> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
> +{
> + struct pblk_line_meta *lm = >lm;
> + unsigned int lba_list_size = lm->emeta_len[2];
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> + struct pblk_emeta *emeta = line->emeta;
> +
> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
> + lba_list_size);
> }
> 
> void pblk_line_close_ws(struct work_struct *work)
> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
>   ws);
>   struct pblk *pblk = line_ws->pblk;
>   struct pblk_line *line = line_ws->line;
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> +
> + /* Write errors makes the emeta start address stored in smeta invalid,
> +  * so keep a copy of the lba list until we've gc'd the line
> +  */
> + if (w_err_gc->has_write_err)
> + pblk_save_lba_list(pblk, line);
> 
>   pblk_line_close(pblk, line);
>   mempool_free(line_ws, pblk->gen_ws_pool);
> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
> index b0cc277..62f0548 100644
> --- a/drivers/lightnvm/pblk-gc.c
> +++ b/drivers/lightnvm/pblk-gc.c
> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
> *work)
>   struct pblk_line_mgmt *l_mg = >l_mg;
>   struct pblk_line_meta *lm = >lm;
>   struct pblk_gc *gc = >gc;
> - struct line_emeta *emeta_buf;
> + struct line_emeta *emeta_buf = NULL;
>   struct pblk_line_ws *gc_rq_ws;
>   struct pblk_gc_rq *gc_rq;
> - __le64 *lba_list;
> + __le64 *lba_list = NULL;
>   unsigned long *invalid_bitmap;
>   int sec_left, nr_secs, bit;
>   int ret;