Re: [PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-23 Thread Hans Holmberg
On Fri, Apr 20, 2018 at 9:49 PM, Javier Gonzalez  wrote:
>> On 19 Apr 2018, at 09.39, Hans Holmberg  
>> wrote:
>>
>> From: Hans Holmberg 
>>
>> Write failures should not happen under normal circumstances,
>> so in order to bring the chunk back into a known state as soon
>> as possible, evacuate all the valid data out of the line and let the
>> fw judge if the block can be written to in the next reset cycle.
>>
>> Do this by introducing a new gc list for lines with failed writes,
>> and ensure that the rate limiter allocates a small portion of
>> the write bandwidth to get the job done.
>>
>> The lba list is saved in memory for use during gc as we
>> cannot gurantee that the emeta data is readable if a write
>> error occurred.
>>
>> Signed-off-by: Hans Holmberg 
>> ---
>> drivers/lightnvm/pblk-core.c  | 43 +--
>> drivers/lightnvm/pblk-gc.c| 79 
>> +++
>> drivers/lightnvm/pblk-init.c  | 39 ++---
>> drivers/lightnvm/pblk-rl.c| 29 +---
>> drivers/lightnvm/pblk-sysfs.c | 15 ++--
>> drivers/lightnvm/pblk-write.c |  2 ++
>> drivers/lightnvm/pblk.h   | 25 +++---
>> 7 files changed, 178 insertions(+), 54 deletions(-)
>>
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 7762e89..f6135e4 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
>> struct pblk_line *line)
>>
>>   lockdep_assert_held(>lock);
>>
>> - if (!vsc) {
>> + if (line->w_err_gc->has_write_err) {
>> + if (line->gc_group != PBLK_LINEGC_WERR) {
>> + line->gc_group = PBLK_LINEGC_WERR;
>> + move_list = _mg->gc_werr_list;
>> + pblk_rl_werr_line_in(>rl);
>> + }
>> + } else if (!vsc) {
>>   if (line->gc_group != PBLK_LINEGC_FULL) {
>>   line->gc_group = PBLK_LINEGC_FULL;
>>   move_list = _mg->gc_full_list;
>> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
>> pblk_line *line)
>>   line->state = PBLK_LINESTATE_FREE;
>>   line->gc_group = PBLK_LINEGC_NONE;
>>   pblk_line_free(line);
>> - spin_unlock(>lock);
>>
>> + if (line->w_err_gc->has_write_err) {
>> + pblk_rl_werr_line_out(>rl);
>> + line->w_err_gc->has_write_err = 0;
>> + }
>> +
>> + spin_unlock(>lock);
>>   atomic_dec(>pipeline_gc);
>>
>>   spin_lock(_mg->free_lock);
>> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
>> pblk_line *line)
>>
>>   spin_lock(_mg->close_lock);
>>   spin_lock(>lock);
>> +
>> + /* Update the in-memory start address for emeta, in case it has
>> +  * shifted due to write errors
>> +  */
>> + if (line->emeta_ssec != line->cur_sec)
>> + line->emeta_ssec = line->cur_sec;
>> +
>>   list_add_tail(>list, _mg->emeta_list);
>>   spin_unlock(>lock);
>>   spin_unlock(_mg->close_lock);
>>
>>   pblk_line_should_sync_meta(pblk);
>> +
>> +
>> +}
>> +
>> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
>> +{
>> + struct pblk_line_meta *lm = >lm;
>> + unsigned int lba_list_size = lm->emeta_len[2];
>> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
>> + struct pblk_emeta *emeta = line->emeta;
>> +
>> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
>> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
>> + lba_list_size);
>> }
>>
>> void pblk_line_close_ws(struct work_struct *work)
>> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
>>   ws);
>>   struct pblk *pblk = line_ws->pblk;
>>   struct pblk_line *line = line_ws->line;
>> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
>> +
>> + /* Write errors makes the emeta start address stored in smeta invalid,
>> +  * so keep a copy of the lba list until we've gc'd the line
>> +  */
>> + if (w_err_gc->has_write_err)
>> + pblk_save_lba_list(pblk, line);
>>
>>   pblk_line_close(pblk, line);
>>   mempool_free(line_ws, pblk->gen_ws_pool);
>> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
>> index b0cc277..62f0548 100644
>> --- a/drivers/lightnvm/pblk-gc.c
>> +++ b/drivers/lightnvm/pblk-gc.c
>> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
>> *work)
>>   struct pblk_line_mgmt *l_mg = >l_mg;
>>   struct pblk_line_meta *lm = >lm;
>>   struct pblk_gc *gc = >gc;
>> - struct line_emeta *emeta_buf;
>> + struct line_emeta *emeta_buf = NULL;
>>   struct pblk_line_ws 

Re: [PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-23 Thread Hans Holmberg
On Fri, Apr 20, 2018 at 9:49 PM, Javier Gonzalez  wrote:
>> On 19 Apr 2018, at 09.39, Hans Holmberg  
>> wrote:
>>
>> From: Hans Holmberg 
>>
>> Write failures should not happen under normal circumstances,
>> so in order to bring the chunk back into a known state as soon
>> as possible, evacuate all the valid data out of the line and let the
>> fw judge if the block can be written to in the next reset cycle.
>>
>> Do this by introducing a new gc list for lines with failed writes,
>> and ensure that the rate limiter allocates a small portion of
>> the write bandwidth to get the job done.
>>
>> The lba list is saved in memory for use during gc as we
>> cannot gurantee that the emeta data is readable if a write
>> error occurred.
>>
>> Signed-off-by: Hans Holmberg 
>> ---
>> drivers/lightnvm/pblk-core.c  | 43 +--
>> drivers/lightnvm/pblk-gc.c| 79 
>> +++
>> drivers/lightnvm/pblk-init.c  | 39 ++---
>> drivers/lightnvm/pblk-rl.c| 29 +---
>> drivers/lightnvm/pblk-sysfs.c | 15 ++--
>> drivers/lightnvm/pblk-write.c |  2 ++
>> drivers/lightnvm/pblk.h   | 25 +++---
>> 7 files changed, 178 insertions(+), 54 deletions(-)
>>
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 7762e89..f6135e4 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
>> struct pblk_line *line)
>>
>>   lockdep_assert_held(>lock);
>>
>> - if (!vsc) {
>> + if (line->w_err_gc->has_write_err) {
>> + if (line->gc_group != PBLK_LINEGC_WERR) {
>> + line->gc_group = PBLK_LINEGC_WERR;
>> + move_list = _mg->gc_werr_list;
>> + pblk_rl_werr_line_in(>rl);
>> + }
>> + } else if (!vsc) {
>>   if (line->gc_group != PBLK_LINEGC_FULL) {
>>   line->gc_group = PBLK_LINEGC_FULL;
>>   move_list = _mg->gc_full_list;
>> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
>> pblk_line *line)
>>   line->state = PBLK_LINESTATE_FREE;
>>   line->gc_group = PBLK_LINEGC_NONE;
>>   pblk_line_free(line);
>> - spin_unlock(>lock);
>>
>> + if (line->w_err_gc->has_write_err) {
>> + pblk_rl_werr_line_out(>rl);
>> + line->w_err_gc->has_write_err = 0;
>> + }
>> +
>> + spin_unlock(>lock);
>>   atomic_dec(>pipeline_gc);
>>
>>   spin_lock(_mg->free_lock);
>> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
>> pblk_line *line)
>>
>>   spin_lock(_mg->close_lock);
>>   spin_lock(>lock);
>> +
>> + /* Update the in-memory start address for emeta, in case it has
>> +  * shifted due to write errors
>> +  */
>> + if (line->emeta_ssec != line->cur_sec)
>> + line->emeta_ssec = line->cur_sec;
>> +
>>   list_add_tail(>list, _mg->emeta_list);
>>   spin_unlock(>lock);
>>   spin_unlock(_mg->close_lock);
>>
>>   pblk_line_should_sync_meta(pblk);
>> +
>> +
>> +}
>> +
>> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
>> +{
>> + struct pblk_line_meta *lm = >lm;
>> + unsigned int lba_list_size = lm->emeta_len[2];
>> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
>> + struct pblk_emeta *emeta = line->emeta;
>> +
>> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
>> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
>> + lba_list_size);
>> }
>>
>> void pblk_line_close_ws(struct work_struct *work)
>> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
>>   ws);
>>   struct pblk *pblk = line_ws->pblk;
>>   struct pblk_line *line = line_ws->line;
>> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
>> +
>> + /* Write errors makes the emeta start address stored in smeta invalid,
>> +  * so keep a copy of the lba list until we've gc'd the line
>> +  */
>> + if (w_err_gc->has_write_err)
>> + pblk_save_lba_list(pblk, line);
>>
>>   pblk_line_close(pblk, line);
>>   mempool_free(line_ws, pblk->gen_ws_pool);
>> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
>> index b0cc277..62f0548 100644
>> --- a/drivers/lightnvm/pblk-gc.c
>> +++ b/drivers/lightnvm/pblk-gc.c
>> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
>> *work)
>>   struct pblk_line_mgmt *l_mg = >l_mg;
>>   struct pblk_line_meta *lm = >lm;
>>   struct pblk_gc *gc = >gc;
>> - struct line_emeta *emeta_buf;
>> + struct line_emeta *emeta_buf = NULL;
>>   struct pblk_line_ws *gc_rq_ws;
>>   struct pblk_gc_rq *gc_rq;
>> - __le64 *lba_list;
>> + __le64 *lba_list = NULL;
>>   

Re: [PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-20 Thread Javier Gonzalez
> On 19 Apr 2018, at 09.39, Hans Holmberg  
> wrote:
> 
> From: Hans Holmberg 
> 
> Write failures should not happen under normal circumstances,
> so in order to bring the chunk back into a known state as soon
> as possible, evacuate all the valid data out of the line and let the
> fw judge if the block can be written to in the next reset cycle.
> 
> Do this by introducing a new gc list for lines with failed writes,
> and ensure that the rate limiter allocates a small portion of
> the write bandwidth to get the job done.
> 
> The lba list is saved in memory for use during gc as we
> cannot gurantee that the emeta data is readable if a write
> error occurred.
> 
> Signed-off-by: Hans Holmberg 
> ---
> drivers/lightnvm/pblk-core.c  | 43 +--
> drivers/lightnvm/pblk-gc.c| 79 +++
> drivers/lightnvm/pblk-init.c  | 39 ++---
> drivers/lightnvm/pblk-rl.c| 29 +---
> drivers/lightnvm/pblk-sysfs.c | 15 ++--
> drivers/lightnvm/pblk-write.c |  2 ++
> drivers/lightnvm/pblk.h   | 25 +++---
> 7 files changed, 178 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 7762e89..f6135e4 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
> struct pblk_line *line)
> 
>   lockdep_assert_held(>lock);
> 
> - if (!vsc) {
> + if (line->w_err_gc->has_write_err) {
> + if (line->gc_group != PBLK_LINEGC_WERR) {
> + line->gc_group = PBLK_LINEGC_WERR;
> + move_list = _mg->gc_werr_list;
> + pblk_rl_werr_line_in(>rl);
> + }
> + } else if (!vsc) {
>   if (line->gc_group != PBLK_LINEGC_FULL) {
>   line->gc_group = PBLK_LINEGC_FULL;
>   move_list = _mg->gc_full_list;
> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
> pblk_line *line)
>   line->state = PBLK_LINESTATE_FREE;
>   line->gc_group = PBLK_LINEGC_NONE;
>   pblk_line_free(line);
> - spin_unlock(>lock);
> 
> + if (line->w_err_gc->has_write_err) {
> + pblk_rl_werr_line_out(>rl);
> + line->w_err_gc->has_write_err = 0;
> + }
> +
> + spin_unlock(>lock);
>   atomic_dec(>pipeline_gc);
> 
>   spin_lock(_mg->free_lock);
> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
> pblk_line *line)
> 
>   spin_lock(_mg->close_lock);
>   spin_lock(>lock);
> +
> + /* Update the in-memory start address for emeta, in case it has
> +  * shifted due to write errors
> +  */
> + if (line->emeta_ssec != line->cur_sec)
> + line->emeta_ssec = line->cur_sec;
> +
>   list_add_tail(>list, _mg->emeta_list);
>   spin_unlock(>lock);
>   spin_unlock(_mg->close_lock);
> 
>   pblk_line_should_sync_meta(pblk);
> +
> +
> +}
> +
> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
> +{
> + struct pblk_line_meta *lm = >lm;
> + unsigned int lba_list_size = lm->emeta_len[2];
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> + struct pblk_emeta *emeta = line->emeta;
> +
> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
> + lba_list_size);
> }
> 
> void pblk_line_close_ws(struct work_struct *work)
> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
>   ws);
>   struct pblk *pblk = line_ws->pblk;
>   struct pblk_line *line = line_ws->line;
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> +
> + /* Write errors makes the emeta start address stored in smeta invalid,
> +  * so keep a copy of the lba list until we've gc'd the line
> +  */
> + if (w_err_gc->has_write_err)
> + pblk_save_lba_list(pblk, line);
> 
>   pblk_line_close(pblk, line);
>   mempool_free(line_ws, pblk->gen_ws_pool);
> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
> index b0cc277..62f0548 100644
> --- a/drivers/lightnvm/pblk-gc.c
> +++ b/drivers/lightnvm/pblk-gc.c
> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
> *work)
>   struct pblk_line_mgmt *l_mg = >l_mg;
>   struct pblk_line_meta *lm = >lm;
>   struct pblk_gc *gc = >gc;
> - struct line_emeta *emeta_buf;
> + struct line_emeta *emeta_buf = NULL;
>   struct pblk_line_ws *gc_rq_ws;
>   struct pblk_gc_rq *gc_rq;
> - __le64 *lba_list;
> + __le64 *lba_list = NULL;
>   unsigned long *invalid_bitmap;
>   int sec_left, nr_secs, bit;
>   int ret;

Re: [PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-20 Thread Javier Gonzalez
> On 19 Apr 2018, at 09.39, Hans Holmberg  
> wrote:
> 
> From: Hans Holmberg 
> 
> Write failures should not happen under normal circumstances,
> so in order to bring the chunk back into a known state as soon
> as possible, evacuate all the valid data out of the line and let the
> fw judge if the block can be written to in the next reset cycle.
> 
> Do this by introducing a new gc list for lines with failed writes,
> and ensure that the rate limiter allocates a small portion of
> the write bandwidth to get the job done.
> 
> The lba list is saved in memory for use during gc as we
> cannot gurantee that the emeta data is readable if a write
> error occurred.
> 
> Signed-off-by: Hans Holmberg 
> ---
> drivers/lightnvm/pblk-core.c  | 43 +--
> drivers/lightnvm/pblk-gc.c| 79 +++
> drivers/lightnvm/pblk-init.c  | 39 ++---
> drivers/lightnvm/pblk-rl.c| 29 +---
> drivers/lightnvm/pblk-sysfs.c | 15 ++--
> drivers/lightnvm/pblk-write.c |  2 ++
> drivers/lightnvm/pblk.h   | 25 +++---
> 7 files changed, 178 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 7762e89..f6135e4 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
> struct pblk_line *line)
> 
>   lockdep_assert_held(>lock);
> 
> - if (!vsc) {
> + if (line->w_err_gc->has_write_err) {
> + if (line->gc_group != PBLK_LINEGC_WERR) {
> + line->gc_group = PBLK_LINEGC_WERR;
> + move_list = _mg->gc_werr_list;
> + pblk_rl_werr_line_in(>rl);
> + }
> + } else if (!vsc) {
>   if (line->gc_group != PBLK_LINEGC_FULL) {
>   line->gc_group = PBLK_LINEGC_FULL;
>   move_list = _mg->gc_full_list;
> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
> pblk_line *line)
>   line->state = PBLK_LINESTATE_FREE;
>   line->gc_group = PBLK_LINEGC_NONE;
>   pblk_line_free(line);
> - spin_unlock(>lock);
> 
> + if (line->w_err_gc->has_write_err) {
> + pblk_rl_werr_line_out(>rl);
> + line->w_err_gc->has_write_err = 0;
> + }
> +
> + spin_unlock(>lock);
>   atomic_dec(>pipeline_gc);
> 
>   spin_lock(_mg->free_lock);
> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
> pblk_line *line)
> 
>   spin_lock(_mg->close_lock);
>   spin_lock(>lock);
> +
> + /* Update the in-memory start address for emeta, in case it has
> +  * shifted due to write errors
> +  */
> + if (line->emeta_ssec != line->cur_sec)
> + line->emeta_ssec = line->cur_sec;
> +
>   list_add_tail(>list, _mg->emeta_list);
>   spin_unlock(>lock);
>   spin_unlock(_mg->close_lock);
> 
>   pblk_line_should_sync_meta(pblk);
> +
> +
> +}
> +
> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
> +{
> + struct pblk_line_meta *lm = >lm;
> + unsigned int lba_list_size = lm->emeta_len[2];
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> + struct pblk_emeta *emeta = line->emeta;
> +
> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
> + lba_list_size);
> }
> 
> void pblk_line_close_ws(struct work_struct *work)
> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
>   ws);
>   struct pblk *pblk = line_ws->pblk;
>   struct pblk_line *line = line_ws->line;
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> +
> + /* Write errors makes the emeta start address stored in smeta invalid,
> +  * so keep a copy of the lba list until we've gc'd the line
> +  */
> + if (w_err_gc->has_write_err)
> + pblk_save_lba_list(pblk, line);
> 
>   pblk_line_close(pblk, line);
>   mempool_free(line_ws, pblk->gen_ws_pool);
> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
> index b0cc277..62f0548 100644
> --- a/drivers/lightnvm/pblk-gc.c
> +++ b/drivers/lightnvm/pblk-gc.c
> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
> *work)
>   struct pblk_line_mgmt *l_mg = >l_mg;
>   struct pblk_line_meta *lm = >lm;
>   struct pblk_gc *gc = >gc;
> - struct line_emeta *emeta_buf;
> + struct line_emeta *emeta_buf = NULL;
>   struct pblk_line_ws *gc_rq_ws;
>   struct pblk_gc_rq *gc_rq;
> - __le64 *lba_list;
> + __le64 *lba_list = NULL;
>   unsigned long *invalid_bitmap;
>   int sec_left, nr_secs, bit;
>   int ret;
> @@ -150,34 +150,42 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
> 

[PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-19 Thread Hans Holmberg
From: Hans Holmberg 

Write failures should not happen under normal circumstances,
so in order to bring the chunk back into a known state as soon
as possible, evacuate all the valid data out of the line and let the
fw judge if the block can be written to in the next reset cycle.

Do this by introducing a new gc list for lines with failed writes,
and ensure that the rate limiter allocates a small portion of
the write bandwidth to get the job done.

The lba list is saved in memory for use during gc as we
cannot gurantee that the emeta data is readable if a write
error occurred.

Signed-off-by: Hans Holmberg 
---
 drivers/lightnvm/pblk-core.c  | 43 +--
 drivers/lightnvm/pblk-gc.c| 79 +++
 drivers/lightnvm/pblk-init.c  | 39 ++---
 drivers/lightnvm/pblk-rl.c| 29 +---
 drivers/lightnvm/pblk-sysfs.c | 15 ++--
 drivers/lightnvm/pblk-write.c |  2 ++
 drivers/lightnvm/pblk.h   | 25 +++---
 7 files changed, 178 insertions(+), 54 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 7762e89..f6135e4 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
struct pblk_line *line)
 
lockdep_assert_held(>lock);
 
-   if (!vsc) {
+   if (line->w_err_gc->has_write_err) {
+   if (line->gc_group != PBLK_LINEGC_WERR) {
+   line->gc_group = PBLK_LINEGC_WERR;
+   move_list = _mg->gc_werr_list;
+   pblk_rl_werr_line_in(>rl);
+   }
+   } else if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = _mg->gc_full_list;
@@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
pblk_line *line)
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
pblk_line_free(line);
-   spin_unlock(>lock);
 
+   if (line->w_err_gc->has_write_err) {
+   pblk_rl_werr_line_out(>rl);
+   line->w_err_gc->has_write_err = 0;
+   }
+
+   spin_unlock(>lock);
atomic_dec(>pipeline_gc);
 
spin_lock(_mg->free_lock);
@@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
pblk_line *line)
 
spin_lock(_mg->close_lock);
spin_lock(>lock);
+
+   /* Update the in-memory start address for emeta, in case it has
+* shifted due to write errors
+*/
+   if (line->emeta_ssec != line->cur_sec)
+   line->emeta_ssec = line->cur_sec;
+
list_add_tail(>list, _mg->emeta_list);
spin_unlock(>lock);
spin_unlock(_mg->close_lock);
 
pblk_line_should_sync_meta(pblk);
+
+
+}
+
+static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
+{
+   struct pblk_line_meta *lm = >lm;
+   unsigned int lba_list_size = lm->emeta_len[2];
+   struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+   struct pblk_emeta *emeta = line->emeta;
+
+   w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
+   memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
+   lba_list_size);
 }
 
 void pblk_line_close_ws(struct work_struct *work)
@@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
ws);
struct pblk *pblk = line_ws->pblk;
struct pblk_line *line = line_ws->line;
+   struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+
+   /* Write errors makes the emeta start address stored in smeta invalid,
+* so keep a copy of the lba list until we've gc'd the line
+*/
+   if (w_err_gc->has_write_err)
+   pblk_save_lba_list(pblk, line);
 
pblk_line_close(pblk, line);
mempool_free(line_ws, pblk->gen_ws_pool);
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index b0cc277..62f0548 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
*work)
struct pblk_line_mgmt *l_mg = >l_mg;
struct pblk_line_meta *lm = >lm;
struct pblk_gc *gc = >gc;
-   struct line_emeta *emeta_buf;
+   struct line_emeta *emeta_buf = NULL;
struct pblk_line_ws *gc_rq_ws;
struct pblk_gc_rq *gc_rq;
-   __le64 *lba_list;
+   __le64 *lba_list = NULL;
unsigned long *invalid_bitmap;
int sec_left, nr_secs, bit;
int ret;
@@ -150,34 +150,42 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
*work)
if (!invalid_bitmap)
goto fail_free_ws;
 
-   emeta_buf = 

[PATCH 2/3] lightnvm: pblk: garbage collect lines with failed writes

2018-04-19 Thread Hans Holmberg
From: Hans Holmberg 

Write failures should not happen under normal circumstances,
so in order to bring the chunk back into a known state as soon
as possible, evacuate all the valid data out of the line and let the
fw judge if the block can be written to in the next reset cycle.

Do this by introducing a new gc list for lines with failed writes,
and ensure that the rate limiter allocates a small portion of
the write bandwidth to get the job done.

The lba list is saved in memory for use during gc as we
cannot gurantee that the emeta data is readable if a write
error occurred.

Signed-off-by: Hans Holmberg 
---
 drivers/lightnvm/pblk-core.c  | 43 +--
 drivers/lightnvm/pblk-gc.c| 79 +++
 drivers/lightnvm/pblk-init.c  | 39 ++---
 drivers/lightnvm/pblk-rl.c| 29 +---
 drivers/lightnvm/pblk-sysfs.c | 15 ++--
 drivers/lightnvm/pblk-write.c |  2 ++
 drivers/lightnvm/pblk.h   | 25 +++---
 7 files changed, 178 insertions(+), 54 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 7762e89..f6135e4 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, 
struct pblk_line *line)
 
lockdep_assert_held(>lock);
 
-   if (!vsc) {
+   if (line->w_err_gc->has_write_err) {
+   if (line->gc_group != PBLK_LINEGC_WERR) {
+   line->gc_group = PBLK_LINEGC_WERR;
+   move_list = _mg->gc_werr_list;
+   pblk_rl_werr_line_in(>rl);
+   }
+   } else if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = _mg->gc_full_list;
@@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct 
pblk_line *line)
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
pblk_line_free(line);
-   spin_unlock(>lock);
 
+   if (line->w_err_gc->has_write_err) {
+   pblk_rl_werr_line_out(>rl);
+   line->w_err_gc->has_write_err = 0;
+   }
+
+   spin_unlock(>lock);
atomic_dec(>pipeline_gc);
 
spin_lock(_mg->free_lock);
@@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct 
pblk_line *line)
 
spin_lock(_mg->close_lock);
spin_lock(>lock);
+
+   /* Update the in-memory start address for emeta, in case it has
+* shifted due to write errors
+*/
+   if (line->emeta_ssec != line->cur_sec)
+   line->emeta_ssec = line->cur_sec;
+
list_add_tail(>list, _mg->emeta_list);
spin_unlock(>lock);
spin_unlock(_mg->close_lock);
 
pblk_line_should_sync_meta(pblk);
+
+
+}
+
+static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
+{
+   struct pblk_line_meta *lm = >lm;
+   unsigned int lba_list_size = lm->emeta_len[2];
+   struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+   struct pblk_emeta *emeta = line->emeta;
+
+   w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
+   memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
+   lba_list_size);
 }
 
 void pblk_line_close_ws(struct work_struct *work)
@@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
ws);
struct pblk *pblk = line_ws->pblk;
struct pblk_line *line = line_ws->line;
+   struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+
+   /* Write errors makes the emeta start address stored in smeta invalid,
+* so keep a copy of the lba list until we've gc'd the line
+*/
+   if (w_err_gc->has_write_err)
+   pblk_save_lba_list(pblk, line);
 
pblk_line_close(pblk, line);
mempool_free(line_ws, pblk->gen_ws_pool);
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index b0cc277..62f0548 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
*work)
struct pblk_line_mgmt *l_mg = >l_mg;
struct pblk_line_meta *lm = >lm;
struct pblk_gc *gc = >gc;
-   struct line_emeta *emeta_buf;
+   struct line_emeta *emeta_buf = NULL;
struct pblk_line_ws *gc_rq_ws;
struct pblk_gc_rq *gc_rq;
-   __le64 *lba_list;
+   __le64 *lba_list = NULL;
unsigned long *invalid_bitmap;
int sec_left, nr_secs, bit;
int ret;
@@ -150,34 +150,42 @@ static void pblk_gc_line_prepare_ws(struct work_struct 
*work)
if (!invalid_bitmap)
goto fail_free_ws;
 
-   emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
-