On 10/13/2018 09:23 PM, Dan Williams wrote:
> The Address Range Scrub implementation tried to skip running scrubs
> against ranges that were already scrubbed by the BIOS. Unfortunately
> that support also resulted in early scrub completions as evidenced by
> this debug output from nfit_test:
>
> nd_region region9: ARS: range 1 short complete
> nd_region region3: ARS: range 1 short complete
> nd_region region4: ARS: range 2 ARS start (0)
> nd_region region4: ARS: range 2 short complete
>
> ...i.e. completions without any indications that the scrub was started.
>
> This state of affairs was hard to see in the code due to the
> proliferation of state bits and mistakenly trying to track done state
> per-range when the completion is a global property of the bus.
>
> So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and
> ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and
> ARS_REQ_LONG. The implementation will still complete and reap the
> results of BIOS initiated ARS, but it will not attempt to use that
> information to affect the completion status of scrubbing the ranges from
> a Linux perspective.
>
> Instead, try to synchronously run a short ARS per range at init time and
> schedule a long scrub in the background. If ARS is busy with an ARS
> request schedule both a short and a long scrub for when ARS returns to
> idle. This logic also satisfies the intent of what ARS_REQ_REDO was
> trying to achieve. The new rule is that the REQ flag stays set until the
> next successful ars_start() for that range.
>
> With the new policy that the REQ flags are not cleared until the next
> start, the implementation no longer loses requests as can be seen from
> the following log:
>
> nd_region region3: ARS: range 1 ARS start short (0)
> nd_region region9: ARS: range 1 ARS start short (0)
> nd_region region3: ARS: range 1 complete
> nd_region region4: ARS: range 2 ARS start short (0)
> nd_region region9: ARS: range 1 complete
> nd_region region9: ARS: range 1 ARS start long (0)
> nd_region region4: ARS: range 2 complete
> nd_region region3: ARS: range 1 ARS start long (0)
> nd_region region9: ARS: range 1 complete
> nd_region region3: ARS: range 1 complete
> nd_region region4: ARS: range 2 ARS start long (0)
> nd_region region4: ARS: range 2 complete
>
> ...note that the nfit_test emulated driver provides 2 buses, that is why
> some of the range indices are duplicated. Notice that each range
> now successfully completes a short and long scrub.
>
> Cc: <[email protected]>
> Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce
> nfit_spa->ars_state")
> Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...")
> Reported-by: Jacek Zloch <[email protected]>
> Reported-by: Krzysztof Rusocki <[email protected]>
> Signed-off-by: Dan Williams <[email protected]>
Reviewed-by: Dave Jiang <[email protected]>
> ---
> drivers/acpi/nfit/core.c | 169
> ++++++++++++++++++++++++++--------------------
> drivers/acpi/nfit/nfit.h | 10 +--
> 2 files changed, 101 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
> index a0dfbcf8220d..f7efcd9843e0 100644
> --- a/drivers/acpi/nfit/core.c
> +++ b/drivers/acpi/nfit/core.c
> @@ -2572,7 +2572,8 @@ static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
> return cmd_rc;
> }
>
> -static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa
> *nfit_spa)
> +static int ars_start(struct acpi_nfit_desc *acpi_desc,
> + struct nfit_spa *nfit_spa, enum nfit_ars_state req_type)
> {
> int rc;
> int cmd_rc;
> @@ -2583,7 +2584,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc,
> struct nfit_spa *nfit_spa
> memset(&ars_start, 0, sizeof(ars_start));
> ars_start.address = spa->address;
> ars_start.length = spa->length;
> - if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
> + if (req_type == ARS_REQ_SHORT)
> ars_start.flags = ND_ARS_RETURN_PREV_DATA;
> if (nfit_spa_type(spa) == NFIT_SPA_PM)
> ars_start.type = ND_ARS_PERSISTENT;
> @@ -2640,6 +2641,15 @@ static void ars_complete(struct acpi_nfit_desc
> *acpi_desc,
> struct nd_region *nd_region = nfit_spa->nd_region;
> struct device *dev;
>
> + lockdep_assert_held(&acpi_desc->init_mutex);
> + /*
> + * Only advance the ARS state for ARS runs initiated by the
> + * kernel, ignore ARS results from BIOS initiated runs for scrub
> + * completion tracking.
> + */
> + if (acpi_desc->scrub_spa != nfit_spa)
> + return;
> +
> if ((ars_status->address >= spa->address && ars_status->address
> < spa->address + spa->length)
> || (ars_status->address < spa->address)) {
> @@ -2659,28 +2669,13 @@ static void ars_complete(struct acpi_nfit_desc
> *acpi_desc,
> } else
> return;
>
> - if (test_bit(ARS_DONE, &nfit_spa->ars_state))
> - return;
> -
> - if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
> - return;
> -
> + acpi_desc->scrub_spa = NULL;
> if (nd_region) {
> dev = nd_region_dev(nd_region);
> nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
> } else
> dev = acpi_desc->dev;
> -
> - dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
> - test_bit(ARS_SHORT, &nfit_spa->ars_state)
> - ? "short" : "long");
> - clear_bit(ARS_SHORT, &nfit_spa->ars_state);
> - if (test_and_clear_bit(ARS_REQ_REDO, &nfit_spa->ars_state)) {
> - set_bit(ARS_SHORT, &nfit_spa->ars_state);
> - set_bit(ARS_REQ, &nfit_spa->ars_state);
> - dev_dbg(dev, "ARS: processing scrub request received while in
> progress\n");
> - } else
> - set_bit(ARS_DONE, &nfit_spa->ars_state);
> + dev_dbg(dev, "ARS: range %d complete\n", spa->range_index);
> }
>
> static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
> @@ -2961,46 +2956,55 @@ static int acpi_nfit_query_poison(struct
> acpi_nfit_desc *acpi_desc)
> return 0;
> }
>
> -static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa
> *nfit_spa,
> - int *query_rc)
> +static int ars_register(struct acpi_nfit_desc *acpi_desc,
> + struct nfit_spa *nfit_spa)
> {
> - int rc = *query_rc;
> + int rc;
>
> - if (no_init_ars)
> + if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
> return acpi_nfit_register_region(acpi_desc, nfit_spa);
>
> - set_bit(ARS_REQ, &nfit_spa->ars_state);
> - set_bit(ARS_SHORT, &nfit_spa->ars_state);
> + set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
> + set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
>
> - switch (rc) {
> + switch (acpi_nfit_query_poison(acpi_desc)) {
> case 0:
> case -EAGAIN:
> - rc = ars_start(acpi_desc, nfit_spa);
> - if (rc == -EBUSY) {
> - *query_rc = rc;
> + rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
> + /* shouldn't happen, try again later */
> + if (rc == -EBUSY)
> break;
> - } else if (rc == 0) {
> - rc = acpi_nfit_query_poison(acpi_desc);
> - } else {
> + if (rc) {
> set_bit(ARS_FAILED, &nfit_spa->ars_state);
> break;
> }
> - if (rc == -EAGAIN)
> - clear_bit(ARS_SHORT, &nfit_spa->ars_state);
> - else if (rc == 0)
> - ars_complete(acpi_desc, nfit_spa);
> + clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
> + rc = acpi_nfit_query_poison(acpi_desc);
> + if (rc)
> + break;
> + acpi_desc->scrub_spa = nfit_spa;
> + ars_complete(acpi_desc, nfit_spa);
> + /*
> + * If ars_complete() says we didn't complete the
> + * short scrub, we'll try again with a long
> + * request.
> + */
> + acpi_desc->scrub_spa = NULL;
> break;
> case -EBUSY:
> + case -ENOMEM:
> case -ENOSPC:
> + /*
> + * BIOS was using ARS, wait for it to complete (or
> + * resources to become available) and then perform our
> + * own scrubs.
> + */
> break;
> default:
> set_bit(ARS_FAILED, &nfit_spa->ars_state);
> break;
> }
>
> - if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
> - set_bit(ARS_REQ, &nfit_spa->ars_state);
> -
> return acpi_nfit_register_region(acpi_desc, nfit_spa);
> }
>
> @@ -3022,6 +3026,8 @@ static unsigned int __acpi_nfit_scrub(struct
> acpi_nfit_desc *acpi_desc,
> struct device *dev = acpi_desc->dev;
> struct nfit_spa *nfit_spa;
>
> + lockdep_assert_held(&acpi_desc->init_mutex);
> +
> if (acpi_desc->cancel)
> return 0;
>
> @@ -3045,21 +3051,49 @@ static unsigned int __acpi_nfit_scrub(struct
> acpi_nfit_desc *acpi_desc,
>
> ars_complete_all(acpi_desc);
> list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
> + enum nfit_ars_state req_type;
> + int rc;
> +
> if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
> continue;
> - if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
> - int rc = ars_start(acpi_desc, nfit_spa);
> -
> - clear_bit(ARS_DONE, &nfit_spa->ars_state);
> - dev = nd_region_dev(nfit_spa->nd_region);
> - dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
> - nfit_spa->spa->range_index, rc);
> - if (rc == 0 || rc == -EBUSY)
> - return 1;
> - dev_err(dev, "ARS: range %d ARS failed (%d)\n",
> - nfit_spa->spa->range_index, rc);
> - set_bit(ARS_FAILED, &nfit_spa->ars_state);
> +
> + /* prefer short ARS requests first */
> + if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state))
> + req_type = ARS_REQ_SHORT;
> + else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state))
> + req_type = ARS_REQ_LONG;
> + else
> + continue;
> + rc = ars_start(acpi_desc, nfit_spa, req_type);
> +
> + dev = nd_region_dev(nfit_spa->nd_region);
> + dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n",
> + nfit_spa->spa->range_index,
> + req_type == ARS_REQ_SHORT ? "short" : "long",
> + rc);
> + /*
> + * Hmm, we raced someone else starting ARS? Try again in
> + * a bit.
> + */
> + if (rc == -EBUSY)
> + return 1;
> + if (rc == 0) {
> + dev_WARN_ONCE(dev, acpi_desc->scrub_spa,
> + "scrub start while range %d active\n",
> + acpi_desc->scrub_spa->spa->range_index);
> + clear_bit(req_type, &nfit_spa->ars_state);
> + acpi_desc->scrub_spa = nfit_spa;
> + /*
> + * Consider this spa last for future scrub
> + * requests
> + */
> + list_move_tail(&nfit_spa->list, &acpi_desc->spas);
> + return 1;
> }
> +
> + dev_err(dev, "ARS: range %d ARS failed (%d)\n",
> + nfit_spa->spa->range_index, rc);
> + set_bit(ARS_FAILED, &nfit_spa->ars_state);
> }
> return 0;
> }
> @@ -3115,6 +3149,7 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc
> *acpi_desc,
> struct nd_cmd_ars_cap ars_cap;
> int rc;
>
> + set_bit(ARS_FAILED, &nfit_spa->ars_state);
> memset(&ars_cap, 0, sizeof(ars_cap));
> rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
> if (rc < 0)
> @@ -3131,16 +3166,14 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc
> *acpi_desc,
> nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
> acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
> clear_bit(ARS_FAILED, &nfit_spa->ars_state);
> - set_bit(ARS_REQ, &nfit_spa->ars_state);
> }
>
> static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
> {
> struct nfit_spa *nfit_spa;
> - int rc, query_rc;
> + int rc;
>
> list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
> - set_bit(ARS_FAILED, &nfit_spa->ars_state);
> switch (nfit_spa_type(nfit_spa->spa)) {
> case NFIT_SPA_VOLATILE:
> case NFIT_SPA_PM:
> @@ -3149,20 +3182,12 @@ static int acpi_nfit_register_regions(struct
> acpi_nfit_desc *acpi_desc)
> }
> }
>
> - /*
> - * Reap any results that might be pending before starting new
> - * short requests.
> - */
> - query_rc = acpi_nfit_query_poison(acpi_desc);
> - if (query_rc == 0)
> - ars_complete_all(acpi_desc);
> -
> list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
> switch (nfit_spa_type(nfit_spa->spa)) {
> case NFIT_SPA_VOLATILE:
> case NFIT_SPA_PM:
> /* register regions and kick off initial ARS run */
> - rc = ars_register(acpi_desc, nfit_spa, &query_rc);
> + rc = ars_register(acpi_desc, nfit_spa);
> if (rc)
> return rc;
> break;
> @@ -3374,7 +3399,8 @@ static int acpi_nfit_clear_to_send(struct
> nvdimm_bus_descriptor *nd_desc,
> return __acpi_nfit_clear_to_send(nd_desc, nvdimm, cmd);
> }
>
> -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long
> flags)
> +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
> + enum nfit_ars_state req_type)
> {
> struct device *dev = acpi_desc->dev;
> int scheduled = 0, busy = 0;
> @@ -3394,14 +3420,10 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc
> *acpi_desc, unsigned long flags)
> if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
> continue;
>
> - if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) {
> + if (test_and_set_bit(req_type, &nfit_spa->ars_state))
> busy++;
> - set_bit(ARS_REQ_REDO, &nfit_spa->ars_state);
> - } else {
> - if (test_bit(ARS_SHORT, &flags))
> - set_bit(ARS_SHORT, &nfit_spa->ars_state);
> + else
> scheduled++;
> - }
> }
> if (scheduled) {
> sched_ars(acpi_desc);
> @@ -3587,10 +3609,11 @@ static void acpi_nfit_update_notify(struct device
> *dev, acpi_handle handle)
> static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
> {
> struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
> - unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
> - 0 : 1 << ARS_SHORT;
>
> - acpi_nfit_ars_rescan(acpi_desc, flags);
> + if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON)
> + acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
> + else
> + acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT);
> }
>
> void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
> diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
> index 8c1af38a5dee..a7d95b427efd 100644
> --- a/drivers/acpi/nfit/nfit.h
> +++ b/drivers/acpi/nfit/nfit.h
> @@ -133,10 +133,8 @@ enum nfit_dimm_notifiers {
> };
>
> enum nfit_ars_state {
> - ARS_REQ,
> - ARS_REQ_REDO,
> - ARS_DONE,
> - ARS_SHORT,
> + ARS_REQ_SHORT,
> + ARS_REQ_LONG,
> ARS_FAILED,
> };
>
> @@ -223,6 +221,7 @@ struct acpi_nfit_desc {
> struct device *dev;
> u8 ars_start_flags;
> struct nd_cmd_ars_status *ars_status;
> + struct nfit_spa *scrub_spa;
> struct delayed_work dwork;
> struct list_head list;
> struct kernfs_node *scrub_count_state;
> @@ -277,7 +276,8 @@ struct nfit_blk {
>
> extern struct list_head acpi_descs;
> extern struct mutex acpi_desc_lock;
> -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long
> flags);
> +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
> + enum nfit_ars_state req_type);
>
> #ifdef CONFIG_X86_MCE
> void nfit_mce_register(void);
>
_______________________________________________
Linux-nvdimm mailing list
[email protected]
https://lists.01.org/mailman/listinfo/linux-nvdimm