Gavin Shan <[email protected]> writes:
> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
> explicitly call abort() on errors. With this change, its return value
> isn't needed any more.
>
> Suggested-by: Igor Mammedov <[email protected]>
> Signed-off-by: Gavin Shan <[email protected]>
> ---
> hw/acpi/ghes-stub.c | 6 +++---
> hw/acpi/ghes.c | 15 ++++-----------
> include/hw/acpi/ghes.h | 5 +++--
> target/arm/kvm.c | 10 +++-------
> 4 files changed, 13 insertions(+), 23 deletions(-)
>
> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
> index 4faf573aeb..4ef914ffc5 100644
> --- a/hw/acpi/ghes-stub.c
> +++ b/hw/acpi/ghes-stub.c
> @@ -11,10 +11,10 @@
> #include "qemu/osdep.h"
> #include "hw/acpi/ghes.h"
>
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp)
> {
> - return -1;
> }
Before the patch, this function always fails: it returns -1.
Afterwards, it always succeeds: it doesn't set @errp.
Which one is correct?
>
> AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 055e5d719a..aa469c03f2 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -543,8 +543,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const
> void *cper, size_t len,
> notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
> }
>
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp)
qapi/error.h:
* - Whenever practical, also return a value that indicates success /
* failure. This can make the error checking more concise, and can
* avoid useless error object creation and destruction. Note that
* we still have many functions returning void. We recommend
* • bool-valued functions return true on success / false on failure,
* • pointer-valued functions return non-null / null pointer, and
* • integer-valued functions return non-negative / negative.
> {
> /* Memory Error Section Type */
> const uint8_t guid[] =
> @@ -555,7 +556,6 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t
> source_id,
> * Table 17-13 Generic Error Data Entry
> */
> QemuUUID fru_id = {};
> - Error *errp = NULL;
> int data_length;
> GArray *block;
> uint32_t block_status, i;
> @@ -592,16 +592,9 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t
> source_id,
> }
>
> /* Report the error */
> - ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
> + ghes_record_cper_errors(ags, block->data, block->len, source_id, errp);
>
> g_array_free(block, true);
> -
> - if (errp) {
> - error_report_err(errp);
> - return -1;
> - }
> -
> - return 0;
> }
The error reporting moves into the caller.
>
> AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index f73908985d..35c7bbbb01 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
> const char *oem_id, const char *oem_table_id);
> void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
> GArray *hardware_errors);
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> - uint64_t *addresses, uint32_t num_of_addresses);
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> + uint64_t *addresses, uint32_t num_of_addresses,
> + Error **errp);
> void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t
> len,
> uint16_t source_id, Error **errp);
>
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 459ca4a9b0..a889315606 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code,
> void *addr)
> addresses[0] = paddr;
> if (code == BUS_MCEERR_AR) {
> kvm_cpu_synchronize_state(c);
> - if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> - addresses, 1)) {
> - kvm_inject_arm_sea(c);
> - } else {
> - error_report("failed to record the error");
> - abort();
> - }
> + acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> + addresses, 1, &error_abort);
> + kvm_inject_arm_sea(c);
Before the patch, we get two error reports, like this:
qemu-system-FOO: OSPM does not acknowledge previous error, so can not
record CPER for current error anymore
qemu-system-FOO: failed to record the error
Aborted (core dumped)
Such error cascades should be avoided.
Afterwards, we get one:
Unexpected error at SOURCE-FILE:LINE-NUMBER:
qemu-system-FOO: OSPM does not acknowledge previous error, so can not
record CPER for current error anymore
Aborted (core dumped)
Are all errors reported by acpi_ghes_memory_errors() programming errors,
i.e. when an error is reported, there's a bug for us to fix?
If not, abort() is wrong before the patch, and &error_abort is wrong
afterwards.
You can use &error_fatal for fatal errors that are not programming
errors.
> }
> return;
> }