Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-08-14 Thread Santosh Sivaraj
Hi Balbir,

Balbir Singh  writes:

> On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
>> If we take a UE on one of the instructions with a fixup entry, set nip
>> to continue execution at the fixup entry. Stop processing the event
>> further or print it.
>> 
>> Co-developed-by: Reza Arbab 
>> Signed-off-by: Reza Arbab 
>> Cc: Mahesh Salgaonkar 
>> Signed-off-by: Santosh Sivaraj 
>> ---
>
> Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it
> should still have my author tag and signed-off-by

Originally when I received the series for posting, I had Reza's authorship and
signed-off-by, since the patch changed significantly I added co-developed-by as
Reza. I will update in the next spin.

https://lore.kernel.org/linuxppc-dev/20190702051932.511-1-sant...@fossix.org/

Santosh
>
> Balbir Singh
>
>>  arch/powerpc/include/asm/mce.h  |  4 +++-
>>  arch/powerpc/kernel/mce.c   | 16 
>>  arch/powerpc/kernel/mce_power.c | 15 +--
>>  3 files changed, 32 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
>> index f3a6036b6bc0..e1931c8c2743 100644
>> --- a/arch/powerpc/include/asm/mce.h
>> +++ b/arch/powerpc/include/asm/mce.h
>> @@ -122,7 +122,8 @@ struct machine_check_event {
>>  enum MCE_UeErrorType ue_error_type:8;
>>  u8  effective_address_provided;
>>  u8  physical_address_provided;
>> -u8  reserved_1[5];
>> +u8  ignore_event;
>> +u8  reserved_1[4];
>>  u64 effective_address;
>>  u64 physical_address;
>>  u8  reserved_2[8];
>> @@ -193,6 +194,7 @@ struct mce_error_info {
>>  enum MCE_Initiator  initiator:8;
>>  enum MCE_ErrorClass error_class:8;
>>  boolsync_error;
>> +boolignore_event;
>>  };
>>  
>>  #define MAX_MC_EVT  100
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index a3b122a685a5..ec4b3e1087be 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>>  if (phys_addr != ULONG_MAX) {
>>  mce->u.ue_error.physical_address_provided = true;
>>  mce->u.ue_error.physical_address = phys_addr;
>> +mce->u.ue_error.ignore_event = mce_err->ignore_event;
>>  machine_check_ue_event(mce);
>>  }
>>  }
>> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct 
>> *work)
>>  /*
>>   * This should probably queued elsewhere, but
>>   * oh! well
>> + *
>> + * Don't report this machine check because the caller has a
>> + * asked us to ignore the event, it has a fixup handler which
>> + * will do the appropriate error handling and reporting.
>>   */
>>  if (evt->error_type == MCE_ERROR_TYPE_UE) {
>> +if (evt->u.ue_error.ignore_event) {
>> +__this_cpu_dec(mce_ue_count);
>> +continue;
>> +}
>> +
>>  if (evt->u.ue_error.physical_address_provided) {
>>  unsigned long pfn;
>>  
>> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct 
>> irq_work *work)
>>  while (__this_cpu_read(mce_queue_count) > 0) {
>>  index = __this_cpu_read(mce_queue_count) - 1;
>>  evt = this_cpu_ptr(_event_queue[index]);
>> +
>> +if (evt->error_type == MCE_ERROR_TYPE_UE &&
>> +evt->u.ue_error.ignore_event) {
>> +__this_cpu_dec(mce_queue_count);
>> +continue;
>> +}
>>  machine_check_print_event_info(evt, false, false);
>>  __this_cpu_dec(mce_queue_count);
>>  }
>> diff --git a/arch/powerpc/kernel/mce_power.c 
>> b/arch/powerpc/kernel/mce_power.c
>> index e74816f045f8..1dd87f6f5186 100644
>> --- a/arch/powerpc/kernel/mce_power.c
>> +++ b/arch/powerpc/kernel/mce_power.c
>> @@ -11,6 +11,7 @@
>>  
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -18,6 +19,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  /*
>>   * Convert an address related to an mm to a physical address.
>> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>>  return 0;
>>  }
>>  
>> -static long mce_handle_ue_error(struct pt_regs *regs)
>> +static long mce_handle_ue_error(struct pt_regs *regs,
>> +struct mce_error_info *mce_err)
>>  {
>>  long handled = 0;
>> +const struct 

Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-08-14 Thread Balbir Singh



On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
> If we take a UE on one of the instructions with a fixup entry, set nip
> to continue execution at the fixup entry. Stop processing the event
> further or print it.
> 
> Co-developed-by: Reza Arbab 
> Signed-off-by: Reza Arbab 
> Cc: Mahesh Salgaonkar 
> Signed-off-by: Santosh Sivaraj 
> ---

Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it should 
still have my author tag and signed-off-by

Balbir Singh

>  arch/powerpc/include/asm/mce.h  |  4 +++-
>  arch/powerpc/kernel/mce.c   | 16 
>  arch/powerpc/kernel/mce_power.c | 15 +--
>  3 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index f3a6036b6bc0..e1931c8c2743 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -122,7 +122,8 @@ struct machine_check_event {
>   enum MCE_UeErrorType ue_error_type:8;
>   u8  effective_address_provided;
>   u8  physical_address_provided;
> - u8  reserved_1[5];
> + u8  ignore_event;
> + u8  reserved_1[4];
>   u64 effective_address;
>   u64 physical_address;
>   u8  reserved_2[8];
> @@ -193,6 +194,7 @@ struct mce_error_info {
>   enum MCE_Initiator  initiator:8;
>   enum MCE_ErrorClass error_class:8;
>   boolsync_error;
> + boolignore_event;
>  };
>  
>  #define MAX_MC_EVT   100
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index a3b122a685a5..ec4b3e1087be 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>   if (phys_addr != ULONG_MAX) {
>   mce->u.ue_error.physical_address_provided = true;
>   mce->u.ue_error.physical_address = phys_addr;
> + mce->u.ue_error.ignore_event = mce_err->ignore_event;
>   machine_check_ue_event(mce);
>   }
>   }
> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct 
> *work)
>   /*
>* This should probably queued elsewhere, but
>* oh! well
> +  *
> +  * Don't report this machine check because the caller has a
> +  * asked us to ignore the event, it has a fixup handler which
> +  * will do the appropriate error handling and reporting.
>*/
>   if (evt->error_type == MCE_ERROR_TYPE_UE) {
> + if (evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_ue_count);
> + continue;
> + }
> +
>   if (evt->u.ue_error.physical_address_provided) {
>   unsigned long pfn;
>  
> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct 
> irq_work *work)
>   while (__this_cpu_read(mce_queue_count) > 0) {
>   index = __this_cpu_read(mce_queue_count) - 1;
>   evt = this_cpu_ptr(_event_queue[index]);
> +
> + if (evt->error_type == MCE_ERROR_TYPE_UE &&
> + evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_queue_count);
> + continue;
> + }
>   machine_check_print_event_info(evt, false, false);
>   __this_cpu_dec(mce_queue_count);
>   }
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index e74816f045f8..1dd87f6f5186 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -11,6 +11,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -18,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * Convert an address related to an mm to a physical address.
> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>   return 0;
>  }
>  
> -static long mce_handle_ue_error(struct pt_regs *regs)
> +static long mce_handle_ue_error(struct pt_regs *regs,
> + struct mce_error_info *mce_err)
>  {
>   long handled = 0;
> + const struct exception_table_entry *entry;
> +
> + entry = search_kernel_exception_table(regs->nip);
> + if (entry) {
> + mce_err->ignore_event = true;
> + regs->nip = extable_fixup(entry);
> + return 1;
> + }
>  
>   /*
>* On specific SCOM read via MMIO we may get a machine check
> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs 

Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-08-14 Thread Mahesh Jagannath Salgaonkar
On 8/12/19 2:52 PM, Santosh Sivaraj wrote:
> If we take a UE on one of the instructions with a fixup entry, set nip
> to continue execution at the fixup entry. Stop processing the event
> further or print it.
> 
> Co-developed-by: Reza Arbab 
> Signed-off-by: Reza Arbab 
> Cc: Mahesh Salgaonkar 
> Signed-off-by: Santosh Sivaraj 

Looks good to me.

Reviewed-by: Mahesh Salgaonkar 

Thanks,
-Mahesh.

> ---
>  arch/powerpc/include/asm/mce.h  |  4 +++-
>  arch/powerpc/kernel/mce.c   | 16 
>  arch/powerpc/kernel/mce_power.c | 15 +--
>  3 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index f3a6036b6bc0..e1931c8c2743 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -122,7 +122,8 @@ struct machine_check_event {
>   enum MCE_UeErrorType ue_error_type:8;
>   u8  effective_address_provided;
>   u8  physical_address_provided;
> - u8  reserved_1[5];
> + u8  ignore_event;
> + u8  reserved_1[4];
>   u64 effective_address;
>   u64 physical_address;
>   u8  reserved_2[8];
> @@ -193,6 +194,7 @@ struct mce_error_info {
>   enum MCE_Initiator  initiator:8;
>   enum MCE_ErrorClass error_class:8;
>   boolsync_error;
> + boolignore_event;
>  };
>  
>  #define MAX_MC_EVT   100
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index a3b122a685a5..ec4b3e1087be 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>   if (phys_addr != ULONG_MAX) {
>   mce->u.ue_error.physical_address_provided = true;
>   mce->u.ue_error.physical_address = phys_addr;
> + mce->u.ue_error.ignore_event = mce_err->ignore_event;
>   machine_check_ue_event(mce);
>   }
>   }
> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct 
> *work)
>   /*
>* This should probably queued elsewhere, but
>* oh! well
> +  *
> +  * Don't report this machine check because the caller has a
> +  * asked us to ignore the event, it has a fixup handler which
> +  * will do the appropriate error handling and reporting.
>*/
>   if (evt->error_type == MCE_ERROR_TYPE_UE) {
> + if (evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_ue_count);
> + continue;
> + }
> +
>   if (evt->u.ue_error.physical_address_provided) {
>   unsigned long pfn;
>  
> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct 
> irq_work *work)
>   while (__this_cpu_read(mce_queue_count) > 0) {
>   index = __this_cpu_read(mce_queue_count) - 1;
>   evt = this_cpu_ptr(_event_queue[index]);
> +
> + if (evt->error_type == MCE_ERROR_TYPE_UE &&
> + evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_queue_count);
> + continue;
> + }
>   machine_check_print_event_info(evt, false, false);
>   __this_cpu_dec(mce_queue_count);
>   }
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index e74816f045f8..1dd87f6f5186 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -11,6 +11,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -18,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * Convert an address related to an mm to a physical address.
> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>   return 0;
>  }
>  
> -static long mce_handle_ue_error(struct pt_regs *regs)
> +static long mce_handle_ue_error(struct pt_regs *regs,
> + struct mce_error_info *mce_err)
>  {
>   long handled = 0;
> + const struct exception_table_entry *entry;
> +
> + entry = search_kernel_exception_table(regs->nip);
> + if (entry) {
> + mce_err->ignore_event = true;
> + regs->nip = extable_fixup(entry);
> + return 1;
> + }
>  
>   /*
>* On specific SCOM read via MMIO we may get a machine check
> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs,
>   _addr);
>  
>   if (!handled &&