Hi Peter,

After updating my UMDs (mesa/etc) over the weekend I cannot reproduce 
the bug to begin with.  I'll try jumping directly to the intersection 
and see if I can reproduce the fault there otherwise I'll have to 
rollback my umds.

Hopefully I can test this tomorrow.

Tom

On 2018-12-03 10:41 a.m., Peter Zijlstra wrote:
> On Fri, Nov 30, 2018 at 04:19:46PM +0000, StDenis, Tom wrote:
>> NAK I get a failure in TTM on init with your x86/mm branch (see attached
>> dmesg).
> 
> So the good news is that with some additional self-tests I can trivially
> reproduce this. The bad news is that an otherwise straight forward
> cleanup seems to make CPA horribly mad at me.
> 
> And since we're somewhat late in the release cycle, I suppose we should
> do the simple thing first, and then I can try and figure out this CPA
> mess later.
> 
> So how about this relatively simple partial revert to sort the problem.
> 
> ---
> Subject: x86/mm/cpa: Fix cpa_flush_array() TLB invalidation
> 
> In commit:
> 
>    a7295fd53c39 ("x86/mm/cpa: Use flush_tlb_kernel_range()")
> 
> I misread the cpa array code and incorrectly used
> tlb_flush_kernel_range(), resulting in missing TLB flushes and
> consequent failures.
> 
> Instead do a full invalidate in this case -- for now.
> 
> Fixes: a7295fd53c39 ("x86/mm/cpa: Use flush_tlb_kernel_range()")
> Reported-by: "StDenis, Tom" <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
>   arch/x86/mm/pageattr.c | 24 ++++++++++++++++--------
>   1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
> index bac35001d896..61bc7d1800d7 100644
> --- a/arch/x86/mm/pageattr.c
> +++ b/arch/x86/mm/pageattr.c
> @@ -285,20 +285,16 @@ static void cpa_flush_all(unsigned long cache)
>       on_each_cpu(__cpa_flush_all, (void *) cache, 1);
>   }
>   
> -static bool __cpa_flush_range(unsigned long start, int numpages, int cache)
> +static bool __inv_flush_all(int cache)
>   {
>       BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
>   
> -     WARN_ON(PAGE_ALIGN(start) != start);
> -
>       if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
>               cpa_flush_all(cache);
>               return true;
>       }
>   
> -     flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
> -
> -     return !cache;
> +     return false;
>   }
>   
>   static void cpa_flush_range(unsigned long start, int numpages, int cache)
> @@ -306,7 +302,14 @@ static void cpa_flush_range(unsigned long start, int 
> numpages, int cache)
>       unsigned int i, level;
>       unsigned long addr;
>   
> -     if (__cpa_flush_range(start, numpages, cache))
> +     WARN_ON(PAGE_ALIGN(start) != start);
> +
> +     if (__inv_flush_all(cache))
> +             return;
> +
> +     flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
> +
> +     if (!cache)
>               return;
>   
>       /*
> @@ -332,7 +335,12 @@ static void cpa_flush_array(unsigned long baddr, 
> unsigned long *start,
>   {
>       unsigned int i, level;
>   
> -     if (__cpa_flush_range(baddr, numpages, cache))
> +     if (__inv_flush_all(cache))
> +             return;
> +
> +     flush_tlb_all();
> +
> +     if (!cache)
>               return;
>   
>       /*
> 

Reply via email to