powerpc/kexec: Speedup kexec hash PTE tear down

    Commit: d504bed676caad29a3dba3d3727298c560628f5c upstream

    Currently for kexec the PTE tear down on 1TB segment systems normally
    requires 3 hcalls for each PTE removal. On a machine with 32GB of
    memory it can take around a minute to remove all the PTEs.

    This optimises the path so that we only remove PTEs that are valid.
    It also uses the read 4 PTEs at once HCALL.  For the common case where
    a PTEs is invalid in a 1TB segment, this turns the 3 HCALLs per PTE
    down to 1 HCALL per 4 PTEs.

    This gives an > 10x speedup in kexec times on PHYP, taking a 32GB
    machine from around 1 minute down to a few seconds.

    Signed-off-by: Michael Neuling <[email protected]>
    Signed-off-by: Benjamin Herrenschmidt <[email protected]>
    Signed-off-by: Kamalesh babulal <[email protected]>
    cc: Anton Blanchard <[email protected]>
---
 arch/powerpc/platforms/pseries/lpar.c |   33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

Index: b/arch/powerpc/platforms/pseries/lpar.c
===================================================================
--- a/arch/powerpc/platforms/pseries/lpar.c     2011-03-07 00:50:53.695992154 
-0800
+++ b/arch/powerpc/platforms/pseries/lpar.c     2011-03-07 02:09:06.133815481 
-0800
@@ -366,21 +366,28 @@
 {
        unsigned long size_bytes = 1UL << ppc64_pft_size;
        unsigned long hpte_count = size_bytes >> 4;
-       unsigned long dummy1, dummy2, dword0;
+       struct {
+               unsigned long pteh;
+               unsigned long ptel;
+       } ptes[4];
        long lpar_rc;
-       int i;
+       int i, j;

-       /* TODO: Use bulk call */
-       for (i = 0; i < hpte_count; i++) {
-               /* dont remove HPTEs with VRMA mappings */
-               lpar_rc = plpar_pte_remove_raw(H_ANDCOND, i, HPTE_V_1TB_SEG,
-                                               &dummy1, &dummy2);
-               if (lpar_rc == H_NOT_FOUND) {
-                       lpar_rc = plpar_pte_read_raw(0, i, &dword0, &dummy1);
-                       if (!lpar_rc && ((dword0 & HPTE_V_VRMA_MASK)
-                               != HPTE_V_VRMA_MASK))
-                               /* Can be hpte for 1TB Seg. So remove it */
-                               plpar_pte_remove_raw(0, i, 0, &dummy1, &dummy2);
+       /* Read in batches of 4,
+        * invalidate only valid entries not in the VRMA
+        * hpte_count will be a multiple of 4
+        */
+       for (i = 0; i < hpte_count; i += 4) {
+               lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
+               if (lpar_rc != H_SUCCESS)
+                       continue;
+               for (j = 0; j < 4; j++) {
+                       if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
+                               HPTE_V_VRMA_MASK)
+                               continue;
+                       if (ptes[j].pteh & HPTE_V_VALID)
+                               plpar_pte_remove_raw(0, i + j, 0,
+                                       &(ptes[j].pteh), &(ptes[j].ptel));
                }
        }
 }

_______________________________________________
stable mailing list
[email protected]
http://linux.kernel.org/mailman/listinfo/stable

Reply via email to