The patch below (against 2_4_devel) implements using large parge TLB
entries to map kernel pages on the 40x.  paulus did the basic design,
and I tweaked and degubbed it. It's a bit ugly in places (particularly
the handling of iopa()) and will need cleaning up, but it does seem to
work.

It works as follows: 40x now uses an explicit _PMD_PRESENT bit, rather
than just checking if the high bits are non-zero.  If this bit is set
in a PMD entry it means that it contains a valid pointer to a page of
PTEs.

If _PMD_PRESENT is not set, but any of bits 24-26 are non-zero, then
it is a large-page PTE.  Bits 24-26 give the size (and are shifted
into place by the TLB miss handler).  The remaining bits have the same
meaning as in a normal PTE.

Theoretically the entry can represent any of the 40x's allowed page
sizes, except size 0 (1k), but in practice only 4M and 16MB pages are
likely to be useful - since each PMD entry corresponds to a 4MB
region, using a smaller page size would lead to that page mapping
being repeated across that 4MB region.  To use 16MB pages 4 adjacent
PMD entries must all be filled with the same PTE value.

The only large-page PTEs used are created in mapin_ram() for the
kernel mapping of system RAM.

diff -urN /home/dgibson/kernel/linuxppc_2_4_devel/arch/ppc/kernel/head_4xx.S 
linux-grinch-largepage/arch/ppc/kernel/head_4xx.S
--- /home/dgibson/kernel/linuxppc_2_4_devel/arch/ppc/kernel/head_4xx.S  Thu May 
30 18:15:28 2002
+++ linux-grinch-largepage/arch/ppc/kernel/head_4xx.S   Fri May 31 10:54:30 2002
@@ -261,10 +261,10 @@
        tophys(r21, r21)
        rlwimi  r21, r20, 12, 20, 29    /* Create L1 (pgdir/pmd) address */
        lwz     r21, 0(r21)             /* Get L1 entry */
-       rlwinm. r22, r21, 0, 0, 19      /* Extract L2 (pte) base address */
+       andi.   r22, r21, _PMD_PRESENT  /* Check if it points to a PTE page */
        beq     2f                      /* Bail if no table */

-       tophys(r22, r22)
+       tophys(r22, r21)
        rlwimi  r22, r20, 22, 20, 29    /* Compute PTE address */
        lwz     r21, 0(r22)             /* Get Linux PTE */

@@ -495,33 +495,40 @@
        tophys(r21, r21)
        rlwimi  r21, r20, 12, 20, 29    /* Create L1 (pgdir/pmd) address */
        lwz     r21, 0(r21)             /* Get L1 entry */
-       rlwinm. r22, r21, 0, 0, 19      /* Extract L2 (pte) base address */
+       andi.   r22, r21, _PMD_PRESENT  /* check if it points to pte page */
        beq     2f                      /* Bail if no table */

-       tophys(r22, r22)
+       tophys(r22, r21)
        rlwimi  r22, r20, 22, 20, 29    /* Compute PTE address */
        lwz     r21, 0(r22)             /* Get Linux PTE */
        andi.   r23, r21, _PAGE_PRESENT
-       beq     2f
+       beq     5f

        ori     r21, r21, _PAGE_ACCESSED
        stw     r21, 0(r22)

-       /* Most of the Linux PTE is ready to load into the TLB LO.
-        * We set ZSEL, where only the LS-bit determines user access.
-        * We set execute, because we don't have the granularity to
-        * properly set this at the page level (Linux problem).
-        * If shared is set, we cause a zero PID->TID load.
-        * Many of these bits are software only.  Bits we don't set
-        * here we (properly should) assume have the appropriate value.
+       /* Create TLB tag.  This is the faulting address plus a static
+        * set of bits.  These are size, valid, E, U0.
         */
-       li      r22, 0x0ce2
-       andc    r21, r21, r22           /* Make sure 20, 21 are zero */
+       li      r22, 0x00c0
+       rlwimi  r20, r22, 0, 20, 31

        b       finish_tlb_load

-
+       /* Check for possible large-page pmd entry */
 2:
+       rlwinm. r22,r21,2,22,24         /* size != 0 means large-page */
+       beq     5f
+
+       /* Create EPN.  This is the faulting address plus a static
+        * set of bits (valid, E, U0) plus the size from the PMD.
+        */
+       ori     r22,r22,0x40
+       rlwimi  r20, r22, 0, 20, 31
+
+       b       finish_tlb_load
+
+5:
        /* The bailout.  Restore registers to pre-exception conditions
         * and call the heavyweights to help us out.
         */
@@ -588,32 +595,40 @@
        tophys(r21, r21)
        rlwimi  r21, r20, 12, 20, 29    /* Create L1 (pgdir/pmd) address */
        lwz     r21, 0(r21)             /* Get L1 entry */
-       rlwinm. r22, r21, 0, 0, 19      /* Extract L2 (pte) base address */
+       andi.   r22, r21, _PMD_PRESENT  /* check if it points to pte page */
        beq     2f                      /* Bail if no table */

-       tophys(r22, r22)
+       tophys(r22, r21)
        rlwimi  r22, r20, 22, 20, 29    /* Compute PTE address */
        lwz     r21, 0(r22)             /* Get Linux PTE */
        andi.   r23, r21, _PAGE_PRESENT
-       beq     2f
+       beq     5f

        ori     r21, r21, _PAGE_ACCESSED
        stw     r21, 0(r22)

-       /* Most of the Linux PTE is ready to load into the TLB LO.
-        * We set ZSEL, where only the LS-bit determines user access.
-        * We set execute, because we don't have the granularity to
-        * properly set this at the page level (Linux problem).
-        * If shared is set, we cause a zero PID->TID load.
-        * Many of these bits are software only.  Bits we don't set
-        * here we (properly should) assume have the appropriate value.
+       /* Create EPN.  This is the faulting address plus a static
+        * set of bits.  These are size, valid, E, U0.
         */
-       li      r22, 0x0ce2
-       andc    r21, r21, r22           /* Make sure 20, 21 are zero */
+       li      r22, 0x00c0
+       rlwimi  r20, r22, 0, 20, 31

        b       finish_tlb_load

+       /* Check for possible large-page pmd entry */
 2:
+       rlwinm. r22,r21,2,22,24         /* size != 0 means large-page */
+       beq     5f
+
+       /* Create EPN.  This is the faulting address plus a static
+        * set of bits (valid=1, E=0, U0=0) plus the size from the PMD.
+        */
+       ori     r22,r22,0x40
+       rlwimi  r20, r22, 0, 20, 31
+
+       b       finish_tlb_load
+
+5:
        /* The bailout.  Restore registers to pre-exception conditions
         * and call the heavyweights to help us out.
         */
@@ -749,7 +764,14 @@
         * EPN is already in the TLB.
         */
        tlbsx.  r23, 0, r20
-       beq     6f
+       bne     8f
+       lwz     r22,9f at l(0)
+       addi    r22,r22,1
+       stw     r22,9f at l(0)
+       b       6f
+tlb_miss_hit:
+9:     .long   0
+8:

        /* load the next available TLB index.
        */
@@ -766,14 +788,16 @@
        stw     r23, tlb_4xx_index at l(0)

 6:
+       /*
+        * Clear out the software-only bits in the PTE to generate the
+        * TLB_DATA value.  These are the bottom 2 bits of RPN, the
+        * top 3 bits of the zone field, and M.
+        */
+       li      r22, 0x0ce2
+       andc    r21, r21, r22           /* Make sure 20, 21 are zero */
+
        tlbwe   r21, r23, TLB_DATA              /* Load TLB LO */

-       /* Create EPN.  This is the faulting address plus a static
-        * set of bits.  These are size, valid, E, U0, and ensure
-        * bits 20 and 21 are zero.
-        */
-       li      r22, 0x00c0
-       rlwimi  r20, r22, 0, 20, 31
        tlbwe   r20, r23, TLB_TAG               /* Load TLB HI */

        /* Done...restore registers and get out of here.
diff -urN /home/dgibson/kernel/linuxppc_2_4_devel/arch/ppc/mm/pgtable.c 
linux-grinch-largepage/arch/ppc/mm/pgtable.c
--- /home/dgibson/kernel/linuxppc_2_4_devel/arch/ppc/mm/pgtable.c       Mon Apr 
 8 10:29:07 2002
+++ linux-grinch-largepage/arch/ppc/mm/pgtable.c        Fri May 31 13:51:48 2002
@@ -348,7 +348,38 @@

        v = KERNELBASE;
        p = PPC_MEMSTART;
-       for (s = 0; s < total_lowmem; s += PAGE_SIZE) {
+       s = 0;
+#if defined(CONFIG_40x)
+       for (; s <= (total_lowmem - 16*1024*1024); s += 16*1024*1024) {
+               pmd_t *pmdp;
+               unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | 
_PAGE_HWWRITE;
+
+               spin_lock(&init_mm.page_table_lock);
+               pmdp = pmd_offset(pgd_offset_k(v), v);
+               pmd_val(*pmdp++) = val;
+               pmd_val(*pmdp++) = val;
+               pmd_val(*pmdp++) = val;
+               pmd_val(*pmdp++) = val;
+               spin_unlock(&init_mm.page_table_lock);
+
+               v += 16*1024*1024;
+               p += 16*1024*1024;
+       }
+
+       for(; s <= (total_lowmem - 4*1024*1024); s += 4*1024*1024) {
+               pmd_t *pmdp;
+               unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | 
_PAGE_HWWRITE;
+
+               spin_lock(&init_mm.page_table_lock);
+               pmdp = pmd_offset(pgd_offset_k(v), v);
+               pmd_val(*pmdp) = val;
+               spin_unlock(&init_mm.page_table_lock);
+
+               v += 4*1024*1024;
+               p += 4*1024*1024;
+       }
+#endif
+       for (; s < total_lowmem; s += PAGE_SIZE) {
                /* On the MPC8xx, we want the page shared so we
                 * don't get ASID compares on kernel space.
                 */
@@ -468,8 +499,33 @@
                mm = &init_mm;

        pa = 0;
+#ifdef CONFIG_40x
+       {
+               pgd_t   *pgd;
+               pmd_t   *pmd;
+               const unsigned long large_page_mask[] = {
+                       0xfffff800, 0xffffe000, 0xffff8000, 0xfffe0000,
+                       0xfff80000, 0xffe00000, 0xff800000, 0xfe000000
+               };
+
+               pgd = pgd_offset(mm, addr & PAGE_MASK);
+               if (pgd) {
+                       pmd = pmd_offset(pgd, addr & PAGE_MASK);
+                       if (pmd_present(*pmd)) {
+                               pte = pte_offset(pmd, addr & PAGE_MASK);
+                               pa = (pte_val(*pte) & PAGE_MASK) | (addr & 
~PAGE_MASK);
+                       } else if (pmd_val(*pmd) & _PMD_SIZE) {
+                               unsigned long mask =
+                                       large_page_mask[(pmd_val(*pmd) & 
_PMD_SIZE) >> 5];
+                               pa = (pmd_val(*pmd) & mask) | (addr & ~mask);
+                       }
+               }
+       }
+
+#else
        if (get_pteptr(mm, addr, &pte))
                pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+#endif

        return(pa);
 }
diff -urN /home/dgibson/kernel/linuxppc_2_4_devel/include/asm-ppc/pgtable.h 
linux-grinch-largepage/include/asm-ppc/pgtable.h
--- /home/dgibson/kernel/linuxppc_2_4_devel/include/asm-ppc/pgtable.h   Wed Apr 
17 10:26:01 2002
+++ linux-grinch-largepage/include/asm-ppc/pgtable.h    Fri May 31 13:50:13 2002
@@ -285,8 +285,8 @@
      is cleared in the TLB miss handler before the TLB entry is loaded.
    - All other bits of the PTE are loaded into TLBLO without
      modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
-     software PTE bits.  We actually use use bits 21, 24, 25, 26, and
-     30 respectively for the software bits: ACCESSED, DIRTY, RW, EXEC,
+     software PTE bits.  We actually use use bits 21, 24, 25, and
+     30 respectively for the software bits: ACCESSED, DIRTY, RW, and
      PRESENT.
 */

@@ -301,8 +301,12 @@
 #define _PAGE_HWWRITE  0x100   /* hardware: Dirty & RW, set in exception */
 #define _PAGE_HWEXEC   0x200   /* hardware: EX permission */
 #define _PAGE_ACCESSED 0x400   /* software: R: page referenced */
-#define _PMD_PRESENT   PAGE_MASK

+#define _PMD_PRESENT   0x400   /* PMD points to page of PTEs */
+#define _PMD_SIZE      0x0e0   /* size field, != 0 for large-page PMD entry */
+#define _PMD_SIZE_4M   0x0c0
+#define _PMD_SIZE_16M  0x0e0
+#define _PMD_BAD       0x802
 #elif defined(CONFIG_440)

 /*
@@ -357,9 +361,10 @@
 #define _PAGE_HWWRITE  0x0100  /* h/w write enable: never set in Linux PTE */
 #define _PAGE_USER     0x0800  /* One of the PP bits, the other is USER&~RW */

-#define _PMD_PRESENT   PAGE_MASK
+#define _PMD_PRESENT   0x0001
 #define _PMD_PAGE_MASK 0x000c
 #define _PMD_PAGE_8M   0x000c
+#define _PMD_BAD       0x0ff0

 #else /* CONFIG_6xx */
 /* Definitions for 60x, 740/750, etc. */
@@ -374,7 +379,9 @@
 #define _PAGE_ACCESSED 0x100   /* R: page referenced */
 #define _PAGE_EXEC     0x200   /* software: i-cache coherency required */
 #define _PAGE_RW       0x400   /* software: user write access allowed */
-#define _PMD_PRESENT   PAGE_MASK
+
+#define _PMD_PRESENT   0x800
+#define _PMD_BAD       0x7ff
 #endif

 /* The non-standard PowerPC MMUs, which includes the 4xx and 8xx (and
@@ -474,7 +481,7 @@
 #define pte_clear(ptep)                do { set_pte((ptep), __pte(0)); } while 
(0)

 #define pmd_none(pmd)          (!pmd_val(pmd))
-#define        pmd_bad(pmd)            ((pmd_val(pmd) & _PMD_PRESENT) == 0)
+#define        pmd_bad(pmd)            ((pmd_val(pmd) & _PMD_BAD) != 0)
 #define        pmd_present(pmd)        ((pmd_val(pmd) & _PMD_PRESENT) != 0)
 #define        pmd_clear(pmdp)         do { pmd_val(*(pmdp)) = 0; } while (0)



--
David Gibson                    | For every complex problem there is a
david at gibson.dropbear.id.au  | solution which is simple, neat and
                                | wrong.  -- H.L. Mencken
http://www.ozlabs.org/people/dgibson

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/



Reply via email to