MLX uses WC to write data payload into HCA buffers. The performance boost
over UC/UC- is significant. PAT details are added as comments above pat_init()
routine.

Signed-off-by: Kanoj Sarcar <[email protected]>
---
 kern/drivers/net/udrvr/compat.c | 51 +++++++++++++++++++++++++++++++++++++++++
 kern/drivers/net/udrvr/compat.h | 19 ++-------------
 2 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/kern/drivers/net/udrvr/compat.c b/kern/drivers/net/udrvr/compat.c
index 75b286d..7ee0137 100644
--- a/kern/drivers/net/udrvr/compat.c
+++ b/kern/drivers/net/udrvr/compat.c
@@ -18,6 +18,55 @@
 #include "uverbs.h"
 
 /*
+ * This is how PAT is used (it can not be disabled): PTE's 3-bit value
+ * made of the bits PAT|PCD|PWT select a PAT value from 0 to 7. This is
+ * indexed into PAT_MSR x277 which really is the PAT0 .. PAT7 registers.
+ * Power up config is PAT0/4=WB, PAT1/5=WT, PAT2/6=UC-, PAT3/7=UC. PAT
+ * bit in PTE is always left to 0. PTE_NOCACHE and the IOAPIC mappings
+ * set up in vm_init() sets PCD and PWT together, ie PAT3/UC is used.
+ *
+ * Linux drivers using pgprot_noncached() uses PAT2 (ie UC-) and
+ * pgprot_writecombine() uses PAT1. PAT1/5 is mapped to WC by pat_init().
+ */
+
+void pat_init(void)
+{
+       unsigned long patval;
+
+       patval = read_msr(MSR_IA32_CR_PAT);
+       printk("PAT : origvalue = 0x%llx\n", patval);
+
+       /* Clear PAT1/5 */
+       patval &= 0xFFFF00FFFFFF00FFULL;
+
+       /* Set PAT1/5 to WC */
+       patval |= (1 << 8);
+       patval |= (1ULL << 40);
+
+       printk("PAT : newvalue = 0x%llx\n", patval);
+       write_msr(MSR_IA32_CR_PAT, patval);
+}
+
+static unsigned long pgprot_val(int vmprot)
+{
+       unsigned long   prot = PTE_P | PTE_U | PTE_A;
+
+       if (vmprot & PROT_WRITE)
+               prot |= PTE_W | PTE_D;
+       return prot;
+}
+
+unsigned long pgprot_noncached(int vmprot)
+{
+       return pgprot_val(vmprot) | PTE_PCD;
+}
+
+unsigned long pgprot_writecombine(int vmprot)
+{
+       return pgprot_val(vmprot) | PTE_PWT;
+}
+
+/*
  * Our version knocked off from kern/src/mm.c version + uncaching logic from
  * vmap_pmem_nocache(). This routine is expected to be invoked as part of 
mmap()
  * handler.
@@ -284,6 +333,8 @@ static const struct file_operations cpuinfo = {
 
 void sysfs_init(void)
 {
+       pat_init();
+
        do_mkdir("/dev/infiniband", S_IRWXU | S_IRWXG | S_IRWXO);
        do_mkdir("/sys", S_IRWXU | S_IRWXG | S_IRWXO);
        do_mkdir("/sys/class", S_IRWXU | S_IRWXG | S_IRWXO);
diff --git a/kern/drivers/net/udrvr/compat.h b/kern/drivers/net/udrvr/compat.h
index 9df9f06..796e310 100644
--- a/kern/drivers/net/udrvr/compat.h
+++ b/kern/drivers/net/udrvr/compat.h
@@ -61,23 +61,8 @@ typedef atomic_t                     atomic64_t;
 #define        spin_unlock_bh(E)               spin_unlock(E)
 #define        DEFINE_SPINLOCK(x)              spinlock_t x = 
SPINLOCK_INITIALIZER
 
-/*
- * Linux pgprot_noncached() adds _PAGE_PCD ie bit 4, which is akaros PTE_PCD.
- * Akaros PTE_NOCACHE also sets bit 3 ie _PAGE_PWT (which is overkill?).
- * Linux pgprot_writecombine() defaults to pgprot_noncached() when PAT is
- * not enabled, otherwise just sets bit 3 ie _PAGE_PWT.
- */
-static unsigned long pgprot_noncached(int vmprot)
-{
-       unsigned long   prot = PTE_P | PTE_U | PTE_A | PTE_PCD;
-
-       if (vmprot & PROT_WRITE)
-               prot |= PTE_W | PTE_D;
-       return prot;
-}
-
-/* TODO: Factor in PAT usage */
-#define        pgprot_writecombine(vmprot)     pgprot_noncached(vmprot)
+extern unsigned long pgprot_noncached(int vmprot);
+extern unsigned long pgprot_writecombine(int vmprot);
 
 #define is_vm_hugetlb_page(vma)        0
 

-- 
You received this message because you are subscribed to the Google Groups 
"Akaros" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to