MLX uses WC to write data payload into HCA buffers. The performance boost over UC/UC- is significant. PAT details are added as comments above pat_init() routine.
Signed-off-by: Kanoj Sarcar <[email protected]> --- kern/drivers/net/udrvr/compat.c | 51 +++++++++++++++++++++++++++++++++++++++++ kern/drivers/net/udrvr/compat.h | 19 ++------------- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/kern/drivers/net/udrvr/compat.c b/kern/drivers/net/udrvr/compat.c index 75b286d..7ee0137 100644 --- a/kern/drivers/net/udrvr/compat.c +++ b/kern/drivers/net/udrvr/compat.c @@ -18,6 +18,55 @@ #include "uverbs.h" /* + * This is how PAT is used (it can not be disabled): PTE's 3-bit value + * made of the bits PAT|PCD|PWT select a PAT value from 0 to 7. This is + * indexed into PAT_MSR x277 which really is the PAT0 .. PAT7 registers. + * Power up config is PAT0/4=WB, PAT1/5=WT, PAT2/6=UC-, PAT3/7=UC. PAT + * bit in PTE is always left to 0. PTE_NOCACHE and the IOAPIC mappings + * set up in vm_init() sets PCD and PWT together, ie PAT3/UC is used. + * + * Linux drivers using pgprot_noncached() uses PAT2 (ie UC-) and + * pgprot_writecombine() uses PAT1. PAT1/5 is mapped to WC by pat_init(). + */ + +void pat_init(void) +{ + unsigned long patval; + + patval = read_msr(MSR_IA32_CR_PAT); + printk("PAT : origvalue = 0x%llx\n", patval); + + /* Clear PAT1/5 */ + patval &= 0xFFFF00FFFFFF00FFULL; + + /* Set PAT1/5 to WC */ + patval |= (1 << 8); + patval |= (1ULL << 40); + + printk("PAT : newvalue = 0x%llx\n", patval); + write_msr(MSR_IA32_CR_PAT, patval); +} + +static unsigned long pgprot_val(int vmprot) +{ + unsigned long prot = PTE_P | PTE_U | PTE_A; + + if (vmprot & PROT_WRITE) + prot |= PTE_W | PTE_D; + return prot; +} + +unsigned long pgprot_noncached(int vmprot) +{ + return pgprot_val(vmprot) | PTE_PCD; +} + +unsigned long pgprot_writecombine(int vmprot) +{ + return pgprot_val(vmprot) | PTE_PWT; +} + +/* * Our version knocked off from kern/src/mm.c version + uncaching logic from * vmap_pmem_nocache(). This routine is expected to be invoked as part of mmap() * handler. @@ -284,6 +333,8 @@ static const struct file_operations cpuinfo = { void sysfs_init(void) { + pat_init(); + do_mkdir("/dev/infiniband", S_IRWXU | S_IRWXG | S_IRWXO); do_mkdir("/sys", S_IRWXU | S_IRWXG | S_IRWXO); do_mkdir("/sys/class", S_IRWXU | S_IRWXG | S_IRWXO); diff --git a/kern/drivers/net/udrvr/compat.h b/kern/drivers/net/udrvr/compat.h index 9df9f06..796e310 100644 --- a/kern/drivers/net/udrvr/compat.h +++ b/kern/drivers/net/udrvr/compat.h @@ -61,23 +61,8 @@ typedef atomic_t atomic64_t; #define spin_unlock_bh(E) spin_unlock(E) #define DEFINE_SPINLOCK(x) spinlock_t x = SPINLOCK_INITIALIZER -/* - * Linux pgprot_noncached() adds _PAGE_PCD ie bit 4, which is akaros PTE_PCD. - * Akaros PTE_NOCACHE also sets bit 3 ie _PAGE_PWT (which is overkill?). - * Linux pgprot_writecombine() defaults to pgprot_noncached() when PAT is - * not enabled, otherwise just sets bit 3 ie _PAGE_PWT. - */ -static unsigned long pgprot_noncached(int vmprot) -{ - unsigned long prot = PTE_P | PTE_U | PTE_A | PTE_PCD; - - if (vmprot & PROT_WRITE) - prot |= PTE_W | PTE_D; - return prot; -} - -/* TODO: Factor in PAT usage */ -#define pgprot_writecombine(vmprot) pgprot_noncached(vmprot) +extern unsigned long pgprot_noncached(int vmprot); +extern unsigned long pgprot_writecombine(int vmprot); #define is_vm_hugetlb_page(vma) 0 -- You received this message because you are subscribed to the Google Groups "Akaros" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. For more options, visit https://groups.google.com/d/optout.
