Re: [kernel-hardening] [RFC PATCH v3 1/2] Add support for eXclusive Page Frame Ownership (XPFO)
This patch adds support for XPFO which protects against 'ret2dir' kernel attacks. The basic idea is to enforce exclusive ownership of page frames by either the kernel or userspace, unless explicitly requested by the kernel. Whenever a page destined for userspace is allocated, it is unmapped from physmap (the kernel's page table). When such a page is reclaimed from userspace, it is mapped back to physmap. Additional fields in the page_ext struct are used for XPFO housekeeping. Specifically two flags to distinguish user vs. kernel pages and to tag unmapped pages and a reference counter to balance kmap/kunmap operations and a lock to serialize access to the XPFO fields. Known issues/limitations: - Only supports x86-64 (for now) - Only supports 4k pages (for now) - There are most likely some legitimate uses cases where the kernel needs to access userspace which need to be made XPFO-aware - Performance penalty Reference paper by the original patch authors: http://www.cs.columbia.edu/~vpk/papers/ret2dir.sec14.pdf Suggested-by: Vasileios P. KemerlisSigned-off-by: Juerg Haefliger --- arch/x86/Kconfig | 3 +- arch/x86/mm/init.c | 2 +- drivers/ata/libata-sff.c | 4 +- include/linux/highmem.h | 15 +++- include/linux/page_ext.h | 7 ++ include/linux/xpfo.h | 39 + lib/swiotlb.c| 3 +- mm/Makefile | 1 + mm/page_alloc.c | 2 + mm/page_ext.c| 4 + mm/xpfo.c| 206 +++ security/Kconfig | 19 + 12 files changed, 298 insertions(+), 7 deletions(-) create mode 100644 include/linux/xpfo.h create mode 100644 mm/xpfo.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bada636d1065..38b334f8fde5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,7 @@ config X86 select HAVE_STACK_VALIDATIONif X86_64 select ARCH_USES_HIGH_VMA_FLAGS if X86_INTEL_MEMORY_PROTECTION_KEYS select ARCH_HAS_PKEYS if X86_INTEL_MEMORY_PROTECTION_KEYS + select ARCH_SUPPORTS_XPFO if X86_64 config INSTRUCTION_DECODER def_bool y @@ -1361,7 +1362,7 @@ config ARCH_DMA_ADDR_T_64BIT config X86_DIRECT_GBPAGES def_bool y - depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK + depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK && !XPFO ---help--- Certain kernel features effectively disable kernel linear 1 GB mappings (even if the CPU otherwise diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 22af912d66d2..a6fafbae02bb 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -161,7 +161,7 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { -#if !defined(CONFIG_KMEMCHECK) +#if !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_XPFO) /* * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will * use small pages. diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 051b6158d1b7..58af734be25d 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -715,7 +715,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); - if (PageHighMem(page)) { + if (PageHighMem(page) || xpfo_page_is_unmapped(page)) { unsigned long flags; /* FIXME: use a bounce buffer */ @@ -860,7 +860,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); - if (PageHighMem(page)) { + if (PageHighMem(page) || xpfo_page_is_unmapped(page)) { unsigned long flags; /* FIXME: use bounce buffer */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index bb3f3297062a..7a17c166532f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -55,24 +56,34 @@ static inline struct page *kmap_to_page(void *addr) #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { + void *kaddr; + might_sleep(); - return page_address(page); + kaddr = page_address(page); + xpfo_kmap(kaddr, page); + return kaddr; } static inline void kunmap(struct page *page) { + xpfo_kunmap(page_address(page), page); } static inline void *kmap_atomic(struct page *page) { + void *kaddr; + preempt_disable(); pagefault_disable(); - return page_address(page); + kaddr = page_address(page); + xpfo_kmap(kaddr, page); + return kaddr; } #define kmap_atomic_prot(page, prot) kmap_atomic(page) static inline void __kunmap_atomic(void
Re: [kernel-hardening] [RFC PATCH v3 1/2] Add support for eXclusive Page Frame Ownership (XPFO)
This patch adds support for XPFO which protects against 'ret2dir' kernel attacks. The basic idea is to enforce exclusive ownership of page frames by either the kernel or userspace, unless explicitly requested by the kernel. Whenever a page destined for userspace is allocated, it is unmapped from physmap (the kernel's page table). When such a page is reclaimed from userspace, it is mapped back to physmap. Additional fields in the page_ext struct are used for XPFO housekeeping. Specifically two flags to distinguish user vs. kernel pages and to tag unmapped pages and a reference counter to balance kmap/kunmap operations and a lock to serialize access to the XPFO fields. Known issues/limitations: - Only supports x86-64 (for now) - Only supports 4k pages (for now) - There are most likely some legitimate uses cases where the kernel needs to access userspace which need to be made XPFO-aware - Performance penalty Reference paper by the original patch authors: http://www.cs.columbia.edu/~vpk/papers/ret2dir.sec14.pdf Suggested-by: Vasileios P. Kemerlis Signed-off-by: Juerg Haefliger --- arch/x86/Kconfig | 3 +- arch/x86/mm/init.c | 2 +- drivers/ata/libata-sff.c | 4 +- include/linux/highmem.h | 15 +++- include/linux/page_ext.h | 7 ++ include/linux/xpfo.h | 39 + lib/swiotlb.c| 3 +- mm/Makefile | 1 + mm/page_alloc.c | 2 + mm/page_ext.c| 4 + mm/xpfo.c| 206 +++ security/Kconfig | 19 + 12 files changed, 298 insertions(+), 7 deletions(-) create mode 100644 include/linux/xpfo.h create mode 100644 mm/xpfo.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bada636d1065..38b334f8fde5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,7 @@ config X86 select HAVE_STACK_VALIDATIONif X86_64 select ARCH_USES_HIGH_VMA_FLAGS if X86_INTEL_MEMORY_PROTECTION_KEYS select ARCH_HAS_PKEYS if X86_INTEL_MEMORY_PROTECTION_KEYS + select ARCH_SUPPORTS_XPFO if X86_64 config INSTRUCTION_DECODER def_bool y @@ -1361,7 +1362,7 @@ config ARCH_DMA_ADDR_T_64BIT config X86_DIRECT_GBPAGES def_bool y - depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK + depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK && !XPFO ---help--- Certain kernel features effectively disable kernel linear 1 GB mappings (even if the CPU otherwise diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 22af912d66d2..a6fafbae02bb 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -161,7 +161,7 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { -#if !defined(CONFIG_KMEMCHECK) +#if !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_XPFO) /* * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will * use small pages. diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 051b6158d1b7..58af734be25d 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -715,7 +715,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); - if (PageHighMem(page)) { + if (PageHighMem(page) || xpfo_page_is_unmapped(page)) { unsigned long flags; /* FIXME: use a bounce buffer */ @@ -860,7 +860,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); - if (PageHighMem(page)) { + if (PageHighMem(page) || xpfo_page_is_unmapped(page)) { unsigned long flags; /* FIXME: use bounce buffer */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index bb3f3297062a..7a17c166532f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -55,24 +56,34 @@ static inline struct page *kmap_to_page(void *addr) #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { + void *kaddr; + might_sleep(); - return page_address(page); + kaddr = page_address(page); + xpfo_kmap(kaddr, page); + return kaddr; } static inline void kunmap(struct page *page) { + xpfo_kunmap(page_address(page), page); } static inline void *kmap_atomic(struct page *page) { + void *kaddr; + preempt_disable(); pagefault_disable(); - return page_address(page); + kaddr = page_address(page); + xpfo_kmap(kaddr, page); + return kaddr; } #define kmap_atomic_prot(page, prot) kmap_atomic(page) static inline void __kunmap_atomic(void *addr) { + xpfo_kunmap(addr,