Re: [kernel-hardening] [RFC PATCH v3 1/2] Add support for eXclusive Page Frame Ownership (XPFO)

2016-11-09 Thread ZhaoJunmin Zhao(Junmin)

This patch adds support for XPFO which protects against 'ret2dir' kernel
attacks. The basic idea is to enforce exclusive ownership of page frames
by either the kernel or userspace, unless explicitly requested by the
kernel. Whenever a page destined for userspace is allocated, it is
unmapped from physmap (the kernel's page table). When such a page is
reclaimed from userspace, it is mapped back to physmap.

Additional fields in the page_ext struct are used for XPFO housekeeping.
Specifically two flags to distinguish user vs. kernel pages and to tag
unmapped pages and a reference counter to balance kmap/kunmap operations
and a lock to serialize access to the XPFO fields.

Known issues/limitations:
   - Only supports x86-64 (for now)
   - Only supports 4k pages (for now)
   - There are most likely some legitimate uses cases where the kernel needs
 to access userspace which need to be made XPFO-aware
   - Performance penalty

Reference paper by the original patch authors:
   http://www.cs.columbia.edu/~vpk/papers/ret2dir.sec14.pdf

Suggested-by: Vasileios P. Kemerlis 
Signed-off-by: Juerg Haefliger 
---
  arch/x86/Kconfig |   3 +-
  arch/x86/mm/init.c   |   2 +-
  drivers/ata/libata-sff.c |   4 +-
  include/linux/highmem.h  |  15 +++-
  include/linux/page_ext.h |   7 ++
  include/linux/xpfo.h |  39 +
  lib/swiotlb.c|   3 +-
  mm/Makefile  |   1 +
  mm/page_alloc.c  |   2 +
  mm/page_ext.c|   4 +
  mm/xpfo.c| 206 +++
  security/Kconfig |  19 +
  12 files changed, 298 insertions(+), 7 deletions(-)
  create mode 100644 include/linux/xpfo.h
  create mode 100644 mm/xpfo.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bada636d1065..38b334f8fde5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -165,6 +165,7 @@ config X86
select HAVE_STACK_VALIDATIONif X86_64
select ARCH_USES_HIGH_VMA_FLAGS if 
X86_INTEL_MEMORY_PROTECTION_KEYS
select ARCH_HAS_PKEYS   if 
X86_INTEL_MEMORY_PROTECTION_KEYS
+   select ARCH_SUPPORTS_XPFO   if X86_64

  config INSTRUCTION_DECODER
def_bool y
@@ -1361,7 +1362,7 @@ config ARCH_DMA_ADDR_T_64BIT

  config X86_DIRECT_GBPAGES
def_bool y
-   depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
+   depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK && !XPFO
---help---
  Certain kernel features effectively disable kernel
  linear 1 GB mappings (even if the CPU otherwise
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 22af912d66d2..a6fafbae02bb 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,7 +161,7 @@ static int page_size_mask;

  static void __init probe_page_size_mask(void)
  {
-#if !defined(CONFIG_KMEMCHECK)
+#if !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_XPFO)
/*
 * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
 * use small pages.
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 051b6158d1b7..58af734be25d 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -715,7 +715,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)

DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");

-   if (PageHighMem(page)) {
+   if (PageHighMem(page) || xpfo_page_is_unmapped(page)) {
unsigned long flags;

/* FIXME: use a bounce buffer */
@@ -860,7 +860,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, 
unsigned int bytes)

DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");

-   if (PageHighMem(page)) {
+   if (PageHighMem(page) || xpfo_page_is_unmapped(page)) {
unsigned long flags;

/* FIXME: use bounce buffer */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index bb3f3297062a..7a17c166532f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -7,6 +7,7 @@
  #include 
  #include 
  #include 
+#include 

  #include 

@@ -55,24 +56,34 @@ static inline struct page *kmap_to_page(void *addr)
  #ifndef ARCH_HAS_KMAP
  static inline void *kmap(struct page *page)
  {
+   void *kaddr;
+
might_sleep();
-   return page_address(page);
+   kaddr = page_address(page);
+   xpfo_kmap(kaddr, page);
+   return kaddr;
  }

  static inline void kunmap(struct page *page)
  {
+   xpfo_kunmap(page_address(page), page);
  }

  static inline void *kmap_atomic(struct page *page)
  {
+   void *kaddr;
+
preempt_disable();
pagefault_disable();
-   return page_address(page);
+   kaddr = page_address(page);
+   xpfo_kmap(kaddr, page);
+   return kaddr;
  }
  #define kmap_atomic_prot(page, prot)  kmap_atomic(page)

  static inline void __kunmap_atomic(void 

Re: [kernel-hardening] [RFC PATCH v3 1/2] Add support for eXclusive Page Frame Ownership (XPFO)

2016-11-09 Thread ZhaoJunmin Zhao(Junmin)

This patch adds support for XPFO which protects against 'ret2dir' kernel
attacks. The basic idea is to enforce exclusive ownership of page frames
by either the kernel or userspace, unless explicitly requested by the
kernel. Whenever a page destined for userspace is allocated, it is
unmapped from physmap (the kernel's page table). When such a page is
reclaimed from userspace, it is mapped back to physmap.

Additional fields in the page_ext struct are used for XPFO housekeeping.
Specifically two flags to distinguish user vs. kernel pages and to tag
unmapped pages and a reference counter to balance kmap/kunmap operations
and a lock to serialize access to the XPFO fields.

Known issues/limitations:
   - Only supports x86-64 (for now)
   - Only supports 4k pages (for now)
   - There are most likely some legitimate uses cases where the kernel needs
 to access userspace which need to be made XPFO-aware
   - Performance penalty

Reference paper by the original patch authors:
   http://www.cs.columbia.edu/~vpk/papers/ret2dir.sec14.pdf

Suggested-by: Vasileios P. Kemerlis 
Signed-off-by: Juerg Haefliger 
---
  arch/x86/Kconfig |   3 +-
  arch/x86/mm/init.c   |   2 +-
  drivers/ata/libata-sff.c |   4 +-
  include/linux/highmem.h  |  15 +++-
  include/linux/page_ext.h |   7 ++
  include/linux/xpfo.h |  39 +
  lib/swiotlb.c|   3 +-
  mm/Makefile  |   1 +
  mm/page_alloc.c  |   2 +
  mm/page_ext.c|   4 +
  mm/xpfo.c| 206 +++
  security/Kconfig |  19 +
  12 files changed, 298 insertions(+), 7 deletions(-)
  create mode 100644 include/linux/xpfo.h
  create mode 100644 mm/xpfo.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bada636d1065..38b334f8fde5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -165,6 +165,7 @@ config X86
select HAVE_STACK_VALIDATIONif X86_64
select ARCH_USES_HIGH_VMA_FLAGS if 
X86_INTEL_MEMORY_PROTECTION_KEYS
select ARCH_HAS_PKEYS   if 
X86_INTEL_MEMORY_PROTECTION_KEYS
+   select ARCH_SUPPORTS_XPFO   if X86_64

  config INSTRUCTION_DECODER
def_bool y
@@ -1361,7 +1362,7 @@ config ARCH_DMA_ADDR_T_64BIT

  config X86_DIRECT_GBPAGES
def_bool y
-   depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
+   depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK && !XPFO
---help---
  Certain kernel features effectively disable kernel
  linear 1 GB mappings (even if the CPU otherwise
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 22af912d66d2..a6fafbae02bb 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,7 +161,7 @@ static int page_size_mask;

  static void __init probe_page_size_mask(void)
  {
-#if !defined(CONFIG_KMEMCHECK)
+#if !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_XPFO)
/*
 * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
 * use small pages.
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 051b6158d1b7..58af734be25d 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -715,7 +715,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)

DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");

-   if (PageHighMem(page)) {
+   if (PageHighMem(page) || xpfo_page_is_unmapped(page)) {
unsigned long flags;

/* FIXME: use a bounce buffer */
@@ -860,7 +860,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, 
unsigned int bytes)

DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");

-   if (PageHighMem(page)) {
+   if (PageHighMem(page) || xpfo_page_is_unmapped(page)) {
unsigned long flags;

/* FIXME: use bounce buffer */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index bb3f3297062a..7a17c166532f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -7,6 +7,7 @@
  #include 
  #include 
  #include 
+#include 

  #include 

@@ -55,24 +56,34 @@ static inline struct page *kmap_to_page(void *addr)
  #ifndef ARCH_HAS_KMAP
  static inline void *kmap(struct page *page)
  {
+   void *kaddr;
+
might_sleep();
-   return page_address(page);
+   kaddr = page_address(page);
+   xpfo_kmap(kaddr, page);
+   return kaddr;
  }

  static inline void kunmap(struct page *page)
  {
+   xpfo_kunmap(page_address(page), page);
  }

  static inline void *kmap_atomic(struct page *page)
  {
+   void *kaddr;
+
preempt_disable();
pagefault_disable();
-   return page_address(page);
+   kaddr = page_address(page);
+   xpfo_kmap(kaddr, page);
+   return kaddr;
  }
  #define kmap_atomic_prot(page, prot)  kmap_atomic(page)

  static inline void __kunmap_atomic(void *addr)
  {
+   xpfo_kunmap(addr,