If an interrupt/exception/NMI is triggered while using ASI then
ASI is interrupted and the system switches back to the (kernel)
page-table used before entering ASI.

When the interrupt/exception/NMI handler returns then ASI is
resumed by switching back to the ASI page-table.

Signed-off-by: Alexandre Chartre <alexandre.char...@oracle.com>
---
 arch/x86/entry/calling.h           |  26 +++++-
 arch/x86/entry/entry_64.S          |  22 ++++++
 arch/x86/include/asm/asi.h         | 122 +++++++++++++++++++++++++++++
 arch/x86/include/asm/asi_session.h |   7 ++
 arch/x86/include/asm/mmu_context.h |   3 +-
 arch/x86/kernel/asm-offsets.c      |   5 ++
 arch/x86/mm/asi.c                  |  67 ++++++++++++++--
 7 files changed, 242 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 0789e13ece90..ca23b79adecf 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -6,6 +6,7 @@
 #include <asm/percpu.h>
 #include <asm/asm-offsets.h>
 #include <asm/processor-flags.h>
+#include <asm/asi.h>
 
 /*
 
@@ -172,7 +173,30 @@ For 32-bit we have the following conventions - kernel is 
built with
        .endif
 .endm
 
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#if defined(CONFIG_ADDRESS_SPACE_ISOLATION)
+
+/*
+ * For now, ASI is not compatible with PTI.
+ */
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+.endm
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+.endm
+
+.macro SWITCH_TO_USER_CR3_STACK        scratch_reg:req
+.endm
+
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+       ASI_INTERRUPT_AND_SAVE_CR3 \scratch_reg \save_reg
+.endm
+
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+       ASI_RESUME_AND_RESTORE_CR3 \save_reg
+.endm
+
+#elif defined(CONFIG_PAGE_TABLE_ISOLATION)
 
 /*
  * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504fabe52..ac47da63a29f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -573,7 +573,15 @@ SYM_CODE_START(interrupt_entry)
 
        CALL_enter_from_user_mode
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+       jmp     2f
+#endif
 1:
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+       /* Interrupt address space isolation if it is active */
+       ASI_INTERRUPT scratch_reg=%rdi
+2:
+#endif
        ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
        /* We entered an interrupt context - irqs are off: */
        TRACE_IRQS_OFF
@@ -673,6 +681,10 @@ retint_kernel:
        jnz     1f
        call    preempt_schedule_irq
 1:
+#endif
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+       ASI_PREPARE_RESUME
+       ASI_RESUME scratch_reg=%rdi
 #endif
        /*
         * The iretq could re-enable interrupts:
@@ -1238,6 +1250,9 @@ SYM_CODE_START_LOCAL(paranoid_entry)
         * This is also why CS (stashed in the "iret frame" by the
         * hardware at entry) can not be used: this may be a return
         * to kernel code, but with a user CR3 value.
+        *
+        * If ASI is enabled, this also handles the case where we are
+        * using an ASI CR3 value.
         */
        SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
@@ -1313,6 +1328,13 @@ SYM_CODE_START_LOCAL(error_entry)
 
 .Lerror_entry_done_lfence:
        FENCE_SWAPGS_KERNEL_ENTRY
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+       /*
+        * Interrupt address space isolation if it is active. This will restore
+        * the original kernel CR3.
+        */
+       ASI_INTERRUPT scratch_reg=%rdi
+#endif
 .Lerror_entry_done:
        ret
 
diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index bcfb68e8e392..d240954b2f85 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -108,6 +108,128 @@ extern void asi_set_pagetable(struct asi *asi, pgd_t 
*pagetable);
 extern int asi_enter(struct asi *asi);
 extern void asi_exit(struct asi *asi);
 
+#else  /* __ASSEMBLY__ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeatures.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
+
+#define THIS_ASI_SESSION_asi           \
+       PER_CPU_VAR(cpu_tlbstate + TLB_STATE_asi)
+#define THIS_ASI_SESSION_isolation_cr3 \
+       PER_CPU_VAR(cpu_tlbstate + TLB_STATE_asi_isolation_cr3)
+#define THIS_ASI_SESSION_original_cr3  \
+       PER_CPU_VAR(cpu_tlbstate + TLB_STATE_asi_original_cr3)
+#define THIS_ASI_SESSION_idepth        \
+       PER_CPU_VAR(cpu_tlbstate + TLB_STATE_asi_idepth)
+
+.macro SET_NOFLUSH_BIT reg:req
+       bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
+.endm
+
+/*
+ * Switch CR3 to the original kernel CR3 value. This is used when exiting
+ * interrupting ASI.
+ */
+.macro ASI_SWITCH_TO_KERNEL_CR3 scratch_reg:req
+       /*
+        * KERNEL pages can always resume with NOFLUSH as we do
+        * explicit flushes.
+        */
+       movq    THIS_ASI_SESSION_original_cr3, \scratch_reg
+       ALTERNATIVE "", "SET_NOFLUSH_BIT \scratch_reg", X86_FEATURE_PCID
+       movq    \scratch_reg, %cr3
+.endm
+
+/*
+ * Interrupt ASI, when there's an interrupt or exception while we
+ * were running with ASI.
+ */
+.macro ASI_INTERRUPT scratch_reg:req
+       movq    THIS_ASI_SESSION_asi, \scratch_reg
+       testq   \scratch_reg, \scratch_reg
+       jz      .Lasi_interrupt_done_\@
+       incl    THIS_ASI_SESSION_idepth
+       cmp     $1, THIS_ASI_SESSION_idepth
+       jne     .Lasi_interrupt_done_\@
+       ASI_SWITCH_TO_KERNEL_CR3 \scratch_reg
+.Lasi_interrupt_done_\@:
+.endm
+
+.macro ASI_PREPARE_RESUME
+       call    asi_prepare_resume
+.endm
+
+/*
+ * Resume ASI, after it was interrupted by an interrupt or an exception.
+ */
+.macro ASI_RESUME scratch_reg:req
+       movq    THIS_ASI_SESSION_asi, \scratch_reg
+       testq   \scratch_reg, \scratch_reg
+       jz      .Lasi_resume_done_\@
+       decl    THIS_ASI_SESSION_idepth
+       jnz     .Lasi_resume_done_\@
+       movq    THIS_ASI_SESSION_isolation_cr3, \scratch_reg
+       mov     \scratch_reg, %cr3
+.Lasi_resume_done_\@:
+.endm
+
+/*
+ * Interrupt ASI, special processing when ASI is interrupted by a NMI
+ * or a paranoid interrupt/exception.
+ */
+.macro ASI_INTERRUPT_AND_SAVE_CR3 scratch_reg:req save_reg:req
+       movq    %cr3, \save_reg
+       /*
+        * Test the ASI PCID bits. If set, then an ASI page table
+        * is active. If clear, CR3 already has the kernel page table
+        * active.
+        */
+       bt      $ASI_PGTABLE_BIT, \save_reg
+       jnc     .Ldone_\@
+       incl    THIS_ASI_SESSION_idepth
+       ASI_SWITCH_TO_KERNEL_CR3 \scratch_reg
+.Ldone_\@:
+.endm
+
+/*
+ * Resume ASI, special processing when ASI is resumed from a NMI
+ * or a paranoid interrupt/exception.
+ */
+.macro ASI_RESUME_AND_RESTORE_CR3 save_reg:req
+
+       ALTERNATIVE "jmp .Lwrite_cr3_\@", "", X86_FEATURE_PCID
+
+       bt      $ASI_PGTABLE_BIT, \save_reg
+       jnc     .Lrestore_kernel_cr3_\@
+
+       /*
+        * Restore ASI CR3. We need to update TLB flushing
+        * information.
+        */
+       movq    THIS_ASI_SESSION_asi, %rdi
+       movq    \save_reg, %rsi
+       call    asi_update_flush
+       movq    %rax, THIS_ASI_SESSION_isolation_cr3
+       decl    THIS_ASI_SESSION_idepth
+       movq    %rax, %cr3
+       jmp     .Ldone_\@
+
+.Lrestore_kernel_cr3_\@:
+       /*
+        * Restore kernel CR3. KERNEL pages can always resume
+        * with NOFLUSH as we do explicit flushes.
+        */
+       SET_NOFLUSH_BIT \save_reg
+
+.Lwrite_cr3_\@:
+       movq    \save_reg, %cr3
+
+.Ldone_\@:
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* CONFIG_ADDRESS_SPACE_ISOLATION */
diff --git a/arch/x86/include/asm/asi_session.h 
b/arch/x86/include/asm/asi_session.h
index 9d39c936a4ee..85968f7e8f32 100644
--- a/arch/x86/include/asm/asi_session.h
+++ b/arch/x86/include/asm/asi_session.h
@@ -10,6 +10,13 @@ struct asi_session {
        struct asi              *asi;           /* ASI for this session */
        unsigned long           isolation_cr3;  /* cr3 when ASI is active */
        unsigned long           original_cr3;   /* cr3 before entering ASI */
+       /*
+        * The interrupt depth (idepth) tracks interrupt (actually
+        * interrupt/exception/NMI) nesting. ASI is interrupted on
+        * the first interrupt, and it is resumed when that interrupt
+        * handler returns.
+        */
+       unsigned int            idepth;         /* interrupt depth */
 };
 
 #endif /* CONFIG_ADDRESS_SPACE_ISOLATION */
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 9b03bad00b81..b8c81e7b197a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -243,7 +243,8 @@ static inline unsigned long __get_current_cr3_fast(void)
         * field of the ASI session.
         */
        if (IS_ENABLED(CONFIG_ADDRESS_SPACE_ISOLATION) &&
-           this_cpu_read(cpu_asi_session.asi)) {
+           this_cpu_read(cpu_asi_session.asi) &&
+           !this_cpu_read(cpu_asi_session.idepth)) {
                cr3 = this_cpu_read(cpu_asi_session.isolation_cr3);
                /* CR3 read never returns with the NOFLUSH bit */
                cr3 &= ~X86_CR3_PCID_NOFLUSH;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 3ca07ad552ae..4c08a688b4b9 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -94,6 +94,11 @@ static void __used common(void)
 
        /* TLB state for the entry code */
        OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
+       OFFSET(TLB_STATE_asi, tlb_state, asi_session.asi);
+       OFFSET(TLB_STATE_asi_isolation_cr3, tlb_state,
+              asi_session.isolation_cr3);
+       OFFSET(TLB_STATE_asi_original_cr3, tlb_state, asi_session.original_cr3);
+       OFFSET(TLB_STATE_asi_idepth, tlb_state, asi_session.idepth);
 
        /* Layout info for cpu_entry_area */
        OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index cf0d122a3c72..c91ba82a095b 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(asi_set_pagetable);
  * Return an updated ASI CR3 value which specified if TLB needs to
  * be flushed or not.
  */
-static unsigned long asi_update_flush(struct asi *asi, unsigned long asi_cr3)
+unsigned long asi_update_flush(struct asi *asi, unsigned long asi_cr3)
 {
        struct asi_tlb_pgtable *tlb_pgtable;
        struct asi_tlb_state *tlb_state;
@@ -90,7 +90,24 @@ static unsigned long asi_update_flush(struct asi *asi, 
unsigned long asi_cr3)
        return asi_cr3;
 }
 
-static void asi_switch_to_asi_cr3(struct asi *asi)
+
+/*
+ * Switch to the ASI pagetable.
+ *
+ * If schedule is ASI_SWITCH_NOW, then immediately switch to the ASI
+ * pagetable by updating the CR3 register with the ASI CR3 value.
+ * Otherwise, if schedule is ASI_SWITCH_ON_RESUME, prepare everything
+ * for switching to ASI pagetable but do not update the CR3 register
+ * yet. This will be done by the next ASI_RESUME call.
+ */
+
+enum asi_switch_schedule {
+       ASI_SWITCH_NOW,
+       ASI_SWITCH_ON_RESUME,
+};
+
+static void asi_switch_to_asi_cr3(struct asi *asi,
+                                 enum asi_switch_schedule schedule)
 {
        unsigned long original_cr3, asi_cr3;
        struct asi_session *asi_session;
@@ -114,8 +131,16 @@ static void asi_switch_to_asi_cr3(struct asi *asi)
        asi_session->original_cr3 = original_cr3;
        asi_session->isolation_cr3 = asi_cr3;
 
-       /* Update CR3 to immediately enter ASI */
-       native_write_cr3(asi_cr3);
+       if (schedule == ASI_SWITCH_ON_RESUME) {
+               /*
+                * Defer the CR3 update the next ASI resume by setting
+                * the interrupt depth to 1.
+                */
+               asi_session->idepth = 1;
+       } else {
+               /* Update CR3 to immediately enter ASI */
+               native_write_cr3(asi_cr3);
+       }
 }
 
 static void asi_switch_to_kernel_cr3(struct asi *asi)
@@ -132,6 +157,7 @@ static void asi_switch_to_kernel_cr3(struct asi *asi)
 
        asi_session = &get_cpu_var(cpu_asi_session);
        asi_session->asi = NULL;
+       asi_session->idepth = 0;
 }
 
 int asi_enter(struct asi *asi)
@@ -153,7 +179,7 @@ int asi_enter(struct asi *asi)
        }
 
        local_irq_save(flags);
-       asi_switch_to_asi_cr3(asi);
+       asi_switch_to_asi_cr3(asi, ASI_SWITCH_NOW);
        local_irq_restore(flags);
 
        return 0;
@@ -162,8 +188,10 @@ EXPORT_SYMBOL(asi_enter);
 
 void asi_exit(struct asi *asi)
 {
+       struct asi_session *asi_session;
        struct asi *current_asi;
        unsigned long flags;
+       int idepth;
 
        current_asi = this_cpu_read(cpu_asi_session.asi);
        if (!current_asi) {
@@ -173,8 +201,31 @@ void asi_exit(struct asi *asi)
 
        WARN_ON(current_asi != asi);
 
-       local_irq_save(flags);
-       asi_switch_to_kernel_cr3(asi);
-       local_irq_restore(flags);
+       idepth = this_cpu_read(cpu_asi_session.idepth);
+       if (!idepth) {
+               local_irq_save(flags);
+               asi_switch_to_kernel_cr3(asi);
+               local_irq_restore(flags);
+       } else {
+               /*
+                * ASI was interrupted so we already switched back
+                * to the back to the kernel page table and we just
+                * need to clear the ASI session.
+                */
+               asi_session = &get_cpu_var(cpu_asi_session);
+               asi_session->asi = NULL;
+               asi_session->idepth = 0;
+       }
 }
 EXPORT_SYMBOL(asi_exit);
+
+void asi_prepare_resume(void)
+{
+       struct asi_session *asi_session;
+
+       asi_session = &get_cpu_var(cpu_asi_session);
+       if (!asi_session->asi || asi_session->idepth > 1)
+               return;
+
+       asi_switch_to_asi_cr3(asi_session->asi, ASI_SWITCH_ON_RESUME);
+}
-- 
2.18.2

Reply via email to