The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=ad794e6d7d02a11b01e721859e096efeb258a4d4

commit ad794e6d7d02a11b01e721859e096efeb258a4d4
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2024-06-06 01:16:36 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2024-09-04 21:50:18 +0000

    x86 iommu: move DMAR-independent parts of the qi code into common
    
    Sponsored by:   Advanced Micro Devices (AMD)
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
---
 sys/x86/iommu/intel_ctx.c   |   7 +-
 sys/x86/iommu/intel_dmar.h  |  42 +-----
 sys/x86/iommu/intel_drv.c   |  26 ++--
 sys/x86/iommu/intel_qi.c    | 330 ++++++++++++--------------------------------
 sys/x86/iommu/iommu_utils.c | 234 ++++++++++++++++++++++++++++++-
 sys/x86/iommu/x86_iommu.h   |  72 ++++++++++
 6 files changed, 413 insertions(+), 298 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index a3ff35dc527e..03ef196c4cb0 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -887,10 +887,11 @@ dmar_domain_unload_entry(struct iommu_map_entry *entry, 
bool free,
        if (unit->qi_enabled) {
                if (free) {
                        DMAR_LOCK(unit);
-                       dmar_qi_invalidate_locked(domain, entry, true);
+                       iommu_qi_invalidate_locked(&domain->iodom, entry,
+                           true);
                        DMAR_UNLOCK(unit);
                } else {
-                       dmar_qi_invalidate_sync(domain, entry->start,
+                       iommu_qi_invalidate_sync(&domain->iodom, entry->start,
                            entry->end - entry->start, cansleep);
                        dmar_domain_free_entry(entry, false);
                }
@@ -943,7 +944,7 @@ dmar_domain_unload(struct iommu_domain *iodom,
        DMAR_LOCK(unit);
        while ((entry = TAILQ_FIRST(entries)) != NULL) {
                TAILQ_REMOVE(entries, entry, dmamap_link);
-               dmar_qi_invalidate_locked(domain, entry,
+               iommu_qi_invalidate_locked(&domain->iodom, entry,
                    dmar_domain_unload_emit_wait(domain, entry));
        }
        DMAR_UNLOCK(unit);
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 0ede955e12b9..8a815d5cfca6 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -123,6 +123,7 @@ struct dmar_msi_data {
 
 struct dmar_unit {
        struct iommu_unit iommu;
+       struct x86_unit_common x86c;
        uint16_t segment;
        uint64_t base;
 
@@ -155,17 +156,6 @@ struct dmar_unit {
 
        /* QI */
        int qi_enabled;
-       char *inv_queue;
-       vm_size_t inv_queue_size;
-       uint32_t inv_queue_avail;
-       uint32_t inv_queue_tail;
-       volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
-                                              descr completion */
-       uint64_t inv_waitd_seq_hw_phys;
-       uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
-       u_int inv_waitd_gen;    /* seq number generation AKA seq overflows */
-       u_int inv_seq_waiters;  /* count of waiters for seq */
-       u_int inv_queue_full;   /* informational counter */
 
        /* IR */
        int ir_enabled;
@@ -173,36 +163,6 @@ struct dmar_unit {
        dmar_irte_t *irt;
        u_int irte_cnt;
        vmem_t *irtids;
-
-       /*
-        * Delayed freeing of map entries queue processing:
-        *
-        * tlb_flush_head and tlb_flush_tail are used to implement a FIFO
-        * queue that supports concurrent dequeues and enqueues.  However,
-        * there can only be a single dequeuer (accessing tlb_flush_head) and
-        * a single enqueuer (accessing tlb_flush_tail) at a time.  Since the
-        * unit's qi_task is the only dequeuer, it can access tlb_flush_head
-        * without any locking.  In contrast, there may be multiple enqueuers,
-        * so the enqueuers acquire the iommu unit lock to serialize their
-        * accesses to tlb_flush_tail.
-        *
-        * In this FIFO queue implementation, the key to enabling concurrent
-        * dequeues and enqueues is that the dequeuer never needs to access
-        * tlb_flush_tail and the enqueuer never needs to access
-        * tlb_flush_head.  In particular, tlb_flush_head and tlb_flush_tail
-        * are never NULL, so neither a dequeuer nor an enqueuer ever needs to
-        * update both.  Instead, tlb_flush_head always points to a "zombie"
-        * struct, which previously held the last dequeued item.  Thus, the
-        * zombie's next field actually points to the struct holding the first
-        * item in the queue.  When an item is dequeued, the current zombie is
-        * finally freed, and the struct that held the just dequeued item
-        * becomes the new zombie.  When the queue is empty, tlb_flush_tail
-        * also points to the zombie.
-        */
-       struct iommu_map_entry *tlb_flush_head;
-       struct iommu_map_entry *tlb_flush_tail;
-       struct task qi_task;
-       struct taskqueue *qi_taskqueue;
 };
 
 #define        DMAR_LOCK(dmar)         mtx_lock(&DMAR2IOMMU(dmar)->lock)
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index 79350358cced..9fa1b3f98dc6 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -1303,19 +1303,19 @@ dmar_print_one(int idx, bool show_domains, bool 
show_mappings)
                            "size 0x%jx\n"
                    "  head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
                    "  hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n",
-                           (uintmax_t)unit->inv_queue,
+                           (uintmax_t)unit->x86c.inv_queue,
                            (uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
-                           (uintmax_t)unit->inv_queue_size,
+                           (uintmax_t)unit->x86c.inv_queue_size,
                            dmar_read4(unit, DMAR_IQH_REG),
                            dmar_read4(unit, DMAR_IQT_REG),
-                           unit->inv_queue_avail,
+                           unit->x86c.inv_queue_avail,
                            dmar_read4(unit, DMAR_ICS_REG),
                            dmar_read4(unit, DMAR_IECTL_REG),
-                           unit->inv_waitd_seq_hw,
-                           &unit->inv_waitd_seq_hw,
-                           (uintmax_t)unit->inv_waitd_seq_hw_phys,
-                           unit->inv_waitd_seq,
-                           unit->inv_waitd_gen);
+                           unit->x86c.inv_waitd_seq_hw,
+                           &unit->x86c.inv_waitd_seq_hw,
+                           (uintmax_t)unit->x86c.inv_waitd_seq_hw_phys,
+                           unit->x86c.inv_waitd_seq,
+                           unit->x86c.inv_waitd_gen);
                } else {
                        db_printf("qi is disabled\n");
                }
@@ -1368,7 +1368,17 @@ dmar_find_method(device_t dev, bool verbose)
        return (&dmar->iommu);
 }
 
+static struct x86_unit_common *
+dmar_get_x86_common(struct iommu_unit *unit)
+{
+       struct dmar_unit *dmar;
+
+       dmar = IOMMU2DMAR(unit);
+       return (&dmar->x86c);
+}
+
 static struct x86_iommu dmar_x86_iommu = {
+       .get_x86_common = dmar_get_x86_common,
        .domain_unload_entry = dmar_domain_unload_entry,
        .domain_unload = dmar_domain_unload,
        .get_ctx = dmar_get_ctx,
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 590cbac9bcbd..a94fbb54e7f7 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -58,17 +58,6 @@
 #include <x86/iommu/x86_iommu.h>
 #include <x86/iommu/intel_dmar.h>
 
-static bool
-dmar_qi_seq_processed(const struct dmar_unit *unit,
-    const struct iommu_qi_genseq *pseq)
-{
-       u_int gen;
-
-       gen = unit->inv_waitd_gen;
-       return (pseq->gen < gen ||
-           (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw));
-}
-
 static int
 dmar_enable_qi(struct dmar_unit *unit)
 {
@@ -96,32 +85,36 @@ dmar_disable_qi(struct dmar_unit *unit)
 }
 
 static void
-dmar_qi_advance_tail(struct dmar_unit *unit)
+dmar_qi_advance_tail(struct iommu_unit *iommu)
 {
+       struct dmar_unit *unit;
 
+       unit = IOMMU2DMAR(iommu);
        DMAR_ASSERT_LOCKED(unit);
-       dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail);
+       dmar_write4(unit, DMAR_IQT_REG, unit->x86c.inv_queue_tail);
 }
 
 static void
-dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
+dmar_qi_ensure(struct iommu_unit *iommu, int descr_count)
 {
+       struct dmar_unit *unit;
        uint32_t head;
        int bytes;
 
+       unit = IOMMU2DMAR(iommu);
        DMAR_ASSERT_LOCKED(unit);
        bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
        for (;;) {
-               if (bytes <= unit->inv_queue_avail)
+               if (bytes <= unit->x86c.inv_queue_avail)
                        break;
                /* refill */
                head = dmar_read4(unit, DMAR_IQH_REG);
                head &= DMAR_IQH_MASK;
-               unit->inv_queue_avail = head - unit->inv_queue_tail -
+               unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
                    DMAR_IQ_DESCR_SZ;
-               if (head <= unit->inv_queue_tail)
-                       unit->inv_queue_avail += unit->inv_queue_size;
-               if (bytes <= unit->inv_queue_avail)
+               if (head <= unit->x86c.inv_queue_tail)
+                       unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
+               if (bytes <= unit->x86c.inv_queue_avail)
                        break;
 
                /*
@@ -134,11 +127,11 @@ dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
                 * See dmar_qi_invalidate_locked() for a discussion
                 * about data race prevention.
                 */
-               dmar_qi_advance_tail(unit);
-               unit->inv_queue_full++;
+               dmar_qi_advance_tail(DMAR2IOMMU(unit));
+               unit->x86c.inv_queue_full++;
                cpu_spinwait();
        }
-       unit->inv_queue_avail -= bytes;
+       unit->x86c.inv_queue_avail -= bytes;
 }
 
 static void
@@ -146,162 +139,60 @@ dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, 
uint64_t data2)
 {
 
        DMAR_ASSERT_LOCKED(unit);
-       *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1;
-       unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
-       KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
-           ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
-           (uintmax_t)unit->inv_queue_size));
-       unit->inv_queue_tail &= unit->inv_queue_size - 1;
-       *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2;
-       unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
-       KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
-           ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
-           (uintmax_t)unit->inv_queue_size));
-       unit->inv_queue_tail &= unit->inv_queue_size - 1;
+       *(volatile uint64_t *)(unit->x86c.inv_queue +
+           unit->x86c.inv_queue_tail) = data1;
+       unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+       KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+           ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+           (uintmax_t)unit->x86c.inv_queue_size));
+       unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
+       *(volatile uint64_t *)(unit->x86c.inv_queue +
+           unit->x86c.inv_queue_tail) = data2;
+       unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+       KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+           ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+           (uintmax_t)unit->x86c.inv_queue_size));
+       unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
 }
 
 static void
-dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr,
+dmar_qi_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr,
     bool memw, bool fence)
 {
+       struct dmar_unit *unit;
 
+       unit = IOMMU2DMAR(iommu);
        DMAR_ASSERT_LOCKED(unit);
        dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
            (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
            (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
            (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
            (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
-           memw ? unit->inv_waitd_seq_hw_phys : 0);
-}
-
-static void
-dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct iommu_qi_genseq *pseq,
-    bool emit_wait)
-{
-       struct iommu_qi_genseq gsec;
-       uint32_t seq;
-
-       KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
-       DMAR_ASSERT_LOCKED(unit);
-       if (unit->inv_waitd_seq == 0xffffffff) {
-               gsec.gen = unit->inv_waitd_gen;
-               gsec.seq = unit->inv_waitd_seq;
-               dmar_qi_ensure(unit, 1);
-               dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false);
-               dmar_qi_advance_tail(unit);
-               while (!dmar_qi_seq_processed(unit, &gsec))
-                       cpu_spinwait();
-               unit->inv_waitd_gen++;
-               unit->inv_waitd_seq = 1;
-       }
-       seq = unit->inv_waitd_seq++;
-       pseq->gen = unit->inv_waitd_gen;
-       pseq->seq = seq;
-       if (emit_wait) {
-               dmar_qi_ensure(unit, 1);
-               dmar_qi_emit_wait_descr(unit, seq, true, true, false);
-       }
+           memw ? unit->x86c.inv_waitd_seq_hw_phys : 0);
 }
 
-/*
- * To avoid missed wakeups, callers must increment the unit's waiters count
- * before advancing the tail past the wait descriptor.
- */
 static void
-dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq 
*gseq,
-    bool nowait)
-{
-
-       DMAR_ASSERT_LOCKED(unit);
-       KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__));
-       while (!dmar_qi_seq_processed(unit, gseq)) {
-               if (cold || nowait) {
-                       cpu_spinwait();
-               } else {
-                       msleep(&unit->inv_seq_waiters, &unit->iommu.lock, 0,
-                           "dmarse", hz);
-               }
-       }
-       unit->inv_seq_waiters--;
-}
-
-static void
-dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base,
+dmar_qi_invalidate_emit(struct iommu_domain *idomain, iommu_gaddr_t base,
     iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
 {
        struct dmar_unit *unit;
+       struct dmar_domain *domain;
        iommu_gaddr_t isize;
        int am;
 
+       domain = __containerof(idomain, struct dmar_domain, iodom);
        unit = domain->dmar;
        DMAR_ASSERT_LOCKED(unit);
        for (; size > 0; base += isize, size -= isize) {
                am = calc_am(unit, base, size, &isize);
-               dmar_qi_ensure(unit, 1);
+               dmar_qi_ensure(DMAR2IOMMU(unit), 1);
                dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
                    DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
                    DMAR_IQ_DESCR_IOTLB_DR |
                    DMAR_IQ_DESCR_IOTLB_DID(domain->domain),
                    base | am);
        }
-       dmar_qi_emit_wait_seq(unit, pseq, emit_wait);
-}
-
-/*
- * The caller must not be using the entry's dmamap_link field.
- */
-void
-dmar_qi_invalidate_locked(struct dmar_domain *domain,
-    struct iommu_map_entry *entry, bool emit_wait)
-{
-       struct dmar_unit *unit;
-
-       unit = domain->dmar;
-       DMAR_ASSERT_LOCKED(unit);
-       dmar_qi_invalidate_emit(domain, entry->start, entry->end -
-           entry->start, &entry->gseq, emit_wait);
-
-       /*
-        * To avoid a data race in dmar_qi_task(), the entry's gseq must be
-        * initialized before the entry is added to the TLB flush list, and the
-        * entry must be added to that list before the tail is advanced.  More
-        * precisely, the tail must not be advanced past the wait descriptor
-        * that will generate the interrupt that schedules dmar_qi_task() for
-        * execution before the entry is added to the list.  While an earlier
-        * call to dmar_qi_ensure() might have advanced the tail, it will not
-        * advance it past the wait descriptor.
-        *
-        * See the definition of struct dmar_unit for more information on
-        * synchronization.
-        */
-       entry->tlb_flush_next = NULL;
-       atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next,
-           (uintptr_t)entry);
-       unit->tlb_flush_tail = entry;
-
-       dmar_qi_advance_tail(unit);
-}
-
-void
-dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base,
-    iommu_gaddr_t size, bool cansleep)
-{
-       struct dmar_unit *unit;
-       struct iommu_qi_genseq gseq;
-
-       unit = domain->dmar;
-       DMAR_LOCK(unit);
-       dmar_qi_invalidate_emit(domain, base, size, &gseq, true);
-
-       /*
-        * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count
-        * must be incremented before the tail is advanced.
-        */
-       unit->inv_seq_waiters++;
-
-       dmar_qi_advance_tail(unit);
-       dmar_qi_wait_for_seq(unit, &gseq, !cansleep);
-       DMAR_UNLOCK(unit);
+       iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), pseq, emit_wait);
 }
 
 void
@@ -310,13 +201,13 @@ dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
        struct iommu_qi_genseq gseq;
 
        DMAR_ASSERT_LOCKED(unit);
-       dmar_qi_ensure(unit, 2);
+       dmar_qi_ensure(DMAR2IOMMU(unit), 2);
        dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
-       dmar_qi_emit_wait_seq(unit, &gseq, true);
+       iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
        /* See dmar_qi_invalidate_sync(). */
-       unit->inv_seq_waiters++;
-       dmar_qi_advance_tail(unit);
-       dmar_qi_wait_for_seq(unit, &gseq, false);
+       unit->x86c.inv_seq_waiters++;
+       dmar_qi_advance_tail(DMAR2IOMMU(unit));
+       iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
 }
 
 void
@@ -325,14 +216,14 @@ dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit 
*unit)
        struct iommu_qi_genseq gseq;
 
        DMAR_ASSERT_LOCKED(unit);
-       dmar_qi_ensure(unit, 2);
+       dmar_qi_ensure(DMAR2IOMMU(unit), 2);
        dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
            DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
-       dmar_qi_emit_wait_seq(unit, &gseq, true);
+       iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
        /* See dmar_qi_invalidate_sync(). */
-       unit->inv_seq_waiters++;
-       dmar_qi_advance_tail(unit);
-       dmar_qi_wait_for_seq(unit, &gseq, false);
+       unit->x86c.inv_seq_waiters++;
+       dmar_qi_advance_tail(DMAR2IOMMU(unit));
+       iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
 }
 
 void
@@ -341,13 +232,13 @@ dmar_qi_invalidate_iec_glob(struct dmar_unit *unit)
        struct iommu_qi_genseq gseq;
 
        DMAR_ASSERT_LOCKED(unit);
-       dmar_qi_ensure(unit, 2);
+       dmar_qi_ensure(DMAR2IOMMU(unit), 2);
        dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0);
-       dmar_qi_emit_wait_seq(unit, &gseq, true);
+       iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
        /* See dmar_qi_invalidate_sync(). */
-       unit->inv_seq_waiters++;
-       dmar_qi_advance_tail(unit);
-       dmar_qi_wait_for_seq(unit, &gseq, false);
+       unit->x86c.inv_seq_waiters++;
+       dmar_qi_advance_tail(DMAR2IOMMU(unit));
+       iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
 }
 
 void
@@ -363,21 +254,21 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int 
start, u_int cnt)
        for (; cnt > 0; cnt -= c, start += c) {
                l = ffs(start | cnt) - 1;
                c = 1 << l;
-               dmar_qi_ensure(unit, 1);
+               dmar_qi_ensure(DMAR2IOMMU(unit), 1);
                dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV |
                    DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) |
                    DMAR_IQ_DESCR_IEC_IM(l), 0);
        }
-       dmar_qi_ensure(unit, 1);
-       dmar_qi_emit_wait_seq(unit, &gseq, true);
+       dmar_qi_ensure(DMAR2IOMMU(unit), 1);
+       iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
 
        /*
-        * Since dmar_qi_wait_for_seq() will not sleep, this increment's
+        * Since iommu_qi_wait_for_seq() will not sleep, this increment's
         * placement relative to advancing the tail doesn't matter.
         */
-       unit->inv_seq_waiters++;
+       unit->x86c.inv_seq_waiters++;
 
-       dmar_qi_advance_tail(unit);
+       dmar_qi_advance_tail(DMAR2IOMMU(unit));
 
        /*
         * The caller of the function, in particular,
@@ -394,7 +285,7 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, 
u_int cnt)
         * queue is processed, which includes requests possibly issued
         * before our request.
         */
-       dmar_qi_wait_for_seq(unit, &gseq, true);
+       iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, true);
 }
 
 int
@@ -405,38 +296,18 @@ dmar_qi_intr(void *arg)
        unit = arg;
        KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled",
            unit->iommu.unit));
-       taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task);
+       taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task);
        return (FILTER_HANDLED);
 }
 
-static void
-dmar_qi_drain_tlb_flush(struct dmar_unit *unit)
-{
-       struct iommu_map_entry *entry, *head;
-
-       for (head = unit->tlb_flush_head;; head = entry) {
-               entry = (struct iommu_map_entry *)
-                   atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
-               if (entry == NULL ||
-                   !dmar_qi_seq_processed(unit, &entry->gseq))
-                       break;
-               unit->tlb_flush_head = entry;
-               iommu_gas_free_entry(head);
-               if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
-                       iommu_gas_free_region(entry);
-               else
-                       iommu_gas_free_space(entry);
-       }
-}
-
 static void
 dmar_qi_task(void *arg, int pending __unused)
 {
        struct dmar_unit *unit;
        uint32_t ics;
 
-       unit = arg;
-       dmar_qi_drain_tlb_flush(unit);
+       unit = IOMMU2DMAR(arg);
+       iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
 
        /*
         * Request an interrupt on the completion of the next invalidation
@@ -453,16 +324,16 @@ dmar_qi_task(void *arg, int pending __unused)
                 * Otherwise, such entries will linger until a later entry
                 * that requests an interrupt is processed.
                 */
-               dmar_qi_drain_tlb_flush(unit);
+               iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
        }
 
-       if (unit->inv_seq_waiters > 0) {
+       if (unit->x86c.inv_seq_waiters > 0) {
                /*
                 * Acquire the DMAR lock so that wakeup() is called only after
                 * the waiter is sleeping.
                 */
                DMAR_LOCK(unit);
-               wakeup(&unit->inv_seq_waiters);
+               wakeup(&unit->x86c.inv_seq_waiters);
                DMAR_UNLOCK(unit);
        }
 }
@@ -472,7 +343,7 @@ dmar_init_qi(struct dmar_unit *unit)
 {
        uint64_t iqa;
        uint32_t ics;
-       int qi_sz;
+       u_int qi_sz;
 
        if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
                return (0);
@@ -481,34 +352,19 @@ dmar_init_qi(struct dmar_unit *unit)
        if (!unit->qi_enabled)
                return (0);
 
-       unit->tlb_flush_head = unit->tlb_flush_tail =
-            iommu_gas_alloc_entry(NULL, 0);
-       TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
-       unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK,
-           taskqueue_thread_enqueue, &unit->qi_taskqueue);
-       taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV,
-           "dmar%d qi taskq", unit->iommu.unit);
-
-       unit->inv_waitd_gen = 0;
-       unit->inv_waitd_seq = 1;
-
-       qi_sz = DMAR_IQA_QS_DEF;
-       TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz);
-       if (qi_sz > DMAR_IQA_QS_MAX)
-               qi_sz = DMAR_IQA_QS_MAX;
-       unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
-       /* Reserve one descriptor to prevent wraparound. */
-       unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ;
-
-       /* The invalidation queue reads by DMARs are always coherent. */
-       unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK |
-           M_ZERO, 0, iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
-       unit->inv_waitd_seq_hw_phys = pmap_kextract(
-           (vm_offset_t)&unit->inv_waitd_seq_hw);
+       unit->x86c.qi_buf_maxsz = DMAR_IQA_QS_MAX;
+       unit->x86c.qi_cmd_sz = DMAR_IQ_DESCR_SZ;
+       iommu_qi_common_init(DMAR2IOMMU(unit), dmar_qi_task);
+       get_x86_iommu()->qi_ensure = dmar_qi_ensure;
+       get_x86_iommu()->qi_emit_wait_descr = dmar_qi_emit_wait_descr;
+       get_x86_iommu()->qi_advance_tail = dmar_qi_advance_tail;
+       get_x86_iommu()->qi_invalidate_emit = dmar_qi_invalidate_emit;
+
+       qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE);
 
        DMAR_LOCK(unit);
        dmar_write8(unit, DMAR_IQT_REG, 0);
-       iqa = pmap_kextract((uintptr_t)unit->inv_queue);
+       iqa = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
        iqa |= qi_sz;
        dmar_write8(unit, DMAR_IQA_REG, iqa);
        dmar_enable_qi(unit);
@@ -523,35 +379,19 @@ dmar_init_qi(struct dmar_unit *unit)
        return (0);
 }
 
+static void
+dmar_fini_qi_helper(struct iommu_unit *iommu)
+{
+       dmar_disable_qi_intr(IOMMU2DMAR(iommu));
+       dmar_disable_qi(IOMMU2DMAR(iommu));
+}
+
 void
 dmar_fini_qi(struct dmar_unit *unit)
 {
-       struct iommu_qi_genseq gseq;
-
        if (!unit->qi_enabled)
                return;
-       taskqueue_drain(unit->qi_taskqueue, &unit->qi_task);
-       taskqueue_free(unit->qi_taskqueue);
-       unit->qi_taskqueue = NULL;
-
-       DMAR_LOCK(unit);
-       /* quisce */
-       dmar_qi_ensure(unit, 1);
-       dmar_qi_emit_wait_seq(unit, &gseq, true);
-       /* See dmar_qi_invalidate_sync_locked(). */
-       unit->inv_seq_waiters++;
-       dmar_qi_advance_tail(unit);
-       dmar_qi_wait_for_seq(unit, &gseq, false);
-       /* only after the quisce, disable queue */
-       dmar_disable_qi_intr(unit);
-       dmar_disable_qi(unit);
-       KASSERT(unit->inv_seq_waiters == 0,
-           ("dmar%d: waiters on disabled queue", unit->iommu.unit));
-       DMAR_UNLOCK(unit);
-
-       kmem_free(unit->inv_queue, unit->inv_queue_size);
-       unit->inv_queue = NULL;
-       unit->inv_queue_size = 0;
+       iommu_qi_common_fini(DMAR2IOMMU(unit), dmar_fini_qi_helper);
        unit->qi_enabled = 0;
 }
 
diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c
index ea2c0358e072..571e5a2e65cd 100644
--- a/sys/x86/iommu/iommu_utils.c
+++ b/sys/x86/iommu/iommu_utils.c
@@ -29,7 +29,9 @@
  */
 
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sf_buf.h>
@@ -40,8 +42,11 @@
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <vm/vm.h>
-#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
 #include <vm/vm_object.h>
+#include <vm/vm_page.h>
 #include <dev/pci/pcireg.h>
 #include <machine/atomic.h>
 #include <machine/bus.h>
@@ -251,3 +256,230 @@ iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
 {
        return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
 }
+
+#define        IOMMU2X86C(iommu)       (x86_iommu->get_x86_common(iommu))
+
+static bool
+iommu_qi_seq_processed(struct iommu_unit *unit,
+    const struct iommu_qi_genseq *pseq)
+{
+       struct x86_unit_common *x86c;
+       u_int gen;
+
+       x86c = IOMMU2X86C(unit);
+       gen = x86c->inv_waitd_gen;
+       return (pseq->gen < gen ||
+           (pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw));
+}
+
+void
+iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
+    bool emit_wait)
+{
+       struct x86_unit_common *x86c;
+       struct iommu_qi_genseq gsec;
+       uint32_t seq;
+
+       KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
+       IOMMU_ASSERT_LOCKED(unit);
+       x86c = IOMMU2X86C(unit);
+
+       if (x86c->inv_waitd_seq == 0xffffffff) {
+               gsec.gen = x86c->inv_waitd_gen;
+               gsec.seq = x86c->inv_waitd_seq;
+               x86_iommu->qi_ensure(unit, 1);
+               x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
+                   true, false);
+               x86_iommu->qi_advance_tail(unit);
+               while (!iommu_qi_seq_processed(unit, &gsec))
+                       cpu_spinwait();
+               x86c->inv_waitd_gen++;
+               x86c->inv_waitd_seq = 1;
+       }
+       seq = x86c->inv_waitd_seq++;
+       pseq->gen = x86c->inv_waitd_gen;
+       pseq->seq = seq;
+       if (emit_wait) {
+               x86_iommu->qi_ensure(unit, 1);
+               x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
+       }
+}
+
+/*
+ * To avoid missed wakeups, callers must increment the unit's waiters count
+ * before advancing the tail past the wait descriptor.
+ */
+void
+iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
+    gseq, bool nowait)
+{
+       struct x86_unit_common *x86c;
+
+       IOMMU_ASSERT_LOCKED(unit);
+       x86c = IOMMU2X86C(unit);
+
+       KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
+       while (!iommu_qi_seq_processed(unit, gseq)) {
+               if (cold || nowait) {
+                       cpu_spinwait();
+               } else {
+                       msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
+                           "dmarse", hz);
+               }
+       }
+       x86c->inv_seq_waiters--;
+}
+
+/*
+ * The caller must not be using the entry's dmamap_link field.
+ */
+void
+iommu_qi_invalidate_locked(struct iommu_domain *domain,
+    struct iommu_map_entry *entry, bool emit_wait)
+{
+       struct iommu_unit *unit;
+       struct x86_unit_common *x86c;
+
+       unit = domain->iommu;
+       x86c = IOMMU2X86C(unit);
+       IOMMU_ASSERT_LOCKED(unit);
+
+       x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
+           entry->start, &entry->gseq, emit_wait);
+
+       /*
+        * To avoid a data race in dmar_qi_task(), the entry's gseq must be
+        * initialized before the entry is added to the TLB flush list, and the
+        * entry must be added to that list before the tail is advanced.  More
+        * precisely, the tail must not be advanced past the wait descriptor
+        * that will generate the interrupt that schedules dmar_qi_task() for
+        * execution before the entry is added to the list.  While an earlier
+        * call to dmar_qi_ensure() might have advanced the tail, it will not
+        * advance it past the wait descriptor.
+        *
+        * See the definition of struct dmar_unit for more information on
+        * synchronization.
+        */
+       entry->tlb_flush_next = NULL;
+       atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
+           tlb_flush_next, (uintptr_t)entry);
+       x86c->tlb_flush_tail = entry;
+
+       x86_iommu->qi_advance_tail(unit);
+}
+
+void
+iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
+    iommu_gaddr_t size, bool cansleep)
+{
+       struct iommu_unit *unit;
+       struct iommu_qi_genseq gseq;
+
+       unit = domain->iommu;
+       IOMMU_LOCK(unit);
+       x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
+
+       /*
+        * To avoid a missed wakeup in iommu_qi_task(), the unit's
+        * waiters count must be incremented before the tail is
+        * advanced.
+        */
+       IOMMU2X86C(unit)->inv_seq_waiters++;
+
+       x86_iommu->qi_advance_tail(unit);
+       iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
+       IOMMU_UNLOCK(unit);
+}
+
+void
+iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
+{
+       struct x86_unit_common *x86c;
+       struct iommu_map_entry *entry, *head;
+
+       x86c = IOMMU2X86C(unit);
+       for (head = x86c->tlb_flush_head;; head = entry) {
+               entry = (struct iommu_map_entry *)
+                   atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
+               if (entry == NULL ||
+                   !iommu_qi_seq_processed(unit, &entry->gseq))
+                       break;
+               x86c->tlb_flush_head = entry;
+               iommu_gas_free_entry(head);
+               if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+                       iommu_gas_free_region(entry);
+               else
+                       iommu_gas_free_space(entry);
+       }
+}
+
+void
+iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
+{
+       struct x86_unit_common *x86c;
+       u_int qi_sz;
+
+       x86c = IOMMU2X86C(unit);
+
+       x86c->tlb_flush_head = x86c->tlb_flush_tail =
+            iommu_gas_alloc_entry(NULL, 0);
+       TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
+       x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
+           taskqueue_thread_enqueue, &x86c->qi_taskqueue);
+       taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
+           "iommu%d qi taskq", unit->unit);
+
+       x86c->inv_waitd_gen = 0;
+       x86c->inv_waitd_seq = 1;
+
+       qi_sz = 3;
+       TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
+       if (qi_sz > x86c->qi_buf_maxsz)
+               qi_sz = x86c->qi_buf_maxsz;
+       x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
+       /* Reserve one descriptor to prevent wraparound. */
+       x86c->inv_queue_avail = x86c->inv_queue_size -
+           x86c->qi_cmd_sz;
+
+       /*
+        * The invalidation queue reads by DMARs/AMDIOMMUs are always
+        * coherent.
+        */
+       x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
+           M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
+           VM_MEMATTR_DEFAULT);
+       x86c->inv_waitd_seq_hw_phys = pmap_kextract(
+           (vm_offset_t)&x86c->inv_waitd_seq_hw);
+}
+
+void
+iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
+    struct iommu_unit *))
+{
+       struct x86_unit_common *x86c;
+       struct iommu_qi_genseq gseq;
+
+       x86c = IOMMU2X86C(unit);
+
+       taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
+       taskqueue_free(x86c->qi_taskqueue);
+       x86c->qi_taskqueue = NULL;
+
+       IOMMU_LOCK(unit);
+       /* quisce */
+       x86_iommu->qi_ensure(unit, 1);
+       iommu_qi_emit_wait_seq(unit, &gseq, true);
+       /* See iommu_qi_invalidate_locked(). */
+       x86c->inv_seq_waiters++;
+       x86_iommu->qi_advance_tail(unit);
+       iommu_qi_wait_for_seq(unit, &gseq, false);
+       /* only after the quisce, disable queue */
+       disable_qi(unit);
+       KASSERT(x86c->inv_seq_waiters == 0,
+           ("iommu%d: waiters on disabled queue", unit->unit));
+       IOMMU_UNLOCK(unit);
+
+       kmem_free(x86c->inv_queue, x86c->inv_queue_size);
+       x86c->inv_queue = NULL;
+       x86c->inv_queue_size = 0;
+}
diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h
index 8c908964acd0..eb1bbafbeb77 100644
--- a/sys/x86/iommu/x86_iommu.h
+++ b/sys/x86/iommu/x86_iommu.h
@@ -59,7 +59,18 @@ extern int iommu_tbl_pagecnt;
 SYSCTL_DECL(_hw_iommu);
 SYSCTL_DECL(_hw_iommu_dmar);
 
+struct x86_unit_common;
+
 struct x86_iommu {
+       struct x86_unit_common *(*get_x86_common)(struct
+           iommu_unit *iommu);
+       void (*qi_ensure)(struct iommu_unit *unit, int descr_count);
+       void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq,
+           bool, bool, bool);
+       void (*qi_advance_tail)(struct iommu_unit *unit);
+       void (*qi_invalidate_emit)(struct iommu_domain *idomain,
+           iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *
+           pseq, bool emit_wait);
        void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free,
            bool cansleep);
        void (*domain_unload)(struct iommu_domain *iodom,
@@ -82,4 +93,65 @@ struct x86_iommu {
 void set_x86_iommu(struct x86_iommu *);
 struct x86_iommu *get_x86_iommu(void);
 
+struct x86_unit_common {
+       uint32_t qi_buf_maxsz;
+       uint32_t qi_cmd_sz;
+
+       char *inv_queue;
+       vm_size_t inv_queue_size;
+       uint32_t inv_queue_avail;
+       uint32_t inv_queue_tail;
+       volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
+                                              descr completion */
*** 52 LINES SKIPPED ***


Reply via email to