Add a description for the TLB cache invalidation algorithm and for
the related kAPI functions.

Signed-off-by: Mauro Carvalho Chehab <mche...@kernel.org>
---

To avoid mailbombing on a large number of people, only mailing lists were C/C 
on the cover.
See [PATCH 0/2] at: 
https://lore.kernel.org/all/cover.1659011328.git.mche...@kernel.org/

 drivers/gpu/drm/i915/gt/intel_tlb.c | 25 +++++++++
 drivers/gpu/drm/i915/gt/intel_tlb.h | 83 +++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c 
b/drivers/gpu/drm/i915/gt/intel_tlb.c
index af8cae979489..d5df2e0f77fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -145,6 +145,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
        intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
 }
 
+/**
+ * intel_gt_invalidate_tlb_full - do full TLB cache invalidation
+ * @gt: GT structure
+ * @seqno: sequence number
+ *
+ * Do a full TLB cache invalidation if the @seqno is bigger than the last
+ * full TLB cache invalidation.
+ *
+ * Note:
+ * The TLB cache invalidation logic depends on GEN-specific registers.
+ * It currently supports GEN8 to GEN12 and GuC-based TLB cache invalidation.
+ */
 void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
 {
        intel_wakeref_t wakeref;
@@ -171,12 +183,25 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, 
u32 seqno)
        }
 }
 
+/**
+ * intel_gt_init_tlb - initialize TLB-specific vars
+ * @gt: GT structure
+ *
+ * TLB cache invalidation logic internally uses some resources that require
+ * initialization. Should be called before doing any TLB cache invalidation.
+ */
 void intel_gt_init_tlb(struct intel_gt *gt)
 {
        mutex_init(&gt->tlb.invalidate_lock);
        seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
 }
 
+/**
+ * intel_gt_fini_tlb - initialize TLB-specific vars
+ * @gt: GT structure
+ *
+ * Frees any resources needed by TLB cache invalidation logic.
+ */
 void intel_gt_fini_tlb(struct intel_gt *gt)
 {
        mutex_destroy(&gt->tlb.invalidate_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h 
b/drivers/gpu/drm/i915/gt/intel_tlb.h
index 46ce25bf5afe..d186f5d5901f 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.h
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.h
@@ -11,16 +11,99 @@
 
 #include "intel_gt_types.h"
 
+/**
+ * DOC: TLB cache invalidation logic
+ *
+ * The way the current algorithm works is that drm_i915_gem_object can be
+ * created on any order. At unbind/evict time, the object is warranted that
+ * it won't be used anymore. So, they store a sequence number provided by
+ * intel_gt_next_invalidate_tlb_full().This can happen either at
+ * __vma_put_pages(), for VMA sync unbind, or at ppgtt_unbind_vma(), for
+ * VMA async VMA bind.
+ *
+ * At __i915_gem_object_unset_pages(), intel_gt_invalidate_tlb() is called,
+ * where it checks if the sequence number of the object was already invalidated
+ * or not. If not, it increments it::
+ *
+ *   void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
+ *   {
+ *   ...
+ *     with_intel_gt_pm_if_awake(gt, wakeref) {
+ *             mutex_lock(&gt->tlb.invalidate_lock);
+ *             if (tlb_seqno_passed(gt, seqno))
+ *                             goto unlock;
+ *
+ *             mmio_invalidate_full(gt);
+ *
+ *             write_seqcount_invalidate(&gt->tlb.seqno); // increment seqno
+ *    ...
+ *
+ * So, let's say the current seqno is 2 and 3 new objects were created,
+ * on this order:
+ *
+ *     obj1
+ *     obj2
+ *     obj3
+ *
+ * They can be unbind/evict on a different order. At unbind/evict time,
+ * the mm.tlb will be stamped with the sequence number, using the number
+ * from the last TLB flush, plus 1.
+ *
+ * Different threads may be used on unbind/evict and/or unset pages.
+ *
+ * As the logic at void intel_gt_invalidate_tlb() is protected by a mutex,
+ * for simplicity, let's consider just two threads::
+ *
+ *   sequence number   Thread 0                Thread 1
+ *
+ *   seqno=2
+ *                     unbind/evict event
+ *                     obj3.mm.tlb = seqno | 1
+ *
+ *                     unbind/evict event
+ *                     obj1.mm.tlb = seqno | 1
+ *                                             __i915_gem_object_unset_pages()
+ *                                             called for obj3 => TLB flush
+ *                                             invalidating both obj1 and obj2.
+ *                                             seqno += 2
+ *   seqno=4
+ *                     unbind/evict event
+ *                     obj2.mm.tlb = seqno | 1
+ *                                             __i915_gem_object_unset_pages()
+ *                                             called for obj1, don't flush,
+ *                                             as past flush invalidated obj1
+ *
+ *                                             __i915_gem_object_unset_pages()
+ *                                             called for obj2 => TLB flush
+ *                                             seqno += 2
+ *   seqno=6
+ */
+
 void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
 
 void intel_gt_init_tlb(struct intel_gt *gt);
 void intel_gt_fini_tlb(struct intel_gt *gt);
 
+/**
+ * intel_gt_tlb_seqno - Returns the current TLB invlidation sequence number
+ *
+ * @gt: GT structure
+ *
+ * There's no need to lock while calling it, as seqprop_sequence is thread-safe
+ */
 static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
 {
        return seqprop_sequence(&gt->tlb.seqno);
 }
 
+/**
+ * intel_gt_next_invalidate_tlb_full - Returns the next TLB full invalidation
+ *     sequence number
+ *
+ * @gt: GT structure
+ *
+ * There's no need to lock while calling it, as seqprop_sequence is thread-safe
+ */
 static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
 {
        return intel_gt_tlb_seqno(gt) | 1;
-- 
2.36.1

Reply via email to