Am 10.03.23 um 23:16 schrieb Felix Kuehling:
This will make it possible for amdgpu GEM ioctls to flush TLBs on compute
VMs.

This removes VMID-based TLB flushing and always uses PASID-based
flushing. This still works because it scans the VMID-PASID mapping
registers to find the right VMID. It's only slightly less efficient. This
is not a production use case.

On the one hand it looks nice that we can now flush based on the pasid without having the NO_HWS dependency (I hope that this was intentional).

On the other hand I really don't like to have any variables in the amdgpu_vm structure which are not worked with by the VM code itself.

That already backfired with the pd_phys_addr before.

Regards,
Christian.


Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 30 +++++++++-------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  6 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  1 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  9 +++++--
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 28 --------------------
  5 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8816853e50c0..dcbd28e99b5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -726,31 +726,25 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device 
*adev, u32 vmid)
        return false;
  }
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-                                    uint16_t vmid)
-{
-       if (adev->family == AMDGPU_FAMILY_AI) {
-               int i;
-
-               for (i = 0; i < adev->num_vmhubs; i++)
-                       amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-       } else {
-               amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
-       }
-
-       return 0;
-}
-
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-                                     uint16_t pasid, enum TLB_FLUSH_TYPE 
flush_type)
+int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                           enum TLB_FLUSH_TYPE type)
  {
+       uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
        bool all_hub = false;
+ /*
+        * It can be that we race and lose here, but that is extremely unlikely
+        * and the worst thing which could happen is that we flush the changes
+        * into the TLB once more which is harmless.
+        */
+       if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+               return 0;
+
        if (adev->family == AMDGPU_FAMILY_AI ||
            adev->family == AMDGPU_FAMILY_RV)
                all_hub = true;
- return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+       return amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, type, all_hub);
  }
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 01ba3589b60a..dcaf69fd833c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -157,10 +157,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
                                uint32_t *ib_cmd, uint32_t ib_len);
  void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
  bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-                               uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-                               uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);
+int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                           enum TLB_FLUSH_TYPE type);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 03a3314e5b43..171de7da2959 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -290,6 +290,7 @@ struct amdgpu_vm {
        /* Last finished delayed update */
        atomic64_t              tlb_seq;
        struct dma_fence        *last_tlb_flush;
+       atomic64_t              kfd_last_flushed_seq;
/* Last unlocked submission to the scheduler entities */
        struct dma_fence        *last_unlocked;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index bfa30d12406b..e029129308e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -715,7 +715,6 @@ struct kfd_process_device {
        /* VM context for GPUVM allocations */
        struct file *drm_file;
        void *drm_priv;
-       atomic64_t tlb_seq;
/* GPUVM allocations storage */
        struct idr alloc_idr;
@@ -1344,7 +1343,13 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,               
                                         enum TLB_FLUSH_TYPE type)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+       amdgpu_amdkfd_flush_tlb(adev, vm, type);
+}
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
  {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ebabe92f7edb..48d7d30eeb24 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1591,7 +1591,6 @@ int kfd_process_device_init_vm(struct kfd_process_device 
*pdd,
                return ret;
        }
        pdd->drm_priv = drm_file->private_data;
-       atomic64_set(&pdd->tlb_seq, 0);
ret = kfd_process_device_reserve_ib_mem(pdd);
        if (ret)
@@ -1994,33 +1993,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct 
kfd_process *process,
                               KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
  }
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
-       struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
-       uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
-       struct kfd_dev *dev = pdd->dev;
-
-       /*
-        * It can be that we race and lose here, but that is extremely unlikely
-        * and the worst thing which could happen is that we flush the changes
-        * into the TLB once more which is harmless.
-        */
-       if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
-               return;
-
-       if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
-               /* Nothing to flush until a VMID is assigned, which
-                * only happens when the first queue is created.
-                */
-               if (pdd->qpd.vmid)
-                       amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
-                                                       pdd->qpd.vmid);
-       } else {
-               amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
-                                       pdd->process->pasid, type);
-       }
-}
-
  struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process 
*p, uint32_t gpu_id)
  {
        int i;

Reply via email to