Re: [PATCH] drm/amd/powerplay: check whether SMU IP is enabled before access

2020-05-25 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Have an improvement suggestion inline. Whether it is accepted or not, the patch 
is

Reviewed-by: Yong Zhao 
Tested-by: Yong Zhao 


From: Quan, Evan 
Sent: Monday, May 25, 2020 2:37 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Deucher, Alexander ; Zhao, Yong 
; Quan, Evan 
Subject: [PATCH] drm/amd/powerplay: check whether SMU IP is enabled before 
access

Since on early phase of bringup, the SMU IP may be not enabled or
supported. Without this, we may hit null pointer dereference on
accessing smu->adev.

Change-Id: I644175e926cd4fef8259f89002d6f8eda04fe42c
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 266 +
 1 file changed, 113 insertions(+), 153 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index a78a1f542ea9..f7428996cc74 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -61,7 +61,6 @@ const char *smu_get_feature_name(struct smu_context *smu, 
enum smu_feature_mask

 size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf)
 {
-   struct amdgpu_device *adev = smu->adev;
 size_t size = 0;
 int ret = 0, i = 0;
 uint32_t feature_mask[2] = { 0 };
@@ -70,8 +69,8 @@ size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, 
char *buf)
 uint32_t sort_feature[SMU_FEATURE_COUNT];
 uint64_t hw_feature_count = 0;

-   if (!adev->pm.dpm_enabled)
-   return -EINVAL;
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return -EOPNOTSUPP;

 mutex_lock(&smu->mutex);

@@ -155,10 +154,9 @@ int smu_sys_set_pp_feature_mask(struct smu_context *smu, 
uint64_t new_mask)
 uint64_t feature_2_enabled = 0;
 uint64_t feature_2_disabled = 0;
 uint64_t feature_enables = 0;
-   struct amdgpu_device *adev = smu->adev;

-   if (!adev->pm.dpm_enabled)
-   return -EINVAL;
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return -EOPNOTSUPP;

 mutex_lock(&smu->mutex);

@@ -436,11 +434,10 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum 
smu_clk_type clk_type)
 int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
bool gate)
 {
-   struct amdgpu_device *adev = smu->adev;
 int ret = 0;

-   if (!adev->pm.dpm_enabled)
-   return -EINVAL;
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return -EOPNOTSUPP;

 switch (block_type) {
 case AMD_IP_BLOCK_TYPE_UVD:
@@ -577,11 +574,10 @@ bool is_support_sw_smu(struct amdgpu_device *adev)
 int smu_sys_get_pp_table(struct smu_context *smu, void **table)
 {
 struct smu_table_context *smu_table = &smu->smu_table;
-   struct amdgpu_device *adev = smu->adev;
 uint32_t powerplay_table_size;

-   if (!adev->pm.dpm_enabled)
-   return -EINVAL;
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return -EOPNOTSUPP;

 if (!smu_table->power_play_table && !smu_table->hardcode_pptable)
 return -EINVAL;
@@ -603,12 +599,11 @@ int smu_sys_get_pp_table(struct smu_context *smu, void 
**table)
 int smu_sys_set_pp_table(struct smu_context *smu,  void *buf, size_t size)
 {
 struct smu_table_context *smu_table = &smu->smu_table;
-   struct amdgpu_device *adev = smu->adev;
 ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf;
 int ret = 0;

-   if (!adev->pm.dpm_enabled)
-   return -EINVAL;
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return -EOPNOTSUPP;

 if (header->usStructureSize != size) {
 pr_err("pp table size not matched !\n");
@@ -1622,15 +1617,11 @@ static int smu_resume(void *handle)
 int smu_display_configuration_change(struct smu_context *smu,
  const struct amd_pp_display_configuration 
*display_config)
 {
-   struct amdgpu_device *adev = smu->adev;
 int index = 0;
 int num_of_active_display = 0;

-   if (!adev->pm.dpm_enabled)
-   return -EINVAL;
-
-   if (!is_support_sw_smu(smu->adev))
-   return -EINVAL;
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return -EOPNOTSUPP;

 if (!display_config)
 return -EINVAL;
@@ -1691,15 +1682,11 @@ int smu_get_current_clocks(struct smu_context *smu,
struct amd_pp_clock_info *clocks)
 {
 struct amd_pp_simple_clock_info simple_clocks = {0};
-   struct amdgpu_device *adev = smu->adev

Re: drm/amdkfd: Change pasid's type to unsigned int

2020-05-22 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Hi Fenghua,

I am okay with the idea.

Regards,
Yong

From: Fenghua Yu 
Sent: Friday, May 22, 2020 5:21 PM
To: Kuehling, Felix 
Cc: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: drm/amdkfd: Change pasid's type to unsigned int

Hi, Felix,

On Fri, May 22, 2020 at 03:40:06PM -0400, Felix Kuehling wrote:
> Hi Fenghua,
>
> The PASID width in KFD is currently limited to 16 bits. I believe this
> reflects what our hardware can handle. KFD will never allocate a PASID
> bigger than 16 bits. That said, I'm OK with changing this field in the
> kfd_process structure to unsigned int. Generally, I find uint16_t in
> structures not very useful except in tightly packed structures such as
> packet formats or ioctl arguments.

Thank you very much for your insight!

I'm writing the patch set to define pasid as "unsigned int" consistently
in iommu. I'll put the amdkfd changes (only a few changes including this
pasid change in struct kfd_processin) one patch and send it to you for
review.

-Fenghua
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: drop navi pcie bw callback

2020-05-21 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

The series are

Reviewed-by: Yong Zhao 

From: amd-gfx  on behalf of Alex Deucher 

Sent: Thursday, May 21, 2020 12:52 PM
To: amd-gfx list 
Cc: Deucher, Alexander 
Subject: Re: [PATCH 2/2] drm/amdgpu: drop navi pcie bw callback

Ping on this series?

On Tue, May 19, 2020 at 5:10 PM Alex Deucher  wrote:
>
> It's not implemented yet so just drop it so the sysfs
> pcie bw file returns an appropriate error instead of
> garbage.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/nv.c | 8 
>  1 file changed, 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
> index 9c42316c47c0..6655dd2009b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -548,13 +548,6 @@ static bool nv_need_full_reset(struct amdgpu_device 
> *adev)
> return true;
>  }
>
> -static void nv_get_pcie_usage(struct amdgpu_device *adev,
> - uint64_t *count0,
> - uint64_t *count1)
> -{
> -   /*TODO*/
> -}
> -
>  static bool nv_need_reset_on_init(struct amdgpu_device *adev)
>  {
>  #if 0
> @@ -629,7 +622,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
> .invalidate_hdp = &nv_invalidate_hdp,
> .init_doorbell_index = &nv_init_doorbell_index,
> .need_full_reset = &nv_need_full_reset,
> -   .get_pcie_usage = &nv_get_pcie_usage,
> .need_reset_on_init = &nv_need_reset_on_init,
> .get_pcie_replay_count = &nv_get_pcie_replay_count,
> .supports_baco = &nv_asic_supports_baco,
> --
> 2.25.4
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cyong.zhao%40amd.com%7C1a6de00e60b447bad54a08d7fda76316%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637256767681371342&sdata=EWwfgYQtOR1TGw6%2BL6w8Cw7Y9e7ixVI1xDJoWTUsWtk%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: Consolidate duplicated bo alloc flags

2020-03-05 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Okay, I will change back to its original format.

Yong


From: Kuehling, Felix 
Sent: Thursday, March 5, 2020 3:49 PM
To: amd-gfx@lists.freedesktop.org ; Zhao, Yong 

Subject: Re: [PATCH] drm/amdkfd: Consolidate duplicated bo alloc flags

On 2020-03-04 3:21 p.m., Yong Zhao wrote:
> ALLOC_MEM_FLAGS_* used are the same as the KFD_IOC_ALLOC_MEM_FLAGS_*,
> but they are interweavedly used in kernel driver, resulting in bad
> readability. For example, KFD_IOC_ALLOC_MEM_FLAGS_COHERENT is totally
> not referenced in kernel, and it functions in the kernel through
> ALLOC_MEM_FLAGS_COHERENT, causing unnecessary confusion.
>
> Replace all occurrences of ALLOC_MEM_FLAGS_* by
> KFD_IOC_ALLOC_MEM_FLAGS_* to solve the problem.
>
> Change-Id: Iced6ed3698167296c97b14e7e4569883859d619c
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  9 +++--
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 38 +++
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 13 ---
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   | 21 --
>   4 files changed, 36 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 726c91ab6761..affaa0d4b636 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -29,6 +29,7 @@
>   #include 
>   #include 
>   #include "amdgpu_xgmi.h"
> +#include 
>
>   static const unsigned int compute_vmid_bitmap = 0xFF00;
>
> @@ -500,11 +501,13 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, 
> int dma_buf_fd,
>r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
>   metadata_size, &metadata_flags);
>if (flags) {
> - *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> - ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
> + if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
> + *flags = KFD_IOC_ALLOC_MEM_FLAGS_VRAM;
> + else
> + *flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT;

You're sneaking in some personal preference (changing the trinary (cond
? a : b) operator to if-else) with the renaming change. Personally I
find the trinary operator just as readable and more concise.


>
>if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
> - *flags |= ALLOC_MEM_FLAGS_PUBLIC;
> + *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
>}
>
>   out_put:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index e4481caed648..c81fe7011e88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -29,6 +29,7 @@
>   #include "amdgpu_vm.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_dma_buf.h"
> +#include 
>
>   /* BO flag to indicate a KFD userptr BO */
>   #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
> @@ -400,18 +401,18 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
> amdgpu_sync *sync)
>   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem 
> *mem)
>   {
>struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
> - bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
> + bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>uint32_t mapping_flags;
>
>mapping_flags = AMDGPU_VM_PAGE_READABLE;
> - if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
> + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
>mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
> - if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
> + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
>mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>
>switch (adev->asic_type) {
>case CHIP_ARCTURUS:
> - if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
> + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>if (bo_adev == adev)
>mapping_flags |= coherent ?
>AMDGPU_VM_MTYPE_CC : 
> AMDGPU_VM_MTYPE_RW;
> @@ -1160,24 +1161,24 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>/*
> * Chec

Re: [PATCH 1/2] drm/amdgpu: remove unused variable

2020-02-27 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Yong Zhao 

From: amd-gfx  on behalf of Alex Deucher 

Sent: Thursday, February 27, 2020 3:14 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Deucher, Alexander 
Subject: [PATCH 1/2] drm/amdgpu: remove unused variable

Fixes a warning.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bec4337156a7..b3e32969eb7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1131,7 +1131,6 @@ static void
 amdgpu_pci_remove(struct pci_dev *pdev)
 {
 struct drm_device *dev = pci_get_drvdata(pdev);
-   struct amdgpu_device *adev = dev->dev_private;

 #ifdef MODULE
 if (THIS_MODULE->state != MODULE_STATE_GOING)
--
2.24.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cyong.zhao%40amd.com%7Ca4b7a490c31647472ca608d7bbc1bc24%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637184313087705310&sdata=LNUqZiHj4Ey7KHjtp1o2c%2BpYO87JeL61VAYfNwNiE0Q%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: change SDMA MQD memory type

2020-02-26 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

It looks good to me. I was thinking maybe we should go one step further, adding 
more explanation comments around the MQD control stack workaround, so that 
people have a clearer idea of what's that MQD control stack workaround is 
about. We can do that in a following commit.

Acked-by: Yong Zhao 

From: amd-gfx  on behalf of Eric Huang 

Sent: Wednesday, February 26, 2020 2:37 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Huang, JinHuiEric 
Subject: [PATCH] drm/amdkfd: change SDMA MQD memory type

SDMA MQD memory type is NC that causes MQD data overwritten
accidentally by an old stable cache line. Changing it to UC
default for GART will fix the issue.

Change-Id: If609f47c78cb97e2c8dc930df2ab5c10c29dfe56
Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 692abfd..77ea0f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1734,7 +1734,7 @@ static int allocate_hiq_sdma_mqd(struct 
device_queue_manager *dqm)

 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
-   (void *)&(mem_obj->cpu_ptr), true);
+   (void *)&(mem_obj->cpu_ptr), false);

 return retval;
 }
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cyong.zhao%40amd.com%7C2cbbb3322952475590f508d7baf34ce6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637183426468344453&sdata=2zCqxZJFXSbBlezU7UIZxc%2FQC0PBTum90MmIjCioHGw%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: Fix 32-bit build

2020-02-26 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Yong Zhao 

From: amd-gfx  on behalf of Felix 
Kuehling 
Sent: Wednesday, February 26, 2020 12:12 PM
To: amd-gfx@lists.freedesktop.org ; Deucher, 
Alexander 
Cc: Pan, Xinhui 
Subject: [PATCH 1/1] drm/amdgpu: Fix 32-bit build

Add a dummy implementation of amdgpu_amdkfd_remove_fence_on_pt_pd_bos
for kernel configs without KFD.

Fixes: be8e48e08499 ("drm/amdgpu: Remove kfd eviction fence before release bo")
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index f30375c1b40a..bc2e72a66db9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -681,6 +681,11 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo 
*bo)
 {
 }

+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+   return 0;
+}
+
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 struct amdgpu_vm *vm)
 {
--
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cyong.zhao%40amd.com%7Cdd32602c073d476434c408d7badf1ef7%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637183339778815696&sdata=Xa3G3wzAYp47cG0fEVW7G9pzgB4qfodkpcAG9dnNBOo%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions

2020-02-25 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Thanks! I will update the commit message before pushing. It should be the way 
how SDMA queue count were used to unmap SDMA engines according to the previous 
understanding was wrong.

Regards,
Yong

From: Kuehling, Felix 
Sent: Tuesday, February 25, 2020 12:06 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package 
submissions

As I understand it, the SDMA queue counting wasn't incorrect. The main
change here is that you no longer send separate unmap packets for SDMA
queues, and that makes SDMA queue counting unnecessary.

That said, this patch series is a nice cleanup and improvement. The
series is

Reviewed-by: Felix Kuehling 

On 2020-02-24 17:18, Yong Zhao wrote:
> The previous SDMA queue counting was wrong. In addition, after confirming
> with MEC firmware team, we understands that only one unmap queue package,
> instead of one unmap queue package for CP and each SDMA engine, is needed,
> which results in much simpler driver code.
>
> Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc
> Signed-off-by: Yong Zhao 
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++-
>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 -
>   .../amd/amdkfd/kfd_process_queue_manager.c| 16 ++--
>   3 files changed, 29 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 958275db3f55..692abfd2088a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct 
> device_queue_manager *dqm)
>return dqm->dev->device_info->num_xgmi_sdma_engines;
>   }
>
> +static unsigned int get_num_all_sdma_engines(struct device_queue_manager 
> *dqm)
> +{
> + return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
> +}
> +
>   unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
>   {
>return dqm->dev->device_info->num_sdma_engines
> @@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct 
> device_queue_manager *dqm,
>if (q->properties.is_active)
>increment_queue_count(dqm, q->properties.type);
>
> - if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
> - dqm->sdma_queue_count++;
> - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
> - dqm->xgmi_sdma_queue_count++;
> -
>/*
> * Unconditionally increment this counter, regardless of the queue's
> * type or whether the queue is active.
> @@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct 
> device_queue_manager *dqm,
>mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
>q->properties.type)];
>
> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
>deallocate_hqd(dqm, q);
> - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
> - dqm->sdma_queue_count--;
> + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
>deallocate_sdma_queue(dqm, q);
> - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
> - dqm->xgmi_sdma_queue_count--;
> + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
>deallocate_sdma_queue(dqm, q);
> - } else {
> + else {
>pr_debug("q->properties.type %d is invalid\n",
>q->properties.type);
>return -EINVAL;
> @@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager 
> *dqm)
>INIT_LIST_HEAD(&dqm->queues);
>dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
>dqm->active_cp_queue_count = 0;
> - dqm->sdma_queue_count = 0;
> - dqm->xgmi_sdma_queue_count = 0;
>
>for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
>int pipe_offset = pipe * get_queues_per_pipe(dqm);
> @@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct 
> device_queue_manager *dqm,
>int bit;
>
>if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
> - if (dqm->sdma_bitmap == 0)
> + if (dqm->sdma_bitmap == 0) {
> + pr_err("No more SDMA queue to allocate\n");
>return -ENOMEM;
> +

Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd

2019-12-23 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

True. There indeed are two vmhubs on Navi. So my two comments are not useful 
here.

Yong

From: Kuehling, Felix 
Sent: Monday, December 23, 2019 2:34 PM
To: Zhao, Yong ; Sierra Guiza, Alejandro (Alex) 
; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd


On 2019-12-20 7:01 p.m., Yong Zhao wrote:
>
> On 2019-12-20 6:50 p.m., Yong Zhao wrote:
>> Inline.
>>
>> On 2019-12-20 4:35 p.m., Felix Kuehling wrote:
>>> On 2019-12-20 1:24, Alex Sierra wrote:
>>>> [Why]
>>>> TLB flush method has been deprecated using kfd2kgd interface.
>>>> This implementation is now on the amdgpu_amdkfd API.
>>>>
>>>> [How]
>>>> TLB flush functions now implemented in amdgpu_amdkfd.
>>>>
>>>> Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
>>>> Signed-off-by: Alex Sierra 
>>>
>>> Looks good to me. See my comment about the TODO inline.
>>>
>>>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32
>>>> ++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>>>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 --
>>>>   3 files changed, 39 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> index d3da9dde4ee1..b7f6e70c5762 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>> @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct
>>>> amdgpu_device *adev, u32 vmid)
>>>>   return false;
>>>>   }
>>>>   +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd,
>>>> uint16_t vmid)
>>>> +{
>>>> +struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> +/* TODO: condition missing for FAMILY above NV */
>>>
>>> I'm not sure what's missing here. NV and above don't need any
>>> special treatment. Since SDMA is connected to GFXHUB on NV, only the
>>> GFXHUB needs to be flushed.
>>>
>>> Regards,
>>>   Felix
>>>
>>>
>>>> +if (adev->family == AMDGPU_FAMILY_AI) {
>>>> +int i;
>>>> +
>>>> +for (i = 0; i < adev->num_vmhubs; i++)
>>>> +amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>>>> +} else {
>>>> +amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
>>>> +}
>>
>> This if else can be unified by
>>
>> for (i = 0; i < adev->num_vmhubs; i++)
>>
>> amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
>>
>>>> +
>>>> +return 0;
>>>> +}
>>>> +
>>>> +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd,
>>>> uint16_t pasid)
>>>> +{
>>>> +struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> +uint32_t flush_type = 0;
>>>> +bool all_hub = false;
>>>> +
>>>> +if (adev->gmc.xgmi.num_physical_nodes &&
>>>> +adev->asic_type == CHIP_VEGA20)
>>>> +flush_type = 2;
>>>> +
>>>> +if (adev->family == AMDGPU_FAMILY_AI)
>>>> +all_hub = true;
>>>> +
>>>> +return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type,
>>>> all_hub);
> The all_hub parameter can be inferred from num_vmhubs in
> flush_gpu_tlb_pasid(), so it can be optimized out here.

Hi Yong,

This is incorrect. NV has two VM hubs: GFXHUB and MMHUB. But KFD doesn't
care about MMHUB on Navi because SDMA is connected to the GFXHUB.
Therefore the all_hub parameter should not be based on the num_vmhubs.
We need a special case for NV.

Or rather the special case could be AI, where SDMA is not connected to
GFXHUB. So only on AI we need to flush all hubs for KFD VMs.

Regards,
   Felix

>>>> +}
>>>> +
>>>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>>>>   {
>>>>   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> index 069d5d230810..47b0f2957d1f 100644
>>

Re: [PATCH] drm/amdkfd: Improve function get_sdma_rlc_reg_offset()

2019-12-16 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

The problem happens when we want to reuse the same function for ASICs which 
have fewer SDMA engines. Some pointers on which SOC15_REG_OFFSET depends for 
some higher index SDMA engines are 0, causing NULL pointer.

I will fix the default case in switch.

Yong


From: Kuehling, Felix 
Sent: Monday, December 16, 2019 2:39 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: Improve function get_sdma_rlc_reg_offset()

On 2019-12-13 8:38, Yong Zhao wrote:
> This prevents the NULL pointer access when there are fewer than 8 sdma
> engines.

I don't see where you got a NULL pointer in the old code. Also this
change is in an Arcturus-specific source file. AFAIK Arcturus always has
8 SDMA engines.

The new code is much longer than the old code. I don't see how that's an
improvement. See one more comment inline.


>
> Change-Id: Iabae9bff7546b344720905d5d4a5cfc066a79d25
> Signed-off-by: Yong Zhao 
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 64 ---
>   1 file changed, 42 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 3c119407dc34..2ad088f10493 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -71,32 +71,52 @@ static uint32_t get_sdma_rlc_reg_offset(struct 
> amdgpu_device *adev,
>unsigned int engine_id,
>unsigned int queue_id)
>   {
> - uint32_t sdma_engine_reg_base[8] = {
> - SOC15_REG_OFFSET(SDMA0, 0,
> -  mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
> - SOC15_REG_OFFSET(SDMA1, 0,
> -  mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
> - SOC15_REG_OFFSET(SDMA2, 0,
> -  mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
> - SOC15_REG_OFFSET(SDMA3, 0,
> -  mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
> - SOC15_REG_OFFSET(SDMA4, 0,
> -  mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
> - SOC15_REG_OFFSET(SDMA5, 0,
> -  mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
> - SOC15_REG_OFFSET(SDMA6, 0,
> -  mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
> - SOC15_REG_OFFSET(SDMA7, 0,
> -  mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
> - };
> -
> - uint32_t retval = sdma_engine_reg_base[engine_id]

I'm not sure where you were getting a NULL pointer, but I guess this
could have used a range check to make sure engine_id is < 8 before
indexing into the array. The equivalent in the switch statement would be
a default case. See below.


> + uint32_t sdma_engine_reg_base;
> + uint32_t sdma_rlc_reg_offset;
> +
> + switch (engine_id) {
> + case 0:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
> + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
> + break;
> + case 1:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
> + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
> + break;
> + case 2:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
> + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
> + break;
> + case 3:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
> + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
> + break;
> + case 4:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
> + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
> + break;
> + case 5:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
> + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
> + break;
> + case 6:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
> + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
> + break;
> + case 7:
> + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
> + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
> + break;
> +

Do you need a default case for the switch statement? I think you get a
compiler warning without one.

Regards,
   Felix


> + }
> +
> + sdma_rlc_reg_o

Re: [PATCH 1/1] drm/amdgpu: Raise KFD unpinned system memory limit

2019-11-25 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Yong Zhao 

From: amd-gfx  on behalf of Felix 
Kuehling 
Sent: Monday, November 25, 2019 4:28 PM
To: amd-gfx@lists.freedesktop.org 
Subject: [PATCH 1/1] drm/amdgpu: Raise KFD unpinned system memory limit

Allow KFD applications to use more unpinned system memory through
HMM.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e43a95514b41..b6d1958d514f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -85,7 +85,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
 }

 /* Set memory usage limits. Current, limits are
- *  System (TTM + userptr) memory - 3/4th System RAM
+ *  System (TTM + userptr) memory - 15/16th System RAM
  *  TTM memory - 3/8th System RAM
  */
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
@@ -98,7 +98,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 mem *= si.mem_unit;

 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
-   kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
+   kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
 (kfd_mem_limit.max_system_mem_limit >> 20),
--
2.24.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cyong.zhao%40amd.com%7C808d3732d1d74a7cd05208d771ee6f9b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637103141200385929&sdata=yywimvE%2FuidOPQb9IYLZi95MbfnFrW0Swmp11iYi4%2BI%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()

2019-11-22 Thread Zhao, Yong
[AMD Official Use Only - Internal Distribution Only]

Hi Felix,

There is no big picture unfortunately, just some improvements that I came to 
when navigating the code.

Regarding your suggestion, I have a concern. With the original code in 
unmap_queues_cpsch(), if amdkfd_fence_wait_timeout() fails, we won't release 
the runlist ib. I am not sure it is by design or just a small bug. If it is by 
design (probably for debugging when HWS hang), merging pm_send_unmap_queue and 
pm_release_ib together will break the design.

If we agree to move in that direction, I agree with the part of the name 
changes because the original names are prone to cause confusion.

Regards,
Yong

From: Kuehling, Felix 
Sent: Friday, November 22, 2019 4:21 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of 
pm_send_runlist()

I'm not sure about this one. Looks like the interface is getting
needlessly more complicated. Now the caller has to keep track of the
runlist IB address and size just to pass those to another function. I
could understand this if there was a use case that needs to separate the
allocation of the runlist and sending it to the HW. But I don't see that.

Some background for why I think the interface is the way it is: The
runlist IB is continuously executed by the HWS firmware. If the runlist
is oversubscribed, the HWS firmware will loop through it. So the IB must
remain allocated until pm_send_unmap_queue is called. Currently
pm_send_runlist creates the runlist IB and sends it to the HWS. You're
separating that into creation and sending. Do you see a case where you
need to send the same runlist multiple times? Or do something else
between creating the runlist and sending it to the HWS?

pm_release_ib releases the runlist IB, assuming that he HWS is no longer
using it. Maybe this could be combined with pm_send_unmap_queue. I'm not
100% sure because there are some filter parameters that may leave some
queues mapped. If the two can be combined, I'd suggest the following
name and interface changes to reflect how I think this is being used today:

  * pm_send_runlist -> pm_create_and_send_runlist
  * pm_send_unmap_queue + pm_release_ib -> pm_preempt_and_free_runlist

I see you're doing a lot of cleanup and refactoring in this area of the
code. Is there some bigger picture here, some idea of the end-state
you're trying to get to? Knowing where you're going with this may make
it easier to review the code.

Regards,
   Felix

On 2019-11-21 4:26 p.m., Yong Zhao wrote:
> This is consistent with the calling sequence in unmap_queues_cpsch().
>
> Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
> Signed-off-by: Yong Zhao 
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++--
>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +--
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  7 ++-
>   3 files changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 510f2d1bb8bb..fd7d90136b94 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct 
> device_queue_manager *dqm)
>   static int map_queues_cpsch(struct device_queue_manager *dqm)
>   {
>int retval;
> + uint64_t rl_ib_gpu_addr;
> + size_t rl_ib_size;
>
>if (!dqm->sched_running)
>return 0;
> @@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct 
> device_queue_manager *dqm)
>if (dqm->active_runlist)
>return 0;
>
> - retval = pm_send_runlist(&dqm->packets, &dqm->queues);
> + retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
> + &rl_ib_gpu_addr, &rl_ib_size);
> + if (retval)
> + goto fail_create_runlist_ib;
> +
> + pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
> +
> + retval = pm_send_runlist(&dqm->packets, &dqm->queues,
> + rl_ib_gpu_addr, rl_ib_size);
>pr_debug("%s sent runlist\n", __func__);
>if (retval) {
>pr_err("failed to execute runlist\n");
> - return retval;
> + goto fail_create_runlist_ib;
>}
>dqm->active_runlist = true;
>
>return retval;
> +
> +fail_create_runlist_ib:
> + pm_destroy_runlist_ib(&dqm->packets);
> + return retval;
>   }
>
>   /* dqm->lock mutex has to be locked before calling t

Re: [PATCH] drm/amdgpu/soc15: move struct definition around to align with other soc15 asics

2019-11-15 Thread Zhao, Yong
Reviewed-by: Yong Zhao 

From: amd-gfx  on behalf of Alex Deucher 

Sent: Friday, November 15, 2019 11:13 AM
To: amd-gfx list 
Cc: Deucher, Alexander 
Subject: Re: [PATCH] drm/amdgpu/soc15: move struct definition around to align 
with other soc15 asics

Ping?

On Mon, Nov 11, 2019 at 5:21 PM Alex Deucher  wrote:
>
> Move reset_method next to reset callback to match the struct layout and
> the other definition in this file.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8e1640bc07af..305ad3eec987 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -1007,6 +1007,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs 
> =
> .read_bios_from_rom = &soc15_read_bios_from_rom,
> .read_register = &soc15_read_register,
> .reset = &soc15_asic_reset,
> +   .reset_method = &soc15_asic_reset_method,
> .set_vga_state = &soc15_vga_set_state,
> .get_xclk = &soc15_get_xclk,
> .set_uvd_clocks = &soc15_set_uvd_clocks,
> @@ -1019,7 +1020,6 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs 
> =
> .get_pcie_usage = &vega20_get_pcie_usage,
> .need_reset_on_init = &soc15_need_reset_on_init,
> .get_pcie_replay_count = &soc15_get_pcie_replay_count,
> -   .reset_method = &soc15_asic_reset_method
>  };
>
>  static int soc15_common_early_init(void *handle)
> --
> 2.23.0
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/3] drm/amdkfd: Fix a bug when calculating save_area_used_size

2019-11-12 Thread Zhao, Yong
+ Laurent

From: Zhao, Yong 
Sent: Monday, November 11, 2019 6:25 PM
To: amd-gfx@lists.freedesktop.org ; Cornwall, 
Jay 
Cc: Zhao, Yong 
Subject: [PATCH 3/3] drm/amdkfd: Fix a bug when calculating save_area_used_size

workgroup context data writes from m->cp_hqd_cntl_stack_size, so we
should deduct it when calculating the used size.

Change-Id: I5252e25662c3b8221f451c39115bf084d1911eae
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index d3380c5bdbde..3a2ee1f01aae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -302,7 +302,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,

 *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
 m->cp_hqd_cntl_stack_offset;
-   *save_area_used_size = m->cp_hqd_wg_state_offset;
+   *save_area_used_size = m->cp_hqd_wg_state_offset -
+   m->cp_hqd_cntl_stack_size;;

 if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
 return -EFAULT;
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Use kernel queue v9 functions for v10 (ver2)

2019-11-07 Thread Zhao, Yong
Okay. I submitted a PSDB. Meanwhile, I got the answer from FW and SQ HW contact 
that nothing bad will happen on GFX9 by writing 1 to TRAP_EN.

Regards,
Yong

From: Kuehling, Felix 
Sent: Thursday, November 7, 2019 4:07 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: Use kernel queue v9 functions for v10 (ver2)

Are you sure that setting the SQ_SHADER_TBA_HI__TRAP_EN bit on GFXv9 is
completely harmless? If the field is not defined, maybe setting the bit
makes the address invalid. It's probably worth running that through a
PSDB, which would cover Vega10, Vega20 and Arcturus.

If it actually works, the patch is

Reviewed-by: Felix Kuehling 

Regards,
   Felix

On 2019-11-07 15:34, Zhao, Yong wrote:
> The kernel queue functions for v9 and v10 are the same except
> pm_map_process_v* which have small difference, so they should be reused.
> This eliminates the need of reapplying several patches which were
> applied on v9 but not on v10, such as bigger GWS and more than 2
> SDMA engine support which were introduced on Arcturus.
>
> Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdkfd/Makefile   |   1 -
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 --
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  16 +-
>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |   4 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   4 -
>   7 files changed, 14 insertions(+), 333 deletions(-)
>   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
> b/drivers/gpu/drm/amd/amdkfd/Makefile
> index 48155060a57c..017a8b7156da 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -41,7 +41,6 @@ AMDKFD_FILES:= $(AMDKFD_PATH)/kfd_module.o \
>$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
>$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
>$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
> - $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
>$(AMDKFD_PATH)/kfd_packet_manager.o \
>$(AMDKFD_PATH)/kfd_process_queue_manager.o \
>$(AMDKFD_PATH)/kfd_device_queue_manager.o \
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 11d244891393..0d966408ea87 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
> *dev,
>case CHIP_RAVEN:
>case CHIP_RENOIR:
>case CHIP_ARCTURUS:
> - kernel_queue_init_v9(&kq->ops_asic_specific);
> - break;
>case CHIP_NAVI10:
>case CHIP_NAVI12:
>case CHIP_NAVI14:
> - kernel_queue_init_v10(&kq->ops_asic_specific);
> + kernel_queue_init_v9(&kq->ops_asic_specific);
>break;
>default:
>WARN(1, "Unexpected ASIC family %u",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> index 365fc674fea4..a7116a939029 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> @@ -102,6 +102,5 @@ struct kernel_queue {
>   void kernel_queue_init_cik(struct kernel_queue_ops *ops);
>   void kernel_queue_init_vi(struct kernel_queue_ops *ops);
>   void kernel_queue_init_v9(struct kernel_queue_ops *ops);
> -void kernel_queue_init_v10(struct kernel_queue_ops *ops);
>
>   #endif /* KFD_KERNEL_QUEUE_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> deleted file mode 100644
> index bfd6221acae9..
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ /dev/null
> @@ -1,317 +0,0 @@
> -/*
> - * Copyright 2018 Advanced Micro Devices, Inc.
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice

Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

2019-11-07 Thread Zhao, Yong
After considering Kent's concern, I actually took the consolidation to the next 
level where v9 and v10 have no divergence. With that, I think the "mustness" is 
stronger. Please check out the new patch.


Regards,

Yong


On 2019-11-07 3:31 p.m., Kuehling, Felix wrote:
On 2019-11-07 14:40, Zhao, Yong wrote:
Hi Felix,

The code working fine is true except that all not new features after this 
duplication are broken. If I want to make all GFX10 feature complete, I have to 
either manually adapt several duplications to the GFX10 file or do this 
consolidation. From this perspective and ease of my work, it is a must.

"A must" means there is no alternative. You already listed two alternatives 
yourself: "either manually adapt several duplications to the GFX10 file or do 
this consolidation."


In _your_ opinion, the consolidation means less work for _you_. That's _your_ 
point of view. The discussion in this code review pointed out other points of 
view. When you take all of them into account, you may reconsider what is less 
work overall, and what is easier to maintain.


I'm not opposing your change per-se. But I'd like you to consider the whole 
picture, including the consequences of any design decisions you're making and 
imposing on anyone working on this code in the future. In this cases I think 
it's a relatively minor issue and it may just come down to a matter of opinion 
that I don't feel terribly strongly about.


With that said, the change is

Reviewed-by: Felix Kuehling 
<mailto:felix.kuehl...@amd.com>


Regards,

  Felix


Regards,
Yong


From: Kuehling, Felix <mailto:felix.kuehl...@amd.com>
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong <mailto:yong.z...@amd.com>; Alex Deucher 
<mailto:alexdeuc...@gmail.com>
Cc: Russell, Kent <mailto:kent.russ...@amd.com>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the 
first place.

The code is working fine with the duplication. You disagree with duplicating 
the code in the first place. But that's just your opinion. It's not a must in 
any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 
are different is because GFX9 is SOC15 where packet formats and doorbell size 
changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by 
reusing v8 functions, even though it is there. Furthermore, in my opinion 
kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, 
packet formats should stay the same. For kernel queues, we should be able to 
differentiate it by pre SOC15 or not, and I have an impression that MEC 
firmware agrees to maintain the kernel queue interface stable across 
generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. 
It's true that the transition to SOC15 with 64-bit doorbells and 
read/write-pointers was particularly disruptive. Your assumption will hold 
until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager 
functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions 
"SOC15". This may work for a while. But I suspect at some point something is 
going to change and we'll need to create a new version for a newer ASIC 
generation. You already have a small taste of that with the different 
TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
____
From: Alex Deucher <mailto:alexdeuc...@gmail.com>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <mailto:felix.kuehl...@amd.com>
Cc: Zhao, Yong <mailto:yong.z...@amd.com>; Russell, Kent 
<mailto:kent.russ...@amd.com>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix 
<mailto:felix.kuehl...@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX 
> versions are agreed on between hardware, firmware, and software and it's 
> generally understood what they mean. If we add a new PM4 packet versioning 
> scheme on our own, then this will add a lot of confusion when talking to 
> firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit 
> of code duplication and had packet writing functions per GFX version. You

[PATCH] drm/amdkfd: Use kernel queue v9 functions for v10 (ver2)

2019-11-07 Thread Zhao, Yong
The kernel queue functions for v9 and v10 are the same except
pm_map_process_v* which have small difference, so they should be reused.
This eliminates the need of reapplying several patches which were
applied on v9 but not on v10, such as bigger GWS and more than 2
SDMA engine support which were introduced on Arcturus.

Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   |   1 -
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 --
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  16 +-
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   4 -
 7 files changed, 14 insertions(+), 333 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 48155060a57c..017a8b7156da 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -41,7 +41,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 11d244891393..0d966408ea87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
*dev,
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
-   kernel_queue_init_v9(&kq->ops_asic_specific);
-   break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
-   kernel_queue_init_v10(&kq->ops_asic_specific);
+   kernel_queue_init_v9(&kq->ops_asic_specific);
break;
default:
WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 365fc674fea4..a7116a939029 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -102,6 +102,5 @@ struct kernel_queue {
 void kernel_queue_init_cik(struct kernel_queue_ops *ops);
 void kernel_queue_init_vi(struct kernel_queue_ops *ops);
 void kernel_queue_init_v9(struct kernel_queue_ops *ops);
-void kernel_queue_init_v10(struct kernel_queue_ops *ops);
 
 #endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
deleted file mode 100644
index bfd6221acae9..
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-   enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v10(struct kernel_queue *kq);
-static void submit_packet_v10(struct kernel_queue *kq);
-
-void kernel_queue_init_v10(struct kernel_queue_ops *ops)
-{
-   ops->initialize = initialize_v10;
-   ops->uninitialize = uninitialize_v10;
-   ops->submit_packe

Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

2019-11-07 Thread Zhao, Yong
Hi Felix,

The code working fine is true except that all not new features after this 
duplication are broken. If I want to make all GFX10 feature complete, I have to 
either manually adapt several duplications to the GFX10 file or do this 
consolidation. From this perspective and ease of my work, it is a must.

Regards,
Yong


From: Kuehling, Felix 
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong ; Alex Deucher 
Cc: Russell, Kent ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the 
first place.

The code is working fine with the duplication. You disagree with duplicating 
the code in the first place. But that's just your opinion. It's not a must in 
any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 
are different is because GFX9 is SOC15 where packet formats and doorbell size 
changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by 
reusing v8 functions, even though it is there. Furthermore, in my opinion 
kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, 
packet formats should stay the same. For kernel queues, we should be able to 
differentiate it by pre SOC15 or not, and I have an impression that MEC 
firmware agrees to maintain the kernel queue interface stable across 
generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. 
It's true that the transition to SOC15 with 64-bit doorbells and 
read/write-pointers was particularly disruptive. Your assumption will hold 
until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager 
functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions 
"SOC15". This may work for a while. But I suspect at some point something is 
going to change and we'll need to create a new version for a newer ASIC 
generation. You already have a small taste of that with the different 
TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong

From: Alex Deucher <mailto:alexdeuc...@gmail.com>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <mailto:felix.kuehl...@amd.com>
Cc: Zhao, Yong <mailto:yong.z...@amd.com>; Russell, Kent 
<mailto:kent.russ...@amd.com>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix 
<mailto:felix.kuehl...@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX 
> versions are agreed on between hardware, firmware, and software and it's 
> generally understood what they mean. If we add a new PM4 packet versioning 
> scheme on our own, then this will add a lot of confusion when talking to 
> firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit 
> of code duplication and had packet writing functions per GFX version. You'll 
> see this pattern a lot in the amdgpu driver where each IP version duplicates 
> a bunch of code. In many cases you may be able to save a few lines of code by 
> sharing functions between IP versions. But you'll add some confusion and 
> burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
.test_ring = uvd_v7_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = uvd_v7_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
.test_ring = uvd_v7_0_enc_ring_test_ring,
.test_ib = uvd_v7_0_enc_ring_test_ib,
.insert_nop = am

Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

2019-11-07 Thread Zhao, Yong
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the 
first place. The kernel queue functions by design are generic. The reasson why 
GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and 
doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty 
much empty by reusing v8 functions, even though it is there. Furthermore, in my 
opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 
onwards, packet formats should stay the same. For kernel queues, we should be 
able to differentiate it by pre SOC15 or not, and I have an impression that MEC 
firmware agrees to maintain the kernel queue interface stable across 
generations most of time.

Regards,
Yong

From: Alex Deucher 
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix 
Cc: Zhao, Yong ; Russell, Kent ; 
amd-gfx@lists.freedesktop.org 
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix  wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX 
> versions are agreed on between hardware, firmware, and software and it's 
> generally understood what they mean. If we add a new PM4 packet versioning 
> scheme on our own, then this will add a lot of confusion when talking to 
> firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit 
> of code duplication and had packet writing functions per GFX version. You'll 
> see this pattern a lot in the amdgpu driver where each IP version duplicates 
> a bunch of code. In many cases you may be able to save a few lines of code by 
> sharing functions between IP versions. But you'll add some confusion and 
> burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
.test_ring = uvd_v7_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = uvd_v7_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
.test_ring = uvd_v7_0_enc_ring_test_ring,
.test_ib = uvd_v7_0_enc_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = uvd_v7_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. 
> Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using 
> the old GFX versions, but not really sticking to it due to the shareability 
> of certain managers and shaders. Could we look into doing something like gen1 
> or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise 
> we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus 
> GFX9”, etc. Then if things change, we just up the value concretely, instead 
> of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those 
> functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue 
> to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx  On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, 
> it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>

[PATCH] drm/amdkfd: Use kernel queue v9 functions for v10

2019-11-07 Thread Zhao, Yong
The kernel queue functions for v9 and v10 are the same except
pm_map_process_v* which have small difference, so they should be reused.
This eliminates the need of reapplying several patches which were
applied on v9 but not on v10, such as bigger GWS and more than 2
SDMA engine support which were introduced on Arcturus.

Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   |   1 -
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 --
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  48 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 -
 6 files changed, 43 insertions(+), 331 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 48155060a57c..017a8b7156da 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -41,7 +41,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 11d244891393..0d966408ea87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
*dev,
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
-   kernel_queue_init_v9(&kq->ops_asic_specific);
-   break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
-   kernel_queue_init_v10(&kq->ops_asic_specific);
+   kernel_queue_init_v9(&kq->ops_asic_specific);
break;
default:
WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 365fc674fea4..a7116a939029 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -102,6 +102,5 @@ struct kernel_queue {
 void kernel_queue_init_cik(struct kernel_queue_ops *ops);
 void kernel_queue_init_vi(struct kernel_queue_ops *ops);
 void kernel_queue_init_v9(struct kernel_queue_ops *ops);
-void kernel_queue_init_v10(struct kernel_queue_ops *ops);
 
 #endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
deleted file mode 100644
index bfd6221acae9..
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-   enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v10(struct kernel_queue *kq);
-static void submit_packet_v10(struct kernel_queue *kq);
-
-void kernel_queue_init_v10(struct kernel_queue_ops *ops)
-{
-   ops->initialize = initialize_v10;
-   ops->uninitialize = uninitialize_v10;
-   ops->submit_packet = submit_packet_v10;
-}
-
-static bool initialize_v10

Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

2019-11-07 Thread Zhao, Yong
Hi Kent,

I can't agree more on this. Also, the same applies to the file names. 
Definitely we need to agree on the naming scheme before making it happen.

Yong

On 2019-11-07 12:33 p.m., Russell, Kent wrote:
I think that the versioning is getting a little confusing since we’re using the 
old GFX versions, but not really sticking to it due to the shareability of 
certain managers and shaders. Could we look into doing something like gen1 or 
gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll 
keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, 
etc. Then if things change, we just up the value concretely, instead of maybe 
doing a v11 if GFX11 changes things, and only GFX11 ASICs use those 
functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to 
consolidate and remove duplicate code.

Kent

From: amd-gfx 
<mailto:amd-gfx-boun...@lists.freedesktop.org>
 On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <mailto:felix.kuehl...@amd.com>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it 
is better to keep it as it is.

Regards,
Yong

From: Kuehling, Felix mailto:felix.kuehl...@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong mailto:yong.z...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao mailto:yong.z...@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager 
> *pm, uint32_t *buffer,
>return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> - struct pm4_mec_release_mem *packet;
> -
> - WARN_ON(!buffer);
> -
> - packet = (struct pm4_mec_release_mem *)buffer;
> - memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> - packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> - sizeof(struct pm4_mec_release_mem));
> -
> - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> - packet->bitfields2.event_index = 
> event_index__mec_release_mem__end_of_pipe;
> - packet->bitfields2.tcl1_action_ena = 1;
> - packet->bitfields2.tc_action_ena = 1;
> - packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> - packet->bitfields3.data_sel = 
> data_sel__mec_release_mem__send_32_bit_low;
> - packet->bitfields3.int_sel =
> - int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> - packet->bitfields4.address_lo_32b = (gpu_addr & 0x) >> 2;
> - packet->address_hi = upper_32_bits(gpu_addr);
> -
> - packet->data_lo = 0;
> -
> - return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>.map_process= pm_map_process_v10,
>.runlist= pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>.map_queues = pm_map_queues_v10,
>.unmap_queues   = pm_unmap_queues_v10,
>.query_status   = pm_query_status_v10,
> - .release_mem= pm_release_mem_v10,
> + .release_mem= NULL,
>.map_process_size   = sizeof(struct pm4_mes_map_process),
>.runlist_size   = sizeof(struct pm4_mes_runlist),
>.set_resources_size 

[PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-07 Thread Zhao, Yong
The new code uses straightforward bit shifts and thus has better readability.

Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
 4 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b91993753b82..e59c229861e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
-   args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
/* On SOC15 ASICs, include the doorbell offset within the
 * process doorbell frame, which could be 1 page or 2 pages.
@@ -1312,10 +1311,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
/* MMIO is mapped through kfd device
 * Generate a kfd mmap offset
 */
-   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
-   args->mmap_offset = KFD_MMAP_TYPE_MMIO | 
KFD_MMAP_GPU_ID(args->gpu_id);
-   args->mmap_offset <<= PAGE_SHIFT;
-   }
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+   args->mmap_offset = KFD_MMAP_TYPE_MMIO
+   | KFD_MMAP_GPU_ID(args->gpu_id);
 
return 0;
 
@@ -1938,20 +1936,19 @@ static int kfd_mmap(struct file *filp, struct 
vm_area_struct *vma)
 {
struct kfd_process *process;
struct kfd_dev *dev = NULL;
-   unsigned long vm_pgoff;
+   unsigned long mmap_offset;
unsigned int gpu_id;
 
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
 
-   vm_pgoff = vma->vm_pgoff;
-   vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
-   gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+   mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+   gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
if (gpu_id)
dev = kfd_device_by_id(gpu_id);
 
-   switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+   switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
case KFD_MMAP_TYPE_DOORBELL:
if (!dev)
return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 908081c85de1..1f8365575b12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
-   *event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 66bae8f2dad1..8eecd2cd1fd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,24 +59,21 @@
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define KFD_MMAP_TYPE_SHIFT(62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_SHIFT(62)
 #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_EVENTS   (0x2ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
 
-#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_SHIFT (46)
 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
 #define KFD_MMAP_GPU_ID(gpu_id) uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
& KFD_MMAP_GPU_ID_MASK)
-#define KFD_MMAP_GPU_ID_GET(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
+#define KFD_MMAP_GET_GPU_ID(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
>> KFD_MMAP_GPU_ID_SHIFT)
 
-#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFULL >> PAGE_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
-
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
diff --git a/drivers/gpu/drm/a

Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-07 Thread Zhao, Yong
True. Thank you for spotting that. MMAP-related code was added after I 
inititally drafted this change earlier this year.

Regards,
Yong

From: Kuehling, Felix 
Sent: Thursday, November 7, 2019 12:05 AM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

On 2019-11-05 18:18, Zhao, Yong wrote:
> The new code uses straightforward bit shifts and thus has better
> readability.

You're missing the MMAP-related code for mmio remapping. In
kfd_ioctl_alloc_memory_of_gpu:

 /* MMIO is mapped through kfd device
  * Generate a kfd mmap offset
  */
 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
 args->mmap_offset = KFD_MMAP_TYPE_MMIO |
KFD_MMAP_GPU_ID(args->gpu_id);
 args->mmap_offset <<= PAGE_SHIFT;
 }

Regards,
   Felix

>
> Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
> Signed-off-by: Yong Zhao 
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 --
> drivers/gpu/drm/amd/amdkfd/kfd_events.c | 1 -
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++--
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 3 +--
> 4 files changed, 8 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b91993753b82..34078df36621 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file
> *filep, struct kfd_process *p,
> /* Return gpu_id as doorbell offset for mmap usage */
> args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
> args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
> - args->doorbell_offset <<= PAGE_SHIFT;
> if (KFD_IS_SOC15(dev->device_info->asic_family))
> /* On SOC15 ASICs, include the doorbell offset within the
> * process doorbell frame, which could be 1 page or 2 pages.
> @@ -1938,20 +1937,19 @@ static int kfd_mmap(struct file *filp, struct
> vm_area_struct *vma)
> {
> struct kfd_process *process;
> struct kfd_dev *dev = NULL;
> - unsigned long vm_pgoff;
> + unsigned long mmap_offset;
> unsigned int gpu_id;
> process = kfd_get_process(current);
> if (IS_ERR(process))
> return PTR_ERR(process);
> - vm_pgoff = vma->vm_pgoff;
> - vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
> - gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
> + mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
> + gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
> if (gpu_id)
> dev = kfd_device_by_id(gpu_id);
> - switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
> + switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
> case KFD_MMAP_TYPE_DOORBELL:
> if (!dev)
> return -ENODEV;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 908081c85de1..1f8365575b12 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct
> kfd_process *p,
> ret = create_signal_event(devkfd, p, ev);
> if (!ret) {
> *event_page_offset = KFD_MMAP_TYPE_EVENTS;
> - *event_page_offset <<= PAGE_SHIFT;
> *event_slot_index = ev->event_id;
> }
> break;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 66bae8f2dad1..8eecd2cd1fd2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -59,24 +59,21 @@
> * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
> * defines are w.r.t to PAGE_SIZE
> */
> -#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
> +#define KFD_MMAP_TYPE_SHIFT (62)
> #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
> #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
> #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
> #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
> #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
> -#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
> +#define KFD_MMAP_GPU_ID_SHIFT (46)
> #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
> << KFD_MMAP_GPU_ID_SHIFT)
> #define KFD_MMAP_GPU_ID(gpu_id) uint64_t)gpu_id) <<
> KFD_MMAP_GPU_ID_SHIFT)\
> & KFD_MMAP_GPU_ID_MASK)
> -#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
> +#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
> >> KFD_MMAP_GPU_ID_SHIFT)
> -#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFULL >> PAGE_

Re: [PATCH] drm/amdgpu: Add comments to gmc structure

2019-11-07 Thread Zhao, Yong
If this is not submitted, I would like to see some comments regarding the 
explanation of MC address and logical address, which I prefer to mention as GPU 
physical/virtual address.

Regards,
Yong



From: amd-gfx  on behalf of Alex Deucher 

Sent: Thursday, November 7, 2019 9:02 AM
To: Zeng, Oak 
Cc: Kuehling, Felix ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdgpu: Add comments to gmc structure

On Wed, Nov 6, 2019 at 12:27 PM Zeng, Oak  wrote:
>
> Explain fields like aper_base, agp_start etc. The definition
> of those fields are confusing as they are from different view
> (CPU or GPU). Add comments for easier understand.
>
> Change-Id: I02c2a27cd0dbc205498eb86aafa722f2e0c25fe6
> Signed-off-by: Oak Zeng 

Same comments as the previous version.  With those addressed,
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 26 ++
>  1 file changed, 26 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 555d8e5..1356ff9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -127,18 +127,44 @@ struct amdgpu_xgmi {
>  };
>
>  struct amdgpu_gmc {
> +   /* FB's physical address in MMIO space (for CPU to
> +* map FB). This is different compared to the apg/
> +* gart/vram_start/end field as the later is from
> +* GPU's view and aper_base is from CPU's view.
> +*/
> resource_size_t aper_size;
> resource_size_t aper_base;
> /* for some chips with <= 32MB we need to lie
>  * about vram size near mc fb location */
> u64 mc_vram_size;
> u64 visible_vram_size;
> +   /* APG aperture start and end in MC address space
> +* Driver find a hole in the MC address space
> +* to place AGP by setting MC_VM_AGP_BOT/TOP registers
> +* Under VMID0, logical address == MC address
> +* AGP aperture is used to simulate FB in ZFB case
> +*/
> u64 agp_size;
> u64 agp_start;
> u64 agp_end;
> +   /* GART aperture start and end in MC address space
> +* Driver find a hole in the MC address space
> +* to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
> +* registers
> +* Under VMID0, logical address inside GART aperture will
> +* be translated through gpuvm gart page table to access
> +* paged system memory
> +*/
> u64 gart_size;
> u64 gart_start;
> u64 gart_end;
> +   /* Frame buffer aperture of this GPU device. Different from
> +* fb_start (see below), this only covers the local GPU device.
> +* Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios)
> +* and calculate vram_start of this local device by adding an
> +* offset inside the XGMI hive.
> +* Under VMID0, logical address == MC address
> +*/
> u64 vram_start;
> u64 vram_end;
> /* FB region , it's same as local vram region in single GPU, in XGMI
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

2019-11-07 Thread Zhao, Yong
Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it 
is better to keep it as it is.

Regards,
Yong

From: Kuehling, Felix 
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao 
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager 
> *pm, uint32_t *buffer,
>return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> - struct pm4_mec_release_mem *packet;
> -
> - WARN_ON(!buffer);
> -
> - packet = (struct pm4_mec_release_mem *)buffer;
> - memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> - packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> - sizeof(struct pm4_mec_release_mem));
> -
> - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> - packet->bitfields2.event_index = 
> event_index__mec_release_mem__end_of_pipe;
> - packet->bitfields2.tcl1_action_ena = 1;
> - packet->bitfields2.tc_action_ena = 1;
> - packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> - packet->bitfields3.data_sel = 
> data_sel__mec_release_mem__send_32_bit_low;
> - packet->bitfields3.int_sel =
> - int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> - packet->bitfields4.address_lo_32b = (gpu_addr & 0x) >> 2;
> - packet->address_hi = upper_32_bits(gpu_addr);
> -
> - packet->data_lo = 0;
> -
> - return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>.map_process= pm_map_process_v10,
>.runlist= pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>.map_queues = pm_map_queues_v10,
>.unmap_queues   = pm_unmap_queues_v10,
>.query_status   = pm_query_status_v10,
> - .release_mem= pm_release_mem_v10,
> + .release_mem= NULL,
>.map_process_size   = sizeof(struct pm4_mes_map_process),
>.runlist_size   = sizeof(struct pm4_mes_runlist),
>.set_resources_size = sizeof(struct pm4_mes_set_resources),
>.map_queues_size= sizeof(struct pm4_mes_map_queues),
>.unmap_queues_size  = sizeof(struct pm4_mes_unmap_queues),
>.query_status_size  = sizeof(struct pm4_mes_query_status),
> - .release_mem_size   = sizeof(struct pm4_mec_release_mem)
> + .release_mem_size   = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, 
> uint32_t *buffer,
>return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> - struct pm4_mec_release_mem *packet;
> -
> - packet = (struct pm4_mec_release_mem *)buffer;
> - memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> - packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> - sizeof(struct pm4_mec_release_mem));
> -
> - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> - packet->bitfields2.event_

Re: [PATCH] drm/amdgpu: Improve RAS documentation

2019-11-06 Thread Zhao, Yong
See two wording comments inline. With that

Reviewed-by: Yong Zhao

On 2019-10-30 2:41 p.m., Alex Deucher wrote:
> Clarify some areas, clean up formatting, add section for
> unrecoverable error handling.
>
> Signed-off-by: Alex Deucher 
> ---
>   Documentation/gpu/amdgpu.rst| 35 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 40 -
>   2 files changed, 68 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
> index 5b9eaf23558e..1c08d64970ee 100644
> --- a/Documentation/gpu/amdgpu.rst
> +++ b/Documentation/gpu/amdgpu.rst
> @@ -82,12 +82,21 @@ AMDGPU XGMI Support
>   AMDGPU RAS Support
>   ==
>   
> +The AMDGPU RAS interfaces are exposed via sysfs (for informational queries) 
> and
> +debugfs (for error injection).
> +
>   RAS debugfs/sysfs Control and Error Injection Interfaces
>   
>   
>   .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>  :doc: AMDGPU RAS debugfs control interface
>   
> +RAS Reboot Behavior for Unrecoverable Errors
> +
> +
> +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +   :doc: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
> +
>   RAS Error Count sysfs Interface
>   ---
>   
> @@ -109,6 +118,32 @@ RAS VRAM Bad Pages sysfs Interface
>   .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>  :internal:
>   
> +Sample Code
> +---
> +Sample code for testing error injection can be found here:
> +https://cgit.freedesktop.org/mesa/drm/tree/tests/amdgpu/ras_tests.c
> +
> +This is part of the libdrm amdgpu unit tests which cover several areas of 
> the GPU.
> +There are four sets of tests:
> +
> +RAS Basic Test
> +
> +The test verifies the RAS feature enabled status and makes sure the 
> necessary sysfs and debugfs files
> +are present.
> +
> +RAS Query Test
> +
> +This test will check the RAS availability and enablement status for each 
> supported IP block as well as
> +the error counts.

This test checks

> +
> +RAS Inject Test
> +
> +This test injects errors for each IP.
> +
> +RAS Disable Test
> +
> +This tests disabling of RAS features for each IP block.

This tests tests disabling

> +
>   
>   GPU Power/Thermal Controls and Monitoring
>   =
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index dab90c280476..404483437bd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct 
> amdgpu_device *adev,
>* As their names indicate, inject operation will write the
>* value to the address.
>*
> - * Second member: struct ras_debug_if::op.
> + * The second member: struct ras_debug_if::op.
>* It has three kinds of operations.
>*
>* - 0: disable RAS on the block. Take ::head as its data.
> @@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct 
> amdgpu_device *adev,
>* - 2: inject errors on the block. Take ::inject as its data.
>*
>* How to use the interface?
> - * programs:
> - * copy the struct ras_debug_if in your codes and initialize it.
> - * write the struct to the control node.
> + *
> + * Programs
> + *
> + * Copy the struct ras_debug_if in your codes and initialize it.
> + * Write the struct to the control node.
> + *
> + * Shells
>*
>* .. code-block:: bash
>*
>*  echo op block [error [sub_block address value]] > .../ras/ras_ctrl
>*
> + * Parameters:
> + *
>* op: disable, enable, inject
>*  disable: only block is needed
>*  enable: block and error are needed
> @@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct 
> amdgpu_device *adev,
>* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
>*
>* .. note::
> - *   Operation is only allowed on blocks which are supported.
> + *   Operations are only allowed on blocks which are supported.
>*  Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
> + *   to see which blocks support RAS on a particular asic.
> + *
>*/
>   static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char 
> __user *buf,
>   size_t size, loff_t *pos)
> @@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file 
> *f, const char __user *
>* DOC: AMDGPU RAS debugfs EEPROM table reset interface
>*
>* Some boards contain an EEPROM which is used to persistently store a list 
> of
> - * bad pages containing ECC errors detected in vram.  This interface provides
> + * bad pages which experiences ECC errors in vram.  This interface provides
>* a way to reset the EEPROM, e.g., after testing error injection.
>*
>* Usage:
> @@ -362,7 +370,7 @

Re: [PATCH] drm/radeon: fix si_enable_smc_cac() failed issue

2019-11-06 Thread Zhao, Yong
Reviewed-by: Yong Zhao

On 2019-10-30 10:22 a.m., Alex Deucher wrote:
> Need to set the dte flag on this asic.
>
> Port the fix from amdgpu:
> 5cb818b861be114148e8dbeb4259698148019dd1
>
> Signed-off-by: Alex Deucher 
> ---
>   drivers/gpu/drm/radeon/si_dpm.c | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
> index 460fd98e40a7..a0b382a637a6 100644
> --- a/drivers/gpu/drm/radeon/si_dpm.c
> +++ b/drivers/gpu/drm/radeon/si_dpm.c
> @@ -1958,6 +1958,7 @@ static void si_initialize_powertune_defaults(struct 
> radeon_device *rdev)
>   case 0x682C:
>   si_pi->cac_weights = cac_weights_cape_verde_pro;
>   si_pi->dte_data = dte_data_sun_xt;
> + update_dte_from_pl2 = true;
>   break;
>   case 0x6825:
>   case 0x6827:
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu/renoir: move gfxoff handling into gfx9 module

2019-11-06 Thread Zhao, Yong
Reviewed-by: Yong Zhao 

On 2019-11-06 11:34 a.m., Alex Deucher wrote:
> Ping?
>
> On Tue, Oct 29, 2019 at 4:10 PM Alex Deucher  wrote:
>> To properly handle the option parsing ordering.
>>
>> Signed-off-by: Alex Deucher 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 ++
>>   drivers/gpu/drm/amd/amdgpu/soc15.c| 5 -
>>   2 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 9fe95e7693d5..b2b3eb75c48c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -1051,6 +1051,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
>> amdgpu_device *adev)
>>  !adev->gfx.rlc.is_rlc_v2_1))
>>  adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>>
>> +   if (adev->pm.pp_feature & PP_GFXOFF_MASK)
>> +   adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
>> +   AMD_PG_SUPPORT_CP |
>> +   AMD_PG_SUPPORT_RLC_SMU_HS;
>> +   break;
>> +   case CHIP_RENOIR:
>>  if (adev->pm.pp_feature & PP_GFXOFF_MASK)
>>  adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
>>  AMD_PG_SUPPORT_CP |
>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> index 16c5bb75889f..25e69ea74a41 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> @@ -1263,11 +1263,6 @@ static int soc15_common_early_init(void *handle)
>>   AMD_PG_SUPPORT_VCN |
>>   AMD_PG_SUPPORT_VCN_DPG;
>>  adev->external_rev_id = adev->rev_id + 0x91;
>> -
>> -   if (adev->pm.pp_feature & PP_GFXOFF_MASK)
>> -   adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
>> -   AMD_PG_SUPPORT_CP |
>> -   AMD_PG_SUPPORT_RLC_SMU_HS;
>>  break;
>>  default:
>>  /* FIXME: not supported yet */
>> --
>> 2.23.0
>>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-05 Thread Zhao, Yong
The new code uses straightforward bit shifts and thus has better readability.

Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 --
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
 4 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b91993753b82..34078df36621 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
-   args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
/* On SOC15 ASICs, include the doorbell offset within the
 * process doorbell frame, which could be 1 page or 2 pages.
@@ -1938,20 +1937,19 @@ static int kfd_mmap(struct file *filp, struct 
vm_area_struct *vma)
 {
struct kfd_process *process;
struct kfd_dev *dev = NULL;
-   unsigned long vm_pgoff;
+   unsigned long mmap_offset;
unsigned int gpu_id;
 
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
 
-   vm_pgoff = vma->vm_pgoff;
-   vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
-   gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+   mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+   gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
if (gpu_id)
dev = kfd_device_by_id(gpu_id);
 
-   switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+   switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
case KFD_MMAP_TYPE_DOORBELL:
if (!dev)
return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 908081c85de1..1f8365575b12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
-   *event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 66bae8f2dad1..8eecd2cd1fd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,24 +59,21 @@
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define KFD_MMAP_TYPE_SHIFT(62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_SHIFT(62)
 #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_EVENTS   (0x2ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
 
-#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_SHIFT (46)
 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
 #define KFD_MMAP_GPU_ID(gpu_id) uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
& KFD_MMAP_GPU_ID_MASK)
-#define KFD_MMAP_GPU_ID_GET(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
+#define KFD_MMAP_GET_GPU_ID(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
>> KFD_MMAP_GPU_ID_SHIFT)
 
-#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFULL >> PAGE_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
-
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 6abfb77ae540..39dc49b8fd85 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -554,8 +554,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, 
struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
 
-   offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
-   << PAGE_SHIFT;
+   offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GP

Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-05 Thread Zhao, Yong
Okay. I will delete that line.

Regards,
Yong

From: Kuehling, Felix 
Sent: Tuesday, November 5, 2019 5:34 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

On 2019-11-01 7:03 p.m., Zhao, Yong wrote:
>
> > + /* only leave the offset segment */
> > + vma->vm_pgoff &= (1ULL << (KFD_MMAP_GPU_ID_SHIFT - PAGE_SHIFT)) - 
> 1;
>
> You're now open-coding what used to be done by the
> KFD_MMAP_OFFSET_VALUE_GET macro. I don't see how this is an
> improvement.
> Maybe better to update the macro to do this.
>
>
> I can definitely do that, but I think we'd better delete this line
> completely as it seems odd to change vm_pgoff. Moreover this vm_pgoff
> is not used at all in the following function calls. What do you think?

I think you're right. Looks like a historical accident. I see that older
versions of kfd_event_mmap used to access vm_pgoff and probably depended
on this. We removed that in this commit:


commit 50cb7dd94cb43a6204813376e1be1d21780b71fb
Author: Felix Kuehling 
Date:   Fri Oct 27 19:35:26 2017 -0400

 drm/amdkfd: Simplify events page allocator

 The first event page is always big enough to handle all events.
 Handling of multiple events pages is not supported by user mode, and
 not necessary.

 Signed-off-by: Yong Zhao 
 Signed-off-by: Felix Kuehling 
 Acked-by: Oded Gabbay 
 Signed-off-by: Oded Gabbay 



Regards,
   Felix


>
> Regards,
> Yong
> ----
> *From:* Kuehling, Felix 
> *Sent:* Friday, November 1, 2019 6:48 PM
> *To:* Zhao, Yong ; amd-gfx@lists.freedesktop.org
> 
> *Subject:* Re: [PATCH] drm/amdkfd: Simplify the mmap offset related
> bit operations
> On 2019-11-01 4:48 p.m., Zhao, Yong wrote:
> > The new code is much cleaner and results in better readability.
> >
> > Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
> > Signed-off-by: Yong Zhao 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 +++--
> >   drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
> >   drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
> >   4 files changed, 11 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index b91993753b82..590138727ca9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file
> *filep, struct kfd_process *p,
> >/* Return gpu_id as doorbell offset for mmap usage */
> >args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
> >args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
> > - args->doorbell_offset <<= PAGE_SHIFT;
> >if (KFD_IS_SOC15(dev->device_info->asic_family))
> >/* On SOC15 ASICs, include the doorbell offset within the
> > * process doorbell frame, which could be 1 page or 2
> pages.
> > @@ -1938,20 +1937,22 @@ static int kfd_mmap(struct file *filp,
> struct vm_area_struct *vma)
> >   {
> >struct kfd_process *process;
> >struct kfd_dev *dev = NULL;
> > - unsigned long vm_pgoff;
> > + unsigned long mmap_offset;
> >unsigned int gpu_id;
> >
> >process = kfd_get_process(current);
> >if (IS_ERR(process))
> >return PTR_ERR(process);
> >
> > - vm_pgoff = vma->vm_pgoff;
> > - vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
> > - gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
> > + mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
> > + gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
> >if (gpu_id)
> >dev = kfd_device_by_id(gpu_id);
> >
> > - switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
> > + /* only leave the offset segment */
> > + vma->vm_pgoff &= (1ULL << (KFD_MMAP_GPU_ID_SHIFT -
> PAGE_SHIFT)) - 1;
>
> You're now open-coding what used to be done by the
> KFD_MMAP_OFFSET_VALUE_GET macro. I don't see how this is an improvement.
> Maybe better to update the macro to do this.
>
>
> > +
> > + switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
> >case KFD_MMAP_TYPE_DOORBELL:
> >if (!dev)
> >return -ENODEV;
> > diff --gi

Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-01 Thread Zhao, Yong
> + /* only leave the offset segment */
> + vma->vm_pgoff &= (1ULL << (KFD_MMAP_GPU_ID_SHIFT - PAGE_SHIFT)) - 1;

You're now open-coding what used to be done by the
KFD_MMAP_OFFSET_VALUE_GET macro. I don't see how this is an improvement.
Maybe better to update the macro to do this.

I can definitely do that, but I think we'd better delete this line completely 
as it seems odd to change vm_pgoff. Moreover this vm_pgoff is not used at all 
in the following function calls. What do you think?

Regards,
Yong

From: Kuehling, Felix 
Sent: Friday, November 1, 2019 6:48 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

On 2019-11-01 4:48 p.m., Zhao, Yong wrote:
> The new code is much cleaner and results in better readability.
>
> Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 +++--
>   drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
>   4 files changed, 11 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b91993753b82..590138727ca9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct kfd_process *p,
>/* Return gpu_id as doorbell offset for mmap usage */
>args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
>args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
> - args->doorbell_offset <<= PAGE_SHIFT;
>if (KFD_IS_SOC15(dev->device_info->asic_family))
>/* On SOC15 ASICs, include the doorbell offset within the
> * process doorbell frame, which could be 1 page or 2 pages.
> @@ -1938,20 +1937,22 @@ static int kfd_mmap(struct file *filp, struct 
> vm_area_struct *vma)
>   {
>struct kfd_process *process;
>struct kfd_dev *dev = NULL;
> - unsigned long vm_pgoff;
> + unsigned long mmap_offset;
>unsigned int gpu_id;
>
>process = kfd_get_process(current);
>if (IS_ERR(process))
>return PTR_ERR(process);
>
> - vm_pgoff = vma->vm_pgoff;
> - vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
> - gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
> + mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
> + gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
>if (gpu_id)
>dev = kfd_device_by_id(gpu_id);
>
> - switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
> + /* only leave the offset segment */
> + vma->vm_pgoff &= (1ULL << (KFD_MMAP_GPU_ID_SHIFT - PAGE_SHIFT)) - 1;

You're now open-coding what used to be done by the
KFD_MMAP_OFFSET_VALUE_GET macro. I don't see how this is an improvement.
Maybe better to update the macro to do this.


> +
> + switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
>case KFD_MMAP_TYPE_DOORBELL:
>if (!dev)
>return -ENODEV;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 908081c85de1..1f8365575b12 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct 
> kfd_process *p,
>ret = create_signal_event(devkfd, p, ev);
>if (!ret) {
>*event_page_offset = KFD_MMAP_TYPE_EVENTS;
> - *event_page_offset <<= PAGE_SHIFT;
>*event_slot_index = ev->event_id;
>}
>break;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 66bae8f2dad1..8eecd2cd1fd2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -59,24 +59,21 @@
>* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
>*  defines are w.r.t to PAGE_SIZE
>*/
> -#define KFD_MMAP_TYPE_SHIFT  (62 - PAGE_SHIFT)
> +#define KFD_MMAP_TYPE_SHIFT  (62)
>   #define KFD_MMAP_TYPE_MASK  (0x3ULL << KFD_MMAP_TYPE_SHIFT)
>   #define KFD_MMAP_TYPE_DOORBELL  (0x3ULL << KFD_MMAP_TYPE_SHIFT)
>   #define KFD_MMAP_TYPE_EVENTS(0x2ULL << KFD_MMAP_TYPE_SHIFT)
>   #define KFD_MMAP_TYPE_RESERVED_MEM  (0x1ULL << KFD_MMA

Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-01 Thread Zhao, Yong
Hi Felix,

The PAGE_SHIFT was not deleted but merged into the KFD_*_SHIFT in kfd_priv.h. 
Because of that, this change is actually transparent to the thunk, and it only 
straightens up the bit shift operations in most cases.

Regards,
Yong

From: Kuehling, Felix 
Sent: Friday, November 1, 2019 5:13 PM
To: amd-gfx@lists.freedesktop.org ; Zhao, Yong 

Subject: Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

NAK. This won't work for several reasons.

The mmap_offset is used as offset parameter in the mmap system call. If
you check the man page of mmap, you'll see that "offset must be a
multiple of the page size". Therefore the PAGE_SHIFT is necessary.

In the case of doorbell offsets, the offset is parsed and processed by
the Thunk in user mode. On GFX9 GPUs the lower bits are used for the
offset of the doorbell within the doorbell page. On GFX8 the queue ID
was used, but on GFX9 we had to decoupled the doorbell ID from the queue
ID. If you remove the PAGE_SHIFT, you'll need to put those bits
somewhere else. But that change in the encoding would break the ABI with
the Thunk.

Regards,
   Felix

On 2019-11-01 4:48 p.m., Zhao, Yong wrote:
> The new code is much cleaner and results in better readability.
>
> Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 +++--
>   drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
>   4 files changed, 11 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b91993753b82..590138727ca9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct kfd_process *p,
>/* Return gpu_id as doorbell offset for mmap usage */
>args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
>args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
> - args->doorbell_offset <<= PAGE_SHIFT;
>if (KFD_IS_SOC15(dev->device_info->asic_family))
>/* On SOC15 ASICs, include the doorbell offset within the
> * process doorbell frame, which could be 1 page or 2 pages.
> @@ -1938,20 +1937,22 @@ static int kfd_mmap(struct file *filp, struct 
> vm_area_struct *vma)
>   {
>struct kfd_process *process;
>struct kfd_dev *dev = NULL;
> - unsigned long vm_pgoff;
> + unsigned long mmap_offset;
>unsigned int gpu_id;
>
>process = kfd_get_process(current);
>if (IS_ERR(process))
>return PTR_ERR(process);
>
> - vm_pgoff = vma->vm_pgoff;
> - vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
> - gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
> + mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
> + gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
>if (gpu_id)
>dev = kfd_device_by_id(gpu_id);
>
> - switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
> + /* only leave the offset segment */
> + vma->vm_pgoff &= (1ULL << (KFD_MMAP_GPU_ID_SHIFT - PAGE_SHIFT)) - 1;
> +
> + switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
>case KFD_MMAP_TYPE_DOORBELL:
>if (!dev)
>return -ENODEV;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 908081c85de1..1f8365575b12 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct 
> kfd_process *p,
>ret = create_signal_event(devkfd, p, ev);
>if (!ret) {
>*event_page_offset = KFD_MMAP_TYPE_EVENTS;
> - *event_page_offset <<= PAGE_SHIFT;
>*event_slot_index = ev->event_id;
>}
>break;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 66bae8f2dad1..8eecd2cd1fd2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -59,24 +59,21 @@
>* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
>*  defines are w.r.t to PAGE_SIZE
>*/
> -#define KFD_MMAP_TYPE_SHIFT  (62 - PAGE_SHIFT)
> +#define KFD_MMAP_TYPE_SHIFT  (62)
>   #define KFD_MMAP_TYPE_MASK  (0x3ULL << KFD_MMAP_TYP

Re: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-01 Thread Zhao, Yong
Please discard this one and look for an update version.

Regards,
Yong

From: Zhao, Yong 
Sent: Friday, November 1, 2019 4:11 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Zhao, Yong 
Subject: [PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

The new code is much cleaner and results in better readability.

Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 --
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
 4 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b91993753b82..34078df36621 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
 /* Return gpu_id as doorbell offset for mmap usage */
 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
-   args->doorbell_offset <<= PAGE_SHIFT;
 if (KFD_IS_SOC15(dev->device_info->asic_family))
 /* On SOC15 ASICs, include the doorbell offset within the
  * process doorbell frame, which could be 1 page or 2 pages.
@@ -1938,20 +1937,19 @@ static int kfd_mmap(struct file *filp, struct 
vm_area_struct *vma)
 {
 struct kfd_process *process;
 struct kfd_dev *dev = NULL;
-   unsigned long vm_pgoff;
+   unsigned long mmap_offset;
 unsigned int gpu_id;

 process = kfd_get_process(current);
 if (IS_ERR(process))
 return PTR_ERR(process);

-   vm_pgoff = vma->vm_pgoff;
-   vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
-   gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+   mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+   gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
 if (gpu_id)
 dev = kfd_device_by_id(gpu_id);

-   switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+   switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
 case KFD_MMAP_TYPE_DOORBELL:
 if (!dev)
 return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 908081c85de1..1f8365575b12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
 ret = create_signal_event(devkfd, p, ev);
 if (!ret) {
 *event_page_offset = KFD_MMAP_TYPE_EVENTS;
-   *event_page_offset <<= PAGE_SHIFT;
 *event_slot_index = ev->event_id;
 }
 break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 66bae8f2dad1..8eecd2cd1fd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,24 +59,21 @@
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define KFD_MMAP_TYPE_SHIFT(62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_SHIFT(62)
 #define KFD_MMAP_TYPE_MASK  (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_DOORBELL  (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_EVENTS(0x2ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_RESERVED_MEM  (0x1ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_MMIO  (0x0ULL << KFD_MMAP_TYPE_SHIFT)

-#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_SHIFT (46)
 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
 << KFD_MMAP_GPU_ID_SHIFT)
 #define KFD_MMAP_GPU_ID(gpu_id) uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
 & KFD_MMAP_GPU_ID_MASK)
-#define KFD_MMAP_GPU_ID_GET(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
+#define KFD_MMAP_GET_GPU_ID(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
 >> KFD_MMAP_GPU_ID_SHIFT)

-#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFULL >> PAGE_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
-
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 6abfb77ae540..39dc49b8fd85 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/d

[PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-01 Thread Zhao, Yong
The new code is much cleaner and results in better readability.

Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
 4 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b91993753b82..590138727ca9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
-   args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
/* On SOC15 ASICs, include the doorbell offset within the
 * process doorbell frame, which could be 1 page or 2 pages.
@@ -1938,20 +1937,22 @@ static int kfd_mmap(struct file *filp, struct 
vm_area_struct *vma)
 {
struct kfd_process *process;
struct kfd_dev *dev = NULL;
-   unsigned long vm_pgoff;
+   unsigned long mmap_offset;
unsigned int gpu_id;
 
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
 
-   vm_pgoff = vma->vm_pgoff;
-   vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
-   gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+   mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+   gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
if (gpu_id)
dev = kfd_device_by_id(gpu_id);
 
-   switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+   /* only leave the offset segment */
+   vma->vm_pgoff &= (1ULL << (KFD_MMAP_GPU_ID_SHIFT - PAGE_SHIFT)) - 1;
+
+   switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
case KFD_MMAP_TYPE_DOORBELL:
if (!dev)
return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 908081c85de1..1f8365575b12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
-   *event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 66bae8f2dad1..8eecd2cd1fd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,24 +59,21 @@
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define KFD_MMAP_TYPE_SHIFT(62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_SHIFT(62)
 #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_EVENTS   (0x2ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
 
-#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_SHIFT (46)
 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
 #define KFD_MMAP_GPU_ID(gpu_id) uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
& KFD_MMAP_GPU_ID_MASK)
-#define KFD_MMAP_GPU_ID_GET(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
+#define KFD_MMAP_GET_GPU_ID(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
>> KFD_MMAP_GPU_ID_SHIFT)
 
-#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFULL >> PAGE_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
-
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 6abfb77ae540..39dc49b8fd85 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -554,8 +554,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, 
struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
 
-   offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(d

[PATCH] drm/amdkfd: Simplify the mmap offset related bit operations

2019-11-01 Thread Zhao, Yong
The new code is much cleaner and results in better readability.

Change-Id: I0c1f7cca7e24ddb7b4ffe1cb0fa71943828ae373
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 --
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  |  1 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  9 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  3 +--
 4 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b91993753b82..34078df36621 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -298,7 +298,6 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
-   args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
/* On SOC15 ASICs, include the doorbell offset within the
 * process doorbell frame, which could be 1 page or 2 pages.
@@ -1938,20 +1937,19 @@ static int kfd_mmap(struct file *filp, struct 
vm_area_struct *vma)
 {
struct kfd_process *process;
struct kfd_dev *dev = NULL;
-   unsigned long vm_pgoff;
+   unsigned long mmap_offset;
unsigned int gpu_id;
 
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
 
-   vm_pgoff = vma->vm_pgoff;
-   vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
-   gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+   mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+   gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
if (gpu_id)
dev = kfd_device_by_id(gpu_id);
 
-   switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+   switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
case KFD_MMAP_TYPE_DOORBELL:
if (!dev)
return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 908081c85de1..1f8365575b12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
-   *event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 66bae8f2dad1..8eecd2cd1fd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,24 +59,21 @@
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define KFD_MMAP_TYPE_SHIFT(62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_SHIFT(62)
 #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_EVENTS   (0x2ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
 
-#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_SHIFT (46)
 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
 #define KFD_MMAP_GPU_ID(gpu_id) uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
& KFD_MMAP_GPU_ID_MASK)
-#define KFD_MMAP_GPU_ID_GET(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
+#define KFD_MMAP_GET_GPU_ID(offset)((offset & KFD_MMAP_GPU_ID_MASK) \
>> KFD_MMAP_GPU_ID_SHIFT)
 
-#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFULL >> PAGE_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
-
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 6abfb77ae540..39dc49b8fd85 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -554,8 +554,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, 
struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
 
-   offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
-   << PAGE_SHIFT;
+   offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);

[PATCH] drm/amdkfd: Rename create_cp_queue() to init_user_queue()

2019-11-01 Thread Zhao, Yong
create_cp_queue() could also work with SDMA queues, so we should rename
it.

Change-Id: I76cbaed8fa95dd9062d786cbc1dd037ff041da9d
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 48185d2957e9..ebb2f69b438c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
pqm->queue_slot_bitmap = NULL;
 }
 
-static int create_cp_queue(struct process_queue_manager *pqm,
+static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
struct file *f, unsigned int qid)
@@ -251,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
 
-   retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
+   retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -272,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
 
-   retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
+   retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdkfd: Use better name to indicate the offset is in dwords

2019-11-01 Thread Zhao, Yong
Change-Id: I75da23bba90231762cf58da3170f5bb77ece45ed
Signed-off-by: Yong Zhao 
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c  | 14 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  8 
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 984c2f2b24b6..4503fb26fe5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
}
 
q->properties.doorbell_off =
-   kfd_doorbell_id_to_offset(dev, q->process,
+   kfd_get_doorbell_dw_offset_from_bar(dev, q->process,
  q->doorbell_id);
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebe79bf00145..f904355c44a1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
 
-   kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
+   kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
 
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
   kfd_doorbell_process_slice(kfd));
@@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
pr_debug("doorbell base   == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
 
-   pr_debug("doorbell_id_offset  == 0x%08lX\n",
-   kfd->doorbell_id_offset);
+   pr_debug("doorbell_base_dw_offset  == 0x%08lX\n",
+   kfd->doorbell_base_dw_offset);
 
pr_debug("doorbell_process_limit  == 0x%08lX\n",
doorbell_process_limit);
@@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 * Calculating the kernel doorbell offset using the first
 * doorbell page.
 */
-   *doorbell_off = kfd->doorbell_id_offset + inx;
+   *doorbell_off = kfd->doorbell_base_dw_offset + inx;
 
pr_debug("Get kernel queue doorbell\n"
" doorbell offset   == 0x%08X\n"
@@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
 }
 
-unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
+unsigned int kfd_get_doorbell_dw_offset_from_bar(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int doorbell_id)
 {
/*
-* doorbell_id_offset accounts for doorbells taken by KGD.
+* doorbell_base_dw_offset accounts for doorbells taken by KGD.
 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
 * the process's doorbells. The offset returned is in dword
 * units regardless of the ASIC-dependent doorbell size.
 */
-   return kfd->doorbell_id_offset +
+   return kfd->doorbell_base_dw_offset +
process->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 62db4d20ed32..7c561c98f2e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -238,9 +238,9 @@ struct kfd_dev {
 * KFD. It is aligned for mapping
 * into user mode
 */
-   size_t doorbell_id_offset;  /* Doorbell offset (from KFD doorbell
-* to HW doorbell, GFX reserved some
-* at the start)
+   size_t doorbell_base_dw_offset; /* Doorbell dword offset (from KFD
+* doorbell to PCI doorbell bar,
+* GFX reserved some at the start)
 */
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
   * page used by kernel queue
@@ -821,7 +821,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 
__iomem *db_addr);
 u32 read_kernel_doorbell(u32 __iomem *db);
 void write_kernel_doorbell(void __iomem *db, u32 value);
 void write_kernel_doorbell64(void __iomem *db, u64 value);
-unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
+uns

[PATCH 2/2] drm/amdkfd: Avoid using doorbell_off as offset in process doorbell pages

2019-11-01 Thread Zhao, Yong
dorbell_off in the queue properties is mainly used for the doorbell dw
offset in pci bar. We should not set it to the doorbell byte offset in
process doorbell pages. This makes the code much easier to read.

Change-Id: I553045ff9fcb3676900c92d10426f2ceb3660005
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c  |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  3 ++-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c   |  8 ++--
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d9e36dbf13d5..b91993753b82 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -258,6 +258,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
unsigned int queue_id;
struct kfd_process_device *pdd;
struct queue_properties q_properties;
+   uint32_t doorbell_offset_in_process = 0;
 
memset(&q_properties, 0, sizeof(struct queue_properties));
 
@@ -286,7 +287,8 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
p->pasid,
dev->id);
 
-   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
+   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
+   &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
 
@@ -298,12 +300,10 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
-   /* On SOC15 ASICs, doorbell allocation must be
-* per-device, and independent from the per-process
-* queue_id. Return the doorbell offset within the
-* doorbell aperture to user mode.
+   /* On SOC15 ASICs, include the doorbell offset within the
+* process doorbell frame, which could be 1 page or 2 pages.
 */
-   args->doorbell_offset |= q_properties.doorbell_off;
+   args->doorbell_offset |= doorbell_offset_in_process;
 
mutex_unlock(&p->mutex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index d59f2cd056c6..1d33c4f25263 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
 
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
-   &properties, &qid);
+   &properties, &qid, NULL);
 
if (status) {
pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7c561c98f2e2..66bae8f2dad1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -907,7 +907,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
-   unsigned int *qid);
+   unsigned int *qid,
+   uint32_t *p_doorbell_offset_in_process);
 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 8509814a6ff0..48185d2957e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -192,7 +192,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
-   unsigned int *qid)
+   unsigned int *qid,
+   uint32_t *p_doorbell_offset_in_process)
 {
int retval;
struct kfd_process_device *pdd;
@@ -307,8 +308,11 @@ int pqm_create_queue(struct process_queue_manager *pqm,
/* Return the doorbell offset within the doorbell page
 * to the caller so it can be passed up to user mode
 * (in bytes).
+* There are always 1024 doorbells per process, so in case
+* of 8

[PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10

2019-10-30 Thread Zhao, Yong
The kernel queue functions for v9 and v10 are the same except
pm_map_process_v* which have small difference, so they should be reused.
This eliminates the need of reapplying several patches which were
applied on v9 but not on v10, such as bigger GWS and more than 2
SDMA engine support which were introduced on Arcturus.

Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   |   1 -
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 --
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  49 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 -
 6 files changed, 44 insertions(+), 331 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 48155060a57c..017a8b7156da 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -41,7 +41,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 11d244891393..0d966408ea87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
*dev,
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
-   kernel_queue_init_v9(&kq->ops_asic_specific);
-   break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
-   kernel_queue_init_v10(&kq->ops_asic_specific);
+   kernel_queue_init_v9(&kq->ops_asic_specific);
break;
default:
WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 365fc674fea4..a7116a939029 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -102,6 +102,5 @@ struct kernel_queue {
 void kernel_queue_init_cik(struct kernel_queue_ops *ops);
 void kernel_queue_init_vi(struct kernel_queue_ops *ops);
 void kernel_queue_init_v9(struct kernel_queue_ops *ops);
-void kernel_queue_init_v10(struct kernel_queue_ops *ops);
 
 #endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
deleted file mode 100644
index bfd6221acae9..
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-   enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v10(struct kernel_queue *kq);
-static void submit_packet_v10(struct kernel_queue *kq);
-
-void kernel_queue_init_v10(struct kernel_queue_ops *ops)
-{
-   ops->initialize = initialize_v10;
-   ops->uninitialize = uninitialize_v10;
-   ops->submit_packet = submit_packet_v10;
-}
-
-static bool initialize_v10

[PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

2019-10-30 Thread Zhao, Yong
release_mem won't be used at all on GFX9 and GFX10, so delete it.

Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
Signed-off-by: Yong Zhao 
---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---
 2 files changed, 4 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
index aed32ab7102e..bfd6221acae9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
@@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, 
uint32_t *buffer,
return 0;
 }
 
-
-static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
-{
-   struct pm4_mec_release_mem *packet;
-
-   WARN_ON(!buffer);
-
-   packet = (struct pm4_mec_release_mem *)buffer;
-   memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-   packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-   sizeof(struct pm4_mec_release_mem));
-
-   packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-   packet->bitfields2.event_index = 
event_index__mec_release_mem__end_of_pipe;
-   packet->bitfields2.tcl1_action_ena = 1;
-   packet->bitfields2.tc_action_ena = 1;
-   packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-   packet->bitfields3.data_sel = 
data_sel__mec_release_mem__send_32_bit_low;
-   packet->bitfields3.int_sel =
-   int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-   packet->bitfields4.address_lo_32b = (gpu_addr & 0x) >> 2;
-   packet->address_hi = upper_32_bits(gpu_addr);
-
-   packet->data_lo = 0;
-
-   return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
-}
-
 const struct packet_manager_funcs kfd_v10_pm_funcs = {
.map_process= pm_map_process_v10,
.runlist= pm_runlist_v10,
@@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
.map_queues = pm_map_queues_v10,
.unmap_queues   = pm_unmap_queues_v10,
.query_status   = pm_query_status_v10,
-   .release_mem= pm_release_mem_v10,
+   .release_mem= NULL,
.map_process_size   = sizeof(struct pm4_mes_map_process),
.runlist_size   = sizeof(struct pm4_mes_runlist),
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size= sizeof(struct pm4_mes_map_queues),
.unmap_queues_size  = sizeof(struct pm4_mes_unmap_queues),
.query_status_size  = sizeof(struct pm4_mes_query_status),
-   .release_mem_size   = sizeof(struct pm4_mec_release_mem)
+   .release_mem_size   = 0,
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 3b5ca2b1d7a6..f0e4910a8865 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, 
uint32_t *buffer,
return 0;
 }
 
-
-static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
-{
-   struct pm4_mec_release_mem *packet;
-
-   packet = (struct pm4_mec_release_mem *)buffer;
-   memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-   packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-   sizeof(struct pm4_mec_release_mem));
-
-   packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-   packet->bitfields2.event_index = 
event_index__mec_release_mem__end_of_pipe;
-   packet->bitfields2.tcl1_action_ena = 1;
-   packet->bitfields2.tc_action_ena = 1;
-   packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-   packet->bitfields3.data_sel = 
data_sel__mec_release_mem__send_32_bit_low;
-   packet->bitfields3.int_sel =
-   int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-   packet->bitfields4.address_lo_32b = (gpu_addr & 0x) >> 2;
-   packet->address_hi = upper_32_bits(gpu_addr);
-
-   packet->data_lo = 0;
-
-   return 0;
-}
-
 const struct packet_manager_funcs kfd_v9_pm_funcs = {
.map_process= pm_map_process_v9,
.runlist= pm_runlist_v9,
@@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.map_queues = pm_map_queues_v9,
.unmap_queues   = pm_unmap_queues_v9,
.query_status   = pm_query_status_v9,
-   .release_mem= pm_release_mem_v9,
+   .r

[PATCH 1/3] drm/amdkfd: Adjust function sequences to avoid unnecessary declarations

2019-10-30 Thread Zhao, Yong
This is cleaner.

Change-Id: I8cdecad387d8c547a088c6050f77385ee1135be1
Signed-off-by: Yong Zhao 
---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 19 +++
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 9a4bafb2e175..3b5ca2b1d7a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -26,18 +26,6 @@
 #include "kfd_pm4_headers_ai.h"
 #include "kfd_pm4_opcodes.h"
 
-static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
-   enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v9(struct kernel_queue *kq);
-static void submit_packet_v9(struct kernel_queue *kq);
-
-void kernel_queue_init_v9(struct kernel_queue_ops *ops)
-{
-   ops->initialize = initialize_v9;
-   ops->uninitialize = uninitialize_v9;
-   ops->submit_packet = submit_packet_v9;
-}
-
 static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
 {
@@ -67,6 +55,13 @@ static void submit_packet_v9(struct kernel_queue *kq)
kq->pending_wptr64);
 }
 
+void kernel_queue_init_v9(struct kernel_queue_ops *ops)
+{
+   ops->initialize = initialize_v9;
+   ops->uninitialize = uninitialize_v9;
+   ops->submit_packet = submit_packet_v9;
+}
+
 static int pm_map_process_v9(struct packet_manager *pm,
uint32_t *buffer, struct qcm_process_device *qpd)
 {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Delete unnecessary pr_fmt switch

2019-10-28 Thread Zhao, Yong
Modified according to the comment and pushed. Thanks!

Regards,
Yong

From: Alex Deucher 
Sent: Monday, October 28, 2019 8:39 AM
To: Zhao, Yong 
Cc: amd-gfx@lists.freedesktop.org 
Subject: Re: [PATCH] drm/amdkfd: Delete unnecessary pr_fmt switch

On Fri, Oct 25, 2019 at 5:21 PM Zhao, Yong  wrote:
>
> Given amdkfd.ko has been merged into amdgpu.ko, this switch is no
> longer useful.
>
> Change-Id: If56b80e086f4ea26f347c70b620b3892afc24ddf
> Signed-off-by: Yong Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 4 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c  | 3 ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c   | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c   | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c   | 3 ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 3 ---
>  7 files changed, 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index d3da9dde4ee1..fa5471c12c34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -19,7 +19,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -

Unrelated whitespace change.

>  #include "amdgpu_amdkfd.h"
>  #include "amd_shared.h"
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index e1fbbebce4fd..b6713e0ed1b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -19,10 +19,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -
> -#undef pr_fmt
> -#define pr_fmt(fmt) "kfd2kgd: " fmt
> -
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 0878f59ec340..61cd707158e4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -19,9 +19,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -#undef pr_fmt
> -#define pr_fmt(fmt) "kfd2kgd: " fmt
> -
>  #include 
>  #include "amdgpu.h"
>  #include "amdgpu_amdkfd.h"
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 6e6f0a99ec06..30897b2d9175 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -19,7 +19,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -

Unrelated whitespace change.

>  #include 
>
>  #include "amdgpu.h"
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index bfbddedb2380..ede6ab0cbe4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -19,7 +19,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -

Same here.

With those dropped, the patch is:
Reviewed-by: Alex Deucher 

>  #include 
>
>  #include "amdgpu.h"
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index c72246f2c08a..47c853ef1051 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -19,9 +19,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -
> -#define pr_fmt(fmt) "kfd2kgd: " fmt
> -
>  #include 
>
>  #include "amdgpu.h"
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 1fbe81094b5f..97114e18c022 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -19,9 +19,6 @@
>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>   * OTHER DEALINGS IN THE SOFTWARE.
>   */
> -
> -#define pr_fmt(fmt) "kfd2kgd: " fmt
> -
>  #include 
>  #include 
>  #include 
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete unnecessary pr_fmt switch

2019-10-25 Thread Zhao, Yong
Given amdkfd.ko has been merged into amdgpu.ko, this switch is no
longer useful.

Change-Id: If56b80e086f4ea26f347c70b620b3892afc24ddf
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c  | 3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c   | 3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 3 ---
 7 files changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d3da9dde4ee1..fa5471c12c34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -19,7 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
 #include "amdgpu_amdkfd.h"
 #include "amd_shared.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index e1fbbebce4fd..b6713e0ed1b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -19,10 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 0878f59ec340..61cd707158e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -19,9 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6e6f0a99ec06..30897b2d9175 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -19,7 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index bfbddedb2380..ede6ab0cbe4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -19,7 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index c72246f2c08a..47c853ef1051 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -19,9 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1fbe81094b5f..97114e18c022 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -19,9 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 #include 
 #include 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete duplicated queue bit map reservation

2019-10-24 Thread Zhao, Yong
The KIQ is on the second MEC and its reservation is covered in the
latter logic, so no need to reserve its bit twice.

Change-Id: Ieee390953a60c7d43de5a9aec38803f1f583a4a9
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8c531793fe17..d3da9dde4ee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -130,14 +130,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
  adev->gfx.mec.queue_bitmap,
  KGD_MAX_QUEUES);
 
-   /* remove the KIQ bit as well */
-   if (adev->gfx.kiq.ring.sched.ready)
-   clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
- adev->gfx.kiq.ring.me 
- 1,
- 
adev->gfx.kiq.ring.pipe,
- 
adev->gfx.kiq.ring.queue),
- gpu_resources.queue_bitmap);
-
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
 * nbits is not compile time constant
 */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/6] drm/amdkfd: Update parameter type of pasid to uint16_t

2019-09-30 Thread Zhao, Yong
I will drop this one, because I found many other functions use 32 bits 
as well and it seems to be convenient.

Regards,

Yong

On 2019-09-30 11:54 a.m., Kuehling, Felix wrote:
> If you want to make this interface consistent, you should make the vmid
> parameter uint8_t at the same time. That said, you don't really save any
> resources, because 8-bit and 16-bit ints still consume 32-bits on the
> call stack.
>
> Regards,
>     Felix
>
> On 2019-09-27 11:41 p.m., Zhao, Yong wrote:
>> This is consistent with other code and registers in the code.
>>
>> Change-Id: I04dd12bdb465a43cfcd8936ed0f227a6546830e8
>> Signed-off-by: Yong Zhao 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 4 ++--
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 4 ++--
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 ++--
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 +-
>>drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
>>drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 2 +-
>>7 files changed, 11 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
>> index 122698f8dd1e..33cbf1d073d3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
>> @@ -59,7 +59,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
>> *kgd, uint32_t vmid,
>>  uint32_t sh_mem_config,
>>  uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
>>  uint32_t sh_mem_bases);
>> -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int 
>> pasid,
>> +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
>>  unsigned int vmid);
>>static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
>>static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>> @@ -232,7 +232,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
>> *kgd, uint32_t vmid,
>>  unlock_srbm(kgd);
>>}
>>
>> -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int 
>> pasid,
>> +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
>>  unsigned int vmid)
>>{
>>  struct amdgpu_device *adev = get_amdgpu_device(kgd);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> index f77ddf7dba2b..0210d791dea1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> @@ -94,7 +94,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
>> *kgd, uint32_t vmid,
>>  uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
>>  uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
>>
>> -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int 
>> pasid,
>> +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
>>  unsigned int vmid);
>>
>>static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
>> @@ -256,7 +256,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
>> *kgd, uint32_t vmid,
>>  unlock_srbm(kgd);
>>}
>>
>> -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int 
>> pasid,
>> +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
>>  unsigned int vmid)
>>{
>>  struct amdgpu_device *adev = get_amdgpu_device(kgd);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> index 7478caf096ad..7a4c762e1209 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> @@ -52,7 +52,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
>> *kgd, uint32_t vmid,
>>  uint32_t sh_mem_config,
>>  uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
>>  uint32_t sh_mem_bases);
>> -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int 
>> pasid,
>> +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd,

[PATCH] drm/amdkfd: Improve KFD IOCTL printing

2019-09-30 Thread Zhao, Yong
The code use hex define, so should the printing.

Change-Id: Ia7cc7690553bb043915b3d8c0157216c64421a60
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index c28ba0c1d7ac..d9e36dbf13d5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1840,7 +1840,7 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
} else
goto err_i1;
 
-   dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
+   dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, 
arg);
 
process = kfd_get_process(current);
if (IS_ERR(process)) {
@@ -1895,7 +1895,8 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
kfree(kdata);
 
if (retcode)
-   dev_dbg(kfd_device, "ret = %d\n", retcode);
+   dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
+   nr, arg, retcode);
 
return retcode;
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 5/6] drm/amdgpu: Add the HDP flush support for Navi

2019-09-30 Thread Zhao, Yong
Not much relationship between them, except that this functional change is to 
fix a IOCTL error printing.

Yong

From: Kuehling, Felix 
Sent: Monday, September 30, 2019 11:57 AM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 5/6] drm/amdgpu: Add the HDP flush support for Navi

As far as I can tell, this is the only patch with functional changes in
the patch series. The rest are purely clean-up. Any relation I'm missing?

Anyway, patches 2,3,5 are

Reviewed-by: Felix Kuehling 

On 2019-09-27 11:41 p.m., Zhao, Yong wrote:
> The HDP flush support code was missing in the nbio and nv files.
>
> Change-Id: I046ff52567676b56bf16dc1728b02481233acb61
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 16 +---
>   drivers/gpu/drm/amd/amdgpu/nv.c|  9 +
>   2 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 
> b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
> index e7e36fb6113d..c699cbfe015a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
> @@ -27,11 +27,21 @@
>   #include "nbio/nbio_2_3_default.h"
>   #include "nbio/nbio_2_3_offset.h"
>   #include "nbio/nbio_2_3_sh_mask.h"
> +#include 
>
>   #define smnPCIE_CONFIG_CNTL 0x11180044
>   #define smnCPM_CONTROL  0x11180460
>   #define smnPCIE_CNTL2   0x11180070
>
> +
> +static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
> +{
> + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
> + adev->rmmio_remap.reg_offset + 
> KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
> + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
> + adev->rmmio_remap.reg_offset + 
> KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
> +}
> +
>   static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)
>   {
>u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
> @@ -56,10 +66,9 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev,
>struct amdgpu_ring *ring)
>   {
>if (!ring || !ring->funcs->emit_wreg)
> - WREG32_SOC15_NO_KIQ(NBIO, 0, 
> mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
> + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + 
> KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
>else
> - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
> - NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
> + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + 
> KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
>   }
>
>   static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev)
> @@ -330,4 +339,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
>.ih_control = nbio_v2_3_ih_control,
>.init_registers = nbio_v2_3_init_registers,
>.detect_hw_virt = nbio_v2_3_detect_hw_virt,
> + .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
> index b3e7756fcc4b..6699a45b88ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -587,8 +587,11 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
>
>   static int nv_common_early_init(void *handle)
>   {
> +#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
>struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
> + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
>adev->smc_rreg = NULL;
>adev->smc_wreg = NULL;
>adev->pcie_rreg = &nv_pcie_rreg;
> @@ -714,6 +717,12 @@ static int nv_common_hw_init(void *handle)
>nv_program_aspm(adev);
>/* setup nbio registers */
>adev->nbio.funcs->init_registers(adev);
> + /* remap HDP registers to a hole in mmio space,
> +  * for the purpose of expose those registers
> +  * to process space
> +  */
> + if (adev->nbio.funcs->remap_hdp_registers)
> + adev->nbio.funcs->remap_hdp_registers(adev);
>/* enable the doorbell aperture */
>nv_enable_doorbell_aperture(adev, true);
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 6/6] drm/amdkfd: Improve KFD IOCTL printing

2019-09-30 Thread Zhao, Yong
Okay, I will change dev_err back to dev_dbg. The hex printing is still very 
useful.

Yong

From: Kuehling, Felix 
Sent: Monday, September 30, 2019 11:47 AM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 6/6] drm/amdkfd: Improve KFD IOCTL printing

On 2019-09-27 11:41 p.m., Zhao, Yong wrote:
> The code use hex define, so should the printing. Also, printf a message
> if there is a failure.
>
> Change-Id: Ia7cc7690553bb043915b3d8c0157216c64421a60
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +++--
>   1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index c28ba0c1d7ac..d1ab09c0f522 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1840,7 +1840,7 @@ static long kfd_ioctl(struct file *filep, unsigned int 
> cmd, unsigned long arg)
>} else
>goto err_i1;
>
> - dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
> + dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, 
> arg);
>
>process = kfd_get_process(current);
>if (IS_ERR(process)) {
> @@ -1895,7 +1895,8 @@ static long kfd_ioctl(struct file *filep, unsigned int 
> cmd, unsigned long arg)
>kfree(kdata);
>
>if (retcode)
> - dev_dbg(kfd_device, "ret = %d\n", retcode);
> + dev_err(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
> + nr, arg, retcode);

NAK. We don't want to spam the kernel log with cryptic error messages
every time ioctl functions fail. Please leave this as a dev_dbg message.
Failing ioctl functions could be perfectly normal for a number of
reasons (system call interrupted by signal, running out of event slots,
timeouts on event waiting, etc). But every bug report will incorrectly
blame any unrelated problem on those messages if they happen to appear
in the kernel log.

Regards,
   Felix


>
>return retcode;
>   }
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 4/6] drm/amdkfd: Use array to probe kfd2kgd_calls

2019-09-30 Thread Zhao, Yong
Thanks. Will check that and fix the missing const.

Yong

From: Kuehling, Felix 
Sent: Monday, September 30, 2019 11:42 AM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 4/6] drm/amdkfd: Use array to probe kfd2kgd_calls

On 2019-09-27 11:41 p.m., Zhao, Yong wrote:
> This is the same idea as the kfd device info probe and move all the
> probe control together for easy maintenance.
>
> Change-Id: I85c98bb08eb2a4a1a80c3b913c32691cc74602d1
> Signed-off-by: Yong Zhao 

Nice clean-up. See one comment inline.

Also, please check that this doesn't break the build if CONFIG_HSA_AMD
is undefined.

With that fixed and checked, this patch is

Reviewed-by: Felix Kuehling 


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 65 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  7 --
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  8 +--
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  7 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  7 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  7 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  7 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 39 +--
>   8 files changed, 41 insertions(+), 106 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 92666b197f6c..8c531793fe17 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -63,47 +63,10 @@ void amdgpu_amdkfd_fini(void)
>
>   void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
>   {
> - const struct kfd2kgd_calls *kfd2kgd;
>bool vf = amdgpu_sriov_vf(adev);
>
> - switch (adev->asic_type) {
> -#ifdef CONFIG_DRM_AMDGPU_CIK
> - case CHIP_KAVERI:
> - case CHIP_HAWAII:
> - kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
> - break;
> -#endif
> - case CHIP_CARRIZO:
> - case CHIP_TONGA:
> - case CHIP_FIJI:
> - case CHIP_POLARIS10:
> - case CHIP_POLARIS11:
> - case CHIP_POLARIS12:
> - case CHIP_VEGAM:
> - kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
> - break;
> - case CHIP_VEGA10:
> - case CHIP_VEGA12:
> - case CHIP_VEGA20:
> - case CHIP_RAVEN:
> - case CHIP_RENOIR:
> - kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
> - break;
> - case CHIP_ARCTURUS:
> - kfd2kgd = amdgpu_amdkfd_arcturus_get_functions();
> - break;
> - case CHIP_NAVI10:
> - case CHIP_NAVI14:
> - case CHIP_NAVI12:
> - kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
> - break;
> - default:
> - dev_info(adev->dev, "kfd not supported on this ASIC\n");
> - return;
> - }
> -
>adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
> -   adev->pdev, kfd2kgd, adev->asic_type, vf);
> +   adev->pdev, adev->asic_type, vf);
>
>if (adev->kfd.dev)
>amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
> @@ -711,33 +674,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, 
> struct mm_struct *mm)
>return 0;
>   }
>
> -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
> -{
> - return NULL;
> -}
> -
> -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
> -{
> - return NULL;
> -}
> -
> -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
> -{
> - return NULL;
> -}
> -
> -struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
> -{
> - return NULL;
> -}
> -
> -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
> -{
> - return NULL;
> -}
> -
>   struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
> -   const struct kfd2kgd_calls *f2g,
>  unsigned int asic_type, bool vf)
>   {
>return NULL;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 4eb2fb85de26..069d5d230810 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -137,12 +137,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
> kgd_engine_type engine,
>   void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
>
> -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(vo

[PATCH 4/6] drm/amdkfd: Use array to probe kfd2kgd_calls

2019-09-27 Thread Zhao, Yong
This is the same idea as the kfd device info probe and move all the
probe control together for easy maintenance.

Change-Id: I85c98bb08eb2a4a1a80c3b913c32691cc74602d1
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 65 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  7 --
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  8 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  7 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  7 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  7 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  7 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 39 +--
 8 files changed, 41 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 92666b197f6c..8c531793fe17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -63,47 +63,10 @@ void amdgpu_amdkfd_fini(void)
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
-   const struct kfd2kgd_calls *kfd2kgd;
bool vf = amdgpu_sriov_vf(adev);
 
-   switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_CIK
-   case CHIP_KAVERI:
-   case CHIP_HAWAII:
-   kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
-   break;
-#endif
-   case CHIP_CARRIZO:
-   case CHIP_TONGA:
-   case CHIP_FIJI:
-   case CHIP_POLARIS10:
-   case CHIP_POLARIS11:
-   case CHIP_POLARIS12:
-   case CHIP_VEGAM:
-   kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
-   break;
-   case CHIP_VEGA10:
-   case CHIP_VEGA12:
-   case CHIP_VEGA20:
-   case CHIP_RAVEN:
-   case CHIP_RENOIR:
-   kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
-   break;
-   case CHIP_ARCTURUS:
-   kfd2kgd = amdgpu_amdkfd_arcturus_get_functions();
-   break;
-   case CHIP_NAVI10:
-   case CHIP_NAVI14:
-   case CHIP_NAVI12:
-   kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
-   break;
-   default:
-   dev_info(adev->dev, "kfd not supported on this ASIC\n");
-   return;
-   }
-
adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
- adev->pdev, kfd2kgd, adev->asic_type, vf);
+ adev->pdev, adev->asic_type, vf);
 
if (adev->kfd.dev)
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
@@ -711,33 +674,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, 
struct mm_struct *mm)
return 0;
 }
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-{
-   return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-{
-   return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
-{
-   return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
-{
-   return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
-{
-   return NULL;
-}
-
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- const struct kfd2kgd_calls *f2g,
  unsigned int asic_type, bool vf)
 {
return NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4eb2fb85de26..069d5d230810 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -137,12 +137,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
 void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);
-
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
 int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
@@ -248,7 +242,6 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo 
*bo);
 int kgd2kfd_init(void);
 void kgd2kfd_exit(void);
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- const struct kfd2kgd_calls *f2g,
  unsigned int asic_type, bool vf);
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 struct drm_device *ddev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 4a49dbee26a1..e1fbbebce4fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c

[PATCH 5/6] drm/amdgpu: Add the HDP flush support for Navi

2019-09-27 Thread Zhao, Yong
The HDP flush support code was missing in the nbio and nv files.

Change-Id: I046ff52567676b56bf16dc1728b02481233acb61
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 16 +---
 drivers/gpu/drm/amd/amdgpu/nv.c|  9 +
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index e7e36fb6113d..c699cbfe015a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -27,11 +27,21 @@
 #include "nbio/nbio_2_3_default.h"
 #include "nbio/nbio_2_3_offset.h"
 #include "nbio/nbio_2_3_sh_mask.h"
+#include 
 
 #define smnPCIE_CONFIG_CNTL0x11180044
 #define smnCPM_CONTROL 0x11180460
 #define smnPCIE_CNTL2  0x11180070
 
+
+static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+   WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+   adev->rmmio_remap.reg_offset + 
KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+   WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+   adev->rmmio_remap.reg_offset + 
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
 static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)
 {
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -56,10 +66,9 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
 {
if (!ring || !ring->funcs->emit_wreg)
-   WREG32_SOC15_NO_KIQ(NBIO, 0, 
mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+   WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + 
KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
-   amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
-   NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+   amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + 
KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 }
 
 static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev)
@@ -330,4 +339,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.ih_control = nbio_v2_3_ih_control,
.init_registers = nbio_v2_3_init_registers,
.detect_hw_virt = nbio_v2_3_detect_hw_virt,
+   .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index b3e7756fcc4b..6699a45b88ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -587,8 +587,11 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
 
 static int nv_common_early_init(void *handle)
 {
+#define MMIO_REG_HOLE_OFFSET (0x8 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+   adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &nv_pcie_rreg;
@@ -714,6 +717,12 @@ static int nv_common_hw_init(void *handle)
nv_program_aspm(adev);
/* setup nbio registers */
adev->nbio.funcs->init_registers(adev);
+   /* remap HDP registers to a hole in mmio space,
+* for the purpose of expose those registers
+* to process space
+*/
+   if (adev->nbio.funcs->remap_hdp_registers)
+   adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
nv_enable_doorbell_aperture(adev, true);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/6] drm/amdkfd: Improve KFD IOCTL printing

2019-09-27 Thread Zhao, Yong
The code use hex define, so should the printing. Also, printf a message
if there is a failure.

Change-Id: Ia7cc7690553bb043915b3d8c0157216c64421a60
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index c28ba0c1d7ac..d1ab09c0f522 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1840,7 +1840,7 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
} else
goto err_i1;
 
-   dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
+   dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, 
arg);
 
process = kfd_get_process(current);
if (IS_ERR(process)) {
@@ -1895,7 +1895,8 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
kfree(kdata);
 
if (retcode)
-   dev_dbg(kfd_device, "ret = %d\n", retcode);
+   dev_err(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
+   nr, arg, retcode);
 
return retcode;
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/6] drm/amdkfd: Delete unnecessary function declarations

2019-09-27 Thread Zhao, Yong
Ajust the function sequences so that those function delcarations are not
needed any more.

Change-Id: I3a270ade7ac380cd083e90611177d7a45249823f
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 115 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 118 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 114 +
 3 files changed, 90 insertions(+), 257 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 2e7330fd1376..be9b873ec683 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -45,61 +45,6 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
 };
 
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-   uint32_t sh_mem_config,
-   uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
-   uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
-   unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-   uint32_t queue_id, uint32_t __user *wptr,
-   uint32_t wptr_shift, uint32_t wptr_mask,
-   struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
-   uint32_t pipe_id, uint32_t queue_id,
-   uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-uint32_t engine_id, uint32_t queue_id,
-uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-   uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-   enum kfd_preempt_type reset_type,
-   unsigned int utimeout, uint32_t pipe_id,
-   uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-   unsigned int utimeout);
-#if 0
-static uint32_t get_watch_base_addr(struct amdgpu_device *adev);
-#endif
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
-   unsigned int watch_point_id,
-   uint32_t cntl_val,
-   uint32_t addr_hi,
-   uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
-   uint32_t gfx_index_val,
-   uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-   unsigned int watch_point_id,
-   unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
-   uint8_t vmid, uint16_t *p_pasid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-   uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
  */
@@ -132,36 +77,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev 
*kgd,
return 0;
 }
 
-static const struct kfd2kgd_calls kfd2kgd = {
-   .program_sh_mem_settings = kgd_program_sh_mem_settings,
-   .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-   .init_interrupts = kgd_init_interrupts,
-   .hqd_load = kgd_hqd_load,
-   .hqd_sdma_load = kgd_hqd_sdma_load,
-   .hqd_dump = kgd_hqd_dump,
-   .hqd_sdma_dump = kgd_hqd_sdma_dump,
-   .hqd_is_occupied = kgd_hqd_is_occupied,
-   .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
-   .hqd_destroy = kgd_hqd_destroy,
-   .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
-   .address_watch_disable = kgd_address_watch_disable,
-   .address_watch_execute = kgd_address_watch_execute,
-   .wave_control_execute = kgd_wave_control_execute,
-   .address_watch_get_offset = kgd_address_watch_get_offset,
-   .get_atc_vmid_pasid_mapping_info =
-   get_atc_vmid_pasid_mapping_info,
-   .get_tile_config = amdgpu_amdkfd_get_tile_config,
-   .set_vm_con

[PATCH 1/6] drm/amdkfd: Update parameter type of pasid to uint16_t

2019-09-27 Thread Zhao, Yong
This is consistent with other code and registers in the code.

Change-Id: I04dd12bdb465a43cfcd8936ed0f227a6546830e8
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 122698f8dd1e..33cbf1d073d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -59,7 +59,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, 
uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
unsigned int vmid);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -232,7 +232,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
unlock_srbm(kgd);
 }
 
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
unsigned int vmid)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index f77ddf7dba2b..0210d791dea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -94,7 +94,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, 
uint32_t vmid,
uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
 
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
unsigned int vmid);
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
@@ -256,7 +256,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
unlock_srbm(kgd);
 }
 
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
unsigned int vmid)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 7478caf096ad..7a4c762e1209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -52,7 +52,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, 
uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
unsigned int vmid);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -210,7 +210,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
unlock_srbm(kgd);
 }
 
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
unsigned int vmid)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 50f885576bbe..6be6061c5554 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -142,7 +142,7 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
unlock_srbm(kgd);
 }
 
-int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, uint16_t pasid,
unsigned int vmid)
 {
struct amdgpu_d

[PATCH 2/6] drm/amdgpu: Delete useless header file reference

2019-09-27 Thread Zhao, Yong
Those header file includes are not needed.

Change-Id: I44aa7e4d0391f9b2c2be757765c1437b603688ae
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 2 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  | 3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c  | 4 
 drivers/gpu/drm/amd/amdgpu/arct_reg_init.c | 1 -
 drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c   | 1 -
 10 files changed, 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 33cbf1d073d3..2e7330fd1376 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -22,15 +22,9 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
-#include 
-#include 
-#include 
-#include 
 #include 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
-#include "amdgpu_ucode.h"
-#include "soc15_hw_ip.h"
 #include "gc/gc_10_1_0_offset.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "navi10_enum.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 0210d791dea1..4705e4d93aac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -20,8 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include 
-#include 
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 7a4c762e1209..376c8b1c81ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -20,9 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include 
-#include 
-#include 
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 6be6061c5554..12f30df2174c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -22,14 +22,10 @@
 
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
-#include 
-#include 
-#include 
 #include 
 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
-#include "soc15_hw_ip.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
 #include "vega10_enum.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
index e62609d5126b..fda99c958c3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
@@ -24,7 +24,6 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "arct_ip_offset.h"
 
 int arct_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
index a56c93620e78..88efaecf9f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
@@ -24,7 +24,6 @@
 #include "nv.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "navi10_ip_offset.h"
 
 int navi10_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
index cadc7603ca41..a786d159e5e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
@@ -24,7 +24,6 @@
 #include "nv.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "navi12_ip_offset.h"
 
 int navi12_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
index 3b5f0f65e096..4ea1e8fbb601 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
@@ -24,7 +24,6 @@
 #include "nv.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "navi14_ip_offset.h"
 
 int navi14_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index bd0580334f83..6b52a539d51b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -24,7 +24,6 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
-#include "soc15_hw_ip.h"
 #include "vega10_ip_offset.h"
 
 int vega10_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 587e33f5dcce..556f854e3551 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/driver

[PATCH] drm/amdgpu: Export setup_vm_pt_regs() logic for mmhub 2.0

2019-09-27 Thread Zhao, Yong
The KFD code will call this function later.

Change-Id: I5993323603799963e9eb473852b6c72de2172ed6
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 19 ---
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h |  2 ++
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 86ed8cb915a8..2eea702de8ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -31,20 +31,25 @@
 
 #include "soc15_common.h"
 
-static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base)
 {
-   uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+   /* two registers distance between mmMMVM_CONTEXT0_* to 
mmMMVM_CONTEXT1_* */
+   int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+   - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
 
-   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
-lower_32_bits(value));
+   WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+   offset * vmid, lower_32_bits(page_table_base));
 
-   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
-upper_32_bits(value));
+   WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+   offset * vmid, upper_32_bits(page_table_base));
 }
 
 static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
 {
-   mmhub_v2_0_init_gart_pt_regs(adev);
+   uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+   mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
 
WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 (u32)(adev->gmc.gart_start >> 12));
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
index db16f3ece218..3ea4344f0315 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
@@ -31,5 +31,7 @@ void mmhub_v2_0_init(struct amdgpu_device *adev);
 int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
   enum amd_clockgating_state state);
 void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base);
 
 #endif
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdkfd: Record vmid pasid mapping in the driver for non HWS mode

2019-09-26 Thread Zhao, Yong

On 2019-09-26 6:16 p.m., Kuehling, Felix wrote:
> On 2019-09-26 5:59 p.m., Zhao, Yong wrote:
>> On 2019-09-26 5:36 p.m., Kuehling, Felix wrote:
>>> Minor nit-pick inline. Otherwise this patch is
>>>
>>> Reviewed-by: Felix Kuehling 
>>>
>>> On 2019-09-26 5:27 p.m., Zhao, Yong wrote:
>>>> This makes possible the vmid pasid mapping query through software.
>>>>
>>>> Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
>>>> Signed-off-by: Yong Zhao 
>>>> ---
>>>>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 33 ---
>>>>  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
>>>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 ++
>>>>  3 files changed, 25 insertions(+), 13 deletions(-)
>>>>
> [snip]
>>>>  
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>>>> index eed8f950b663..99c8b36301ef 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>>>> @@ -188,7 +188,8 @@ struct device_queue_manager {
>>>>unsigned int*allocated_queues;
>>>>uint64_tsdma_bitmap;
>>>>uint64_txgmi_sdma_bitmap;
>>>> -  unsigned intvmid_bitmap;
>>>> +  /* the pasid mapping for each kfd vmid */
>>>> +  uint16_tvmid_pasid[VMID_NUM];
>>>>uint64_tpipelines_addr;
>>>>struct kfd_mem_obj  *pipeline_mem;
>>>>uint64_tfence_gpu_addr;
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> index 0d2c7fa1fa46..a08015720841 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> @@ -43,6 +43,8 @@
>>>>  
>>>>  #include "amd_shared.h"
>>>>  
>>>> +#define VMID_NUM 16
>>>> +
>>> Any good reason why this is not defined in kfd_device_queue_manager.h?
>>> It's only used there.
>> [yz] It could be used by other places in the future, as they use 16
>> directly now.
> Can you point out those places? A quick grep for hard-coded 16 in kfd
> doesn't show up anything VMID-related on a first glance.
>
> Regards,
>     Felix
>
Oh, thye are in amdgpu_amdkfd_gfx* files. I have put the define in 
kfd_device_queue_manager.h . With that, I will directly push later.
>>>>  #define KFD_MAX_RING_ENTRY_SIZE   8
>>>>  
>>>>  #define KFD_SYSFS_FILE_MODE 0444
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdkfd: Query vmid pasid mapping through stored info for non HWS

2019-09-26 Thread Zhao, Yong
Okay, I will delete that message.

Yong

On 2019-09-26 5:31 p.m., Kuehling, Felix wrote:
> On 2019-09-26 5:27 p.m., Zhao, Yong wrote:
>> Because we record the mapping under non HWS mode in the software,
>> we can query pasid through vmid using the stored mapping instead of
>> reading from ATC registers.
>>
>> This also prepares for the defeatured ATC block in future ASICs.
>>
>> Change-Id: I781cb9d30dc0cc93379908ff1cf8da798bb26f13
>> Signed-off-by: Yong Zhao 
>> ---
>>drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 4 ++--
>>1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>> index ab8a695c4a3c..9fff01c0fb9e 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>> @@ -58,8 +58,8 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
>>  memcpy(patched_ihre, ih_ring_entry,
>>  dev->device_info->ih_ring_entry_size);
>>
>> -pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid(
>> -dev->kgd, vmid);
>> +pasid = dev->dqm->vmid_pasid[vmid];
>> +WARN_ONCE(pasid == 0, "No PASID assigned for VMID %d\n", vmid);
> When this happens, you'll now get to WARN_ONCE messages. One here and
> then the one a few lines lower: WARN_ONCE(pasid == 0, "Bug: No PASID in
> KFD interrupt"). My point was, your messages is redundant. The original
> WARN_ONCE already covers both the HWS and non-HWS cases.
>
> Regards,
>     Felix
>
>>
>>  /* Patch the pasid field */
>>  patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdkfd: Record vmid pasid mapping in the driver for non HWS mode

2019-09-26 Thread Zhao, Yong

On 2019-09-26 5:36 p.m., Kuehling, Felix wrote:
> Minor nit-pick inline. Otherwise this patch is
>
> Reviewed-by: Felix Kuehling 
>
> On 2019-09-26 5:27 p.m., Zhao, Yong wrote:
>> This makes possible the vmid pasid mapping query through software.
>>
>> Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
>> Signed-off-by: Yong Zhao 
>> ---
>>.../drm/amd/amdkfd/kfd_device_queue_manager.c | 33 ---
>>.../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
>>drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 ++
>>3 files changed, 25 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index e7f0a32e0e44..455f49a25ccb 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager 
>> *dqm,
>>  struct qcm_process_device *qpd,
>>  struct queue *q)
>>{
>> -int bit, allocated_vmid;
>> +int allocated_vmid = -1, i;
>>
>> -if (dqm->vmid_bitmap == 0)
>> -return -ENOMEM;
>> +for (i = dqm->dev->vm_info.first_vmid_kfd;
>> +i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
>> +if (!dqm->vmid_pasid[i]) {
>> +allocated_vmid = i;
>> +break;
>> +}
>> +}
>> +
>> +if (allocated_vmid < 0) {
>> +pr_err("no more vmid to allocate\n");
>> +return -ENOSPC;
>> +}
>> +
>> +pr_debug("vmid allocated: %d\n", allocated_vmid);
>> +
>> +dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
>>
>> -bit = ffs(dqm->vmid_bitmap) - 1;
>> -dqm->vmid_bitmap &= ~(1 << bit);
>> +set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
>>
>> -allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
>> -pr_debug("vmid allocation %d\n", allocated_vmid);
>>  qpd->vmid = allocated_vmid;
>>  q->properties.vmid = allocated_vmid;
>>
>> -set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
>>  program_sh_mem_settings(dqm, qpd);
>>
>>  /* qpd->page_table_base is set earlier when register_process()
>> @@ -278,8 +288,6 @@ static void deallocate_vmid(struct device_queue_manager 
>> *dqm,
>>  struct qcm_process_device *qpd,
>>  struct queue *q)
>>{
>> -int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
>> -
>>  /* On GFX v7, CP doesn't flush TC at dequeue */
>>  if (q->device->device_info->asic_family == CHIP_HAWAII)
>>  if (flush_texture_cache_nocpsch(q->device, qpd))
>> @@ -289,8 +297,8 @@ static void deallocate_vmid(struct device_queue_manager 
>> *dqm,
>>
>>  /* Release the vmid mapping */
>>  set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>> +dqm->vmid_pasid[qpd->vmid] = 0;
>>
>> -dqm->vmid_bitmap |= (1 << bit);
>>  qpd->vmid = 0;
>>  q->properties.vmid = 0;
>>}
>> @@ -1017,7 +1025,8 @@ static int initialize_nocpsch(struct 
>> device_queue_manager *dqm)
>>  dqm->allocated_queues[pipe] |= 1 << queue;
>>  }
>>
>> -dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
>> +memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
>> +
>>  dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
>>  dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> index eed8f950b663..99c8b36301ef 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> @@ -188,7 +188,8 @@ struct device_queue_manager {
>>  unsigned int*allocated_queues;
>>  uint64_tsdma_bitmap;
>>  uint64_txgmi_sdma_bitmap;
>> -unsigned intvmid_bitmap;
>> +/* t

[PATCH 1/2] drm/amdkfd: Record vmid pasid mapping in the driver for non HWS mode

2019-09-26 Thread Zhao, Yong
This makes possible the vmid pasid mapping query through software.

Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 33 ---
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 ++
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e7f0a32e0e44..455f49a25ccb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
 {
-   int bit, allocated_vmid;
+   int allocated_vmid = -1, i;
 
-   if (dqm->vmid_bitmap == 0)
-   return -ENOMEM;
+   for (i = dqm->dev->vm_info.first_vmid_kfd;
+   i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
+   if (!dqm->vmid_pasid[i]) {
+   allocated_vmid = i;
+   break;
+   }
+   }
+
+   if (allocated_vmid < 0) {
+   pr_err("no more vmid to allocate\n");
+   return -ENOSPC;
+   }
+
+   pr_debug("vmid allocated: %d\n", allocated_vmid);
+
+   dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
 
-   bit = ffs(dqm->vmid_bitmap) - 1;
-   dqm->vmid_bitmap &= ~(1 << bit);
+   set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
 
-   allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
-   pr_debug("vmid allocation %d\n", allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
 
-   set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
program_sh_mem_settings(dqm, qpd);
 
/* qpd->page_table_base is set earlier when register_process()
@@ -278,8 +288,6 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
struct qcm_process_device *qpd,
struct queue *q)
 {
-   int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
-
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->device_info->asic_family == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
@@ -289,8 +297,8 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
 
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
+   dqm->vmid_pasid[qpd->vmid] = 0;
 
-   dqm->vmid_bitmap |= (1 << bit);
qpd->vmid = 0;
q->properties.vmid = 0;
 }
@@ -1017,7 +1025,8 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
dqm->allocated_queues[pipe] |= 1 << queue;
}
 
-   dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
+   memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
+
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index eed8f950b663..99c8b36301ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -188,7 +188,8 @@ struct device_queue_manager {
unsigned int*allocated_queues;
uint64_tsdma_bitmap;
uint64_txgmi_sdma_bitmap;
-   unsigned intvmid_bitmap;
+   /* the pasid mapping for each kfd vmid */
+   uint16_tvmid_pasid[VMID_NUM];
uint64_tpipelines_addr;
struct kfd_mem_obj  *pipeline_mem;
uint64_tfence_gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0d2c7fa1fa46..a08015720841 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -43,6 +43,8 @@
 
 #include "amd_shared.h"
 
+#define VMID_NUM 16
+
 #define KFD_MAX_RING_ENTRY_SIZE8
 
 #define KFD_SYSFS_FILE_MODE 0444
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdkfd: Query vmid pasid mapping through stored info for non HWS

2019-09-26 Thread Zhao, Yong
Because we record the mapping under non HWS mode in the software,
we can query pasid through vmid using the stored mapping instead of
reading from ATC registers.

This also prepares for the defeatured ATC block in future ASICs.

Change-Id: I781cb9d30dc0cc93379908ff1cf8da798bb26f13
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index ab8a695c4a3c..9fff01c0fb9e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -58,8 +58,8 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
memcpy(patched_ihre, ih_ring_entry,
dev->device_info->ih_ring_entry_size);
 
-   pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid(
-   dev->kgd, vmid);
+   pasid = dev->dqm->vmid_pasid[vmid];
+   WARN_ONCE(pasid == 0, "No PASID assigned for VMID %d\n", vmid);
 
/* Patch the pasid field */
patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdkfd: Query vmid pasid mapping through stored info

2019-09-26 Thread Zhao, Yong
Because we record the mapping in the software, we can query pasid
through vmid using the stored mapping instead of reading from ATC
registers.

This also prepares for the defeatured ATC block in future ASICs.

Change-Id: I781cb9d30dc0cc93379908ff1cf8da798bb26f13
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index ab8a695c4a3c..754c052b7d72 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -58,8 +58,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
memcpy(patched_ihre, ih_ring_entry,
dev->device_info->ih_ring_entry_size);
 
-   pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid(
-   dev->kgd, vmid);
+   pasid = dev->dqm->vmid_pasid[vmid];
+   if (!pasid)
+   pr_err("pasid is not queried correctly\n");
 
/* Patch the pasid field */
patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdkfd: Record vmid pasid mapping in the driver

2019-09-26 Thread Zhao, Yong
This makes possible the vmid pasid mapping query through software.

Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 33 ---
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 ++
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e7f0a32e0e44..92fede18bf1d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
 {
-   int bit, allocated_vmid;
+   int allocated_vmid = -1, i;
 
-   if (dqm->vmid_bitmap == 0)
-   return -ENOMEM;
+   for (i = dqm->dev->vm_info.first_vmid_kfd;
+   i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
+   if (!dqm->vmid_pasid[i]) {
+   allocated_vmid = i;
+   break;
+   }
+   }
+
+   if (allocated_vmid < 0) {
+   pr_err("no more vmid to allocate\n");
+   return -ENOSPC;
+   }
+
+   pr_debug("vmid allocated: %d\n", allocated_vmid);
+
+   dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
 
-   bit = ffs(dqm->vmid_bitmap) - 1;
-   dqm->vmid_bitmap &= ~(1 << bit);
+   set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
 
-   allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
-   pr_debug("vmid allocation %d\n", allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
 
-   set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
program_sh_mem_settings(dqm, qpd);
 
/* qpd->page_table_base is set earlier when register_process()
@@ -278,8 +288,6 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
struct qcm_process_device *qpd,
struct queue *q)
 {
-   int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
-
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->device_info->asic_family == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
@@ -289,8 +297,8 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
 
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
+   dqm->vmid_pasid[qpd->vmid] = 0;
 
-   dqm->vmid_bitmap |= (1 << bit);
qpd->vmid = 0;
q->properties.vmid = 0;
 }
@@ -1017,7 +1025,8 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
dqm->allocated_queues[pipe] |= 1 << queue;
}
 
-   dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
+   memset(dqm->vmid_pasid, 0, VMID_NUM * sizeof(uint16_t));
+
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index eed8f950b663..99c8b36301ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -188,7 +188,8 @@ struct device_queue_manager {
unsigned int*allocated_queues;
uint64_tsdma_bitmap;
uint64_txgmi_sdma_bitmap;
-   unsigned intvmid_bitmap;
+   /* the pasid mapping for each kfd vmid */
+   uint16_tvmid_pasid[VMID_NUM];
uint64_tpipelines_addr;
struct kfd_mem_obj  *pipeline_mem;
uint64_tfence_gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0d2c7fa1fa46..a08015720841 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -43,6 +43,8 @@
 
 #include "amd_shared.h"
 
+#define VMID_NUM 16
+
 #define KFD_MAX_RING_ENTRY_SIZE8
 
 #define KFD_SYSFS_FILE_MODE 0444
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver

2019-09-26 Thread Zhao, Yong
I agree.

Yong

On 2019-09-26 2:54 p.m., Kuehling, Felix wrote:
> On 2019-09-26 2:38 p.m., Zhao, Yong wrote:
>> This makes possible the vmid pasid mapping query through software.
>>
>> Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
>> Signed-off-by: Yong Zhao 
>> ---
>>.../drm/amd/amdkfd/kfd_device_queue_manager.c | 34 +--
>>.../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
>>2 files changed, 26 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index e7f0a32e0e44..d006adefef55 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager 
>> *dqm,
>>  struct qcm_process_device *qpd,
>>  struct queue *q)
>>{
>> -int bit, allocated_vmid;
>> +int idx = -1, allocated_vmid, i;
>>
>> -if (dqm->vmid_bitmap == 0)
>> +for (i = 0; i < dqm->dev->vm_info.vmid_num_kfd; i++) {
>> +if (!dqm->vmid_pasid[i]) {
>> +idx = i;
>> +break;
>> +}
>> +}
>> +
>> +if (idx < 0) {
>> +pr_err("no more vmid to allocate\n");
>>  return -ENOMEM;
>> +}
>> +
>> +dqm->vmid_pasid[idx] = q->process->pasid;
>>
>> -bit = ffs(dqm->vmid_bitmap) - 1;
>> -dqm->vmid_bitmap &= ~(1 << bit);
>> +allocated_vmid = idx + dqm->dev->vm_info.first_vmid_kfd;
>> +pr_debug("vmid allocated: %d\n", allocated_vmid);
>> +
>> +set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
>>
>> -allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
>> -pr_debug("vmid allocation %d\n", allocated_vmid);
>>  qpd->vmid = allocated_vmid;
>>  q->properties.vmid = allocated_vmid;
>>
>> -set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
>>  program_sh_mem_settings(dqm, qpd);
>>
>>  /* qpd->page_table_base is set earlier when register_process()
>> @@ -278,7 +288,7 @@ static void deallocate_vmid(struct device_queue_manager 
>> *dqm,
>>  struct qcm_process_device *qpd,
>>  struct queue *q)
>>{
>> -int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
>> +int idx;
>>
>>  /* On GFX v7, CP doesn't flush TC at dequeue */
>>  if (q->device->device_info->asic_family == CHIP_HAWAII)
>> @@ -290,7 +300,9 @@ static void deallocate_vmid(struct device_queue_manager 
>> *dqm,
>>  /* Release the vmid mapping */
>>  set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>>
>> -dqm->vmid_bitmap |= (1 << bit);
>> +idx = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
>> +dqm->vmid_pasid[idx] = 0;
>> +
>>  qpd->vmid = 0;
>>  q->properties.vmid = 0;
>>}
>> @@ -1017,7 +1029,8 @@ static int initialize_nocpsch(struct 
>> device_queue_manager *dqm)
>>  dqm->allocated_queues[pipe] |= 1 << queue;
>>  }
>>
>> -dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
>> +dqm->vmid_pasid = kcalloc(dqm->dev->vm_info.vmid_num_kfd,
>> +sizeof(uint16_t), GFP_KERNEL);
> If you allocate this dynamically, you need to check the return value.
> But see below ...
>
>
>>  dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
>>  dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
>>
>> @@ -1030,6 +1043,7 @@ static void uninitialize(struct device_queue_manager 
>> *dqm)
>>
>>  WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
>>
>> +kfree(dqm->vmid_pasid);
>>  kfree(dqm->allocated_queues);
>>  for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
>>  kfree(dqm->mqd_mgrs[i]);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> index eed8f950b663..67b5e5fadd95 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queu

Re: [PATCH 3/3] drm/amdkfd: Remove the control stack workaround for GFX10

2019-09-26 Thread Zhao, Yong
Hi Felix,

I reworded this patch in the next series. Please review the first two patches 
in this series.

Regards,
Yong

From: amd-gfx  on behalf of Zhao, Yong 

Sent: Wednesday, September 25, 2019 2:34 PM
To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 3/3] drm/amdkfd: Remove the control stack workaround for 
GFX10

Yes. I confirmed with CP guys and they said the behavior on GFX10 is the
same as GFX8 now. I remember that the workaround on GFX9 was to help
with a HW bug, but not too sure.

Regards,

Yong

On 2019-09-25 2:25 p.m., Kuehling, Felix wrote:
> On 2019-09-25 2:15 p.m., Zhao, Yong wrote:
>> The GFX10 does not have this hardware bug any more, so remove it.
> I wouldn't call this a bug and a workaround. More like a change in the
> HW or FW behaviour and a corresponding driver change. I.e. in GFXv8 the
> control stack was in the user mode CWSR allocation. In GFXv9 it moved
> into a kernel mode buffer next to the MQD. So in GFXv10 the control
> stack moved back into the user mode CWSR buffer?
>
> Regards,
> Felix
>
>> Change-Id: I446c9685549a09ac8846a42ee22d86cfb93fd98c
>> Signed-off-by: Yong Zhao 
>> ---
>>.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 37 ++-
>>1 file changed, 4 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> index 9cd3eb2d90bd..4a236b2c2354 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> @@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void 
>> *mqd,
>>static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
>>   struct queue_properties *q)
>>{
>> -int retval;
>> -struct kfd_mem_obj *mqd_mem_obj = NULL;
>> +struct kfd_mem_obj *mqd_mem_obj;
>>
>> -/* From V9,  for CWSR, the control stack is located on the next page
>> - * boundary after the mqd, we will use the gtt allocation function
>> - * instead of sub-allocation function.
>> - */
>> -if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
>> -mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>> -if (!mqd_mem_obj)
>> -return NULL;
>> -retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
>> -ALIGN(q->ctl_stack_size, PAGE_SIZE) +
>> -ALIGN(sizeof(struct v10_compute_mqd), 
>> PAGE_SIZE),
>> -&(mqd_mem_obj->gtt_mem),
>> -&(mqd_mem_obj->gpu_addr),
>> -(void *)&(mqd_mem_obj->cpu_ptr), true);
>> -} else {
>> -retval = kfd_gtt_sa_allocate(kfd, sizeof(struct 
>> v10_compute_mqd),
>> -&mqd_mem_obj);
>> -}
>> -
>> -if (retval) {
>> -kfree(mqd_mem_obj);
>> +if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
>> +&mqd_mem_obj))
>>   return NULL;
>> -}
>>
>>   return mqd_mem_obj;
>> -
>>}
>>
>>static void init_mqd(struct mqd_manager *mm, void **mqd,
>> @@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void 
>> *mqd,
>>static void free_mqd(struct mqd_manager *mm, void *mqd,
>>   struct kfd_mem_obj *mqd_mem_obj)
>>{
>> -struct kfd_dev *kfd = mm->dev;
>> -
>> -if (mqd_mem_obj->gtt_mem) {
>> -amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
>> -kfree(mqd_mem_obj);
>> -} else {
>> -kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
>> -}
>> +kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
>>}
>>
>>static bool is_occupied(struct mqd_manager *mm, void *mqd,
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/6] drm/amdkfd: Query vmid pasid mapping through stored info

2019-09-26 Thread Zhao, Yong
Because we record the mapping in the software, we can query pasid
through vmid using the stored mapping instead of reading from ATC
registers.

This also prepares for the defeatured ATC block in future ASICs.

Change-Id: I781cb9d30dc0cc93379908ff1cf8da798bb26f13
Signed-off-by: Yong Zhao 
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 12 
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h|  3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c  |  3 +--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d006adefef55..57d33e887f43 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -220,6 +220,18 @@ static void deallocate_doorbell(struct qcm_process_device 
*qpd,
WARN_ON(!old);
 }
 
+uint16_t get_pasid_from_vmid_nocpsch(struct device_queue_manager *dqm,
+   uint16_t vmid)
+{
+   int idx = vmid - dqm->dev->vm_info.first_vmid_kfd;
+
+   uint16_t pasid = dqm->dev->dqm->vmid_pasid[idx];
+   if (!pasid)
+   pr_err("pasid is not queried correctly\n");
+
+   return pasid;
+}
+
 static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 67b5e5fadd95..9e8f6cde397e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -244,6 +244,9 @@ void set_queue_snapshot_entry(struct device_queue_manager 
*dqm,
  int flags,
  struct kfd_queue_snapshot_entry *qss_entry);
 
+uint16_t get_pasid_from_vmid_nocpsch(struct device_queue_manager *dqm,
+   uint16_t vmid);
+
 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
 {
return (pdd->lds_base >> 16) & 0xFF;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index ab8a695c4a3c..adb5bbab7160 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -58,8 +58,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
memcpy(patched_ihre, ih_ring_entry,
dev->device_info->ih_ring_entry_size);
 
-   pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid(
-   dev->kgd, vmid);
+   pasid = get_pasid_from_vmid_nocpsch(dev->dqm, vmid);
 
/* Patch the pasid field */
patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/6] drm/amdkfd: Delete unused defines

2019-09-26 Thread Zhao, Yong
They are not used anywhere.

Change-Id: Ieba4f57760f0c45f24e54629245cae419b8ff157
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 49cd946bf8b4..f8f8d6fe8b52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -52,9 +52,6 @@
 #include "gmc_v9_0.h"
 
 
-#define V9_PIPE_PER_MEC(4)
-#define V9_QUEUES_PER_PIPE_MEC (8)
-
 enum hqd_dequeue_request_type {
NO_ACTION = 0,
DRAIN_PIPE,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/6] drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid

2019-09-26 Thread Zhao, Yong
get_atc_vmid_pasid_mapping_valid() is very similar to
get_atc_vmid_pasid_mapping_pasid(), so they can be merged into a new
function get_atc_vmid_pasid_mapping_info() to reduce register access
times.

Change-Id: I255ebf2629012400b07fe6a69c3d075cfd46612e
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  6 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 49 +++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 28 ---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 32 
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 45 +++--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  6 +--
 .../gpu/drm/amd/amdkfd/cik_event_interrupt.c  |  8 +--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 16 +++---
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  8 ++-
 9 files changed, 76 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index eb6e8b232729..5e1bd6500fe2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -279,10 +279,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
-   .get_atc_vmid_pasid_mapping_pasid =
-   kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
-   .get_atc_vmid_pasid_mapping_valid =
-   kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
+   .get_atc_vmid_pasid_mapping_info =
+   kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 09d50949c5b9..57ff698f51bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -100,10 +100,8 @@ static uint32_t kgd_address_watch_get_offset(struct 
kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset);
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-   uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-   uint8_t vmid);
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+   uint8_t vmid, uint16_t *p_pasid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint64_t page_table_base);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
@@ -157,10 +155,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
.address_watch_get_offset = kgd_address_watch_get_offset,
-   .get_atc_vmid_pasid_mapping_pasid =
-   get_atc_vmid_pasid_mapping_pasid,
-   .get_atc_vmid_pasid_mapping_valid =
-   get_atc_vmid_pasid_mapping_valid,
+   .get_atc_vmid_pasid_mapping_info =
+   get_atc_vmid_pasid_mapping_info,
.get_tile_config = amdgpu_amdkfd_get_tile_config,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.invalidate_tlbs = invalidate_tlbs,
@@ -772,26 +768,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
return 0;
 }
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-   uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+   uint8_t vmid, uint16_t *p_pasid)
 {
-   uint32_t reg;
+   uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-   reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+   value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 + vmid);
-   return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-   uint8_t vmid)
-{
-   uint32_t reg;
-   struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+   *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-   reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-+ vmid);
-   return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+   return !!(value & ATC_VMID0_PASID_MAPPING__

[PATCH 1/6] drm/amdkfd: Move the control stack on GFX10 to userspace buffer

2019-09-26 Thread Zhao, Yong
The GFX10 does not require the control stack to be right after mqd
buffer any more, so move it back to usersapce allocated CSWR buffer.

Change-Id: I446c9685549a09ac8846a42ee22d86cfb93fd98c
Signed-off-by: Yong Zhao 
---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 37 ++-
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 29d50d6af9d7..e2fb76247f47 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void 
*mqd,
 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
 {
-   int retval;
-   struct kfd_mem_obj *mqd_mem_obj = NULL;
+   struct kfd_mem_obj *mqd_mem_obj;
 
-   /* From V9,  for CWSR, the control stack is located on the next page
-* boundary after the mqd, we will use the gtt allocation function
-* instead of sub-allocation function.
-*/
-   if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-   mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
-   if (!mqd_mem_obj)
-   return NULL;
-   retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
-   ALIGN(q->ctl_stack_size, PAGE_SIZE) +
-   ALIGN(sizeof(struct v10_compute_mqd), 
PAGE_SIZE),
-   &(mqd_mem_obj->gtt_mem),
-   &(mqd_mem_obj->gpu_addr),
-   (void *)&(mqd_mem_obj->cpu_ptr), true);
-   } else {
-   retval = kfd_gtt_sa_allocate(kfd, sizeof(struct 
v10_compute_mqd),
-   &mqd_mem_obj);
-   }
-
-   if (retval) {
-   kfree(mqd_mem_obj);
+   if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
+   &mqd_mem_obj))
return NULL;
-   }
 
return mqd_mem_obj;
-
 }
 
 static void init_mqd(struct mqd_manager *mm, void **mqd,
@@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
 static void free_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
 {
-   struct kfd_dev *kfd = mm->dev;
-
-   if (mqd_mem_obj->gtt_mem) {
-   amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
-   kfree(mqd_mem_obj);
-   } else {
-   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-   }
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }
 
 static bool is_occupied(struct mqd_manager *mm, void *mqd,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/6] drm/amdkfd: Use hex print format for pasid

2019-09-26 Thread Zhao, Yong
Since KFD pasid starts from 0x8000 (32768 in decimal), it is better
perceived as a hex number. Meanwhile, change the pasid type from
unsigned int to uint16_t to be consistent throughout the code.

Change-Id: I565fe39f69e782749a697f18545775354c7a89f8
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 12 +--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   |  8 
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 12 +--
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c |  8 
 drivers/gpu/drm/amd/amdkfd/kfd_events.c   | 12 +--
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 20 +--
 .../amd/amdkfd/kfd_process_queue_manager.c|  6 +++---
 10 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e5ff772862cd..106d45ae7c9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -301,7 +301,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
goto err_bind_process;
}
 
-   pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
+   pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
p->pasid,
dev->id);
 
@@ -351,7 +351,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, 
struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
 
-   pr_debug("Destroying queue id %d for pasid %d\n",
+   pr_debug("Destroying queue id %d for pasid 0x%x\n",
args->queue_id,
p->pasid);
 
@@ -397,7 +397,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct 
kfd_process *p,
properties.queue_percent = args->queue_percentage;
properties.priority = args->queue_priority;
 
-   pr_debug("Updating queue id %d for pasid %d\n",
+   pr_debug("Updating queue id %d for pasid 0x%x\n",
args->queue_id, p->pasid);
 
mutex_lock(&p->mutex);
@@ -854,7 +854,7 @@ static int kfd_ioctl_get_process_apertures(struct file 
*filp,
struct kfd_process_device_apertures *pAperture;
struct kfd_process_device *pdd;
 
-   dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+   dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
args->num_of_nodes = 0;
 
@@ -912,7 +912,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file 
*filp,
uint32_t nodes = 0;
int ret;
 
-   dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+   dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
if (args->num_of_nodes == 0) {
/* Return number of nodes, so that user space can alloacate
@@ -3063,7 +3063,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct 
kfd_process *process,
 
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-   pr_debug("Process %d mapping mmio page\n"
+   pr_debug("pasid 0x%x mapping mmio page\n"
 " target user address == 0x%08llX\n"
 " physical address== 0x%08llX\n"
 " vm_flags== 0x%04lX\n"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 3635e0b4b3b7..492951cad143 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -800,7 +800,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, 
struct kfd_process *p)
(dev->kgd, vmid)) {
if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
(dev->kgd, vmid) == p->pasid) {
-   pr_debug("Killing wave fronts of vmid %d and 
pasid %d\n",
+   pr_debug("Killing wave fronts of vmid %d and 
pasid 0x%x\n",
vmid, p->pasid);
break;
}
@@ -808,7 +808,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, 
struct kfd_process *p)
}
 
if (vmid > last_vmid_to_scan) {
-   pr_err("Didn't find vmid for pasid %d\n", p->pasid);
+   pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
return -EFAULT;
}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 9d4af961c5d1..9bfa50633654 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -96,7 +96,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct 
kfd_dev *p

[PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver

2019-09-26 Thread Zhao, Yong
This makes possible the vmid pasid mapping query through software.

Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 34 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e7f0a32e0e44..d006adefef55 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
 {
-   int bit, allocated_vmid;
+   int idx = -1, allocated_vmid, i;
 
-   if (dqm->vmid_bitmap == 0)
+   for (i = 0; i < dqm->dev->vm_info.vmid_num_kfd; i++) {
+   if (!dqm->vmid_pasid[i]) {
+   idx = i;
+   break;
+   }
+   }
+
+   if (idx < 0) {
+   pr_err("no more vmid to allocate\n");
return -ENOMEM;
+   }
+
+   dqm->vmid_pasid[idx] = q->process->pasid;
 
-   bit = ffs(dqm->vmid_bitmap) - 1;
-   dqm->vmid_bitmap &= ~(1 << bit);
+   allocated_vmid = idx + dqm->dev->vm_info.first_vmid_kfd;
+   pr_debug("vmid allocated: %d\n", allocated_vmid);
+
+   set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
 
-   allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
-   pr_debug("vmid allocation %d\n", allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
 
-   set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
program_sh_mem_settings(dqm, qpd);
 
/* qpd->page_table_base is set earlier when register_process()
@@ -278,7 +288,7 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
struct qcm_process_device *qpd,
struct queue *q)
 {
-   int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+   int idx;
 
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->device_info->asic_family == CHIP_HAWAII)
@@ -290,7 +300,9 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
 
-   dqm->vmid_bitmap |= (1 << bit);
+   idx = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+   dqm->vmid_pasid[idx] = 0;
+
qpd->vmid = 0;
q->properties.vmid = 0;
 }
@@ -1017,7 +1029,8 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
dqm->allocated_queues[pipe] |= 1 << queue;
}
 
-   dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
+   dqm->vmid_pasid = kcalloc(dqm->dev->vm_info.vmid_num_kfd,
+   sizeof(uint16_t), GFP_KERNEL);
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
 
@@ -1030,6 +1043,7 @@ static void uninitialize(struct device_queue_manager *dqm)
 
WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
 
+   kfree(dqm->vmid_pasid);
kfree(dqm->allocated_queues);
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
kfree(dqm->mqd_mgrs[i]);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index eed8f950b663..67b5e5fadd95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -188,7 +188,8 @@ struct device_queue_manager {
unsigned int*allocated_queues;
uint64_tsdma_bitmap;
uint64_txgmi_sdma_bitmap;
-   unsigned intvmid_bitmap;
+   /* the pasid mapping for each kfd vmid */
+   uint16_t*vmid_pasid;
uint64_tpipelines_addr;
struct kfd_mem_obj  *pipeline_mem;
uint64_tfence_gpu_addr;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Fix race in gfx10 context restore handler

2019-09-25 Thread Zhao, Yong
Reviewed-by: Yong Zhao 

From: amd-gfx  on behalf of Cornwall, 
Jay 
Sent: Wednesday, September 25, 2019 6:06 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Cornwall, Jay 
Subject: [PATCH] drm/amdkfd: Fix race in gfx10 context restore handler

Missing synchronization with VGPR restore leads to intermittent
VGPR trashing in the user shader.

Signed-off-by: Jay Cornwall 
---
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 139 +++--
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm |   1 +
 2 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 901fe35..d3400da 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -905,7 +905,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 0x7a5d, 0x807c817c,
 0x807aff7a, 0x0080,
 0xbf0a717c, 0xbf85fff8,
-   0xbf820141, 0xbef4037e,
+   0xbf820142, 0xbef4037e,
 0x8775ff7f, 0x,
 0x8875ff75, 0x0004,
 0xbef60380, 0xbef703ff,
@@ -967,7 +967,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 0x725d, 0xe0304080,
 0x725d0100, 0xe0304100,
 0x725d0200, 0xe0304180,
-   0x725d0300, 0xbf820031,
+   0x725d0300, 0xbf820032,
 0xbef603ff, 0x0100,
 0xbef20378, 0x8078ff78,
 0x0400, 0xbefc0384,
@@ -992,83 +992,84 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 0x725d, 0xe0304100,
 0x725d0100, 0xe0304200,
 0x725d0200, 0xe0304300,
-   0x725d0300, 0xb9782a05,
-   0x80788178, 0x907c9973,
-   0x877c817c, 0xbf06817c,
-   0xbf850002, 0x8f788978,
-   0xbf820001, 0x8f788a78,
-   0xb9721e06, 0x8f728a72,
-   0x80787278, 0x8078ff78,
-   0x0200, 0x80f8ff78,
-   0x0050, 0xbef603ff,
-   0x0100, 0xbefc03ff,
-   0x006c, 0x80f89078,
-   0xf429003a, 0xf000,
-   0xbf8cc07f, 0x80fc847c,
-   0xbf80, 0xbe803100,
-   0xbe823102, 0x80f8a078,
-   0xf42d003a, 0xf000,
-   0xbf8cc07f, 0x80fc887c,
-   0xbf80, 0xbe803100,
-   0xbe823102, 0xbe843104,
-   0xbe863106, 0x80f8c078,
-   0xf431003a, 0xf000,
-   0xbf8cc07f, 0x80fc907c,
-   0xbf80, 0xbe803100,
-   0xbe823102, 0xbe843104,
-   0xbe863106, 0xbe883108,
-   0xbe8a310a, 0xbe8c310c,
-   0xbe8e310e, 0xbf06807c,
-   0xbf84fff0, 0xb9782a05,
-   0x80788178, 0x907c9973,
-   0x877c817c, 0xbf06817c,
-   0xbf850002, 0x8f788978,
-   0xbf820001, 0x8f788a78,
-   0xb9721e06, 0x8f728a72,
-   0x80787278, 0x8078ff78,
-   0x0200, 0xbef603ff,
-   0x0100, 0xf4211bfa,
+   0x725d0300, 0xbf8c3f70,
+   0xb9782a05, 0x80788178,
+   0x907c9973, 0x877c817c,
+   0xbf06817c, 0xbf850002,
+   0x8f788978, 0xbf820001,
+   0x8f788a78, 0xb9721e06,
+   0x8f728a72, 0x80787278,
+   0x8078ff78, 0x0200,
+   0x80f8ff78, 0x0050,
+   0xbef603ff, 0x0100,
+   0xbefc03ff, 0x006c,
+   0x80f89078, 0xf429003a,
+   0xf000, 0xbf8cc07f,
+   0x80fc847c, 0xbf80,
+   0xbe803100, 0xbe823102,
+   0x80f8a078, 0xf42d003a,
+   0xf000, 0xbf8cc07f,
+   0x80fc887c, 0xbf80,
+   0xbe803100, 0xbe823102,
+   0xbe843104, 0xbe863106,
+   0x80f8c078, 0xf431003a,
+   0xf000, 0xbf8cc07f,
+   0x80fc907c, 0xbf80,
+   0xbe803100, 0xbe823102,
+   0xbe843104, 0xbe863106,
+   0xbe883108, 0xbe8a310a,
+   0xbe8c310c, 0xbe8e310e,
+   0xbf06807c, 0xbf84fff0,
+   0xb9782a05, 0x80788178,
+   0x907c9973, 0x877c817c,
+   0xbf06817c, 0xbf850002,
+   0x8f788978, 0xbf820001,
+   0x8f788a78, 0xb9721e06,
+   0x8f728a72, 0x80787278,
+   0x8078ff78, 0x0200,
+   0xbef603ff, 0x0100,
+   0xf4211bfa, 0xf000,
+   0x80788478, 0xf4211b3a,
 0xf000, 0x80788478,
-   0xf4211b3a, 0xf000,
-   0x80788478, 0xf4211b7a,
+   0xf4211b7a, 0xf000,
+   0x80788478, 0xf4211eba,
 0xf000, 0x80788478,
-   0xf4211eba, 0xf000,
-   0x80788478, 0xf4211efa,
+   0xf4211efa, 0xf000,
+   0x80788478, 0xf4211c3a,
 0xf000, 0x80788478,
-   0xf4211c3a, 0xf000,
-   0x80788478, 0xf4211c7a,
+   0xf4211c7a, 0xf000,
+   0x80788478, 0xf4211e7a,
 0xf000, 0x80788478,
-   0xf4211e7a, 0xf000,
-   0x80788478, 0xf4211cfa,
+   0xf4211cfa, 0xf000,
+   0x80788478, 0xf4211bba,
 0xf000, 0x80788478,
+   0xbf8cc07f, 0xb9eef814,
 0xf4211bba, 0xf000,
 0x80788478, 0xbf8cc07f,
-   0xb9eef814, 0xf4211bba,
-   0xf000, 0x80788478,
-   0xbf8cc07f, 0xb9eef815,
-   0xbef2036d, 0x876dff72,
-   0x, 0xbefc036f,
-   0xbefe037a, 0xbef

[PATCH] drm/amdkfd: Use hex print format for pasid

2019-09-25 Thread Zhao, Yong
Since KFD pasid starts from 0x8000 (32768 in decimal), it is better
perceived as a hex number.

Change-Id: I565fe39f69e782749a697f18545775354c7a89f8
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 12 +--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   |  8 
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 12 +--
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c |  8 
 drivers/gpu/drm/amd/amdkfd/kfd_events.c   | 12 +--
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 20 +--
 .../amd/amdkfd/kfd_process_queue_manager.c|  6 +++---
 9 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e5ff772862cd..106d45ae7c9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -301,7 +301,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
goto err_bind_process;
}
 
-   pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
+   pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
p->pasid,
dev->id);
 
@@ -351,7 +351,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, 
struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
 
-   pr_debug("Destroying queue id %d for pasid %d\n",
+   pr_debug("Destroying queue id %d for pasid 0x%x\n",
args->queue_id,
p->pasid);
 
@@ -397,7 +397,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct 
kfd_process *p,
properties.queue_percent = args->queue_percentage;
properties.priority = args->queue_priority;
 
-   pr_debug("Updating queue id %d for pasid %d\n",
+   pr_debug("Updating queue id %d for pasid 0x%x\n",
args->queue_id, p->pasid);
 
mutex_lock(&p->mutex);
@@ -854,7 +854,7 @@ static int kfd_ioctl_get_process_apertures(struct file 
*filp,
struct kfd_process_device_apertures *pAperture;
struct kfd_process_device *pdd;
 
-   dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+   dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
args->num_of_nodes = 0;
 
@@ -912,7 +912,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file 
*filp,
uint32_t nodes = 0;
int ret;
 
-   dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+   dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
if (args->num_of_nodes == 0) {
/* Return number of nodes, so that user space can alloacate
@@ -3063,7 +3063,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct 
kfd_process *process,
 
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-   pr_debug("Process %d mapping mmio page\n"
+   pr_debug("pasid 0x%x mapping mmio page\n"
 " target user address == 0x%08llX\n"
 " physical address== 0x%08llX\n"
 " vm_flags== 0x%04lX\n"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 3635e0b4b3b7..492951cad143 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -800,7 +800,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, 
struct kfd_process *p)
(dev->kgd, vmid)) {
if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
(dev->kgd, vmid) == p->pasid) {
-   pr_debug("Killing wave fronts of vmid %d and 
pasid %d\n",
+   pr_debug("Killing wave fronts of vmid %d and 
pasid 0x%x\n",
vmid, p->pasid);
break;
}
@@ -808,7 +808,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, 
struct kfd_process *p)
}
 
if (vmid > last_vmid_to_scan) {
-   pr_err("Didn't find vmid for pasid %d\n", p->pasid);
+   pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
return -EFAULT;
}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 9d4af961c5d1..9bfa50633654 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -96,7 +96,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct 
kfd_dev *pdev)
 long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 {
if (pmgr->pasid != 0) {
-   pr_debug("H/W debugger is a

Re: [PATCH 3/3] drm/amdkfd: Remove the control stack workaround for GFX10

2019-09-25 Thread Zhao, Yong
Yes. I confirmed with CP guys and they said the behavior on GFX10 is the 
same as GFX8 now. I remember that the workaround on GFX9 was to help 
with a HW bug, but not too sure.

Regards,

Yong

On 2019-09-25 2:25 p.m., Kuehling, Felix wrote:
> On 2019-09-25 2:15 p.m., Zhao, Yong wrote:
>> The GFX10 does not have this hardware bug any more, so remove it.
> I wouldn't call this a bug and a workaround. More like a change in the
> HW or FW behaviour and a corresponding driver change. I.e. in GFXv8 the
> control stack was in the user mode CWSR allocation. In GFXv9 it moved
> into a kernel mode buffer next to the MQD. So in GFXv10 the control
> stack moved back into the user mode CWSR buffer?
>
> Regards,
>     Felix
>
>> Change-Id: I446c9685549a09ac8846a42ee22d86cfb93fd98c
>> Signed-off-by: Yong Zhao 
>> ---
>>.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 37 ++-
>>1 file changed, 4 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> index 9cd3eb2d90bd..4a236b2c2354 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> @@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void 
>> *mqd,
>>static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
>>  struct queue_properties *q)
>>{
>> -int retval;
>> -struct kfd_mem_obj *mqd_mem_obj = NULL;
>> +struct kfd_mem_obj *mqd_mem_obj;
>>
>> -/* From V9,  for CWSR, the control stack is located on the next page
>> - * boundary after the mqd, we will use the gtt allocation function
>> - * instead of sub-allocation function.
>> - */
>> -if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
>> -mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>> -if (!mqd_mem_obj)
>> -return NULL;
>> -retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
>> -ALIGN(q->ctl_stack_size, PAGE_SIZE) +
>> -ALIGN(sizeof(struct v10_compute_mqd), 
>> PAGE_SIZE),
>> -&(mqd_mem_obj->gtt_mem),
>> -&(mqd_mem_obj->gpu_addr),
>> -(void *)&(mqd_mem_obj->cpu_ptr), true);
>> -} else {
>> -retval = kfd_gtt_sa_allocate(kfd, sizeof(struct 
>> v10_compute_mqd),
>> -&mqd_mem_obj);
>> -}
>> -
>> -if (retval) {
>> -kfree(mqd_mem_obj);
>> +if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
>> +&mqd_mem_obj))
>>  return NULL;
>> -}
>>
>>  return mqd_mem_obj;
>> -
>>}
>>
>>static void init_mqd(struct mqd_manager *mm, void **mqd,
>> @@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void 
>> *mqd,
>>static void free_mqd(struct mqd_manager *mm, void *mqd,
>>  struct kfd_mem_obj *mqd_mem_obj)
>>{
>> -struct kfd_dev *kfd = mm->dev;
>> -
>> -if (mqd_mem_obj->gtt_mem) {
>> -amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
>> -kfree(mqd_mem_obj);
>> -} else {
>> -kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
>> -}
>> +kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
>>}
>>
>>static bool is_occupied(struct mqd_manager *mm, void *mqd,
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdkfd: Remove the control stack workaround for GFX10

2019-09-25 Thread Zhao, Yong
The GFX10 does not have this hardware bug any more, so remove it.

Change-Id: I446c9685549a09ac8846a42ee22d86cfb93fd98c
Signed-off-by: Yong Zhao 
---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 37 ++-
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 9cd3eb2d90bd..4a236b2c2354 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void 
*mqd,
 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
 {
-   int retval;
-   struct kfd_mem_obj *mqd_mem_obj = NULL;
+   struct kfd_mem_obj *mqd_mem_obj;
 
-   /* From V9,  for CWSR, the control stack is located on the next page
-* boundary after the mqd, we will use the gtt allocation function
-* instead of sub-allocation function.
-*/
-   if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-   mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
-   if (!mqd_mem_obj)
-   return NULL;
-   retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
-   ALIGN(q->ctl_stack_size, PAGE_SIZE) +
-   ALIGN(sizeof(struct v10_compute_mqd), 
PAGE_SIZE),
-   &(mqd_mem_obj->gtt_mem),
-   &(mqd_mem_obj->gpu_addr),
-   (void *)&(mqd_mem_obj->cpu_ptr), true);
-   } else {
-   retval = kfd_gtt_sa_allocate(kfd, sizeof(struct 
v10_compute_mqd),
-   &mqd_mem_obj);
-   }
-
-   if (retval) {
-   kfree(mqd_mem_obj);
+   if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
+   &mqd_mem_obj))
return NULL;
-   }
 
return mqd_mem_obj;
-
 }
 
 static void init_mqd(struct mqd_manager *mm, void **mqd,
@@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
 static void free_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
 {
-   struct kfd_dev *kfd = mm->dev;
-
-   if (mqd_mem_obj->gtt_mem) {
-   amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
-   kfree(mqd_mem_obj);
-   } else {
-   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-   }
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }
 
 static bool is_occupied(struct mqd_manager *mm, void *mqd,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdkfd: Use setup_vm_pt_regs function from base driver in KFD

2019-09-25 Thread Zhao, Yong
This was done on GFX9 previously, now do it for GFX10.

Change-Id: I4442e60534c59bc9526a673559f018ba8058deac
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 23 +++
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index fe5b702c75ce..64568ed32793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -42,6 +42,7 @@
 #include "v10_structs.h"
 #include "nv.h"
 #include "nvd.h"
+#include "gfxhub_v2_0.h"
 
 enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -251,11 +252,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, 
unsigned int pasid,
ATC_VMID0_PASID_MAPPING__VALID_MASK;
 
pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, 
pasid_mapping);
-   /*
-* need to do this twice, once for gfx and once for mmhub
-* for ATC add 16 to VMID for mmhub, for IH different registers.
-* ATC_VMID0..15 registers are separate from ATC_VMID16..31.
-*/
 
pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, 
mmATC_VMID0_PASID_MAPPING) + vmid);
WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
@@ -910,7 +906,6 @@ static void set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,
uint64_t page_table_base)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
-   uint64_t base = page_table_base | AMDGPU_PTE_VALID;
 
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
@@ -918,18 +913,6 @@ static void set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,
return;
}
 
-   /* TODO: take advantage of per-process address space size. For
-* now, all processes share the same address space size, like
-* on GFX8 and older.
-*/
-   WREG32(SOC15_REG_OFFSET(GC, 0, 
mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
-   WREG32(SOC15_REG_OFFSET(GC, 0, 
mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-
-   WREG32(SOC15_REG_OFFSET(GC, 0, 
mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
-   lower_32_bits(adev->vm_manager.max_pfn - 1));
-   WREG32(SOC15_REG_OFFSET(GC, 0, 
mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
-   upper_32_bits(adev->vm_manager.max_pfn - 1));
-
-   WREG32(SOC15_REG_OFFSET(GC, 0, 
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
-   WREG32(SOC15_REG_OFFSET(GC, 0, 
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+   /* SDMA is on gfxhub as well on Navi1* series */
+   gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu: Export setup_vm_pt_regs() logic for gfxhub 2.0

2019-09-25 Thread Zhao, Yong
The KFD code will call this function later.

Change-Id: I88a53368cdee719b2c75393e5cdbd8290584548e
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 20 
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h |  2 ++
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index a9238735d361..b601c6740ef5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -46,21 +46,25 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
 }
 
-static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base)
 {
-   uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+   /* two registers distance between mmGCVM_CONTEXT0_* to 
mmGCVM_CONTEXT1_* */
+   int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+   - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
 
+   WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+   offset * vmid, lower_32_bits(page_table_base));
 
-   WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
-lower_32_bits(value));
-
-   WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
-upper_32_bits(value));
+   WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+   offset * vmid, upper_32_bits(page_table_base));
 }
 
 static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
 {
-   gfxhub_v2_0_init_gart_pt_regs(adev);
+   uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+   gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
 
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 (u32)(adev->gmc.gart_start >> 12));
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
index 06807940748b..392b8cd94fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
@@ -31,5 +31,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct 
amdgpu_device *adev,
  bool value);
 void gfxhub_v2_0_init(struct amdgpu_device *adev);
 u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev);
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base);
 
 #endif
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Add NAVI12 support from kfd side

2019-09-24 Thread Zhao, Yong
If Navi12 is the same as Navi10, then you can easily add the KFD support first, 
as it only involves a couple of lines because of recent simplification. Then 
have this patch next.

Regards,
Yong

From: Liu, Shaoyun 
Sent: Tuesday, September 24, 2019 6:28 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: Add NAVI12 support from kfd side


I will push to drm-next branch . After check the code again , this change will 
cause  issue in the  kfd since CHIP_NAVI12  not added  in other place where 
check the device_info->asic_family  in kfd code .  I think it's better just set 
the  family ID as CHIP_NAVI10  since there is no difference from the kfd side 
for NAVI10, NAVI12 andNAVI14.  I will send  another review .


Regards

shaoyun.liu


On 2019-09-24 6:17 p.m., Zhao, Yong wrote:
Reviewed-by: Yong Zhao <mailto:yong.z...@amd.com>

Make sure to push to the new 5.3 branch.

Yong


From: amd-gfx 
<mailto:amd-gfx-boun...@lists.freedesktop.org>
 on behalf of Liu, Shaoyun <mailto:shaoyun@amd.com>
Sent: Tuesday, September 24, 2019 6:16 PM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
<mailto:amd-gfx@lists.freedesktop.org>
Cc: Liu, Shaoyun <mailto:shaoyun@amd.com>
Subject: [PATCH] drm/amdkfd: Add NAVI12 support from kfd side

Add device info for both navi12 PF and VF

Change-Id: Ifb4035e65c12d153fc30e593fe109f9c7e0541f4
Signed-off-by: shaoyunl <mailto:shaoyun@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f329b82..edfbae5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -387,6 +387,24 @@ static const struct kfd_device_info navi10_device_info = {
 .num_sdma_queues_per_engine = 8,
 };

+static const struct kfd_device_info navi12_device_info = {
+   .asic_family = CHIP_NAVI12,
+   .asic_name = "navi12",
+   .max_pasid_bits = 16,
+   .max_no_of_hqd  = 24,
+   .doorbell_size  = 8,
+   .ih_ring_entry_size = 8 * sizeof(uint32_t),
+   .event_interrupt_class = &event_interrupt_class_v9,
+   .num_of_watch_points = 4,
+   .mqd_size_aligned = MQD_SIZE_ALIGNED,
+   .needs_iommu_device = false,
+   .supports_cwsr = true,
+   .needs_pci_atomics = false,
+   .num_sdma_engines = 2,
+   .num_xgmi_sdma_engines = 0,
+   .num_sdma_queues_per_engine = 8,
+};
+
 static const struct kfd_device_info navi14_device_info = {
 .asic_family = CHIP_NAVI14,
 .asic_name = "navi14",
@@ -425,6 +443,7 @@ static const struct kfd_device_info 
*kfd_supported_devices[][2] = {
 [CHIP_RENOIR] = {&renoir_device_info, NULL},
 [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
 [CHIP_NAVI10] = {&navi10_device_info, NULL},
+   [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
 [CHIP_NAVI14] = {&navi14_device_info, NULL},
 };

--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Add NAVI12 support from kfd side

2019-09-24 Thread Zhao, Yong
Reviewed-by: Yong Zhao 

Make sure to push to the new 5.3 branch.

Yong


From: amd-gfx  on behalf of Liu, Shaoyun 

Sent: Tuesday, September 24, 2019 6:16 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Liu, Shaoyun 
Subject: [PATCH] drm/amdkfd: Add NAVI12 support from kfd side

Add device info for both navi12 PF and VF

Change-Id: Ifb4035e65c12d153fc30e593fe109f9c7e0541f4
Signed-off-by: shaoyunl 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f329b82..edfbae5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -387,6 +387,24 @@ static const struct kfd_device_info navi10_device_info = {
 .num_sdma_queues_per_engine = 8,
 };

+static const struct kfd_device_info navi12_device_info = {
+   .asic_family = CHIP_NAVI12,
+   .asic_name = "navi12",
+   .max_pasid_bits = 16,
+   .max_no_of_hqd  = 24,
+   .doorbell_size  = 8,
+   .ih_ring_entry_size = 8 * sizeof(uint32_t),
+   .event_interrupt_class = &event_interrupt_class_v9,
+   .num_of_watch_points = 4,
+   .mqd_size_aligned = MQD_SIZE_ALIGNED,
+   .needs_iommu_device = false,
+   .supports_cwsr = true,
+   .needs_pci_atomics = false,
+   .num_sdma_engines = 2,
+   .num_xgmi_sdma_engines = 0,
+   .num_sdma_queues_per_engine = 8,
+};
+
 static const struct kfd_device_info navi14_device_info = {
 .asic_family = CHIP_NAVI14,
 .asic_name = "navi14",
@@ -425,6 +443,7 @@ static const struct kfd_device_info 
*kfd_supported_devices[][2] = {
 [CHIP_RENOIR] = {&renoir_device_info, NULL},
 [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
 [CHIP_NAVI10] = {&navi10_device_info, NULL},
+   [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
 [CHIP_NAVI14] = {&navi14_device_info, NULL},
 };

--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdkfd: Fix NULL pointer dereference for set_scratch_backing_va()

2019-09-23 Thread Zhao, Yong
Currently this function pointer is missing for GFX10. Considering it is
a void function since GFX9, fix it by checking the function pointer
before dereferencing it.

Change-Id: I1dc8e5163f259251357bfaa42a91ff991fba6dd5
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 10 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 --
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  5 +++--
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  5 +
 6 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 39768d6cb07c..3abaf92caf5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -283,7 +283,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
-   .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
.get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 4a126df93885..745cae5b6282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -775,15 +775,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct 
kgd_dev *kgd,
return 0;
 }
 
-void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
-   uint64_t va, uint32_t vmid)
-{
-   /* No longer needed on GFXv9. The scratch base address is
-* passed to the shader by the CP. It's the user mode driver's
-* responsibility.
-*/
-}
-
 void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t 
vmid,
uint64_t page_table_base)
 {
@@ -829,7 +820,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
-   .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
.get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 26d8879bff9d..225bf64001e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -61,8 +61,6 @@ uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct 
kgd_dev *kgd,
uint8_t vmid);
 void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t 
vmid,
uint64_t page_table_base);
-void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
-   uint64_t va, uint32_t vmid);
 int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a53864bb70bb..d14aed983663 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1128,7 +1128,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file 
*filep,
mutex_unlock(&p->mutex);
 
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-   pdd->qpd.vmid != 0)
+   pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
dev->kfd2kgd->set_scratch_backing_va(
dev->kgd, args->va_addr, pdd->qpd.vmid);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c826634938d2..9a53bc79fb42 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -220,8 +220,9 @@ static int allocate_vmid(struct device_queue_manager *dqm,
/* invalidate the VM context after pasid and vmid mapping is set up */
kfd_flush_tlb(qpd_to_pdd(qpd));
 
-   dqm->dev->kfd2kgd->set_scratch_backing_va(
-   dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
+   if (dqm->dev->kfd2kgd->set_scratch_backing_va)
+   dqm->dev->kfd2

[PATCH 2/2] drm/amdkfd: Sync gfx10 kfd2kgd_calls function pointers

2019-09-23 Thread Zhao, Yong
get_hive_id was not set. Also, adjust the function setting sequence.

Change-Id: I51962954cd0707ebe9aa6c85c71110dee98d6200
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index d3713e41ad8b..3ccaa088cafe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -160,10 +160,11 @@ static const struct kfd2kgd_calls kfd2kgd = {
get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid =
get_atc_vmid_pasid_mapping_valid,
+   .get_tile_config = amdgpu_amdkfd_get_tile_config,
+   .set_vm_context_page_table_base = set_vm_context_page_table_base,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
-   .set_vm_context_page_table_base = set_vm_context_page_table_base,
-   .get_tile_config = amdgpu_amdkfd_get_tile_config,
+   .get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions()
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdkfd: Use better name for sdma queue non HWS path

2019-09-23 Thread Zhao, Yong
Okay, I will incorporate Shaoyun's input.

Yong

From: Liu, Shaoyun 
Sent: Monday, September 23, 2019 10:27 AM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/2] drm/amdkfd: Use better name for sdma queue non HWS path

Probably rename to sdma_rlc to avoid the  confusion of  rlc used in
other amdgpu driver .

Regards

shaoyun.liu

On 2019-09-22 11:56 p.m., Zhao, Yong wrote:
> The old name is prone to confusion. The register offset is for a RLC queue
> rather than a SDMA engine. The value is not a base address, but a
> register offset.
>
> Change-Id: I55fb835f2105392344b1c17323bb55c03f927836
> Signed-off-by: Yong Zhao 
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 85 +-
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 90 +--
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 10 +--
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 10 +--
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 86 +-
>   5 files changed, 137 insertions(+), 144 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index c9ce1516956e..d2c0666c2798 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -70,11 +70,11 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
>return (struct v9_sdma_mqd *)mqd;
>   }
>
> -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
> +static uint32_t get_rlc_reg_offset(struct amdgpu_device *adev,
>unsigned int engine_id,
>unsigned int queue_id)
>   {
> - uint32_t base[8] = {
> + uint32_t sdma_engine_reg_base[8] = {
>SOC15_REG_OFFSET(SDMA0, 0,
> mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
>SOC15_REG_OFFSET(SDMA1, 0,
> @@ -92,12 +92,11 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device 
> *adev,
>SOC15_REG_OFFSET(SDMA7, 0,
> mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
>};
> - uint32_t retval;
>
> - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
> -mmSDMA0_RLC0_RB_CNTL);
> + uint32_t retval = sdma_engine_reg_base[engine_id]
> + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
>
> - pr_debug("sdma base address: 0x%x\n", retval);
> + pr_debug("RLC register offset: 0x%x\n", retval);
>
>return retval;
>   }
> @@ -107,22 +106,22 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
> *mqd,
>   {
>struct amdgpu_device *adev = get_amdgpu_device(kgd);
>struct v9_sdma_mqd *m;
> - uint32_t sdma_base_addr;
> + uint32_t rlc_reg_offset;
>unsigned long end_jiffies;
>uint32_t data;
>uint64_t data64;
>uint64_t __user *wptr64 = (uint64_t __user *)wptr;
>
>m = get_sdma_mqd(mqd);
> - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
> + rlc_reg_offset = get_rlc_reg_offset(adev, m->sdma_engine_id,
>m->sdma_queue_id);
>
> - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
> + WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
>m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
>
>end_jiffies = msecs_to_jiffies(2000) + jiffies;
>while (true) {
> - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
> + data = RREG32(rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
>if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
>break;
>if (time_after(jiffies, end_jiffies))
> @@ -130,41 +129,41 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
> *mqd,
>usleep_range(500, 1000);
>}
>
> - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
> + WREG32(rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
>   m->sdmax_rlcx_doorbell_offset);
>
>data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
> ENABLE, 1);
> - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
> - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
> - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
> + WREG32(rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
> + WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
&g

[PATCH 3/3] drm/amdkfd: Remove unnecessary pm_init() for non HWS mode

2019-09-22 Thread Zhao, Yong
The packet manager is not needed for non HWS mode except Hawaii, so only
initialize it for Hawaii under non HWS mode. This will simplify debugging
under non HWS mode for all new asics, because it eliminates one variable
out of the equation in non HWS mode

Change-Id: Ie2b61b546299a50366b9ab97900f4bb13de33d5b
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c47b88987c87..c826634938d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -902,12 +902,18 @@ static void uninitialize(struct device_queue_manager *dqm)
 static int start_nocpsch(struct device_queue_manager *dqm)
 {
init_interrupts(dqm);
-   return pm_init(&dqm->packets, dqm);
+   
+   if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+   return pm_init(&dqm->packets, dqm);
+   
+   return 0;
 }
 
 static int stop_nocpsch(struct device_queue_manager *dqm)
 {
-   pm_uninit(&dqm->packets);
+   if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+   pm_uninit(&dqm->packets);
+   
return 0;
 }
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdkfd: Remove excessive print when reserving doorbells

2019-09-22 Thread Zhao, Yong
The dozens of printing messages are compressed into 2 lines.

Change-Id: I339b3eee06509973f76577091c4c4e9c70ed8248
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 0c6ac043ae3c..48a38847e839 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -693,6 +693,8 @@ static int init_doorbell_bitmap(struct qcm_process_device 
*qpd,
struct kfd_dev *dev)
 {
unsigned int i;
+   int range_start = dev->shared_resources.non_cp_doorbells_start;
+   int range_end = dev->shared_resources.non_cp_doorbells_end;
 
if (!KFD_IS_SOC15(dev->device_info->asic_family))
return 0;
@@ -704,14 +706,16 @@ static int init_doorbell_bitmap(struct qcm_process_device 
*qpd,
return -ENOMEM;
 
/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+   pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+   pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+   range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+   range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
-   if (i >= dev->shared_resources.non_cp_doorbells_start
-   && i <= dev->shared_resources.non_cp_doorbells_end) {
+   if (i >= range_start && i <= range_end) {
set_bit(i, qpd->doorbell_bitmap);
set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
qpd->doorbell_bitmap);
-   pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,
-   i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
}
}
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdkfd: Add an error print if SDMA RLC is not idle

2019-09-22 Thread Zhao, Yong
The message will be useful when troubleshooting the issues.

Change-Id: Id82bbe80810dccff67c5b1275e9779f6a945dc7a
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c  | 8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c   | 8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c   | 8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c   | 8 ++--
 5 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index d2c0666c2798..0c28e838162c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -124,8 +124,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
data = RREG32(rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
-   if (time_after(jiffies, end_jiffies))
+   if (time_after(jiffies, end_jiffies)) {
+   pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+   }
usleep_range(500, 1000);
}
 
@@ -239,8 +241,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
temp = RREG32(rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
-   if (time_after(jiffies, end_jiffies))
+   if (time_after(jiffies, end_jiffies)) {
+   pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+   }
usleep_range(500, 1000);
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index ee520ad90717..31372d8e4425 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -507,8 +507,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
data = RREG32(rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
-   if (time_after(jiffies, end_jiffies))
+   if (time_after(jiffies, end_jiffies)) {
+   pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+   }
usleep_range(500, 1000);
}
 
@@ -752,8 +754,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
temp = RREG32(rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
-   if (time_after(jiffies, end_jiffies))
+   if (time_after(jiffies, end_jiffies)) {
+   pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+   }
usleep_range(500, 1000);
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index d0517b7ae089..6288de4c943d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -427,8 +427,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
-   if (time_after(jiffies, end_jiffies))
+   if (time_after(jiffies, end_jiffies)) {
+   pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+   }
usleep_range(500, 1000);
}
if (m->sdma_engine_id) {
@@ -660,8 +662,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
-   if (time_after(jiffies, end_jiffies))
+   if (time_after(jiffies, end_jiffies)) {
+   pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+   }
usleep_range(500, 1000);
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 373501abdb6b..a7e5464df85e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -411,8 +411,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *

[PATCH 1/2] drm/amdkfd: Delete useless SDMA register setting on non HWS path

2019-09-22 Thread Zhao, Yong
It turns out when loading hqd for SDMA queues, RESUME_CTX of
SDMA*_GFX_CONTEXT_CNTL is already 0, so there is no need to set it
to 0 again. In addition, it does not seem right that we should touch
SDMA GFX queues when manipulating RLC queues.

Change-Id: I2c142d024e94f92194b1cb9feb7f44396b8f3ecc
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 34 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  9 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  9 +
 3 files changed, 3 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 4d9101834ba7..c9ce1516956e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -102,38 +102,12 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device 
*adev,
return retval;
 }
 
-static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
-   u32 instance, u32 offset)
-{
-   switch (instance) {
-   case 0:
-   return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
-   case 1:
-   return (adev->reg_offset[SDMA1_HWIP][0][1] + offset);
-   case 2:
-   return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
-   case 3:
-   return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
-   case 4:
-   return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
-   case 5:
-   return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
-   case 6:
-   return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
-   case 7:
-   return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
-   default:
-   break;
-   }
-   return 0;
-}
-
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 uint32_t __user *wptr, struct mm_struct *mm)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
-   uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+   uint32_t sdma_base_addr;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
@@ -142,8 +116,6 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
m->sdma_queue_id);
-   sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev,
-   m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL);
 
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
@@ -157,10 +129,6 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
return -ETIME;
usleep_range(500, 1000);
}
-   data = RREG32(sdmax_gfx_context_cntl);
-   data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-RESUME_CTX, 0);
-   WREG32(sdmax_gfx_context_cntl, data);
 
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
   m->sdmax_rlcx_doorbell_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 3aff2b5758e0..a4325db8d093 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -489,7 +489,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
-   uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+   uint32_t sdma_base_addr;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
@@ -499,9 +499,6 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
m->sdma_queue_id);
pr_debug("sdma load base addr %x for engine %d, queue %d\n", 
sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id);
-   sdmax_gfx_context_cntl = m->sdma_engine_id ?
-   SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
-   SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
 
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
@@ -515,10 +512,6 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
return -ETIME;
usleep_range(500, 1000);
}
-   data = RREG32(sdmax_gfx_context_cntl);
-   data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-RESUME_CTX, 0);
-   WREG32(sdmax_gfx_context_cntl, data);
 
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
   m->sdm

[PATCH 2/2] drm/amdkfd: Use better name for sdma queue non HWS path

2019-09-22 Thread Zhao, Yong
The old name is prone to confusion. The register offset is for a RLC queue
rather than a SDMA engine. The value is not a base address, but a
register offset.

Change-Id: I55fb835f2105392344b1c17323bb55c03f927836
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 85 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 90 +--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 10 +--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 10 +--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 86 +-
 5 files changed, 137 insertions(+), 144 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index c9ce1516956e..d2c0666c2798 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -70,11 +70,11 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v9_sdma_mqd *)mqd;
 }
 
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
 {
-   uint32_t base[8] = {
+   uint32_t sdma_engine_reg_base[8] = {
SOC15_REG_OFFSET(SDMA0, 0,
 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
SOC15_REG_OFFSET(SDMA1, 0,
@@ -92,12 +92,11 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device 
*adev,
SOC15_REG_OFFSET(SDMA7, 0,
 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
};
-   uint32_t retval;
 
-   retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
-  mmSDMA0_RLC0_RB_CNTL);
+   uint32_t retval = sdma_engine_reg_base[engine_id]
+   + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
 
-   pr_debug("sdma base address: 0x%x\n", retval);
+   pr_debug("RLC register offset: 0x%x\n", retval);
 
return retval;
 }
@@ -107,22 +106,22 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
-   uint32_t sdma_base_addr;
+   uint32_t rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
 
m = get_sdma_mqd(mqd);
-   sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+   rlc_reg_offset = get_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
 
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
-   data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+   data = RREG32(rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies))
@@ -130,41 +129,41 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void 
*mqd,
usleep_range(500, 1000);
}
 
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
   m->sdmax_rlcx_doorbell_offset);
 
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 ENABLE, 1);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
 
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
   lower_32_bits(data64));
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
   upper_32_bits(data64));
} else {
-   WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+   WREG32(rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
   m->sdmax_rlcx_rb_rptr);
-   WREG32(sdma_base_addr +

[PATCH] drm/amdkfd: Delete unused KFD_IS_* macro

2019-09-16 Thread Zhao, Yong
These were deleted before, but somehow showed up again. Delete them again.

Change-Id: I19b3063932380cb74a01d505e8e92f897a2c2cb7
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 06bb2d7a9b39..0773dc4df4ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -179,10 +179,6 @@ enum cache_policy {
cache_policy_noncoherent
 };
 
-#define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_POLARIS11)
-#define KFD_IS_DGPU(chip) (((chip) >= CHIP_TONGA && \
-  (chip) <= CHIP_NAVI10) || \
-  (chip) == CHIP_HAWAII)
 #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
 
 struct kfd_event_interrupt_class {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete unused KFD_IS_DGPU macro

2019-09-16 Thread Zhao, Yong
This was deleted before, but somehow showed up again. Delete it again.

Change-Id: I19b3063932380cb74a01d505e8e92f897a2c2cb7
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 06bb2d7a9b39..6ed31a76dfda 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -180,9 +180,6 @@ enum cache_policy {
 };
 
 #define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_POLARIS11)
-#define KFD_IS_DGPU(chip) (((chip) >= CHIP_TONGA && \
-  (chip) <= CHIP_NAVI10) || \
-  (chip) == CHIP_HAWAII)
 #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
 
 struct kfd_event_interrupt_class {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Support Navi14 in KFD

2019-09-05 Thread Zhao, Yong
Fixed and pushed. Thanks.

Regards,

Yong

On 2019-09-05 3:56 p.m., Kuehling, Felix wrote:
> On 2019-09-05 3:22 p.m., Zhao, Yong wrote:
>> Change-Id: Ie2c6226022ff4d389eaa05b1c84afa7ae4cea0aa
>> Signed-off-by: Yong Zhao 
> Please add a change description. With that fixed, this patch is
>
> Reviewed-by: Felix Kuehling 
>
>
>> ---
>>drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  1 +
>>drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 19 +++
>>.../drm/amd/amdkfd/kfd_device_queue_manager.c |  1 +
>>drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c  |  1 +
>>drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |  1 +
>>.../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  1 +
>>drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  1 +
>>7 files changed, 25 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> index 3d7d5eb9ed7a..333b44eb72e6 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> @@ -671,6 +671,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
>>  num_of_cache_types = ARRAY_SIZE(raven_cache_info);
>>  break;
>>  case CHIP_NAVI10:
>> +case CHIP_NAVI14:
>>  pcache_info = navi10_cache_info;
>>  num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
>>  break;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 444396a2fb0a..e71018b57784 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -369,6 +369,24 @@ static const struct kfd_device_info navi10_device_info 
>> = {
>>  .num_sdma_queues_per_engine = 8,
>>};
>>
>> +static const struct kfd_device_info navi14_device_info = {
>> +.asic_family = CHIP_NAVI14,
>> +.asic_name = "navi14",
>> +.max_pasid_bits = 16,
>> +.max_no_of_hqd  = 24,
>> +.doorbell_size  = 8,
>> +.ih_ring_entry_size = 8 * sizeof(uint32_t),
>> +.event_interrupt_class = &event_interrupt_class_v9,
>> +.num_of_watch_points = 4,
>> +.mqd_size_aligned = MQD_SIZE_ALIGNED,
>> +.needs_iommu_device = false,
>> +.supports_cwsr = true,
>> +.needs_pci_atomics = false,
>> +.num_sdma_engines = 2,
>> +.num_xgmi_sdma_engines = 0,
>> +.num_sdma_queues_per_engine = 8,
>> +};
>> +
>>/* For each entry, [0] is regular and [1] is virtualisation device. */
>>static const struct kfd_device_info *kfd_supported_devices[][2] = {
>>#ifdef KFD_SUPPORT_IOMMU_V2
>> @@ -388,6 +406,7 @@ static const struct kfd_device_info 
>> *kfd_supported_devices[][2] = {
>>  [CHIP_VEGA20] = {&vega20_device_info, NULL},
>>  [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
>>  [CHIP_NAVI10] = {&navi10_device_info, NULL},
>> +[CHIP_NAVI14] = {&navi14_device_info, NULL},
>>};
>>
>>static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 56639ee78608..9a7b512049d6 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -1908,6 +1908,7 @@ struct device_queue_manager 
>> *device_queue_manager_init(struct kfd_dev *dev)
>>  device_queue_manager_init_v9(&dqm->asic_ops);
>>  break;
>>  case CHIP_NAVI10:
>> +case CHIP_NAVI14:
>>  device_queue_manager_init_v10_navi10(&dqm->asic_ops);
>>  break;
>>  default:
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> index ee7ff6b0541b..ed4efab0a190 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> @@ -412,6 +412,7 @@ int kfd_init_apertures(struct kfd_process *process)
>>  case CHIP_RAVEN:
>>  case CHIP_ARCTURUS:
>>  case CHIP_NAVI10:
>> +case CHIP_NAVI14:
>>  kfd_init_apertures_v9(pdd, id);
>>  break;
>>  default:
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_

[PATCH] drm/amdkfd: Support Navi14 in KFD

2019-09-05 Thread Zhao, Yong
Change-Id: Ie2c6226022ff4d389eaa05b1c84afa7ae4cea0aa
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 19 +++
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c  |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |  1 +
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  1 +
 7 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 3d7d5eb9ed7a..333b44eb72e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -671,6 +671,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
break;
case CHIP_NAVI10:
+   case CHIP_NAVI14:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 444396a2fb0a..e71018b57784 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -369,6 +369,24 @@ static const struct kfd_device_info navi10_device_info = {
.num_sdma_queues_per_engine = 8,
 };
 
+static const struct kfd_device_info navi14_device_info = {
+   .asic_family = CHIP_NAVI14,
+   .asic_name = "navi14",
+   .max_pasid_bits = 16,
+   .max_no_of_hqd  = 24,
+   .doorbell_size  = 8,
+   .ih_ring_entry_size = 8 * sizeof(uint32_t),
+   .event_interrupt_class = &event_interrupt_class_v9,
+   .num_of_watch_points = 4,
+   .mqd_size_aligned = MQD_SIZE_ALIGNED,
+   .needs_iommu_device = false,
+   .supports_cwsr = true,
+   .needs_pci_atomics = false,
+   .num_sdma_engines = 2,
+   .num_xgmi_sdma_engines = 0,
+   .num_sdma_queues_per_engine = 8,
+};
+
 /* For each entry, [0] is regular and [1] is virtualisation device. */
 static const struct kfd_device_info *kfd_supported_devices[][2] = {
 #ifdef KFD_SUPPORT_IOMMU_V2
@@ -388,6 +406,7 @@ static const struct kfd_device_info 
*kfd_supported_devices[][2] = {
[CHIP_VEGA20] = {&vega20_device_info, NULL},
[CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
[CHIP_NAVI10] = {&navi10_device_info, NULL},
+   [CHIP_NAVI14] = {&navi14_device_info, NULL},
 };
 
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 56639ee78608..9a7b512049d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1908,6 +1908,7 @@ struct device_queue_manager 
*device_queue_manager_init(struct kfd_dev *dev)
device_queue_manager_init_v9(&dqm->asic_ops);
break;
case CHIP_NAVI10:
+   case CHIP_NAVI14:
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index ee7ff6b0541b..ed4efab0a190 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -412,6 +412,7 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_RAVEN:
case CHIP_ARCTURUS:
case CHIP_NAVI10:
+   case CHIP_NAVI14:
kfd_init_apertures_v9(pdd, id);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 7a3b0482ab1a..1097e047b4bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -368,6 +368,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
kernel_queue_init_v9(&kq->ops_asic_specific);
break;
case CHIP_NAVI10:
+   case CHIP_NAVI14:
kernel_queue_init_v10(&kq->ops_asic_specific);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 6cf12422a7d8..b7828a241981 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -243,6 +243,7 @@ int pm_init(struct packet_manager *pm, struct 
device_queue_manager *dqm)
pm->pmf = &kfd_v9_pm_funcs;
break;
case CHIP_NAVI10:
+   case CHIP_NAVI14:
pm->pmf = &kfd_v10_pm_funcs;
 

[PATCH] drm/amdkfd: Fix a building error when KFD_SUPPORT_IOMMU_V2 is turned off

2019-09-05 Thread Zhao, Yong
The issue was accidentally introduced recently.

Change-Id: I3b21caa1596d4f7de1866bed1cb5d8fe1b51504c
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 267eb2e01bec..21f5c597e699 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -371,11 +371,14 @@ static const struct kfd_device_info navi10_device_info = {
 
 /* For each entry, [0] is regular and [1] is virtualisation device. */
 static const struct kfd_device_info *kfd_supported_devices[][2] = {
+#ifdef KFD_SUPPORT_IOMMU_V2
[CHIP_KAVERI] = {&kaveri_device_info, NULL},
+   [CHIP_CARRIZO] = {&carrizo_device_info, NULL},
+   [CHIP_RAVEN] = {&raven_device_info, NULL},
+#endif
[CHIP_HAWAII] = {&hawaii_device_info, NULL},
[CHIP_TONGA] = {&tonga_device_info, NULL},
[CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info},
-   [CHIP_CARRIZO] = {&carrizo_device_info, NULL},
[CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info},
[CHIP_POLARIS11] = {&polaris11_device_info, NULL},
[CHIP_POLARIS12] = {&polaris12_device_info, NULL},
@@ -383,7 +386,6 @@ static const struct kfd_device_info 
*kfd_supported_devices[][2] = {
[CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info},
[CHIP_VEGA12] = {&vega12_device_info, NULL},
[CHIP_VEGA20] = {&vega20_device_info, NULL},
-   [CHIP_RAVEN] = {&raven_device_info, NULL},
[CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
[CHIP_NAVI10] = {&navi10_device_info, NULL},
 };
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Fill the name field in node topology with asic name v2

2019-08-16 Thread Zhao, Yong
Pushed. Will work on the market name later.

Yong

From: Kuehling, Felix 
Sent: Thursday, August 15, 2019 8:45 PM
To: Zhao, Yong ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: Fill the name field in node topology with asic 
name v2

On 2019-08-13 17:21, Zhao, Yong wrote:
> The name field in node topology has not been used. We re-purpose it to
> hold the asic name, which can be queried by user space applications
> through sysfs.
>
> Change-Id: I74f4f5487db169004a9d27ea15abe99261c86220
> Signed-off-by: Yong Zhao 

Reviewed-by: Felix Kuehling 

As a follow-up, I think you could also remove the marketing name field
from struct kfd_node_properties. As far as I can tell this is never
populated and now it's also no longer reported in sysfs.

Regards,
   Felix

> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 18 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 16 ++--
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  4 ++--
>   4 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 3b9fe629a126..24bfdf583820 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -42,6 +42,7 @@ static atomic_t kfd_locked = ATOMIC_INIT(0);
>   #ifdef KFD_SUPPORT_IOMMU_V2
>   static const struct kfd_device_info kaveri_device_info = {
>.asic_family = CHIP_KAVERI,
> + .asic_name = "kaveri",
>.max_pasid_bits = 16,
>/* max num of queues for KV.TODO should be a dynamic value */
>.max_no_of_hqd  = 24,
> @@ -60,6 +61,7 @@ static const struct kfd_device_info kaveri_device_info = {
>
>   static const struct kfd_device_info carrizo_device_info = {
>.asic_family = CHIP_CARRIZO,
> + .asic_name = "carrizo",
>.max_pasid_bits = 16,
>/* max num of queues for CZ.TODO should be a dynamic value */
>.max_no_of_hqd  = 24,
> @@ -78,6 +80,7 @@ static const struct kfd_device_info carrizo_device_info = {
>
>   static const struct kfd_device_info raven_device_info = {
>.asic_family = CHIP_RAVEN,
> + .asic_name = "raven",
>.max_pasid_bits = 16,
>.max_no_of_hqd  = 24,
>.doorbell_size  = 8,
> @@ -96,6 +99,7 @@ static const struct kfd_device_info raven_device_info = {
>
>   static const struct kfd_device_info hawaii_device_info = {
>.asic_family = CHIP_HAWAII,
> + .asic_name = "hawaii",
>.max_pasid_bits = 16,
>/* max num of queues for KV.TODO should be a dynamic value */
>.max_no_of_hqd  = 24,
> @@ -114,6 +118,7 @@ static const struct kfd_device_info hawaii_device_info = {
>
>   static const struct kfd_device_info tonga_device_info = {
>.asic_family = CHIP_TONGA,
> + .asic_name = "tonga",
>.max_pasid_bits = 16,
>.max_no_of_hqd  = 24,
>.doorbell_size  = 4,
> @@ -131,6 +136,7 @@ static const struct kfd_device_info tonga_device_info = {
>
>   static const struct kfd_device_info fiji_device_info = {
>.asic_family = CHIP_FIJI,
> + .asic_name = "fiji",
>.max_pasid_bits = 16,
>.max_no_of_hqd  = 24,
>.doorbell_size  = 4,
> @@ -148,6 +154,7 @@ static const struct kfd_device_info fiji_device_info = {
>
>   static const struct kfd_device_info fiji_vf_device_info = {
>.asic_family = CHIP_FIJI,
> + .asic_name = "fiji",
>.max_pasid_bits = 16,
>.max_no_of_hqd  = 24,
>.doorbell_size  = 4,
> @@ -166,6 +173,7 @@ static const struct kfd_device_info fiji_vf_device_info = 
> {
>
>   static const struct kfd_device_info polaris10_device_info = {
>.asic_family = CHIP_POLARIS10,
> + .asic_name = "polaris10",
>.max_pasid_bits = 16,
>.max_no_of_hqd  = 24,
>.doorbell_size  = 4,
> @@ -183,6 +191,7 @@ static const struct kfd_device_info polaris10_device_info 
> = {
>
>   static const struct kfd_device_info polaris10_vf_device_info = {
>.asic_family = CHIP_POLARIS10,
> + .asic_name = "polaris10",
>.max_pasid_bits = 16,
>.max_no_of_hqd  = 24,
>.doorbell_size  = 4,
> @@ -200,6 +209,7 @@ static const struct kfd_device_info 
> polaris10_vf_device_info = {
>
>   static const struct kfd_device_info polaris11_device_info = {
>.asic_family = CHIP_POLARIS11,
> + .asic_name = "polaris11",
>.max_pasid_bits = 16,
>.max_no_of_hqd  = 24,
>.doorbell_siz

Re: [PATCH 1/2] drm/amdgpu: Add printing for RW extracted from VM_L2_PROTECTION_FAULT_STATUS

2019-08-14 Thread Zhao, Yong
Pushed! Thanks.

Yong

On 2019-08-14 3:27 a.m., Christian König wrote:
> Am 13.08.19 um 20:08 schrieb Zhao, Yong:
>> RW is also useful in most cases.
>>
>> Change-Id: Icf4bd65ea168e5965a6a8ebe32ce9327a2de2851
>> Signed-off-by: Yong Zhao 
>
> Reviewed-by: Christian König  for the series.
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 296e2d982578..ec5e858926ad 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -387,6 +387,9 @@ static int gmc_v9_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>   dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
>>   REG_GET_FIELD(status,
>>   VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
>> +    dev_err(adev->dev, "\t RW: 0x%lx\n",
>> +    REG_GET_FIELD(status,
>> +    VM_L2_PROTECTION_FAULT_STATUS, RW));
>>     }
>>   }
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Fill the name field in node topology with asic name v2

2019-08-13 Thread Zhao, Yong
The name field in node topology has not been used. We re-purpose it to
hold the asic name, which can be queried by user space applications
through sysfs.

Change-Id: I74f4f5487db169004a9d27ea15abe99261c86220
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 18 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 16 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  4 ++--
 4 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3b9fe629a126..24bfdf583820 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -42,6 +42,7 @@ static atomic_t kfd_locked = ATOMIC_INIT(0);
 #ifdef KFD_SUPPORT_IOMMU_V2
 static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
+   .asic_name = "kaveri",
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd  = 24,
@@ -60,6 +61,7 @@ static const struct kfd_device_info kaveri_device_info = {
 
 static const struct kfd_device_info carrizo_device_info = {
.asic_family = CHIP_CARRIZO,
+   .asic_name = "carrizo",
.max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd  = 24,
@@ -78,6 +80,7 @@ static const struct kfd_device_info carrizo_device_info = {
 
 static const struct kfd_device_info raven_device_info = {
.asic_family = CHIP_RAVEN,
+   .asic_name = "raven",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 8,
@@ -96,6 +99,7 @@ static const struct kfd_device_info raven_device_info = {
 
 static const struct kfd_device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII,
+   .asic_name = "hawaii",
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd  = 24,
@@ -114,6 +118,7 @@ static const struct kfd_device_info hawaii_device_info = {
 
 static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
+   .asic_name = "tonga",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -131,6 +136,7 @@ static const struct kfd_device_info tonga_device_info = {
 
 static const struct kfd_device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
+   .asic_name = "fiji",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -148,6 +154,7 @@ static const struct kfd_device_info fiji_device_info = {
 
 static const struct kfd_device_info fiji_vf_device_info = {
.asic_family = CHIP_FIJI,
+   .asic_name = "fiji",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -166,6 +173,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
 
 static const struct kfd_device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10,
+   .asic_name = "polaris10",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -183,6 +191,7 @@ static const struct kfd_device_info polaris10_device_info = 
{
 
 static const struct kfd_device_info polaris10_vf_device_info = {
.asic_family = CHIP_POLARIS10,
+   .asic_name = "polaris10",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -200,6 +209,7 @@ static const struct kfd_device_info 
polaris10_vf_device_info = {
 
 static const struct kfd_device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11,
+   .asic_name = "polaris11",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -217,6 +227,7 @@ static const struct kfd_device_info polaris11_device_info = 
{
 
 static const struct kfd_device_info polaris12_device_info = {
.asic_family = CHIP_POLARIS12,
+   .asic_name = "polaris12",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -234,6 +245,7 @@ static const struct kfd_device_info polaris12_device_info = 
{
 
 static const struct kfd_device_info vegam_device_info = {
.asic_family = CHIP_VEGAM,
+   .asic_name = "vegam",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 4,
@@ -251,6 +263,7 @@ static const struct kfd_device_info vegam_device_info = {
 
 static const struct kfd_device_info vega10_device_info = {
.asic_family = CHIP_VEGA10,
+   .asic_name = "vega10",
.max_pasid_bits = 16,
.max_no_of_hqd  = 24,
.doorbell_size  = 8,
@@ -268,6 +281,7 @@ static const struct kfd_device_info vega10_device_info = {
 
 static const struct kfd_device_info vega10_vf_device_info = {
.asic_family = CHIP_VEGA10,
+   .asic_name = "vega10",
 

[PATCH] drm/amdkfd: Fill amdgpu_task_info for KFD VMs

2019-08-13 Thread Zhao, Yong
The amdgpu_task_info will be used when printing VM page fault for KFD
processes.

Change-Id: Ifd983db5dceb1d477e7287e4893f80565a7a6b06
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 8f1076c0c88a..2ec42749f971 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -801,6 +801,8 @@ int kfd_process_device_init_vm(struct kfd_process_device 
*pdd,
return ret;
}
 
+   amdgpu_vm_set_task_info(pdd->vm);
+
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
goto err_reserve_ib_mem;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Set VM_L2_CNTL.PDE_FAULT_CLASSIFICATION to 0 for GFX10

2019-08-13 Thread Zhao, Yong
We have done this for pre-GFX10 asics, but GFX10 did not pick up the
new change. The below is the commit message for that change.

This is recommended by HW designers. Previously when it was set to 1,
the PDE walk error in VM fault will be treated as
PERMISSION_OR_INVALID_PAGE_FAULT rather than usually expected OTHER_FAULT.
As a result, the retry control in VM_CONTEXT*_CNTL will change accordingly.

The above behavior is kind of abnormal. Furthermore, the
PDE_FAULT_CLASSIFICATION == 1 feature was targeted for very old ASICs
and it never made it way to production. Therefore, we should set it to 0.

Change-Id: If1beedb631d16b85d072aa96657a7a75fa378480
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 8ce5bf5feb45..8b789f750b72 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -140,7 +140,7 @@ static void gfxhub_v2_0_init_cache_regs(struct 
amdgpu_device *adev)
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
-   tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
+   tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 
1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 8ee1225d1a18..3542c203c3c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -126,7 +126,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device 
*adev)
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, 
L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
-   tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
+   tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 
1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: Add more page fault info printing for GFX10

2019-08-13 Thread Zhao, Yong
The printing we did for GFX9 was not propogated to GFX10 somehow, so fix
it now.

Change-Id: Ic0b8381134340b83cd69c3fe186ac7a8a97b1bca
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 33 ++
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 4e3ac1084a94..ead2d3bf8a8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -140,17 +140,40 @@ static int gmc_v10_0_process_interrupt(struct 
amdgpu_device *adev,
}
 
if (printk_ratelimit()) {
+   struct amdgpu_task_info task_info;
+
+   memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+   amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
dev_err(adev->dev,
-   "[%s] VMC page fault (src_id:%u ring:%u vmid:%u 
pasid:%u)\n",
+   "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
+   "for process %s pid %d thread %s pid %d)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
-   entry->pasid);
-   dev_err(adev->dev, "  at page 0x%016llx from %d\n",
+   entry->pasid, task_info.process_name, task_info.tgid,
+   task_info.task_name, task_info.pid);
+   dev_err(adev->dev, "  in page starting at address 0x%016llx 
from client %d\n",
addr, entry->client_id);
-   if (!amdgpu_sriov_vf(adev))
+   if (!amdgpu_sriov_vf(adev)) {
dev_err(adev->dev,
-   "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+   "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
+   dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+   REG_GET_FIELD(status,
+   GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+   dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+   REG_GET_FIELD(status,
+   GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+   dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+   REG_GET_FIELD(status,
+   GCVM_L2_PROTECTION_FAULT_STATUS, 
PERMISSION_FAULTS));
+   dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+   REG_GET_FIELD(status,
+   GCVM_L2_PROTECTION_FAULT_STATUS, 
MAPPING_ERROR));
+   dev_err(adev->dev, "\t RW: 0x%lx\n",
+   REG_GET_FIELD(status,
+   GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+   }
}
 
return 0;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: Add printing for RW extracted from VM_L2_PROTECTION_FAULT_STATUS

2019-08-13 Thread Zhao, Yong
RW is also useful in most cases.

Change-Id: Icf4bd65ea168e5965a6a8ebe32ce9327a2de2851
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 296e2d982578..ec5e858926ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -387,6 +387,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+   dev_err(adev->dev, "\t RW: 0x%lx\n",
+   REG_GET_FIELD(status,
+   VM_L2_PROTECTION_FAULT_STATUS, RW));
 
}
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Add more page fault info printing for GFX10

2019-08-12 Thread Zhao, Yong
Hi Christian,

I feel with ":" it is better, because without it I found it is not easy 
to interpret the printing. Moreover, it continues the format of the 
former part.

It looks like this:

[  190.686668] amdgpu :03:00.0: [gfxhub0] retry page fault (src_id:0 
ring:0 vmid:8 pasid:32771, for process:kfdtest pid:3273 thread:kfdtest 
pid:3273)

vs without ":"

[  190.686668] amdgpu :03:00.0: [gfxhub0] retry page fault (src_id:0 
ring:0 vmid:8 pasid:32771, for process kfdtest pid 3273 thread kfdtest 
pid 3273)


If you are not convinced, I can change it to without ":".


Regards,

Yong

On 2019-08-12 3:12 p.m., Christian König wrote:
> Am 12.08.19 um 21:05 schrieb Zhao, Yong:
>> The printing we did for GFX9 was not propogated to GFX10 somehow, so fix
>> it now.
>>
>> Change-Id: Ic0b8381134340b83cd69c3fe186ac7a8a97b1bca
>> Signed-off-by: Yong Zhao 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 33 ++
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  5 +++-
>>   2 files changed, 32 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 4e3ac1084a94..f23be98e9897 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -140,17 +140,40 @@ static int gmc_v10_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>   }
>>     if (printk_ratelimit()) {
>> +    struct amdgpu_task_info task_info;
>> +
>> +    memset(&task_info, 0, sizeof(struct amdgpu_task_info));
>> +    amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
>> +
>>   dev_err(adev->dev,
>> -    "[%s] VMC page fault (src_id:%u ring:%u vmid:%u 
>> pasid:%u)\n",
>> +    "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
>> +    "for process:%s pid:%d thread:%s pid:%d)\n",
>>   entry->vmid_src ? "mmhub" : "gfxhub",
>>   entry->src_id, entry->ring_id, entry->vmid,
>> -    entry->pasid);
>> -    dev_err(adev->dev, "  at page 0x%016llx from %d\n",
>> +    entry->pasid, task_info.process_name, task_info.tgid,
>> +    task_info.task_name, task_info.pid);
>> +    dev_err(adev->dev, "  in page starting at address 0x%016llx 
>> from client %d\n",
>>   addr, entry->client_id);
>> -    if (!amdgpu_sriov_vf(adev))
>> +    if (!amdgpu_sriov_vf(adev)) {
>>   dev_err(adev->dev,
>> -    "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>> +    "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>   status);
>> +    dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
>> +    REG_GET_FIELD(status,
>> +    GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
>> +    dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
>> +    REG_GET_FIELD(status,
>> +    GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
>> +    dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
>> +    REG_GET_FIELD(status,
>> +    GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
>> +    dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
>> +    REG_GET_FIELD(status,
>> +    GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
>> +    dev_err(adev->dev, "\t RW: 0x%lx\n",
>> +    REG_GET_FIELD(status,
>> +    GCVM_L2_PROTECTION_FAULT_STATUS, RW));
>> +    }
>>   }
>>     return 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 296e2d982578..34c4c2d08550 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -364,7 +364,7 @@ static int gmc_v9_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>     dev_err(adev->dev,
>>   "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
>> -    "pasid:%u, for process %s pid %d thread %s pid %d)\n",
>> +    "pasid:%u, for process:%s pid:%d thread:%s pid:%d)\n",
>
> I think the text actually looks better without the ":".
>
>>   hub_name, retry_fault ? "retry" : "no-retry",
>>    

  1   2   >