[PATCH] drm/amdkfd: ratelimited override pte flags messages

2023-09-28 Thread Philip Yang
Use ratelimited version of dev_dbg to avoid flooding dmesg log. No
functional change.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 96d601e209b8..6d2878684287 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -848,7 +848,7 @@ static void amdgpu_vm_pte_update_flags(struct 
amdgpu_vm_update_params *params,
amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
 addr, );
else
-   dev_dbg(adev->dev,
+   dev_dbg_ratelimited(adev->dev,
"override_vm_pte_flags skipped: 
non-contiguous\n");
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6b15677c0314..ad0d63472e17 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1244,7 +1244,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct 
amdgpu_device *adev,
 * the DMA address.
 */
if (!adev->ram_is_direct_mapped) {
-   dev_dbg(adev->dev, "RAM is not direct mapped\n");
+   dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n");
return;
}
 
@@ -1253,7 +1253,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct 
amdgpu_device *adev,
 */
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
-   dev_dbg(adev->dev, "MTYPE is not NC\n");
+   dev_dbg_ratelimited(adev->dev, "MTYPE is not NC\n");
return;
}
 
@@ -1264,7 +1264,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct 
amdgpu_device *adev,
if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
} else {
-   dev_dbg(adev->dev, "Only native mode APU is supported.\n");
+   dev_dbg_ratelimited(adev->dev, "Only native mode APU is 
supported.\n");
return;
}
 
@@ -1272,12 +1272,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct 
amdgpu_device *adev,
 * page or NUMA nodes.
 */
if (!page_is_ram(addr >> PAGE_SHIFT)) {
-   dev_dbg(adev->dev, "Page is not RAM.\n");
+   dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n");
return;
}
nid = pfn_to_nid(addr >> PAGE_SHIFT);
-   dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
-   vm->mem_id, local_node, nid);
+   dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+   vm->mem_id, local_node, nid);
if (nid == local_node) {
uint64_t old_flags = *flags;
unsigned int mtype_local = MTYPE_RW;
@@ -1289,8 +1289,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct 
amdgpu_device *adev,
 
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
 AMDGPU_PTE_MTYPE_VG10(mtype_local);
-   dev_dbg(adev->dev, "flags updated from %llx to %llx\n",
-   old_flags, *flags);
+   dev_dbg_ratelimited(adev->dev, "flags updated from %llx to 
%llx\n",
+   old_flags, *flags);
}
 }
 
-- 
2.35.1



[PATCH] drm/amd: Fix logic error in sienna_cichlid_update_pcie_parameters()

2023-09-28 Thread Mario Limonciello
While aligning SMU11 with SMU13 implementation an assumption was made that
`dpm_context->dpm_tables.pcie_table` was populated in dpm table initialization
like in SMU13 but it isn't.

So restore some of the original logic and instead just check for
amdgpu_device_pcie_dynamic_switching_supported() to decide whether to hardcode
values; erring on the side of performance.

Cc: sta...@vger.kernel.org # 6.1+
Reported-and-tested-by: Umio Yasuno 
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1447#note_2101382
Fixes: e701156ccc6c ("drm/amd: Align SMU11 SMU_MSG_OverridePcieParameters 
implementation with SMU13")
Signed-off-by: Mario Limonciello 
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 41 +++
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index f0800c0c5168..9119b0df2419 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2081,36 +2081,41 @@ static int 
sienna_cichlid_display_disable_memory_clock_switch(struct smu_context
return ret;
 }
 
+#define MAX(a, b)  ((a) > (b) ? (a) : (b))
+
 static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
 uint32_t pcie_gen_cap,
 uint32_t pcie_width_cap)
 {
struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_11_0_pcie_table *pcie_table = 
_context->dpm_tables.pcie_table;
-   u32 smu_pcie_arg;
+   uint8_t *table_member1, *table_member2;
+   uint32_t min_gen_speed, max_gen_speed;
+   uint32_t min_lane_width, max_lane_width;
+   uint32_t smu_pcie_arg;
int ret, i;
 
-   /* PCIE gen speed and lane width override */
-   if (!amdgpu_device_pcie_dynamic_switching_supported()) {
-   if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap)
-   pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 
1];
+   GET_PPTABLE_MEMBER(PcieGenSpeed, _member1);
+   GET_PPTABLE_MEMBER(PcieLaneCount, _member2);
 
-   if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap)
-   pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS 
- 1];
+   min_gen_speed = MAX(0, table_member1[0]);
+   max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
+   min_gen_speed = min_gen_speed > max_gen_speed ?
+   max_gen_speed : min_gen_speed;
+   min_lane_width = MAX(1, table_member2[0]);
+   max_lane_width = MIN(pcie_width_cap, table_member2[1]);
+   min_lane_width = min_lane_width > max_lane_width ?
+max_lane_width : min_lane_width;
 
-   /* Force all levels to use the same settings */
-   for (i = 0; i < NUM_LINK_LEVELS; i++) {
-   pcie_table->pcie_gen[i] = pcie_gen_cap;
-   pcie_table->pcie_lane[i] = pcie_width_cap;
-   }
+   if (!amdgpu_device_pcie_dynamic_switching_supported()) {
+   pcie_table->pcie_gen[0] = max_gen_speed;
+   pcie_table->pcie_lane[0] = max_lane_width;
} else {
-   for (i = 0; i < NUM_LINK_LEVELS; i++) {
-   if (pcie_table->pcie_gen[i] > pcie_gen_cap)
-   pcie_table->pcie_gen[i] = pcie_gen_cap;
-   if (pcie_table->pcie_lane[i] > pcie_width_cap)
-   pcie_table->pcie_lane[i] = pcie_width_cap;
-   }
+   pcie_table->pcie_gen[0] = min_gen_speed;
+   pcie_table->pcie_lane[0] = min_lane_width;
}
+   pcie_table->pcie_gen[1] = max_gen_speed;
+   pcie_table->pcie_lane[1] = max_lane_width;
 
for (i = 0; i < NUM_LINK_LEVELS; i++) {
smu_pcie_arg = (i << 16 |
-- 
2.34.1



Re: [PATCH 0/5] Add the pci_get_base_class() helper and use it

2023-09-28 Thread Bjorn Helgaas
On Fri, Aug 25, 2023 at 02:27:09PM +0800, Sui Jingfeng wrote:
> From: Sui Jingfeng 
> 
> There is no function that can be used to get all PCI(e) devices in a
> system by matching against its the PCI base class code only, while keep
> the sub-class code and the programming interface ignored. Therefore, add
> the pci_get_base_class() function to suit the need.
> 
> For example, if an application want to process all PCI(e) display devices
> in a system, it can achieve such goal by writing the code as following:
> 
> pdev = NULL;
> do {
> pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev);
> if (!pdev)
> break;
> 
> do_something_for_pci_display_device(pdev);
> } while (1);
> 
> Sui Jingfeng (5):
>   PCI: Add the pci_get_base_class() helper
>   ALSA: hda/intel: Use pci_get_base_class() to reduce duplicated code
>   drm/nouveau: Use pci_get_base_class() to reduce duplicated code
>   drm/amdgpu: Use pci_get_base_class() to reduce duplicated code
>   drm/radeon: Use pci_get_base_class() to reduce duplicated code
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 11 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 20 ---
>  drivers/gpu/drm/nouveau/nouveau_acpi.c   | 11 +++--
>  drivers/gpu/drm/radeon/radeon_bios.c | 20 ---
>  drivers/pci/search.c | 31 
>  include/linux/pci.h  |  5 
>  sound/pci/hda/hda_intel.c| 16 
>  7 files changed, 59 insertions(+), 55 deletions(-)

Applied to pci/enumeration for v6.7, thanks.


Re: [PATCH] drm/amdkfd: Wait vm update fence after retry fault recovered

2023-09-28 Thread Felix Kuehling

On 2023-09-22 17:37, Philip Yang wrote:

Otherwise kfd flush tlb does nothing if vm update fence callback doesn't
update vm->tlb_seq. H/W will generate retry fault again.

This works now because retry fault keep coming, recover will update page
table again after AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING timeout and flush
tlb.


I think I'm OK with this change. But as I understand it, this is really 
part of another patch series that depends on this fix. It's not needed 
with the way we currently handle retry faults. Am I misunderstanding it?


This is not an optimal solution, but I think it's only meant to be 
temporary. I think we want to get to a solution that allows us to 
schedule TLB flushes asynchronously using the fences. For now, the 
impact is limited to small-BAR GPUs that use SDMA for page table 
updates, so I'm OK with that.


Regards,
  Felix



Remove wait parameter in svm_range_validate_and_map because it is
always called with true.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 15 +++
  1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 70aa882636ab..61f4de1633a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1447,7 +1447,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, 
struct svm_range *prange,
  static int
  svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
  unsigned long npages, bool readonly,
- unsigned long *bitmap, bool wait, bool flush_tlb)
+ unsigned long *bitmap, bool flush_tlb)
  {
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev = NULL;
@@ -1480,8 +1480,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned 
long offset,
  
  		r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,

 prange->dma_addr[gpuidx],
-bo_adev, wait ?  : NULL,
-flush_tlb);
+bo_adev, , flush_tlb);
if (r)
break;
  
@@ -1605,7 +1604,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)

   */
  static int svm_range_validate_and_map(struct mm_struct *mm,
  struct svm_range *prange, int32_t gpuidx,
- bool intr, bool wait, bool flush_tlb)
+ bool intr, bool flush_tlb)
  {
struct svm_validate_context *ctx;
unsigned long start, end, addr;
@@ -1729,7 +1728,7 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
  
  		if (!r)

r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
- ctx->bitmap, wait, flush_tlb);
+ ctx->bitmap, flush_tlb);
  
  		if (!r && next == end)

prange->mapped_to_gpu = true;
@@ -1823,7 +1822,7 @@ static void svm_range_restore_work(struct work_struct 
*work)
mutex_lock(>migrate_mutex);
  
  		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,

-  false, true, false);
+  false, false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
 prange->start);
@@ -3064,7 +3063,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
}
  
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);

+   r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
 r, svms, prange->start, prange->last);
@@ -3603,7 +3602,7 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
flush_tlb = !migrated && update_mapping && 
prange->mapped_to_gpu;
  
  		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,

-  true, true, flush_tlb);
+  true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
  


Re: [PATCH v3] drm/amdkfd: Use partial migrations in GPU page faults

2023-09-28 Thread Felix Kuehling

On 2023-09-20 13:32, Xiaogang.Chen wrote:

From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: xiaogang chen


Some more nit-picks inline.



---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 104 +++
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
  4 files changed, 178 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..e886f9ce40ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
  
  	r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset);

migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
  {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
  
-	if (prange->actual_loc == best_loc) {

-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
  
+	if (start_mgr < prange->start || last_mgr > prange->last) {

+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
  
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,

-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
  
-	start = prange->start << PAGE_SHIFT;

-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
  
  	r = svm_range_vram_node_new(node, prange, true);

if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
  
  	if (cpages) {

prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
  
@@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,

   * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
   *
   * Return:
- *   0 - success with all pages migrated
   *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
   */
  static long
  svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
-

Re: [PATCH v3 07/32] drm/amd/display: document AMDGPU pre-defined transfer functions

2023-09-28 Thread Harry Wentland



On 2023-09-25 15:49, Melissa Wen wrote:
> Brief documentation about pre-defined transfer function usage on AMD
> display driver and standardized EOTFs and inverse EOTFs.
> 
> v3:
> - Document BT709 OETF (Pekka)
> - Fix description of sRGB and pure power funcs (Pekka)
> 
> Co-developed-by: Harry Wentland 
> Signed-off-by: Harry Wentland 
> Signed-off-by: Melissa Wen 
> ---
>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 39 +++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> index d03bdb010e8b..14f9c02539c6 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> @@ -85,6 +85,45 @@ void amdgpu_dm_init_color_mod(void)
>  }
>  
>  #ifdef AMD_PRIVATE_COLOR
> +/* Pre-defined Transfer Functions (TF)
> + *
> + * AMD driver supports pre-defined mathematical functions for transferring
> + * between encoded values and optical/linear space. Depending on HW color 
> caps,
> + * ROMs and curves built by the AMD color module support these transforms.
> + *
> + * The driver-specific color implementation exposes properties for 
> pre-blending
> + * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
> + * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
> + * supports ROM curves. AMD color module uses pre-defined coefficients to 
> build
> + * curves for the other blocks. What can be done by each color block is
> + * described by struct dpp_color_capsand struct mpc_color_caps.
> + *
> + * AMD driver-specific color API exposes the following pre-defined transfer
> + * functions:
> + *
> + * - Linear/Unity: linear/identity relationship between pixel value and
> + *   luminance value;
> + * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions;
> + * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999;
> + * - BT.709: has a linear segment in the bottom part and then a power 
> function
> + *   with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by
> + *   ITU-R BT.709-6;
> + * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
> + *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
> + *

I think it's important to highlight that the AMD color model is
designed with an assumption that SDR (sRGB, BT.709, G2.2, etc.)
peak white maps (normalized to 1.0 FP) to 80 nits in the PQ system.
This has the implication that PQ EOTF (NL-to-L) maps to [0.0..125.0].
125.0 = 10,000 nits / 80 nits

I think we'll want table or some other way describing this:

(Using L to mean linear and NL to mean non-linear.)

== sRGB, BT709, Gamma 2.x ==
NL form is either UNORM or [0.0, 1.0]
L form is [0.0, 1.0]

Note that HDR multiplier can wide range beyond [0.0, 1.0].
In practice this means that PQ TF is needed for any subsequent
L-to-NL transforms.

== PQ ==
NL form is either UNORM or FP16 CCCS (Windows canonical composition color 
space, see [1])
L form is [0.0, 125.0]

== Unity, Default ==
NL form is either UNORM or FP16 CCCS
L form is either [0.0, 1.0] (mapping from UNORM) or CCCS (mapping from CCCS 
FP16)

Harry

> + * In the driver-specific API, color block names attached to TF properties
> + * suggest the intention regarding non-linear encoding pixel's luminance
> + * values. As some newer encodings don't use gamma curve, we make encoding 
> and
> + * decoding explicit by defining an enum list of transfer functions supported
> + * in terms of EOTF and inverse EOTF, where:
> + *
> + * - EOTF (electro-optical transfer function): is the transfer function to go
> + *   from the encoded value to an optical (linear) value. De-gamma functions
> + *   traditionally do this.
> + * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
> + *   from an optical/linear space (which might have been used for blending)
> + *   back to the encoded values. Gamma functions traditionally do this.
> + */
>  static const char * const
>  amdgpu_transfer_function_names[] = {
>   [AMDGPU_TRANSFER_FUNCTION_DEFAULT]  = "Default",




[PATCH 2/4] drm/amdgpu: cache gpuvm fault information for gmc7+

2023-09-28 Thread Alex Deucher
Cache the current fault info in the vm struct.  This can be queried
by userspace later to help debug UMDs.

Cc: samuel.pitoi...@gmail.com
Acked-by: Guchun Chen 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  3 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  3 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 11 +++
 5 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 70370b412d24..79fac90f423d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -149,6 +149,9 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device 
*adev,
 
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+   amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+entry->vmid_src ? AMDGPU_MMHUB0(0) 
: AMDGPU_GFXHUB(0));
}
 
if (!printk_ratelimit())
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index d0a9ee2f12d3..a8d513f56148 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -119,6 +119,9 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device 
*adev,
 
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+   amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+entry->vmid_src ? AMDGPU_MMHUB0(0) 
: AMDGPU_GFXHUB(0));
}
 
if (printk_ratelimit()) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index a72dc21cf6fc..516505161f51 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1268,6 +1268,9 @@ static int gmc_v7_0_process_interrupt(struct 
amdgpu_device *adev,
if (!addr && !status)
return 0;
 
+   amdgpu_vm_update_fault_cache(adev, entry->pasid,
+((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, 
status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v7_0_set_fault_enable_default(adev, false);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8ce77d074d17..39ad04119f31 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1436,6 +1436,9 @@ static int gmc_v8_0_process_interrupt(struct 
amdgpu_device *adev,
if (!addr && !status)
return 0;
 
+   amdgpu_vm_update_fault_cache(adev, entry->pasid,
+((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, 
status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v8_0_set_fault_enable_default(adev, false);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6b15677c0314..35fef255c1a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -553,6 +553,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
struct amdgpu_vmhub *hub;
const char *mmhub_cid;
const char *hub_name;
+   unsigned int vmhub;
u64 addr;
uint32_t cam_index = 0;
int ret, xcc_id = 0;
@@ -565,10 +566,10 @@ static int gmc_v9_0_process_interrupt(struct 
amdgpu_device *adev,
 
if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
hub_name = "mmhub0";
-   hub = >vmhub[AMDGPU_MMHUB0(node_id / 4)];
+   vmhub = AMDGPU_MMHUB0(node_id / 4);
} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
hub_name = "mmhub1";
-   hub = >vmhub[AMDGPU_MMHUB1(0)];
+   vmhub = AMDGPU_MMHUB1(0);
} else {
hub_name = "gfxhub0";
if (adev->gfx.funcs->ih_node_to_logical_xcc) {
@@ -577,8 +578,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
if (xcc_id < 0)
xcc_id = 0;
}
-   hub = >vmhub[xcc_id];
+   vmhub = xcc_id;
}
+   hub = >vmhub[vmhub];
 
if (retry_fault) {
if (adev->irq.retry_cam_enabled) {
@@ -624,7 +626,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
if (!printk_ratelimit())
return 0;
 
-
memset(_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(adev, entry->pasid, _info);
 
@@ -660,6 +661,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
   

[PATCH 3/4] drm/amdgpu: add new INFO ioctl query for the last GPU page fault

2023-09-28 Thread Alex Deucher
Add a interface to query the last GPU page fault for the process.
Useful for debugging context lost errors.

v2: split vmhub representation between kernel and userspace
v3: add locking when fetching fault info in INFO IOCTL

Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
libdrm MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238

Cc: samuel.pitoi...@gmail.com
Acked-by: Guchun Chen 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 16 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 13 ++---
 include/uapi/drm/amdgpu_drm.h   | 16 
 5 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d9052475f2fc..d2cf1c39563b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -113,9 +113,10 @@
  *gl1c_cache_size, gl2c_cache_size, mall_size, 
enabled_rb_pipes_mask_hi
  *   3.53.0 - Support for GFX11 CP GFX shadowing
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   54
+#define KMS_DRIVER_MINOR   55
 #define KMS_DRIVER_PATCHLEVEL  0
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 081bd28e2443..6cd77c21048b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1224,6 +1224,26 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file *filp)
return copy_to_user(out, max_ibs,
min((size_t)size, sizeof(max_ibs))) ? 
-EFAULT : 0;
}
+   case AMDGPU_INFO_GPUVM_FAULT: {
+   struct amdgpu_fpriv *fpriv = filp->driver_priv;
+   struct amdgpu_vm *vm = >vm;
+   struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
+   unsigned long flags;
+
+   if (!vm)
+   return -EINVAL;
+
+   memset(_fault, 0, sizeof(gpuvm_fault));
+
+   xa_lock_irqsave(>vm_manager.pasids, flags);
+   gpuvm_fault.addr = vm->fault_info.addr;
+   gpuvm_fault.status = vm->fault_info.status;
+   gpuvm_fault.vmhub = vm->fault_info.vmhub;
+   xa_unlock_irqrestore(>vm_manager.pasids, flags);
+
+   return copy_to_user(out, _fault,
+   min((size_t)size, sizeof(gpuvm_fault))) ? 
-EFAULT : 0;
+   }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 91e36b0ad062..4058ed49e5a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2756,7 +2756,21 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device 
*adev,
if (vm) {
vm->fault_info.addr = addr;
vm->fault_info.status = status;
-   vm->fault_info.vmhub = vmhub;
+   if (AMDGPU_IS_GFXHUB(vmhub)) {
+   vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
+   vm->fault_info.vmhub |=
+   (vmhub - AMDGPU_GFXHUB_START) << 
AMDGPU_VMHUB_IDX_SHIFT;
+   } else if (AMDGPU_IS_MMHUB0(vmhub)) {
+   vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
+   vm->fault_info.vmhub |=
+   (vmhub - AMDGPU_MMHUB0_START) << 
AMDGPU_VMHUB_IDX_SHIFT;
+   } else if (AMDGPU_IS_MMHUB1(vmhub)) {
+   vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
+   vm->fault_info.vmhub |=
+   (vmhub - AMDGPU_MMHUB1_START) << 
AMDGPU_VMHUB_IDX_SHIFT;
+   } else {
+   WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
+   }
}
xa_unlock_irqrestore(>vm_manager.pasids, flags);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 2bf328d9e04b..411d42fecfb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -124,9 +124,16 @@ struct amdgpu_mem_stats;
  * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
  */
 #define AMDGPU_MAX_VMHUBS  13
-#define AMDGPU_GFXHUB(x)   (x)
-#define AMDGPU_MMHUB0(x)   (8 + x)
-#define AMDGPU_MMHUB1(x)   (8 + 4 + x)
+#define AMDGPU_GFXHUB_START0
+#define AMDGPU_MMHUB0_START8
+#define AMDGPU_MMHUB1_START

[PATCH 4/4] drm/amdgpu: refine fault cache updates

2023-09-28 Thread Alex Deucher
Don't update the fault cache if status is 0.  In the multiple
fault case, subsequent faults will return a 0 status which is
useless for userspace and replaces the useful fault status, so
only update if status is non-0.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4058ed49e5a6..afc19341334f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2753,7 +2753,12 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device 
*adev,
xa_lock_irqsave(>vm_manager.pasids, flags);
 
vm = xa_load(>vm_manager.pasids, pasid);
-   if (vm) {
+   /* Don't update the fault cache if status is 0.  In the multiple
+* fault case, subsequent faults will return a 0 status which is
+* useless for userspace and replaces the useful fault status, so
+* only update if status is non-0.
+*/
+   if (vm && status) {
vm->fault_info.addr = addr;
vm->fault_info.status = status;
if (AMDGPU_IS_GFXHUB(vmhub)) {
-- 
2.41.0



[PATCH 1/4] drm/amdgpu: add cached GPU fault structure to vm struct

2023-09-28 Thread Alex Deucher
When we get a GPU page fault, cache the fault for later
analysis.

Cc: samuel.pitoi...@gmail.com
Acked-by: Guchun Chen 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 18 +++
 2 files changed, 49 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8ce91f69bbeb..91e36b0ad062 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2730,3 +2730,34 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, 
struct seq_file *m)
   total_done_objs);
 }
 #endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: GPUVM fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debugging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub)
+{
+   struct amdgpu_vm *vm;
+   unsigned long flags;
+
+   xa_lock_irqsave(>vm_manager.pasids, flags);
+
+   vm = xa_load(>vm_manager.pasids, pasid);
+   if (vm) {
+   vm->fault_info.addr = addr;
+   vm->fault_info.status = status;
+   vm->fault_info.vmhub = vmhub;
+   }
+   xa_unlock_irqrestore(>vm_manager.pasids, flags);
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 204ab13184ed..2bf328d9e04b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -252,6 +252,15 @@ struct amdgpu_vm_update_funcs {
  struct dma_fence **fence);
 };
 
+struct amdgpu_vm_fault_info {
+   /* fault address */
+   uint64_taddr;
+   /* fault status register */
+   uint32_tstatus;
+   /* which vmhub? gfxhub, mmhub, etc. */
+   unsigned intvmhub;
+};
+
 struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached   va;
@@ -343,6 +352,9 @@ struct amdgpu_vm {
 
/* Memory partition number, -1 means any partition */
int8_t  mem_id;
+
+   /* cached fault info */
+   struct amdgpu_vm_fault_info fault_info;
 };
 
 struct amdgpu_vm_manager {
@@ -554,4 +566,10 @@ static inline void amdgpu_vm_eviction_unlock(struct 
amdgpu_vm *vm)
mutex_unlock(>eviction_lock);
 }
 
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub);
+
 #endif
-- 
2.41.0



[PATCH v4 0/4] Add GPU page fault query interface

2023-09-28 Thread Alex Deucher
This patch set adds support for an application to query GPU
page faults.  It's useful for debugging and there are
vulkan extensions that could make use of this.  Preliminary
user space code which uses this can be found here:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/298

Note, that I made a small change to the vmhub definition to
decouple it from how the kernel tracks vmhubs so that we have
a consistent user view even if we decide to add more vmhubs
like we recently did for gfx 9.4.3.

I've also pushed the changed to:
https://gitlab.freedesktop.org/agd5f/linux/-/commits/gpu_fault_info_ioctl

Open question, currently we just expose the raw GPU fault status
register value for each GPU so UMDs need GPU specific knowlege to decode
it, although it's largely the same across generations.  One option would be to
translate to a generic GPU independent fault status.  Opinions?

v2:
- Fix spelling typos noted by Guchun
v3:
- Add locking in IOCTL query
- Only update cache if fault status is valid
v4:
- Rebase and resend

Alex Deucher (4):
  drm/amdgpu: add cached GPU fault structure to vm struct
  drm/amdgpu: cache gpuvm fault information for gmc7+
  drm/amdgpu: add new INFO ioctl query for the last GPU page fault
  drm/amdgpu: refine fault cache updates

 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 20 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 50 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 31 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  3 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  |  3 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |  3 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |  3 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 11 --
 include/uapi/drm/amdgpu_drm.h   | 16 
 10 files changed, 135 insertions(+), 8 deletions(-)

-- 
2.41.0



Re: [PATCH 0/3] Fix Navi3x boot and hotplug problems

2023-09-28 Thread Mario Limonciello

On 9/28/2023 13:00, Alex Deucher wrote:

On Thu, Sep 28, 2023 at 12:41 PM Mario Limonciello
 wrote:


On some OEM systems multiple navi3x dGPUS are triggering RAS errors
and BACO errors.

These errors come from elements of the OEM system that weren't part of
original test environment.  This series addresses those problems.

NOTE: Although this series touches two subsystems, I would prefer to
take this all through DRM because there is a workaround in linux-next
that I would like to be reverted at the same time as picking up the first
two patches.


FWIW, the workaround is not in linux-next yet.  At the time I thought
it was already fixed by the fixes in ucsi and power supply when we
first encountered this.


I looked yesterday and I did see it there, but I think it was 
specifically because it had merged the amd-staging-drm-next tree.

It's not there today..

If Sebastian is OK, I'd still rather keep it all together so that people 
testing amd-staging-drm-next get the fixes.




Alex



Mario Limonciello (3):
   drm/amd: Fix detection of _PR3 on the PCIe root port
   power: supply: Don't count 'unknown' scope power supplies
   Revert "drm/amd/pm: workaround for the wrong ac power detection on smu
 13.0.0"

  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 2 +-
  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c   | 3 ++-
  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 +
  drivers/power/supply/power_supply_core.c | 2 +-
  4 files changed, 5 insertions(+), 3 deletions(-)

--
2.34.1





Re: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

2023-09-28 Thread Felix Kuehling

On 2023-09-28 11:38, Shashank Sharma wrote:

Hello Felix, Mukul,

On 28/09/2023 17:30, Felix Kuehling wrote:

On 2023-09-28 10:30, Joshi, Mukul wrote:

[AMD Official Use Only - General]


-Original Message-
From: Yadav, Arvind 
Sent: Thursday, September 28, 2023 5:54 AM
To: Koenig, Christian ; Deucher, Alexander
; Sharma, Shashank
; Kuehling, Felix ;
Joshi, Mukul ; Pan, Xinhui ;
airl...@gmail.com; dan...@ffwll.ch
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
linux-

ker...@vger.kernel.org; Yadav, Arvind ; Koenig,
Christian 
Subject: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute 
offset

for gfx8

This patch is to adjust the absolute doorbell offset against the 
doorbell id

considering the doorbell size of 32/64 bit.

v2:
- Addressed the review comment from Felix.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0d3d538b64eb..c54c4392d26e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -407,7 +407,14 @@ static int allocate_doorbell(struct
qcm_process_device *qpd,

   q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev-

adev,

 qpd-

proc_doorbells,

-   q-

doorbell_id);

+   0);
+
It looks like amdgpu_doorbell_index_on_bar() works only for 64-bit 
doorbells.
Shouldn't it work for both 32-bit and 64-bit doorbells considering 
this is common

doorbell manager code?



Yes, You are right that the calculations to find a particular doorbell 
in the doorbell page considers a doorbell width of 64-bit.




I could see this argument going either way. KFD is the only one that 
cares about managing doorbells for user mode queues on GFXv8 GPUs. 
This is not a use case that amdgpu cares about. So I'm OK with KFD 
doing its own address calculations to make sure doorbells continue to 
work on GFXv8.


It may not be worth adding complexity to the common doorbell manager 
code to support legacy GPUs with 32-bit doorbells.



I was thinking about adding an additional input parameter which will 
indicate if the doorbell width is 32-bit vs 64-bit (like 
is_doorbell_64_bit), and doorbell manager can alter the multiplier 
while calculating the final offset. Please let me know if that will 
work for both the cases.


Yes, that would work for KFD because we already have the doorbell size 
in our device-info structure. Instead of making it a boolean flag, you 
could make it a doorbell_size parameter, in byte or dword units to 
simplify the pointer math.


Regards,
  Felix




- Shashank




Regards,
  Felix




Thanks,
Mukul


+ /* Adjust the absolute doorbell offset against the doorbell id
considering
+  * the doorbell size of 32/64 bit.
+  */
+ q->properties.doorbell_off += q->doorbell_id *
+ dev->kfd->device_info.doorbell_size / 4;
+
   return 0;
  }

--
2.34.1


Re: [PATCH 0/3] Fix Navi3x boot and hotplug problems

2023-09-28 Thread Alex Deucher
On Thu, Sep 28, 2023 at 12:41 PM Mario Limonciello
 wrote:
>
> On some OEM systems multiple navi3x dGPUS are triggering RAS errors
> and BACO errors.
>
> These errors come from elements of the OEM system that weren't part of
> original test environment.  This series addresses those problems.
>
> NOTE: Although this series touches two subsystems, I would prefer to
> take this all through DRM because there is a workaround in linux-next
> that I would like to be reverted at the same time as picking up the first
> two patches.

FWIW, the workaround is not in linux-next yet.  At the time I thought
it was already fixed by the fixes in ucsi and power supply when we
first encountered this.

Alex

>
> Mario Limonciello (3):
>   drm/amd: Fix detection of _PR3 on the PCIe root port
>   power: supply: Don't count 'unknown' scope power supplies
>   Revert "drm/amd/pm: workaround for the wrong ac power detection on smu
> 13.0.0"
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 2 +-
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c   | 3 ++-
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 +
>  drivers/power/supply/power_supply_core.c | 2 +-
>  4 files changed, 5 insertions(+), 3 deletions(-)
>
> --
> 2.34.1
>


RE: [PATCH 1/3] drm/amd: Fix detection of _PR3 on the PCIe root port

2023-09-28 Thread Deucher, Alexander
[Public]

> -Original Message-
> From: Limonciello, Mario 
> Sent: Tuesday, September 26, 2023 7:00 PM
> To: amd-gfx@lists.freedesktop.org; Sebastian Reichel ;
> Deucher, Alexander 
> Cc: linux...@vger.kernel.org; linux-ker...@vger.kernel.org; Ma, Jun
> ; Limonciello, Mario 
> Subject: [PATCH 1/3] drm/amd: Fix detection of _PR3 on the PCIe root port
>
> On some systems with Navi3x dGPU will attempt to use BACO for runtime PM
> but fails to resume properly.  This is because on these systems the root port
> goes into D3cold which is incompatible with BACO.
>
> This happens because in this case dGPU is connected to a bridge between root
> port which causes BOCO detection logic to fail.  Fix the intent of the logic 
> by
> looking at root port, not the immediate upstream bridge for _PR3.
>
> Cc: sta...@vger.kernel.org
> Suggested-by: Jun Ma 
> Tested-by: David Perry 
> Fixes: b10c1c5b3a4e ("drm/amdgpu: add check for ACPI power resources")
> Signed-off-by: Mario Limonciello 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e4627d92e1d0..bad2b5577e96 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2238,7 +2238,7 @@ static int amdgpu_device_ip_early_init(struct
> amdgpu_device *adev)
>   adev->flags |= AMD_IS_PX;
>
>   if (!(adev->flags & AMD_IS_APU)) {
> - parent = pci_upstream_bridge(adev->pdev);
> + parent = pcie_find_root_port(adev->pdev);
>   adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
>   }
>
> --
> 2.34.1



[PATCH 3/3] Revert "drm/amd/pm: workaround for the wrong ac power detection on smu 13.0.0"

2023-09-28 Thread Mario Limonciello
This workaround is not necessary with the power supply core fixed.

This reverts commit 0e5e1a84f0b8c814d502a135824244127fed8f23.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c   | 3 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index d86499ac8931..8d1e39589057 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1021,7 +1021,8 @@ static int smu_v13_0_process_pending_interrupt(struct 
smu_context *smu)
 {
int ret = 0;
 
-   if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT))
+   if (smu->dc_controlled_by_gpio &&
+   smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT))
ret = smu_v13_0_allow_ih_interrupt(smu);
 
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 1c15fa911176..684b4e01fac2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -3001,6 +3001,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = 
{
.enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost,
.get_power_limit = smu_v13_0_0_get_power_limit,
.set_power_limit = smu_v13_0_set_power_limit,
+   .set_power_source = smu_v13_0_set_power_source,
.get_power_profile_mode = smu_v13_0_0_get_power_profile_mode,
.set_power_profile_mode = smu_v13_0_0_set_power_profile_mode,
.run_btc = smu_v13_0_run_btc,
-- 
2.34.1



[PATCH 0/3] Fix Navi3x boot and hotplug problems

2023-09-28 Thread Mario Limonciello
On some OEM systems multiple navi3x dGPUS are triggering RAS errors
and BACO errors.

These errors come from elements of the OEM system that weren't part of
original test environment.  This series addresses those problems.

NOTE: Although this series touches two subsystems, I would prefer to
take this all through DRM because there is a workaround in linux-next
that I would like to be reverted at the same time as picking up the first
two patches.

Mario Limonciello (3):
  drm/amd: Fix detection of _PR3 on the PCIe root port
  power: supply: Don't count 'unknown' scope power supplies
  Revert "drm/amd/pm: workaround for the wrong ac power detection on smu
13.0.0"

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c   | 3 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 +
 drivers/power/supply/power_supply_core.c | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

-- 
2.34.1



[PATCH 2/3] power: supply: Don't count 'unknown' scope power supplies

2023-09-28 Thread Mario Limonciello
On some systems AMD Navi3x dGPU triggers RAS errors on startup; but
only if the amdgpu kernel module is not part of the initramfs.
This is because the hardware is not properly programmed for the
AC/DC state of the system when it is loaded later in boot.

The AC/DC state of the system is incorrect specifically when UCSI power
supplies have been initialized.  These power supplies are marked as
POWER_SUPPLY_SCOPE_UNKNOWN scope. As they're 'offline' the power
supply count is increased but the resultant return value is
power_supply_is_system_supplied() 0.

To fix this look explicitly for `POWER_SUPPLY_SCOPE_SYSTEM` power
supplies before incrementing the count. If no system power supply
is found then the system is assumed to be on AC.

Cc: sta...@vger.kernel.org
Tested-by: David Perry 
Fixes: 95339f40a8b6 ("power: supply: Fix logic checking if system is running 
from battery")
Signed-off-by: Mario Limonciello 
---
 drivers/power/supply/power_supply_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/power_supply_core.c 
b/drivers/power/supply/power_supply_core.c
index d325e6dbc770..3de6e6d00815 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -349,7 +349,7 @@ static int __power_supply_is_system_supplied(struct device 
*dev, void *data)
unsigned int *count = data;
 
if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, ))
-   if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE)
+   if (ret.intval != POWER_SUPPLY_SCOPE_SYSTEM)
return 0;
 
(*count)++;
-- 
2.34.1



[PATCH 1/3] drm/amd: Fix detection of _PR3 on the PCIe root port

2023-09-28 Thread Mario Limonciello
On some systems with Navi3x dGPU will attempt to use BACO for runtime
PM but fails to resume properly.  This is because on these systems
the root port goes into D3cold which is incompatible with BACO.

This happens because in this case dGPU is connected to a bridge between
root port which causes BOCO detection logic to fail.  Fix the intent of
the logic by looking at root port, not the immediate upstream bridge for
_PR3.

Cc: sta...@vger.kernel.org
Suggested-by: Jun Ma 
Tested-by: David Perry 
Fixes: b10c1c5b3a4e ("drm/amdgpu: add check for ACPI power resources")
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e4627d92e1d0..bad2b5577e96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2238,7 +2238,7 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
adev->flags |= AMD_IS_PX;
 
if (!(adev->flags & AMD_IS_APU)) {
-   parent = pci_upstream_bridge(adev->pdev);
+   parent = pcie_find_root_port(adev->pdev);
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
}
 
-- 
2.34.1



[PATCH] drm/amd/pm: Remove set df cstate for SMUv13.0.6

2023-09-28 Thread Asad Kamal
Remove set df cstate as disallow df state is
not required for SMUv13.0.6

Signed-off-by: Asad Kamal 
Reviewed-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 11a6cd96c601..652688316d68 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1889,13 +1889,6 @@ static bool smu_v13_0_6_is_baco_supported(struct 
smu_context *smu)
return false;
 }
 
-static int smu_v13_0_6_set_df_cstate(struct smu_context *smu,
-enum pp_df_cstate state)
-{
-   return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl,
-  state, NULL);
-}
-
 static int smu_v13_0_6_allow_xgmi_power_down(struct smu_context *smu, bool en)
 {
return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GmiPwrDnControl,
@@ -2754,7 +2747,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = 
{
.get_dpm_ultimate_freq = smu_v13_0_6_get_dpm_ultimate_freq,
.set_soft_freq_limited_range = smu_v13_0_6_set_soft_freq_limited_range,
.od_edit_dpm_table = smu_v13_0_6_usr_edit_dpm_table,
-   .set_df_cstate = smu_v13_0_6_set_df_cstate,
.allow_xgmi_power_down = smu_v13_0_6_allow_xgmi_power_down,
.log_thermal_throttling_event = 
smu_v13_0_6_log_thermal_throttling_event,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
-- 
2.34.1



Re: [PATCH] drm/radeon/kms/atom: Remove redundant code

2023-09-28 Thread Alex Deucher
On Thu, Sep 28, 2023 at 5:46 AM Jiapeng Chong
 wrote:
>
> drivers/gpu/drm/radeon/atom.c:396 atom_skip_src_int() warn: ignoring 
> unreachable code.
>
> Reported-by: Abaci Robot 
> Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6713
> Signed-off-by: Jiapeng Chong 
> ---
>  drivers/gpu/drm/radeon/atom.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
> index ceb6d772ef94..3082d08a06c2 100644
> --- a/drivers/gpu/drm/radeon/atom.c
> +++ b/drivers/gpu/drm/radeon/atom.c
> @@ -393,7 +393,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, 
> uint8_t attr, int *ptr)
> (*ptr)++;
> return;
> }
> -   return;

I think this should be a break.

Alex

> }
>  }
>
> --
> 2.20.1.7.g153144c
>


Re: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

2023-09-28 Thread Shashank Sharma

Hello Felix, Mukul,

On 28/09/2023 17:30, Felix Kuehling wrote:

On 2023-09-28 10:30, Joshi, Mukul wrote:

[AMD Official Use Only - General]


-Original Message-
From: Yadav, Arvind 
Sent: Thursday, September 28, 2023 5:54 AM
To: Koenig, Christian ; Deucher, Alexander
; Sharma, Shashank
; Kuehling, Felix ;
Joshi, Mukul ; Pan, Xinhui ;
airl...@gmail.com; dan...@ffwll.ch
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
linux-

ker...@vger.kernel.org; Yadav, Arvind ; Koenig,
Christian 
Subject: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute 
offset

for gfx8

This patch is to adjust the absolute doorbell offset against the 
doorbell id

considering the doorbell size of 32/64 bit.

v2:
- Addressed the review comment from Felix.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0d3d538b64eb..c54c4392d26e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -407,7 +407,14 @@ static int allocate_doorbell(struct
qcm_process_device *qpd,

   q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev-

adev,

 qpd-

proc_doorbells,

-   q-

doorbell_id);

+   0);
+
It looks like amdgpu_doorbell_index_on_bar() works only for 64-bit 
doorbells.
Shouldn't it work for both 32-bit and 64-bit doorbells considering 
this is common

doorbell manager code?



Yes, You are right that the calculations to find a particular doorbell 
in the doorbell page considers a doorbell width of 64-bit.




I could see this argument going either way. KFD is the only one that 
cares about managing doorbells for user mode queues on GFXv8 GPUs. 
This is not a use case that amdgpu cares about. So I'm OK with KFD 
doing its own address calculations to make sure doorbells continue to 
work on GFXv8.


It may not be worth adding complexity to the common doorbell manager 
code to support legacy GPUs with 32-bit doorbells.



I was thinking about adding an additional input parameter which will 
indicate if the doorbell width is 32-bit vs 64-bit (like 
is_doorbell_64_bit), and doorbell manager can alter the multiplier while 
calculating the final offset. Please let me know if that will work for 
both the cases.


- Shashank




Regards,
  Felix




Thanks,
Mukul


+ /* Adjust the absolute doorbell offset against the doorbell id
considering
+  * the doorbell size of 32/64 bit.
+  */
+ q->properties.doorbell_off += q->doorbell_id *
+ dev->kfd->device_info.doorbell_size / 4;
+
   return 0;
  }

--
2.34.1


Re: Requests For Proposals for hosting XDC 2024 are now open

2023-09-28 Thread Ricardo Garcia
The period to submit XDC 2024 hosting proposals has been extended and
the new deadline is **November 1, 2023**

Please, submit your proposals as soon as possible, thank you!

On Mon, 2023-06-19 at 15:43 +0200, Ricardo Garcia wrote:
> Hello everyone!
> 
> The X.org board is soliciting proposals to host XDC in 2024. Since XDC
> 2023 is being held in Europe this year, we've decided to host in North
> America. However, the board is open to other locations, especially if
> there's an interesting co-location with another conference.
> 
> If you're considering hosting XDC, we've assembled a wiki page with
> what's generally expected and needed:
> 
> https://www.x.org/wiki/Events/RFP/
> 
> When submitting your proposal, please make sure to include at least the
> key information about the potential location in question, possible
> dates along with estimated costs. Proposals can be submitted to board
> at foundation.x.org until the deadline of *September 17th, 2023*. 
> 
> Additionally, an quirk early heads-up to the board if you're
> considering hosting would be appreciated, in case we need to adjust the
> schedule a bit. Also, earlier is better since there generally will be a
> bit of Q with organizers.
> 
> And if you just have some questions about what organizing XDC entails,
> please feel free to chat with previous organizers, or someone from the
> board.
> 
> Thanks,
> Ricardo Garcia, on behalf of X.Org
> 



Re: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

2023-09-28 Thread Felix Kuehling

On 2023-09-28 10:30, Joshi, Mukul wrote:

[AMD Official Use Only - General]


-Original Message-
From: Yadav, Arvind 
Sent: Thursday, September 28, 2023 5:54 AM
To: Koenig, Christian ; Deucher, Alexander
; Sharma, Shashank
; Kuehling, Felix ;
Joshi, Mukul ; Pan, Xinhui ;
airl...@gmail.com; dan...@ffwll.ch
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; linux-
ker...@vger.kernel.org; Yadav, Arvind ; Koenig,
Christian 
Subject: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute offset
for gfx8

This patch is to adjust the absolute doorbell offset against the doorbell id
considering the doorbell size of 32/64 bit.

v2:
- Addressed the review comment from Felix.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0d3d538b64eb..c54c4392d26e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -407,7 +407,14 @@ static int allocate_doorbell(struct
qcm_process_device *qpd,

   q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev-

adev,

 qpd-

proc_doorbells,

-   q-

doorbell_id);

+   0);
+

It looks like amdgpu_doorbell_index_on_bar() works only for 64-bit doorbells.
Shouldn't it work for both 32-bit and 64-bit doorbells considering this is 
common
doorbell manager code?


I could see this argument going either way. KFD is the only one that 
cares about managing doorbells for user mode queues on GFXv8 GPUs. This 
is not a use case that amdgpu cares about. So I'm OK with KFD doing its 
own address calculations to make sure doorbells continue to work on GFXv8.


It may not be worth adding complexity to the common doorbell manager 
code to support legacy GPUs with 32-bit doorbells.


Regards,
  Felix




Thanks,
Mukul


+ /* Adjust the absolute doorbell offset against the doorbell id
considering
+  * the doorbell size of 32/64 bit.
+  */
+ q->properties.doorbell_off += q->doorbell_id *
+   dev->kfd->device_info.doorbell_size / 4;
+
   return 0;
  }

--
2.34.1


Re: [PATCH 9/9] drm/v3d: Annotate struct v3d_perfmon with __counted_by

2023-09-28 Thread Maira Canal

Hi Kees,

On 9/22/23 14:32, Kees Cook wrote:

Prepare for the coming implementation by GCC and Clang of the __counted_by
attribute. Flexible array members annotated with __counted_by can have
their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS
(for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family
functions).

As found with Coccinelle[1], add __counted_by for struct v3d_perfmon.

[1] 
https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci

Cc: Emma Anholt 
Cc: Melissa Wen 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-de...@lists.freedesktop.org
Signed-off-by: Kees Cook 


Reviewed-by: Maíra Canal 

Best Regards,
- Maíra


---
  drivers/gpu/drm/v3d/v3d_drv.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 7f664a4b2a75..106454f28956 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -59,7 +59,7 @@ struct v3d_perfmon {
 * values can't be reset, but you can fake a reset by
 * destroying the perfmon and creating a new one.
 */
-   u64 values[];
+   u64 values[] __counted_by(ncounters);
  };
  
  struct v3d_dev {


RE: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

2023-09-28 Thread Joshi, Mukul
[AMD Official Use Only - General]

> -Original Message-
> From: Yadav, Arvind 
> Sent: Thursday, September 28, 2023 5:54 AM
> To: Koenig, Christian ; Deucher, Alexander
> ; Sharma, Shashank
> ; Kuehling, Felix ;
> Joshi, Mukul ; Pan, Xinhui ;
> airl...@gmail.com; dan...@ffwll.ch
> Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; linux-
> ker...@vger.kernel.org; Yadav, Arvind ; Koenig,
> Christian 
> Subject: [PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute offset
> for gfx8
>
> This patch is to adjust the absolute doorbell offset against the doorbell id
> considering the doorbell size of 32/64 bit.
>
> v2:
> - Addressed the review comment from Felix.
>
> Cc: Christian Koenig 
> Cc: Alex Deucher 
> Signed-off-by: Shashank Sharma 
> Signed-off-by: Arvind Yadav 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 0d3d538b64eb..c54c4392d26e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -407,7 +407,14 @@ static int allocate_doorbell(struct
> qcm_process_device *qpd,
>
>   q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev-
> >adev,
> qpd-
> >proc_doorbells,
> -   q-
> >doorbell_id);
> +   0);
> +

It looks like amdgpu_doorbell_index_on_bar() works only for 64-bit doorbells.
Shouldn't it work for both 32-bit and 64-bit doorbells considering this is 
common
doorbell manager code?

Thanks,
Mukul

> + /* Adjust the absolute doorbell offset against the doorbell id
> considering
> +  * the doorbell size of 32/64 bit.
> +  */
> + q->properties.doorbell_off += q->doorbell_id *
> +   dev->kfd->device_info.doorbell_size / 4;
> +
>   return 0;
>  }
>
> --
> 2.34.1



Re: [PATCH v6 4/9] drm/amdgpu: create GFX-gen11 usermode queue

2023-09-28 Thread Shashank Sharma



On 28/09/2023 15:52, Alex Deucher wrote:

On Thu, Sep 28, 2023 at 9:40 AM Shashank Sharma  wrote:


On 28/09/2023 15:27, Alex Deucher wrote:

On Thu, Sep 28, 2023 at 9:22 AM Shashank Sharma  wrote:

On 14/09/2023 10:24, Shashank Sharma wrote:

On 14/09/2023 09:45, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
 GFX-GEN-11 IP

V1: Worked on review comments from Alex:
   - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
   - Reuse the existing adev->mqd[ip] for MQD creation
   - Formatting and arrangement of code

V3:
   - Integration with doorbell manager

V4: Review comments addressed:
   - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
   - Align name of structure members (Luben)
   - Don't break up the Cc tag list and the Sob tag list in commit
 message (Luben)
V5:
  - No need to reserve the bo for MQD (Christian).
  - Some more changes to support IP specific MQD creation.

V6:
  - Add a comment reminding us to replace the
amdgpu_bo_create_kernel()
calls while creating MQD object to amdgpu_bo_create() once
eviction
fences are ready (Christian).

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 77
+++
.../gpu/drm/amd/include/amdgpu_userqueue.h|  7 ++
3 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 44769423ba30..03fc8e89eafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -140,12 +140,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev,
void *data,
return r;
}
+extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+int maj;
+struct amdgpu_device *adev = uq_mgr->adev;
+uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+/* We support usermode queue only for GFX V11 as of now */
+maj = IP_VERSION_MAJ(version);
+if (maj == 11)
+uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}

That belongs into gfx_v11.c and not here.

Agree,

On a second thought, we can't move it to gfx_v11.c, as this is the place
where we are setting up the gfx_userqueue functions in fpriv->uq_mgr()
for the first time, and we do not have another option but to check the
IP and setup the functions here. The only other option to do this will
be to move  uq_mgr->userq_funcs to adev->gfx.userq_funcs and setup them
with the IP init (as Alex once suggested). Please let me know your
thoughts on this.

That seems cleaner to me.  They should be global anyway and could be
set as part of the individual IP init sequences.  Then the presence of
a pointer could be used to determine whether or not a particular IP
type supports user queues.

Alex


So if I understand this correctly, this is how we are looking to arrange
the userqueue IP functions:

- Presence of adev->gfx.funcs.userqueue_funcs() will decide if this IP
supports userqueue or not.

- sw_init function of the IP will setup these fptrs like:

in gfx_v11_0_sw_init :

  if (GFX_MAJ == 11)

  adev->gfx.funcs.userqueue_funcs = gfx_v11_0_userqueue_funcs

I was thinking something more like:

adev->userq_funcs[AMD_IP_BLOCK_TYPE_GFX] = gfx_v11_0_userqueue_funcs;

That way there would be one place for all of the all of the fptrs and
you could use the IP type to query the right one.

And then in the IOCTLs, you could just check if the pointer is valid.  E.g.,

if (!adev->userq_funcs[ip_block_type])
return -EINVAL;

etc.

You could store any metadata relevant to each userq in the per fd user
queue manager and then just pass the state to the global userq
functions for each IP.


Makes sense, we can do that, hope this works for Christian as well, 
@Christian ?


- Shashank



Alex



In amdgpu_userqueue_ioctl:

  |

CASE: create:

  amdgpu_userqueue_create()

  if (adev->gfx.funcs.userqueue_funcs) {

  adev->gfx.funcs.userqueue_funcs.create_mqd();

  }


CASE: destroy:

  amdgpu_userqueue_destroy()

  if (adev->gfx.funcs.userqueue_funcs) {

  adev->gfx.funcs.userqueue_funcs.destroy_mqd();

  }

and so on ...

Am I getting this right ?

- Shashank


- Shashank


+
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr,
struct amdgpu_device *adev)
{

Re: [PATCH v6 4/9] drm/amdgpu: create GFX-gen11 usermode queue

2023-09-28 Thread Alex Deucher
On Thu, Sep 28, 2023 at 9:40 AM Shashank Sharma  wrote:
>
>
> On 28/09/2023 15:27, Alex Deucher wrote:
> > On Thu, Sep 28, 2023 at 9:22 AM Shashank Sharma  
> > wrote:
> >>
> >> On 14/09/2023 10:24, Shashank Sharma wrote:
> >>> On 14/09/2023 09:45, Christian König wrote:
>  Am 08.09.23 um 18:04 schrieb Shashank Sharma:
> > A Memory queue descriptor (MQD) of a userqueue defines it in
> > the hw's context. As MQD format can vary between different
> > graphics IPs, we need gfx GEN specific handlers to create MQDs.
> >
> > This patch:
> > - Introduces MQD handler functions for the usermode queues.
> > - Adds new functions to create and destroy userqueue MQD for
> > GFX-GEN-11 IP
> >
> > V1: Worked on review comments from Alex:
> >   - Make MQD functions GEN and IP specific
> >
> > V2: Worked on review comments from Alex:
> >   - Reuse the existing adev->mqd[ip] for MQD creation
> >   - Formatting and arrangement of code
> >
> > V3:
> >   - Integration with doorbell manager
> >
> > V4: Review comments addressed:
> >   - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
> >   - Align name of structure members (Luben)
> >   - Don't break up the Cc tag list and the Sob tag list in commit
> > message (Luben)
> > V5:
> >  - No need to reserve the bo for MQD (Christian).
> >  - Some more changes to support IP specific MQD creation.
> >
> > V6:
> >  - Add a comment reminding us to replace the
> > amdgpu_bo_create_kernel()
> >calls while creating MQD object to amdgpu_bo_create() once
> > eviction
> >fences are ready (Christian).
> >
> > Cc: Alex Deucher 
> > Cc: Christian Koenig 
> > Signed-off-by: Shashank Sharma 
> > Signed-off-by: Arvind Yadav 
> > ---
> >drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
> >drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 77
> > +++
> >.../gpu/drm/amd/include/amdgpu_userqueue.h|  7 ++
> >3 files changed, 100 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> > index 44769423ba30..03fc8e89eafb 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> > @@ -140,12 +140,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev,
> > void *data,
> >return r;
> >}
> >+extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
> > +
> > +static void
> > +amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
> > +{
> > +int maj;
> > +struct amdgpu_device *adev = uq_mgr->adev;
> > +uint32_t version = adev->ip_versions[GC_HWIP][0];
> > +
> > +/* We support usermode queue only for GFX V11 as of now */
> > +maj = IP_VERSION_MAJ(version);
> > +if (maj == 11)
> > +uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
> > +}
>  That belongs into gfx_v11.c and not here.
> >>>
> >>> Agree,
> >> On a second thought, we can't move it to gfx_v11.c, as this is the place
> >> where we are setting up the gfx_userqueue functions in fpriv->uq_mgr()
> >> for the first time, and we do not have another option but to check the
> >> IP and setup the functions here. The only other option to do this will
> >> be to move  uq_mgr->userq_funcs to adev->gfx.userq_funcs and setup them
> >> with the IP init (as Alex once suggested). Please let me know your
> >> thoughts on this.
> > That seems cleaner to me.  They should be global anyway and could be
> > set as part of the individual IP init sequences.  Then the presence of
> > a pointer could be used to determine whether or not a particular IP
> > type supports user queues.
> >
> > Alex
> >
> So if I understand this correctly, this is how we are looking to arrange
> the userqueue IP functions:
>
> - Presence of adev->gfx.funcs.userqueue_funcs() will decide if this IP
> supports userqueue or not.
>
> - sw_init function of the IP will setup these fptrs like:
>
>in gfx_v11_0_sw_init :
>
>  if (GFX_MAJ == 11)
>
>  adev->gfx.funcs.userqueue_funcs = gfx_v11_0_userqueue_funcs

I was thinking something more like:

adev->userq_funcs[AMD_IP_BLOCK_TYPE_GFX] = gfx_v11_0_userqueue_funcs;

That way there would be one place for all of the all of the fptrs and
you could use the IP type to query the right one.

And then in the IOCTLs, you could just check if the pointer is valid.  E.g.,

if (!adev->userq_funcs[ip_block_type])
   return -EINVAL;

etc.

You could store any metadata relevant to each userq in the per fd user
queue manager and then just pass the state to the global userq
functions for each IP.

Alex

>
>
> In amdgpu_userqueue_ioctl:
>
>  |
>
> CASE: create:
>
>   

Re: [PATCH v2] drm/amd/display: enable S/G display for for recent APUs by default

2023-09-28 Thread Alex Deucher
On Thu, Sep 28, 2023 at 5:11 AM Yifan Zhang  wrote:
>
> With S/G display becomes stable, enable S/G display for recent APUs
> by default rather than white list.
>
> v2: explicitly disable sg on pre-CZ chips (Alex)
>
> Co-authored-by: Alex Deucher 
> Signed-off-by: Yifan Zhang 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 47 ++-
>  1 file changed, 14 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 8e5dfdee22e6..8963d9c251f3 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -1627,41 +1627,22 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
> break;
> }
>
> -   switch (adev->asic_type) {
> -   case CHIP_CARRIZO:
> -   case CHIP_STONEY:
> -   init_data.flags.gpu_vm_support = true;
> -   break;
> -   default:
> -   switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
> -   case IP_VERSION(1, 0, 0):
> -   case IP_VERSION(1, 0, 1):
> -   /* enable S/G on PCO and RV2 */
> -   if ((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
> -   (adev->apu_flags & AMD_APU_IS_PICASSO))
> -   init_data.flags.gpu_vm_support = true;
> -   break;
> -   case IP_VERSION(2, 1, 0):
> -   case IP_VERSION(3, 0, 1):
> -   case IP_VERSION(3, 1, 2):
> -   case IP_VERSION(3, 1, 3):
> -   case IP_VERSION(3, 1, 4):
> -   case IP_VERSION(3, 1, 5):
> -   case IP_VERSION(3, 1, 6):
> -   case IP_VERSION(3, 5, 0):
> -   init_data.flags.gpu_vm_support = true;
> -   break;
> -   default:
> -   break;
> -   }
> -   break;
> -   }
> -   if (init_data.flags.gpu_vm_support &&
> -   (amdgpu_sg_display == 0))
> +   /* APU support S/G display by default except:
> +* ASICs before Carrizo,
> +* RAVEN1 (Users repored stability issue)
> +*/
> +
> +   if (adev->asic_type < CHIP_CARRIZO)
> init_data.flags.gpu_vm_support = false;
> +   else if (adev->asic_type == CHIP_RAVEN) {
> +   if (adev->apu_flags & AMD_APU_IS_RAVEN)
> +   init_data.flags.gpu_vm_support = false;
> +   else
> +   init_data.flags.gpu_vm_support = (amdgpu_sg_display 
> != 0);
> +   } else
> +   init_data.flags.gpu_vm_support = (amdgpu_sg_display != 0) && 
> (adev->flags & AMD_IS_APU);
>

per kernel coding style, if any clause has parens, every clause should
have parens.  With that fixed,
Reviewed-by: Alex Deucher 

> -   if (init_data.flags.gpu_vm_support)
> -   adev->mode_info.gpu_vm_support = true;
> +   init_data.flags.gpu_vm_support = adev->mode_info.gpu_vm_support;
>
> if (amdgpu_dc_feature_mask & DC_FBC_MASK)
> init_data.flags.fbc_support = true;
> --
> 2.37.3
>


Re: [PATCH v6 4/9] drm/amdgpu: create GFX-gen11 usermode queue

2023-09-28 Thread Shashank Sharma



On 28/09/2023 15:27, Alex Deucher wrote:

On Thu, Sep 28, 2023 at 9:22 AM Shashank Sharma  wrote:


On 14/09/2023 10:24, Shashank Sharma wrote:

On 14/09/2023 09:45, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
GFX-GEN-11 IP

V1: Worked on review comments from Alex:
  - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
  - Reuse the existing adev->mqd[ip] for MQD creation
  - Formatting and arrangement of code

V3:
  - Integration with doorbell manager

V4: Review comments addressed:
  - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
  - Align name of structure members (Luben)
  - Don't break up the Cc tag list and the Sob tag list in commit
message (Luben)
V5:
 - No need to reserve the bo for MQD (Christian).
 - Some more changes to support IP specific MQD creation.

V6:
 - Add a comment reminding us to replace the
amdgpu_bo_create_kernel()
   calls while creating MQD object to amdgpu_bo_create() once
eviction
   fences are ready (Christian).

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 77
+++
   .../gpu/drm/amd/include/amdgpu_userqueue.h|  7 ++
   3 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 44769423ba30..03fc8e89eafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -140,12 +140,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev,
void *data,
   return r;
   }
   +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+int maj;
+struct amdgpu_device *adev = uq_mgr->adev;
+uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+/* We support usermode queue only for GFX V11 as of now */
+maj = IP_VERSION_MAJ(version);
+if (maj == 11)
+uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}

That belongs into gfx_v11.c and not here.


Agree,

On a second thought, we can't move it to gfx_v11.c, as this is the place
where we are setting up the gfx_userqueue functions in fpriv->uq_mgr()
for the first time, and we do not have another option but to check the
IP and setup the functions here. The only other option to do this will
be to move  uq_mgr->userq_funcs to adev->gfx.userq_funcs and setup them
with the IP init (as Alex once suggested). Please let me know your
thoughts on this.

That seems cleaner to me.  They should be global anyway and could be
set as part of the individual IP init sequences.  Then the presence of
a pointer could be used to determine whether or not a particular IP
type supports user queues.

Alex

So if I understand this correctly, this is how we are looking to arrange 
the userqueue IP functions:


- Presence of adev->gfx.funcs.userqueue_funcs() will decide if this IP 
supports userqueue or not.


- sw_init function of the IP will setup these fptrs like:

  in gfx_v11_0_sw_init :

    if (GFX_MAJ == 11)

        adev->gfx.funcs.userqueue_funcs = gfx_v11_0_userqueue_funcs


In amdgpu_userqueue_ioctl:

    |

CASE: create:

    amdgpu_userqueue_create()

    if (adev->gfx.funcs.userqueue_funcs) {

        adev->gfx.funcs.userqueue_funcs.create_mqd();

    }


CASE: destroy:

    amdgpu_userqueue_destroy()

    if (adev->gfx.funcs.userqueue_funcs) {

        adev->gfx.funcs.userqueue_funcs.destroy_mqd();

    }

and so on ...

Am I getting this right ?

- Shashank


- Shashank


+
   int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr,
struct amdgpu_device *adev)
   {
   mutex_init(_mgr->userq_mutex);
   idr_init_base(_mgr->userq_idr, 1);
   userq_mgr->adev = adev;
   +amdgpu_userqueue_setup_gfx(userq_mgr);
   return 0;
   }
   diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 0451533ddde4..6760abda08df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
   #include "amdgpu_psp.h"
   #include "amdgpu_smu.h"
   #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
   #include "imu_v11_0.h"
   #include "soc21.h"
   #include "nvd.h"
@@ -6422,3 +6423,79 @@ const struct amdgpu_ip_block_version
gfx_v11_0_ip_block =
   .rev = 0,
   .funcs = _v11_0_ip_funcs,
   };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr 

Re: [PATCH v6 4/9] drm/amdgpu: create GFX-gen11 usermode queue

2023-09-28 Thread Alex Deucher
On Thu, Sep 28, 2023 at 9:22 AM Shashank Sharma  wrote:
>
>
> On 14/09/2023 10:24, Shashank Sharma wrote:
> >
> > On 14/09/2023 09:45, Christian König wrote:
> >> Am 08.09.23 um 18:04 schrieb Shashank Sharma:
> >>> A Memory queue descriptor (MQD) of a userqueue defines it in
> >>> the hw's context. As MQD format can vary between different
> >>> graphics IPs, we need gfx GEN specific handlers to create MQDs.
> >>>
> >>> This patch:
> >>> - Introduces MQD handler functions for the usermode queues.
> >>> - Adds new functions to create and destroy userqueue MQD for
> >>>GFX-GEN-11 IP
> >>>
> >>> V1: Worked on review comments from Alex:
> >>>  - Make MQD functions GEN and IP specific
> >>>
> >>> V2: Worked on review comments from Alex:
> >>>  - Reuse the existing adev->mqd[ip] for MQD creation
> >>>  - Formatting and arrangement of code
> >>>
> >>> V3:
> >>>  - Integration with doorbell manager
> >>>
> >>> V4: Review comments addressed:
> >>>  - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
> >>>  - Align name of structure members (Luben)
> >>>  - Don't break up the Cc tag list and the Sob tag list in commit
> >>>message (Luben)
> >>> V5:
> >>> - No need to reserve the bo for MQD (Christian).
> >>> - Some more changes to support IP specific MQD creation.
> >>>
> >>> V6:
> >>> - Add a comment reminding us to replace the
> >>> amdgpu_bo_create_kernel()
> >>>   calls while creating MQD object to amdgpu_bo_create() once
> >>> eviction
> >>>   fences are ready (Christian).
> >>>
> >>> Cc: Alex Deucher 
> >>> Cc: Christian Koenig 
> >>> Signed-off-by: Shashank Sharma 
> >>> Signed-off-by: Arvind Yadav 
> >>> ---
> >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
> >>>   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 77
> >>> +++
> >>>   .../gpu/drm/amd/include/amdgpu_userqueue.h|  7 ++
> >>>   3 files changed, 100 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>> index 44769423ba30..03fc8e89eafb 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>> @@ -140,12 +140,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev,
> >>> void *data,
> >>>   return r;
> >>>   }
> >>>   +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
> >>> +
> >>> +static void
> >>> +amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
> >>> +{
> >>> +int maj;
> >>> +struct amdgpu_device *adev = uq_mgr->adev;
> >>> +uint32_t version = adev->ip_versions[GC_HWIP][0];
> >>> +
> >>> +/* We support usermode queue only for GFX V11 as of now */
> >>> +maj = IP_VERSION_MAJ(version);
> >>> +if (maj == 11)
> >>> +uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
> >>> +}
> >>
> >> That belongs into gfx_v11.c and not here.
> >
> >
> > Agree,
>
> On a second thought, we can't move it to gfx_v11.c, as this is the place
> where we are setting up the gfx_userqueue functions in fpriv->uq_mgr()
> for the first time, and we do not have another option but to check the
> IP and setup the functions here. The only other option to do this will
> be to move  uq_mgr->userq_funcs to adev->gfx.userq_funcs and setup them
> with the IP init (as Alex once suggested). Please let me know your
> thoughts on this.

That seems cleaner to me.  They should be global anyway and could be
set as part of the individual IP init sequences.  Then the presence of
a pointer could be used to determine whether or not a particular IP
type supports user queues.

Alex


>
> - Shashank
>
> >
> >>
> >>> +
> >>>   int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr,
> >>> struct amdgpu_device *adev)
> >>>   {
> >>>   mutex_init(_mgr->userq_mutex);
> >>>   idr_init_base(_mgr->userq_idr, 1);
> >>>   userq_mgr->adev = adev;
> >>>   +amdgpu_userqueue_setup_gfx(userq_mgr);
> >>>   return 0;
> >>>   }
> >>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >>> index 0451533ddde4..6760abda08df 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >>> @@ -30,6 +30,7 @@
> >>>   #include "amdgpu_psp.h"
> >>>   #include "amdgpu_smu.h"
> >>>   #include "amdgpu_atomfirmware.h"
> >>> +#include "amdgpu_userqueue.h"
> >>>   #include "imu_v11_0.h"
> >>>   #include "soc21.h"
> >>>   #include "nvd.h"
> >>> @@ -6422,3 +6423,79 @@ const struct amdgpu_ip_block_version
> >>> gfx_v11_0_ip_block =
> >>>   .rev = 0,
> >>>   .funcs = _v11_0_ip_funcs,
> >>>   };
> >>> +
> >>> +static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
> >>> +  struct drm_amdgpu_userq_in *args_in,
> >>> +  struct amdgpu_usermode_queue *queue)
> >>> +{
> >>> +struct amdgpu_device *adev = uq_mgr->adev;
> >>> +

Re: [PATCH 1/2] drm/amdgpu/gmc: add a way to force a particular placement for GART

2023-09-28 Thread Alex Deucher
Ping on this series?  Fixes an issue for SR-IOV in stress tests.

Alex

On Wed, Sep 27, 2023 at 2:31 PM Alex Deucher  wrote:
>
> We normally place GART based on the location of VRAM and the
> available address space around that, but provide an option
> to force a particular location for hardware that needs it.
>
> v2: Switch to passing the placement via parameter
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 22 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  9 -
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  2 +-
>  8 files changed, 31 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 2bfeaacd050c..60c81c3d29d5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -269,7 +269,8 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device 
> *adev, struct amdgpu_gmc *mc
>   * If GART size is bigger than space left then we ajust GART size.
>   * Thus function will never fails.
>   */
> -void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc 
> *mc)
> +void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc 
> *mc,
> + enum amdgpu_gart_placement gart_placement)
>  {
> const uint64_t four_gb = 0x1ULL;
> u64 size_af, size_bf;
> @@ -287,11 +288,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device 
> *adev, struct amdgpu_gmc *mc)
> mc->gart_size = max(size_bf, size_af);
> }
>
> -   if ((size_bf >= mc->gart_size && size_bf < size_af) ||
> -   (size_af < mc->gart_size))
> -   mc->gart_start = 0;
> -   else
> +   switch (gart_placement) {
> +   case AMDGPU_GART_PLACEMENT_HIGH:
> mc->gart_start = max_mc_address - mc->gart_size + 1;
> +   break;
> +   case AMDGPU_GART_PLACEMENT_LOW:
> +   mc->gart_start = 0;
> +   break;
> +   case AMDGPU_GART_PLACEMENT_BEST_FIT:
> +   default:
> +   if ((size_bf >= mc->gart_size && size_bf < size_af) ||
> +   (size_af < mc->gart_size))
> +   mc->gart_start = 0;
> +   else
> +   mc->gart_start = max_mc_address - mc->gart_size + 1;
> +   break;
> +   }
>
> mc->gart_start &= ~(four_gb - 1);
> mc->gart_end = mc->gart_start + mc->gart_size - 1;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index f593259a66c3..e699d1ca8deb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -199,6 +199,12 @@ struct amdgpu_mem_partition_info {
>
>  #define INVALID_PFN-1
>
> +enum amdgpu_gart_placement {
> +   AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
> +   AMDGPU_GART_PLACEMENT_HIGH,
> +   AMDGPU_GART_PLACEMENT_LOW,
> +};
> +
>  struct amdgpu_gmc {
> /* FB's physical address in MMIO space (for CPU to
>  * map FB). This is different compared to the agp/
> @@ -391,7 +397,8 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device 
> *adev, struct amdgpu_gmc *mc
>  void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc 
> *mc,
>   u64 base);
>  void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
> - struct amdgpu_gmc *mc);
> + struct amdgpu_gmc *mc,
> + enum amdgpu_gart_placement gart_placement);
>  void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
>  struct amdgpu_gmc *mc);
>  void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 70370b412d24..8e6e36279389 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -670,7 +670,7 @@ static void gmc_v10_0_vram_gtt_location(struct 
> amdgpu_device *adev,
> base += adev->gmc.xgmi.physical_node_id * 
> adev->gmc.xgmi.node_segment_size;
>
> amdgpu_gmc_vram_location(adev, >gmc, base);
> -   amdgpu_gmc_gart_location(adev, mc);
> +   amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
> if (!amdgpu_sriov_vf(adev))
> amdgpu_gmc_agp_location(adev, mc);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index d0a9ee2f12d3..d611d2efce3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ 

[bug report] drm/amd/display: add DMUB registers to crash dump diagnostic data.

2023-09-28 Thread Dan Carpenter
Hello Ashley Thomas,

The patch 2631ac1ac328: "drm/amd/display: add DMUB registers to crash
dump diagnostic data." from May 17, 2021 (linux-next), leads to the
following Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:800 
dc_dmub_srv_log_diagnostic_data()
error: we previously assumed 'dc_dmub_srv' could be null (see line 799)

drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c
795 void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
796 {
797 struct dmub_diagnostic_data diag_data = {0};
798 
799 if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
 ^^^
Check for NULL.

--> 800 DC_LOG_ERROR("%s: invalid parameters.", __func__);

The logging will dereference dc_dmub_srv.

801 return;
802 }
803 
804 if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv, _data)) {

regards,
dan carpenter


[PATCH] drm/radeon/kms/atom: Remove redundant code

2023-09-28 Thread Jiapeng Chong
drivers/gpu/drm/radeon/atom.c:396 atom_skip_src_int() warn: ignoring 
unreachable code.

Reported-by: Abaci Robot 
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6713
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/radeon/atom.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index ceb6d772ef94..3082d08a06c2 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -393,7 +393,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, 
uint8_t attr, int *ptr)
(*ptr)++;
return;
}
-   return;
}
 }
 
-- 
2.20.1.7.g153144c



[bug report] drm/amd/display: switch DC over to the new DRM logging macros

2023-09-28 Thread Dan Carpenter
Hello Hamza Mahfooz,

The patch 5d72e247e58c: "drm/amd/display: switch DC over to the new
DRM logging macros" from Sep 20, 2023 (linux-next), leads to the
following Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.c:542 
dm_helpers_dp_read_dpcd()
error: we previously assumed 'aconnector' could be null (see line 541)

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.c
531 bool dm_helpers_dp_read_dpcd(
532 struct dc_context *ctx,
533 const struct dc_link *link,
534 uint32_t address,
535 uint8_t *data,
536 uint32_t size)
537 {
538 
539 struct amdgpu_dm_connector *aconnector = link->priv;
540 
541 if (!aconnector) {
 ^^
Check for NULL

--> 542 drm_dbg_dp(aconnector->base.dev,
   
NULL dereference.

543"Failed to find connector for link!\n");
544 return false;
545 }
546 
547 return drm_dp_dpcd_read(>dm_dp_aux.aux, address, 
data,
548 size) == size;
549 }

regards,
dan carpenter


Re: [PATCH v6 4/9] drm/amdgpu: create GFX-gen11 usermode queue

2023-09-28 Thread Shashank Sharma



On 14/09/2023 10:24, Shashank Sharma wrote:


On 14/09/2023 09:45, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
    - No need to reserve the bo for MQD (Christian).
    - Some more changes to support IP specific MQD creation.

V6:
    - Add a comment reminding us to replace the 
amdgpu_bo_create_kernel()
  calls while creating MQD object to amdgpu_bo_create() once 
eviction

  fences are ready (Christian).

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 77 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 ++
  3 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index 44769423ba30..03fc8e89eafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -140,12 +140,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev, 
void *data,

  return r;
  }
  +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+    int maj;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+    /* We support usermode queue only for GFX V11 as of now */
+    maj = IP_VERSION_MAJ(version);
+    if (maj == 11)
+    uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}


That belongs into gfx_v11.c and not here.



Agree,


On a second thought, we can't move it to gfx_v11.c, as this is the place 
where we are setting up the gfx_userqueue functions in fpriv->uq_mgr() 
for the first time, and we do not have another option but to check the 
IP and setup the functions here. The only other option to do this will 
be to move  uq_mgr->userq_funcs to adev->gfx.userq_funcs and setup them 
with the IP init (as Alex once suggested). Please let me know your 
thoughts on this.


- Shashank






+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
struct amdgpu_device *adev)

  {
  mutex_init(_mgr->userq_mutex);
  idr_init_base(_mgr->userq_idr, 1);
  userq_mgr->adev = adev;
  +    amdgpu_userqueue_setup_gfx(userq_mgr);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index 0451533ddde4..6760abda08df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6422,3 +6423,79 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .rev = 0,
  .funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+  struct drm_amdgpu_userq_in *args_in,
+  struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_mqd *mqd_gfx_generic = 
>mqds[AMDGPU_HW_IP_GFX];

+    struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+    struct amdgpu_mqd_prop userq_props;
+    int r;
+
+    /* Incoming MQD parameters from userspace to be saved here */
+    memset(_user, 0, sizeof(mqd_user));
+
+    /* Structure to initialize MQD for userqueue using generic MQD 
init function */

+    memset(_props, 0, sizeof(userq_props));
+
+    if (args_in->mqd_size != sizeof(struct 
drm_amdgpu_userq_mqd_gfx_v11_0)) {

+    DRM_ERROR("MQD size mismatch\n");
+    return -EINVAL;
+    }
+
+    if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {

+    DRM_ERROR("Failed to get user MQD\n");
+    return -EFAULT;
+    }
+
+    /*
+ * Create BO for actual Userqueue MQD now
+ * Todo: replace the calls to bo_create_kernel() with 
bo_create() and use

+ * implicit pinning for the MQD buffers.


Well not implicit 

[PATCH v2 1/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

2023-09-28 Thread Arvind Yadav
This patch is to adjust the absolute doorbell offset
against the doorbell id considering the doorbell
size of 32/64 bit.

v2:
- Addressed the review comment from Felix.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0d3d538b64eb..c54c4392d26e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -407,7 +407,14 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd,
 
q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
  
qpd->proc_doorbells,
- 
q->doorbell_id);
+ 0);
+
+   /* Adjust the absolute doorbell offset against the doorbell id 
considering
+* the doorbell size of 32/64 bit.
+*/
+   q->properties.doorbell_off += q->doorbell_id *
+ dev->kfd->device_info.doorbell_size / 4;
+
return 0;
 }
 
-- 
2.34.1



[PATCH v2 0/1] drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

2023-09-28 Thread Arvind Yadav
On older chips, the absolute doorbell offset within
the doorbell page is based on the queue ID.
KFD is using queue ID and doorbell size to get an
absolute doorbell offset in userspace.

This patch is to adjust the absolute doorbell offset
against the doorbell id considering the doorbell
size of 32/64 bit.

v2:
- Addressed the review comment from Felix.

Arvind Yadav (1):
  drm/amdkfd: Fix unaligned doorbell absolute offset for gfx8

 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

-- 
2.34.1



Re: [PATCH v2] MAINTAINERS: Update drm-misc entry to match all drivers

2023-09-28 Thread Maxime Ripard
Hi,

On Thu, Sep 21, 2023 at 12:57:43PM +0200, Maxime Ripard wrote:
> We've had a number of times when a patch slipped through and we couldn't
> pick them up either because our MAINTAINERS entry only covers the
> framework and thus we weren't Cc'd.
> 
> Let's take another approach where we match everything, and remove all
> the drivers that are not maintained through drm-misc.
> 
> Acked-by: Jani Nikula 
> Signed-off-by: Maxime Ripard 

Applied with Dave's Acked-by given on IRC.

This was conflicting with
https://lore.kernel.org/r/20230925154929.1.I3287e895ce8e68d41b458494a49a1b5ec5c71013@changeid

So I removed the imx exclusion from that list while applying.

Maxime


signature.asc
Description: PGP signature


Re: [PATCH] drm/amdgpu: add hub->ctx_distance in setup_vmid_config

2023-09-28 Thread Christian König

Am 28.09.23 um 10:31 schrieb Yifan Zhang:

add hub->ctx_distance when read CONTEXT1_CNTL, align w/
write back operation.


Good catch, but please double check the coding style with checkpath.pl.

With that done the patch is Acked-by: Christian König 





Signed-off-by: Yifan Zhang 
---
  drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c| 2 +-
  drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c| 2 +-
  drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c| 2 +-
  drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c| 2 +-
  drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c| 2 +-
  drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c  | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c   | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c   | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 2 +-
  17 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
index bcb6ba03cead..f9949fedfbb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
@@ -297,7 +297,7 @@ static void gfxhub_v11_5_0_setup_vmid_config(struct 
amdgpu_device *adev)
uint32_t tmp;
  
  	for (i = 0; i <= 14; i++) {

-   tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * 
hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index cdc290a474a9..53a2ba5fcf4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -260,7 +260,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct 
amdgpu_device *adev)
block_size -= 9;
  
  	for (i = 0; i <= 14; i++) {

-   tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * 
hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index ff60670b8464..5f949caa68b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -329,7 +329,7 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct 
amdgpu_device *adev,
for_each_inst(j, xcc_mask) {
hub = >vmhub[AMDGPU_GFXHUB(j)];
for (i = 0; i <= 14; i++) {
-   tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), 
regVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), 
regVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 
PAGE_TABLE_DEPTH,
num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 8521c45e8f38..793faf62cb07 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -287,7 +287,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct 
amdgpu_device *adev)
uint32_t tmp;
  
  	for (i = 0; i <= 14; i++) {

-   tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * 
hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index f829c441640a..cd0e8a321e46 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -296,7 +296,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct 
amdgpu_device *adev)
uint32_t tmp;
  
  	for (i = 0; i <= 14; i++) {

-   tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, 

[PATCH v2] drm/amd/display: enable S/G display for for recent APUs by default

2023-09-28 Thread Yifan Zhang
With S/G display becomes stable, enable S/G display for recent APUs
by default rather than white list.

v2: explicitly disable sg on pre-CZ chips (Alex)

Co-authored-by: Alex Deucher 
Signed-off-by: Yifan Zhang 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 47 ++-
 1 file changed, 14 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 8e5dfdee22e6..8963d9c251f3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1627,41 +1627,22 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
break;
}
 
-   switch (adev->asic_type) {
-   case CHIP_CARRIZO:
-   case CHIP_STONEY:
-   init_data.flags.gpu_vm_support = true;
-   break;
-   default:
-   switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
-   case IP_VERSION(1, 0, 0):
-   case IP_VERSION(1, 0, 1):
-   /* enable S/G on PCO and RV2 */
-   if ((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
-   (adev->apu_flags & AMD_APU_IS_PICASSO))
-   init_data.flags.gpu_vm_support = true;
-   break;
-   case IP_VERSION(2, 1, 0):
-   case IP_VERSION(3, 0, 1):
-   case IP_VERSION(3, 1, 2):
-   case IP_VERSION(3, 1, 3):
-   case IP_VERSION(3, 1, 4):
-   case IP_VERSION(3, 1, 5):
-   case IP_VERSION(3, 1, 6):
-   case IP_VERSION(3, 5, 0):
-   init_data.flags.gpu_vm_support = true;
-   break;
-   default:
-   break;
-   }
-   break;
-   }
-   if (init_data.flags.gpu_vm_support &&
-   (amdgpu_sg_display == 0))
+   /* APU support S/G display by default except:
+* ASICs before Carrizo,
+* RAVEN1 (Users repored stability issue)
+*/
+
+   if (adev->asic_type < CHIP_CARRIZO)
init_data.flags.gpu_vm_support = false;
+   else if (adev->asic_type == CHIP_RAVEN) {
+   if (adev->apu_flags & AMD_APU_IS_RAVEN)
+   init_data.flags.gpu_vm_support = false;
+   else
+   init_data.flags.gpu_vm_support = (amdgpu_sg_display != 
0);
+   } else
+   init_data.flags.gpu_vm_support = (amdgpu_sg_display != 0) && 
(adev->flags & AMD_IS_APU);
 
-   if (init_data.flags.gpu_vm_support)
-   adev->mode_info.gpu_vm_support = true;
+   init_data.flags.gpu_vm_support = adev->mode_info.gpu_vm_support;
 
if (amdgpu_dc_feature_mask & DC_FBC_MASK)
init_data.flags.fbc_support = true;
-- 
2.37.3



[PATCH] drm/amdgpu: add hub->ctx_distance in setup_vmid_config

2023-09-28 Thread Yifan Zhang
add hub->ctx_distance when read CONTEXT1_CNTL, align w/
write back operation.

Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 2 +-
 17 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
index bcb6ba03cead..f9949fedfbb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
@@ -297,7 +297,7 @@ static void gfxhub_v11_5_0_setup_vmid_config(struct 
amdgpu_device *adev)
uint32_t tmp;
 
for (i = 0; i <= 14; i++) {
-   tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * 
hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index cdc290a474a9..53a2ba5fcf4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -260,7 +260,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct 
amdgpu_device *adev)
block_size -= 9;
 
for (i = 0; i <= 14; i++) {
-   tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * 
hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index ff60670b8464..5f949caa68b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -329,7 +329,7 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct 
amdgpu_device *adev,
for_each_inst(j, xcc_mask) {
hub = >vmhub[AMDGPU_GFXHUB(j)];
for (i = 0; i <= 14; i++) {
-   tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), 
regVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), 
regVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 
PAGE_TABLE_DEPTH,
num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 8521c45e8f38..793faf62cb07 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -287,7 +287,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct 
amdgpu_device *adev)
uint32_t tmp;
 
for (i = 0; i <= 14; i++) {
-   tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * 
hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index f829c441640a..cd0e8a321e46 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -296,7 +296,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct 
amdgpu_device *adev)
uint32_t tmp;
 
for (i = 0; i <= 14; i++) {
-   tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+   tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * 
hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,