Re: [PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-01 Thread Philip Yang

  


On 2024-03-01 06:07, Shashank Sharma
  wrote:


  The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist which will keep the objects that need to be
  freed after tlb_flush
- Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of freeing
  them immediately.
- Exports function amdgpu_vm_pt_free to be called dircetly.
- Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
  between immediate freeing of the BOs (like from
  amdgpu_vm_pt_free_root) vs delayed freeing.

V2: rebase
V4: (Christian)
- add only locked PTEs entries in TLB flush waitlist.
- do not create a separate function for list flush.
- do not create a new lock for TLB flush.
- there is no need to wait on tlb_flush_fence exclusively.

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++---
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 310aae6fb49b..94581a1fe34f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	/* Prepare a TLB flush fence to be attached to PTs */
 	if (!unlocked && params.needs_flush && vm->is_compute_context) {
+		struct amdgpu_vm_bo_base *entry, *next;
+
 		amdgpu_vm_tlb_fence_create(adev, vm, fence);
 
 		/* Makes sure no PD/PT is freed before the flush */
 		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
    DMA_RESV_USAGE_BOOKKEEP);
+
+		if (list_empty(>tlb_flush_waitlist))
+			goto error_unlock;
+
+		/* Now actually free the waitlist */
+		list_for_each_entry_safe(entry, next, >tlb_flush_waitlist, vm_status)
+			amdgpu_vm_pt_free(entry);
 	}
 
 error_unlock:
@@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	INIT_LIST_HEAD(>pt_freed);
 	INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work);
 	INIT_KFIFO(vm->faults);
+	INIT_LIST_HEAD(>tlb_flush_waitlist);
 
 	r = amdgpu_seq64_alloc(adev, >tlb_seq_va, >tlb_seq_cpu_addr);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 298f604b8e5f..ba374c2c61bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -343,6 +343,9 @@ struct amdgpu_vm {
 	uint64_t		*tlb_seq_cpu_addr;
 	uint64_t		tlb_fence_context;
 
+	/* temporary storage of PT BOs until the TLB flush */
+	struct list_head	tlb_flush_waitlist;
+
 	atomic64_t		kfd_last_flushed_seq;
 
 	/* How many times we had to re-generate the page tables */
@@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
 			  uint64_t start, uint64_t end,
 			  uint64_t dst, uint64_t flags);
 void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
 
 #if defined(CONFIG_DEBUG_FS)
 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 95dc0afdaffb..cb14e5686c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
  *
  * @entry: PDE to free
  */
-static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 {
 	struct amdgpu_bo *shadow;
 
@@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
  * @vm: amdgpu vm structure
  * @start: optional cursor where to start freeing PDs/PTs
  * @unlocked: vm resv unlock status
+ * @force: force free all PDs/PTs without waiting for TLB flush
  *
  * Free the page directory or page table level and all sub levels.
  */
 static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
   struct amdgpu_vm *vm,
   struct amdgpu_vm_pt_cursor *start,
-  bool unlocked)
+  bool unlocked,
+  bool force)
 {
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_vm_bo_base *entry;
@@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
 		return;
 	}
 
-	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
-		amdgpu_vm_pt_free(entry);

I feel like if we attach tlb flush fence before free pt bo, then
  don't need tlb_flush_waitlist.
Regards,
Philip


  
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) {
+		if (!force)
+			list_move(>vm_status, >tlb_flush_waitlist);
+		else
+			amdgpu_vm_pt_free(entry);
+	}
 
 	if 

Re: [PATCH v4 1/2] drm/amdgpu: implement TLB flush fence

2024-03-01 Thread Philip Yang

  


On 2024-03-01 06:07, Shashank Sharma
  wrote:


  From: Christian König 

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.

V2: (Shashank)
- rebase
- set dma_fence_error only in case of error
- add tlb_flush fence only when PT/PD BO is locked (Felix)
- use vm->pasid when f is NULL (Mukul)

V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
- move the misplaced fence_create call to the end (Philip)

Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Cc: Alex Deucher 
Signed-off-by: Christian König 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   4 +
 .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 111 ++
 4 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index fa26a4e3a99d..91ab4cf29b5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
 	atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+	amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
 	amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0960e0a665d3..310aae6fb49b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -988,6 +988,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	r = vm->update_funcs->commit(, fence);
 
+	/* Prepare a TLB flush fence to be attached to PTs */
+	if (!unlocked && params.needs_flush && vm->is_compute_context) {
+		amdgpu_vm_tlb_fence_create(adev, vm, fence);
+
+		/* Makes sure no PD/PT is freed before the flush */
+		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+   DMA_RESV_USAGE_BOOKKEEP);
+	}
+

Adding fence here seems too late, the fence has to add before
calling amdgpu_vm_pt_free_dfs inside amdgpu_vm_ptes_update.

  
 error_unlock:
 	amdgpu_vm_eviction_unlock(vm);
 	drm_dev_exit(idx);
@@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	mutex_init(>eviction_lock);
 	vm->evicting = false;
+	vm->tlb_fence_context = dma_fence_context_alloc(1);
 
 	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
 false, , xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 64b3f69efa57..298f604b8e5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -341,6 +341,7 @@ struct amdgpu_vm {
 	atomic64_t		tlb_seq;
 	uint64_t		tlb_seq_va;
 	uint64_t		*tlb_seq_cpu_addr;
+	uint64_t		tlb_fence_context;
 
 	atomic64_t		kfd_last_flushed_seq;
 
@@ -594,5 +595,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
   uint64_t addr,
   uint32_t status,
   unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index ..54c33c24fa46
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ 

Re: [PATCH V3] Revert "drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute" for Raven

2024-03-01 Thread Felix Kuehling

On 2024-02-29 01:04, Jesse.Zhang wrote:

fix the issue:
"amdgpu: Failed to create process VM object".

[Why]when amdgpu initialized, seq64 do mampping and update bo mapping in vm 
page table.
But when clifo run. It also initializes a vm for a process device through the 
function kfd_process_device_init_vm and ensure the root PD is clean through the 
function amdgpu_vm_pt_is_root_clean.
So they have a conflict, and clinfo  always failed.

v1:
   - remove all the pte_supports_ats stuff from the amdgpu_vm code (Felix)

Signed-off-by: Jesse Zhang 
The headline should be updated. This is no longer a revert of the quoted 
patch. Other than that, this patch looks reasonable to me. One more 
comment inline.



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 23 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  3 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 56 +--
  3 files changed, 1 insertion(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ed4a8c5d26d7..d004ace79536 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1385,10 +1385,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping, list);
list_del(>list);
  
-		if (vm->pte_support_ats &&

-   mapping->start < AMDGPU_GMC_HOLE_START)
-   init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
-
r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
   resv, mapping->start, mapping->last,
   init_pte_value, 0, 0, NULL, NULL,
@@ -2264,7 +2260,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (r)
return r;
  
-	vm->pte_support_ats = false;

vm->is_compute_context = false;
  
  	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &

@@ -2350,30 +2345,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
   */
  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
-   bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
  
  	r = amdgpu_bo_reserve(vm->root.bo, true);

if (r)
return r;
  
-	/* Check if PD needs to be reinitialized and do it before

-* changing any other state, in case it fails.
-*/
-   if (pte_support_ats != vm->pte_support_ats) {
-   /* Sanity checks */
-   if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
-   r = -EINVAL;
-   goto unreserve_bo;
-   }
-
-   vm->pte_support_ats = pte_support_ats;
-   r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
-  false);
-   if (r)
-   goto unreserve_bo;
-   }
-
/* Update VM state */
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 42f6ddec50c1..9f6b5e1ccf34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -357,9 +357,6 @@ struct amdgpu_vm {
/* Functions to use for VM table updates */
const struct amdgpu_vm_update_funcs *update_funcs;
  
-	/* Flag to indicate ATS support from PTE for GFX9 */

-   boolpte_support_ats;
-
/* Up to 128 pending retry page faults */
DECLARE_KFIFO(faults, u64, 128);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

index a160265ddc07..2835cb3f76eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -89,22 +89,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct 
amdgpu_device *adev,
return AMDGPU_VM_PTE_COUNT(adev);
  }
  
-/**

- * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root 
PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS 
setting.
- */
-static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
-{
-   unsigned int shift;
-
-   shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
-   return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
-
  /**
   * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
   *
@@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_vm_update_params params;
struct amdgpu_bo *ancestor = >bo;
-   unsigned int entries, 

[pull] amdgpu, amdkfd, radeon drm-next-6.9

2024-03-01 Thread Alex Deucher
Hi Dave, Sima,

A few more updates for 6.9.

The following changes since commit 31e0a586f3385134bcad00d8194eb0728cb1a17d:

  drm/amdgpu: add MMHUB 3.3.1 support (2024-02-19 14:50:46 -0500)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-next-6.9-2024-03-01

for you to fetch changes up to b07395d5d5e74e3a7e2e436fc0eced2b0f332074:

  drm/amdgpu: remove misleading amdgpu_pmops_runtime_idle() comment (2024-02-29 
20:35:39 -0500)


amd-drm-next-6.9-2024-03-01:

amdgpu:
- GC 11.5.1 updates
- Misc display cleanups
- NBIO 7.9 updates
- Backlight fixes
- DMUB fixes
- MPO fixes
- atomfirmware table updates
- SR-IOV fixes
- VCN 4.x updates
- use RMW accessors for pci config registers
- PSR fixes
- Suspend/resume fixes
- RAS fixes
- ABM fixes
- Misc code cleanups
- SI DPM fix
- Revert freesync video

amdkfd:
- Misc cleanups
- Error handling fixes

radeon:
- use RMW accessors for pci config registers


Alex Deucher (3):
  Revert "drm/amd/pm: resolve reboot exception for si oland"
  Revert "drm/amd: Remove freesync video mode amdgpu parameter"
  Reapply "Revert drm/amd/display: Enable Freesync Video Mode by default"

Alvin Lee (2):
  drm/amd/display: Generalize new minimal transition path
  drm/amd/display: Remove pixle rate limit for subvp

Aric Cyr (2):
  drm/amd/display: Fix nanosec stat overflow
  drm/amd/display: 3.2.273

Armin Wolf (1):
  drm/amd/display: Fix memory leak in dm_sw_fini()

Asad Kamal (5):
  Revert "drm/amdgpu: Add pci usage to nbio v7.9"
  Revert "drm/amdgpu: Add pcie usage callback to nbio"
  drm/amdgpu: Remove pcie bw sys entry
  drm/amd/pm: Skip reporting pcie width/speed on vfs
  drm/amd/pm: Fix esm reg mask use to get pcie speed

Aurabindo Pillai (1):
  drm/amd: Update atomfirmware.h for DCN401

Bjorn Helgaas (1):
  drm/amdgpu: remove misleading amdgpu_pmops_runtime_idle() comment

Eric Huang (1):
  amd/amdkfd: remove unused parameter

Ethan Bitnun (1):
  drm/amd/display: Only log during optimize_bandwidth call

George Shen (1):
  drm/amd/display: Check DP Alt mode DPCS state via DMUB

Hawking Zhang (1):
  drm/amdgpu: Do not toggle bif ras irq from guest

Ilpo Järvinen (2):
  drm/radeon: Use RMW accessors for changing LNKCTL2
  drm/amdgpu: Use RMW accessors for changing LNKCTL2

Jonathan Kim (1):
  drm/amdkfd: fix process reference drop on debug ioctl

Kunwu Chan (3):
  drm/amdgpu: Simplify the allocation of fence slab caches
  drm/amdgpu: Simplify the allocation of mux_chunk slab caches
  drm/amdgpu: Simplify the allocation of sync slab caches

Lenko Donchev (1):
  drm/amd/display: Use kcalloc() instead of kzalloc()

Lewis Huang (1):
  drm/amd/display: Only allow dig mapping to pwrseq in new asic

Li Ma (1):
  drm/amd/swsmu: modify the gfx activity scaling

Lijo Lazar (4):
  drm/amdgpu: Add fatal error detected flag
  drm/amdkfd: Skip packet submission on fatal error
  drm/amdkfd: Add partition id field to location_id
  drm/amd/pm: Increase SMUv13.0.6 mode-2 reset time

Ma Jun (3):
  drm/amdgpu: Drop redundant parameter in amdgpu_gfx_kiq_init_ring
  drm/amdgpu: Fix the runtime resume failure issue
  drm/amdgpu/pm: Fix the power1_min_cap value

Mario Limonciello (1):
  drm/amd: Drop abm_level property

Melissa Wen (2):
  drm/amd/display: fix null-pointer dereference on edid reading
  drm/amd/display: check dc_link before dereferencing

Nicholas Kazlauskas (1):
  drm/amd/display: Fix S4 hang polling on HW power up done for VBIOS DMCUB

Prike Liang (1):
  drm/amdgpu: Enable gpu reset for S3 abort cases on Raven series

Rodrigo Siqueira (5):
  drm/amd/display: Initialize variable with default value
  drm/amd/display: Remove unused file
  drm/amd/display: Add SMU timeout check and retry
  drm/amd/display: Remove redundant FPU guard
  drm/amd/display: Drop unnecessary header

Saleemkhan Jamadar (1):
  drm/amdgpu/jpeg: add support for jpeg multi instance

Srinivasan Shanmugam (5):
  drm/amd/display: Fix potential null pointer dereference in dc_dmub_srv
  drm/amdgpu/display: Address kdoc for 'is_psr_su' in 'fill_dc_dirty_rects'
  drm/amd/display: Prevent potential buffer overflow in map_hw_resources
  drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of atom_get_src_int()
  drm/amd/amdgpu: Fix potential ioremap() memory leaks in 
amdgpu_device_init()

Stanley.Yang (1):
  drm/amdgpu: Fix ineffective ras_mask settings

Swapnil Patel (1):
  drm/amd/display: fix input states translation error for dcn35 & dcn351

Tao Zhou (1):
  drm/amdgpu: add deferred error check for UMC v12 address query

Tim Huang (2):
  drm/amdgpu: enable CGPG for GFX ip v11.5.1
  drm/amdgpu: reserve more memory for MES runtime 

Re: [PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-01 Thread Sharma, Shashank



On 01/03/2024 14:29, Christian König wrote:



Am 01.03.24 um 12:07 schrieb Shashank Sharma:

The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist which will keep the objects that need to be
   freed after tlb_flush
- Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of 
freeing

   them immediately.
- Exports function amdgpu_vm_pt_free to be called dircetly.
- Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
   between immediate freeing of the BOs (like from
   amdgpu_vm_pt_free_root) vs delayed freeing.

V2: rebase
V4: (Christian)
 - add only locked PTEs entries in TLB flush waitlist.
 - do not create a separate function for list flush.
 - do not create a new lock for TLB flush.
 - there is no need to wait on tlb_flush_fence exclusively.

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 10 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++---
  3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 310aae6fb49b..94581a1fe34f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct amdgpu_device 
*adev, struct amdgpu_vm *vm,

    /* Prepare a TLB flush fence to be attached to PTs */
  if (!unlocked && params.needs_flush && vm->is_compute_context) {
+    struct amdgpu_vm_bo_base *entry, *next;
+
  amdgpu_vm_tlb_fence_create(adev, vm, fence);
    /* Makes sure no PD/PT is freed before the flush */
  dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
 DMA_RESV_USAGE_BOOKKEEP);
+
+    if (list_empty(>tlb_flush_waitlist))
+    goto error_unlock;
+
+    /* Now actually free the waitlist */
+    list_for_each_entry_safe(entry, next, 
>tlb_flush_waitlist, vm_status)

+    amdgpu_vm_pt_free(entry);


This needs to be outside of the "if". We also need to free the PDs/PTs 
in non compute VMs.



Noted,




  }
    error_unlock:
@@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,

  INIT_LIST_HEAD(>pt_freed);
  INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work);
  INIT_KFIFO(vm->faults);
+    INIT_LIST_HEAD(>tlb_flush_waitlist);
    r = amdgpu_seq64_alloc(adev, >tlb_seq_va, 
>tlb_seq_cpu_addr);

  if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 298f604b8e5f..ba374c2c61bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -343,6 +343,9 @@ struct amdgpu_vm {
  uint64_t    *tlb_seq_cpu_addr;
  uint64_t    tlb_fence_context;
  +    /* temporary storage of PT BOs until the TLB flush */
+    struct list_head    tlb_flush_waitlist;
+
  atomic64_t    kfd_last_flushed_seq;
    /* How many times we had to re-generate the page tables */
@@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct 
amdgpu_vm_update_params *params,

    uint64_t start, uint64_t end,
    uint64_t dst, uint64_t flags);
  void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
    #if defined(CONFIG_DEBUG_FS)
  void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct 
seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

index 95dc0afdaffb..cb14e5686c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct 
amdgpu_device *adev,

   *
   * @entry: PDE to free
   */
-static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
  {
  struct amdgpu_bo *shadow;
  @@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct 
work_struct *work)

   * @vm: amdgpu vm structure
   * @start: optional cursor where to start freeing PDs/PTs
   * @unlocked: vm resv unlock status
+ * @force: force free all PDs/PTs without waiting for TLB flush
   *
   * Free the page directory or page table level and all sub levels.
   */
  static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
    struct amdgpu_vm *vm,
    struct amdgpu_vm_pt_cursor *start,
-  bool unlocked)
+  bool unlocked,
+  bool force)
  {
  struct amdgpu_vm_pt_cursor cursor;
  struct amdgpu_vm_bo_base *entry;
@@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct 
amdgpu_device *adev,

  return;
  

Re: [PATCH v3] drm/amdgpu: change vm->task_info handling

2024-03-01 Thread Sharma, Shashank


On 01/03/2024 18:07, Felix Kuehling wrote:

On 2024-02-05 12:05, Shashank Sharma wrote:

This patch changes the handling and lifecycle of vm->task_info object.
The major changes are:
- vm->task_info is a dynamically allocated ptr now, and its uasge is
   reference counted.
- introducing two new helper funcs for task_info lifecycle management
 - amdgpu_vm_get_task_info: reference counts up task_info before
   returning this info
 - amdgpu_vm_put_task_info: reference counts down task_info
- last put to task_info() frees task_info from the vm.

This patch also does logistical changes required for existing usage
of vm->task_info.

V2: Do not block all the prints when task_info not found (Felix)
V3: (Felix)
- Fix wrong indentation
- No debug message for -ENOMEM
- Add NULL check for task_info
- Do not duplicate the debug messages (ti vs no ti)
- Get first reference of task_info in vm_init(), put last
  in vm_fini()

Cc: Christian Koenig
Cc: Alex Deucher
Cc: Felix Kuehling
Signed-off-by: Shashank Sharma


One nit-pick and one bug inline. With those fixed, the patch

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   9 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  18 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   |  12 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 158 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  21 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |   2 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  24 +--
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  |  23 +--
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |  20 ++-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  23 +--
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  |  23 +--
  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c|  22 +--
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c |  20 +--
  13 files changed, 251 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 0e61ebdb3f3e..f9eb12697b95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1775,9 +1775,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file 
*m, void *unused)
list_for_each_entry(file, >filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = >vm;
+   struct amdgpu_task_info *ti;
+
+   ti = amdgpu_vm_get_task_info_vm(vm);
+   if (ti) {
+   seq_printf(m, "pid:%d\tProcess:%s --\n", ti->pid, 
ti->process_name);
+   amdgpu_vm_put_task_info(ti);
+   }
  
-		seq_printf(m, "pid:%d\tProcess:%s --\n",

-   vm->task_info.pid, vm->task_info.process_name);
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1f357198533f..e6e6d56398f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  {
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
-   struct amdgpu_task_info ti;
+   struct amdgpu_task_info *ti;
struct amdgpu_device *adev = ring->adev;
int idx;
int r;
@@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
return DRM_GPU_SCHED_STAT_ENODEV;
}
  
-	memset(, 0, sizeof(struct amdgpu_task_info));

+
adev->job_hang = true;
  
  	if (amdgpu_gpu_recovery &&

@@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
goto exit;
}
  
-	amdgpu_vm_get_task_info(ring->adev, job->pasid, );

DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
- job->base.sched->name, atomic_read(>fence_drv.last_seq),
- ring->fence_drv.sync_seq);
-   DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
- ti.process_name, ti.tgid, ti.task_name, ti.pid);
+  job->base.sched->name, 
atomic_read(>fence_drv.last_seq),
+  ring->fence_drv.sync_seq);
+
+   ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+   if (ti) {
+   DRM_ERROR("Process information: process %s pid %d thread %s pid 
%d\n",
+ ti->process_name, ti->tgid, ti->task_name, ti->pid);
+   amdgpu_vm_put_task_info(ti);
+   }
  
  	dma_fence_set_error(_job->s_fence->finished, -ETIME);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 

Re: [PATCH v3] drm/amdgpu: change vm->task_info handling

2024-03-01 Thread Felix Kuehling

On 2024-02-05 12:05, Shashank Sharma wrote:

This patch changes the handling and lifecycle of vm->task_info object.
The major changes are:
- vm->task_info is a dynamically allocated ptr now, and its uasge is
   reference counted.
- introducing two new helper funcs for task_info lifecycle management
 - amdgpu_vm_get_task_info: reference counts up task_info before
   returning this info
 - amdgpu_vm_put_task_info: reference counts down task_info
- last put to task_info() frees task_info from the vm.

This patch also does logistical changes required for existing usage
of vm->task_info.

V2: Do not block all the prints when task_info not found (Felix)
V3: (Felix)
- Fix wrong indentation
- No debug message for -ENOMEM
- Add NULL check for task_info
- Do not duplicate the debug messages (ti vs no ti)
- Get first reference of task_info in vm_init(), put last
  in vm_fini()

Cc: Christian Koenig
Cc: Alex Deucher
Cc: Felix Kuehling
Signed-off-by: Shashank Sharma


One nit-pick and one bug inline. With those fixed, the patch

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   9 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  18 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c   |  12 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 158 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  21 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |   2 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  24 +--
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  |  23 +--
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |  20 ++-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  23 +--
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  |  23 +--
  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c|  22 +--
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c |  20 +--
  13 files changed, 251 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 0e61ebdb3f3e..f9eb12697b95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1775,9 +1775,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file 
*m, void *unused)
list_for_each_entry(file, >filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = >vm;
+   struct amdgpu_task_info *ti;
+
+   ti = amdgpu_vm_get_task_info_vm(vm);
+   if (ti) {
+   seq_printf(m, "pid:%d\tProcess:%s --\n", ti->pid, 
ti->process_name);
+   amdgpu_vm_put_task_info(ti);
+   }
  
-		seq_printf(m, "pid:%d\tProcess:%s --\n",

-   vm->task_info.pid, vm->task_info.process_name);
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1f357198533f..e6e6d56398f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  {
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
-   struct amdgpu_task_info ti;
+   struct amdgpu_task_info *ti;
struct amdgpu_device *adev = ring->adev;
int idx;
int r;
@@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
return DRM_GPU_SCHED_STAT_ENODEV;
}
  
-	memset(, 0, sizeof(struct amdgpu_task_info));

+
adev->job_hang = true;
  
  	if (amdgpu_gpu_recovery &&

@@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
goto exit;
}
  
-	amdgpu_vm_get_task_info(ring->adev, job->pasid, );

DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
- job->base.sched->name, atomic_read(>fence_drv.last_seq),
- ring->fence_drv.sync_seq);
-   DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
- ti.process_name, ti.tgid, ti.task_name, ti.pid);
+  job->base.sched->name, 
atomic_read(>fence_drv.last_seq),
+  ring->fence_drv.sync_seq);
+
+   ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+   if (ti) {
+   DRM_ERROR("Process information: process %s pid %d thread %s pid 
%d\n",
+ ti->process_name, ti->tgid, ti->task_name, ti->pid);
+   amdgpu_vm_put_task_info(ti);
+   }
  
  	dma_fence_set_error(_job->s_fence->finished, -ETIME);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

index 4baa300121d8..a59364e9b6ed 

Re: [PATCH] drm/amd/display: handle range offsets in VRR ranges

2024-03-01 Thread Harry Wentland



On 2024-03-01 08:55, Alex Deucher wrote:
> Ping?
> 
> On Wed, Feb 28, 2024 at 4:12 PM Alex Deucher  
> wrote:
>>
>> Need to check the offset bits for values greater than 255.
>>
>> v2: also update amdgpu_dm_connector values.
>>
>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3203
>> Signed-off-by: Alex Deucher 

Reviewed-by: Harry Wentland 

Harry

>> ---
>>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++-
>>  1 file changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 32efce81a5a74..4e4cbf2e33dd2 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -11292,14 +11292,23 @@ void amdgpu_dm_update_freesync_caps(struct 
>> drm_connector *connector,
>> if (range->flags != 1)
>> continue;
>>
>> -   amdgpu_dm_connector->min_vfreq = 
>> range->min_vfreq;
>> -   amdgpu_dm_connector->max_vfreq = 
>> range->max_vfreq;
>> -   amdgpu_dm_connector->pixel_clock_mhz =
>> -   range->pixel_clock_mhz * 10;
>> -
>> 
>> connector->display_info.monitor_range.min_vfreq = range->min_vfreq;
>> 
>> connector->display_info.monitor_range.max_vfreq = range->max_vfreq;
>>
>> +   if (edid->revision >= 4) {
>> +   if (data->pad2 & 
>> DRM_EDID_RANGE_OFFSET_MIN_VFREQ)
>> +   
>> connector->display_info.monitor_range.min_vfreq += 255;
>> +   if (data->pad2 & 
>> DRM_EDID_RANGE_OFFSET_MAX_VFREQ)
>> +   
>> connector->display_info.monitor_range.max_vfreq += 255;
>> +   }
>> +
>> +   amdgpu_dm_connector->min_vfreq =
>> +   
>> connector->display_info.monitor_range.min_vfreq;
>> +   amdgpu_dm_connector->max_vfreq =
>> +   
>> connector->display_info.monitor_range.max_vfreq;
>> +   amdgpu_dm_connector->pixel_clock_mhz =
>> +   range->pixel_clock_mhz * 10;
>> +
>> break;
>> }
>>
>> --
>> 2.44.0
>>



Re: [PATCH] drm/amdkfd: fix shift out of bounds about gpu debug

2024-03-01 Thread Jay Cornwall
On 3/1/2024 00:35, Kim, Jonathan wrote:

> The range check should probably flag any exception prefixed as 
> EC_QUEUE_PACKET_* as valid defined in kfd_dbg_trap_exception_code:
> https://github.com/torvalds/linux/blob/master/include/uapi/linux/kfd_ioctl.h#L857
> + Jay to confirm this is the correct exception range for CP_BAD_OPCODE

Yes, that covers the full range of possible values.


Re: [PATCH] drm/amdgpu: Fix potential Spectre vulnerability in amdgpu_gfx_parse_disable_cu()

2024-03-01 Thread Lazar, Lijo



On 3/1/2024 7:52 PM, Christian König wrote:
> Am 01.03.24 um 15:01 schrieb Lazar, Lijo:
>> On 3/1/2024 6:15 PM, Srinivasan Shanmugam wrote:
>>> The 'mask' array could be used in a way that would make the code
>>> vulnerable to a Spectre attack. The issue is likely related to the fact
>>> that the 'mask' array is being indexed using values that are derived
>>> from user input (the 'se' and 'sh' variables), which could potentially
>>> be manipulated by an attacker.
>>>
>>> The array_index_nospec() function is typically used in these situations
>>> where an array index is derived from user input or other untrusted data.
>>> By sanitizing the index, it helps to ensure that even if an attacker can
>>> influence the index, they cannot use this to read sensitive data from
>>> other parts of the array or memory.
>>>
>>> The array indices are now sanitized using the array_index_nospec()
>>> function, which ensures that the index cannot be greater than the size
>>> of the array, helping to mitigate Spectre attacks.
>>>
>>> The array_index_nospec() function, takes two parameters: the array index
>>> and the maximum size of the array. It ensures that the array index is
>>> within the bounds of the array, i.e., it is less than the maximum size
>>> of the array.
>>>
>>> If the array index is within bounds, the function returns the index. If
>>> the index is out of bounds, the function returns a safe index (usually
>>> 0) instead. This prevents out-of-bounds reads that could potentially be
>>> exploited in a speculative execution attack.
>>>
>>> Reported by smatch:
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c:136
>>> amdgpu_gfx_parse_disable_cu() warn: potential spectre issue 'mask' [w]
>>>
>>> Fixes: 6f8941a23088 ("drm/amdgpu: add disable_cu parameter")
>>> Cc: Nicolai Hähnle 
>>> Cc: Christian König 
>>> Cc: Alex Deucher 
>>> Signed-off-by: Srinivasan Shanmugam 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +++-
>>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 4835d6d899e7..2ef31dbdbc3d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -24,6 +24,7 @@
>>>    */
>>>     #include 
>>> +#include 
>>>   #include "amdgpu.h"
>>>   #include "amdgpu_gfx.h"
>>>   #include "amdgpu_rlc.h"
>>> @@ -132,8 +133,9 @@ void amdgpu_gfx_parse_disable_cu(unsigned int
>>> *mask, unsigned int max_se, unsign
>>>   }
>>>     if (se < max_se && sh < max_sh && cu < 16) {
>> This check is already there which is equivalent to a case like -
>>
>> int arr[A][B], then check if (i < A && j < B) before accessing arr[i][j].
>>
>> I think smatch is missing this.
> 
> The problem with spectre is that those checks can run speculative and
> you can still get parts of the data by looking at cache behavior and
> timings after it returned from kernel space with an error message.
> 
> That's why you need the explicit call to array_index_nospec() to avoid
> security holes from that.
> 

Thanks, found the doc -
https://www.kernel.org/doc/Documentation/speculation.txt

Thanks,
Lijo

> Regards,
> Christian.
> 
>>
>> Thanks,
>> Lijo
>>> +    unsigned int index = array_index_nospec(se * max_sh +
>>> sh, max_se * max_sh);
>>>   DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
>>> -    mask[se * max_sh + sh] |= 1u << cu;
>>> +    mask[index] |= 1u << cu;
>>>   } else {
>>>   DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
>>>     se, sh, cu);
> 


Re: [PATCH] drm/amdgpu: Fix potential Spectre vulnerability in amdgpu_gfx_parse_disable_cu()

2024-03-01 Thread Christian König

Am 01.03.24 um 15:01 schrieb Lazar, Lijo:

On 3/1/2024 6:15 PM, Srinivasan Shanmugam wrote:

The 'mask' array could be used in a way that would make the code
vulnerable to a Spectre attack. The issue is likely related to the fact
that the 'mask' array is being indexed using values that are derived
from user input (the 'se' and 'sh' variables), which could potentially
be manipulated by an attacker.

The array_index_nospec() function is typically used in these situations
where an array index is derived from user input or other untrusted data.
By sanitizing the index, it helps to ensure that even if an attacker can
influence the index, they cannot use this to read sensitive data from
other parts of the array or memory.

The array indices are now sanitized using the array_index_nospec()
function, which ensures that the index cannot be greater than the size
of the array, helping to mitigate Spectre attacks.

The array_index_nospec() function, takes two parameters: the array index
and the maximum size of the array. It ensures that the array index is
within the bounds of the array, i.e., it is less than the maximum size
of the array.

If the array index is within bounds, the function returns the index. If
the index is out of bounds, the function returns a safe index (usually
0) instead. This prevents out-of-bounds reads that could potentially be
exploited in a speculative execution attack.

Reported by smatch:
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c:136 amdgpu_gfx_parse_disable_cu() warn: 
potential spectre issue 'mask' [w]

Fixes: 6f8941a23088 ("drm/amdgpu: add disable_cu parameter")
Cc: Nicolai Hähnle 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 4835d6d899e7..2ef31dbdbc3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -24,6 +24,7 @@
   */
  
  #include 

+#include 
  #include "amdgpu.h"
  #include "amdgpu_gfx.h"
  #include "amdgpu_rlc.h"
@@ -132,8 +133,9 @@ void amdgpu_gfx_parse_disable_cu(unsigned int *mask, 
unsigned int max_se, unsign
}
  
  		if (se < max_se && sh < max_sh && cu < 16) {

This check is already there which is equivalent to a case like -

int arr[A][B], then check if (i < A && j < B) before accessing arr[i][j].

I think smatch is missing this.


The problem with spectre is that those checks can run speculative and 
you can still get parts of the data by looking at cache behavior and 
timings after it returned from kernel space with an error message.


That's why you need the explicit call to array_index_nospec() to avoid 
security holes from that.


Regards,
Christian.



Thanks,
Lijo

+   unsigned int index = array_index_nospec(se * max_sh + 
sh, max_se * max_sh);
DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
-   mask[se * max_sh + sh] |= 1u << cu;
+   mask[index] |= 1u << cu;
} else {
DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of 
range\n",
  se, sh, cu);




2024 X.Org Board of Directors Elections timeline extended, request for nominations

2024-03-01 Thread Christopher Michael
We are seeking nominations for candidates for election to the X.org 
Foundation Board of Directors. However, as we presently do not have 
enough nominations to start the election - the decision has been made to 
extend the timeline by 2 weeks. Note this is a fairly regular part of 
the elections process.



The new deadline for nominations to the X.org Board of Directors is 
23:59 UTC on 11 March 2024



The Board consists of directors elected from the membership. Each year, 
an election is held to bring the total number of directors to eight. The 
four members receiving the highest vote totals will serve as directors 
for two year terms.


The directors who received two year terms starting in 2023 were 
Arkadiusz Hiler, Christopher Michael, Lyude Paul, and Daniel Vetter. 
They will continue to serve until their term ends in 2024. Current 
directors whose term expires in 2024 are Emma Anholt, Mark Filion, 
Ricardo Garcia, and Alyssa Rosenzweig.



A director is expected to participate in the fortnightly IRC meeting to 
discuss current business and to attend the annual meeting of the X.Org 
Foundation, which will be held at a location determined in advance by 
the Board of Directors.


A member may nominate themselves or any other member they feel is 
qualified. Nominations should be sent to the Election Committee at 
electi...@x.org.


Nominees shall be required to be current members of the X.Org 
Foundation, and submit a personal statement of up to 200 words that will 
be provided to prospective voters. The collected statements, along with 
the statement of contribution to the X.Org Foundation in the member's 
account page on http://members.x.org, will be made available to all 
voters to help them make their voting decisions.


Nominations and completed personal statements must be received no later 
than 23:59 UTC on 11 March 2024.


The slate of candidates will be published 18 March 2024 and candidate 
Q will begin then. The deadline for Xorg membership applications and 
renewals has also been extended 2 weeks and is now 25 March 2024.



Cheers,

Christopher Michael, on behalf of the X.Org BoD



Re: [PATCH] drm/amdgpu: Fix potential Spectre vulnerability in amdgpu_gfx_parse_disable_cu()

2024-03-01 Thread Lazar, Lijo



On 3/1/2024 6:15 PM, Srinivasan Shanmugam wrote:
> The 'mask' array could be used in a way that would make the code
> vulnerable to a Spectre attack. The issue is likely related to the fact
> that the 'mask' array is being indexed using values that are derived
> from user input (the 'se' and 'sh' variables), which could potentially
> be manipulated by an attacker.
> 
> The array_index_nospec() function is typically used in these situations
> where an array index is derived from user input or other untrusted data.
> By sanitizing the index, it helps to ensure that even if an attacker can
> influence the index, they cannot use this to read sensitive data from
> other parts of the array or memory.
> 
> The array indices are now sanitized using the array_index_nospec()
> function, which ensures that the index cannot be greater than the size
> of the array, helping to mitigate Spectre attacks.
> 
> The array_index_nospec() function, takes two parameters: the array index
> and the maximum size of the array. It ensures that the array index is
> within the bounds of the array, i.e., it is less than the maximum size
> of the array.
> 
> If the array index is within bounds, the function returns the index. If
> the index is out of bounds, the function returns a safe index (usually
> 0) instead. This prevents out-of-bounds reads that could potentially be
> exploited in a speculative execution attack.
> 
> Reported by smatch:
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c:136 amdgpu_gfx_parse_disable_cu() 
> warn: potential spectre issue 'mask' [w]
> 
> Fixes: 6f8941a23088 ("drm/amdgpu: add disable_cu parameter")
> Cc: Nicolai Hähnle 
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 4835d6d899e7..2ef31dbdbc3d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -24,6 +24,7 @@
>   */
>  
>  #include 
> +#include 
>  #include "amdgpu.h"
>  #include "amdgpu_gfx.h"
>  #include "amdgpu_rlc.h"
> @@ -132,8 +133,9 @@ void amdgpu_gfx_parse_disable_cu(unsigned int *mask, 
> unsigned int max_se, unsign
>   }
>  
>   if (se < max_se && sh < max_sh && cu < 16) {

This check is already there which is equivalent to a case like -

int arr[A][B], then check if (i < A && j < B) before accessing arr[i][j].

I think smatch is missing this.

Thanks,
Lijo
> + unsigned int index = array_index_nospec(se * max_sh + 
> sh, max_se * max_sh);
>   DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
> - mask[se * max_sh + sh] |= 1u << cu;
> + mask[index] |= 1u << cu;
>   } else {
>   DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of 
> range\n",
> se, sh, cu);


Re: [PATCH] drm/amd/display: handle range offsets in VRR ranges

2024-03-01 Thread Alex Deucher
Ping?

On Wed, Feb 28, 2024 at 4:12 PM Alex Deucher  wrote:
>
> Need to check the offset bits for values greater than 255.
>
> v2: also update amdgpu_dm_connector values.
>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3203
> Signed-off-by: Alex Deucher 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++-
>  1 file changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 32efce81a5a74..4e4cbf2e33dd2 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -11292,14 +11292,23 @@ void amdgpu_dm_update_freesync_caps(struct 
> drm_connector *connector,
> if (range->flags != 1)
> continue;
>
> -   amdgpu_dm_connector->min_vfreq = 
> range->min_vfreq;
> -   amdgpu_dm_connector->max_vfreq = 
> range->max_vfreq;
> -   amdgpu_dm_connector->pixel_clock_mhz =
> -   range->pixel_clock_mhz * 10;
> -
> 
> connector->display_info.monitor_range.min_vfreq = range->min_vfreq;
> 
> connector->display_info.monitor_range.max_vfreq = range->max_vfreq;
>
> +   if (edid->revision >= 4) {
> +   if (data->pad2 & 
> DRM_EDID_RANGE_OFFSET_MIN_VFREQ)
> +   
> connector->display_info.monitor_range.min_vfreq += 255;
> +   if (data->pad2 & 
> DRM_EDID_RANGE_OFFSET_MAX_VFREQ)
> +   
> connector->display_info.monitor_range.max_vfreq += 255;
> +   }
> +
> +   amdgpu_dm_connector->min_vfreq =
> +   
> connector->display_info.monitor_range.min_vfreq;
> +   amdgpu_dm_connector->max_vfreq =
> +   
> connector->display_info.monitor_range.max_vfreq;
> +   amdgpu_dm_connector->pixel_clock_mhz =
> +   range->pixel_clock_mhz * 10;
> +
> break;
> }
>
> --
> 2.44.0
>


Re: [PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-01 Thread Christian König




Am 01.03.24 um 12:07 schrieb Shashank Sharma:

The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist which will keep the objects that need to be
   freed after tlb_flush
- Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of freeing
   them immediately.
- Exports function amdgpu_vm_pt_free to be called dircetly.
- Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
   between immediate freeing of the BOs (like from
   amdgpu_vm_pt_free_root) vs delayed freeing.

V2: rebase
V4: (Christian)
 - add only locked PTEs entries in TLB flush waitlist.
 - do not create a separate function for list flush.
 - do not create a new lock for TLB flush.
 - there is no need to wait on tlb_flush_fence exclusively.

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 10 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++---
  3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 310aae6fb49b..94581a1fe34f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  
  	/* Prepare a TLB flush fence to be attached to PTs */

if (!unlocked && params.needs_flush && vm->is_compute_context) {
+   struct amdgpu_vm_bo_base *entry, *next;
+
amdgpu_vm_tlb_fence_create(adev, vm, fence);
  
  		/* Makes sure no PD/PT is freed before the flush */

dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
   DMA_RESV_USAGE_BOOKKEEP);
+
+   if (list_empty(>tlb_flush_waitlist))
+   goto error_unlock;
+
+   /* Now actually free the waitlist */
+   list_for_each_entry_safe(entry, next, >tlb_flush_waitlist, 
vm_status)
+   amdgpu_vm_pt_free(entry);


This needs to be outside of the "if". We also need to free the PDs/PTs 
in non compute VMs.



}
  
  error_unlock:

@@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
INIT_LIST_HEAD(>pt_freed);
INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work);
INIT_KFIFO(vm->faults);
+   INIT_LIST_HEAD(>tlb_flush_waitlist);
  
  	r = amdgpu_seq64_alloc(adev, >tlb_seq_va, >tlb_seq_cpu_addr);

if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 298f604b8e5f..ba374c2c61bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -343,6 +343,9 @@ struct amdgpu_vm {
uint64_t*tlb_seq_cpu_addr;
uint64_ttlb_fence_context;
  
+	/* temporary storage of PT BOs until the TLB flush */

+   struct list_headtlb_flush_waitlist;
+
atomic64_t  kfd_last_flushed_seq;
  
  	/* How many times we had to re-generate the page tables */

@@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params 
*params,
  uint64_t start, uint64_t end,
  uint64_t dst, uint64_t flags);
  void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
  
  #if defined(CONFIG_DEBUG_FS)

  void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 95dc0afdaffb..cb14e5686c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
   *
   * @entry: PDE to free
   */
-static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
  {
struct amdgpu_bo *shadow;
  
@@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)

   * @vm: amdgpu vm structure
   * @start: optional cursor where to start freeing PDs/PTs
   * @unlocked: vm resv unlock status
+ * @force: force free all PDs/PTs without waiting for TLB flush
   *
   * Free the page directory or page table level and all sub levels.
   */
  static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
  struct amdgpu_vm *vm,
  struct amdgpu_vm_pt_cursor *start,
- bool unlocked)
+ bool unlocked,
+ bool force)
  {
struct amdgpu_vm_pt_cursor cursor;
  

Re: [PATCH v4 1/2] drm/amdgpu: implement TLB flush fence

2024-03-01 Thread Sharma, Shashank



On 01/03/2024 13:59, Christian König wrote:

Am 01.03.24 um 12:07 schrieb Shashank Sharma:

From: Christian König 

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.

V2: (Shashank)
 - rebase
 - set dma_fence_error only in case of error
 - add tlb_flush fence only when PT/PD BO is locked (Felix)
 - use vm->pasid when f is NULL (Mukul)

V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
 - move the misplaced fence_create call to the end (Philip)

Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Cc: Alex Deucher 
Signed-off-by: Christian König 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  10 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |   4 +
  .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 111 ++
  4 files changed, 127 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index fa26a4e3a99d..91ab4cf29b5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \

  amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
  atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
  atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-    amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o 
amdgpu_pll.o \

+    amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+    amdgpu_ib.o amdgpu_pll.o \
  amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
  amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o 
amdgpu_virt.o \

  amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 0960e0a665d3..310aae6fb49b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -988,6 +988,15 @@ int amdgpu_vm_update_range(struct amdgpu_device 
*adev, struct amdgpu_vm *vm,

    r = vm->update_funcs->commit(, fence);
  +    /* Prepare a TLB flush fence to be attached to PTs */
+    if (!unlocked && params.needs_flush && vm->is_compute_context) {
+    amdgpu_vm_tlb_fence_create(adev, vm, fence);
+
+    /* Makes sure no PD/PT is freed before the flush */
+    dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+   DMA_RESV_USAGE_BOOKKEEP);
+    }
+
  error_unlock:
  amdgpu_vm_eviction_unlock(vm);
  drm_dev_exit(idx);
@@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,

    mutex_init(>eviction_lock);
  vm->evicting = false;
+    vm->tlb_fence_context = dma_fence_context_alloc(1);
    r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
  false, , xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 64b3f69efa57..298f604b8e5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -341,6 +341,7 @@ struct amdgpu_vm {
  atomic64_t    tlb_seq;
  uint64_t    tlb_seq_va;
  uint64_t    *tlb_seq_cpu_addr;
+    uint64_t    tlb_fence_context;
    atomic64_t    kfd_last_flushed_seq;
  @@ -594,5 +595,8 @@ void amdgpu_vm_update_fault_cache(struct 
amdgpu_device *adev,

    uint64_t addr,
    uint32_t status,
    unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

new file mode 100644
index ..54c33c24fa46
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom 
the

+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all 

Re: [PATCH v4 1/2] drm/amdgpu: implement TLB flush fence

2024-03-01 Thread Christian König

Am 01.03.24 um 12:07 schrieb Shashank Sharma:

From: Christian König 

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.

V2: (Shashank)
 - rebase
 - set dma_fence_error only in case of error
 - add tlb_flush fence only when PT/PD BO is locked (Felix)
 - use vm->pasid when f is NULL (Mukul)

V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
 - move the misplaced fence_create call to the end (Philip)

Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Cc: Alex Deucher 
Signed-off-by: Christian König 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  10 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   4 +
  .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 111 ++
  4 files changed, 127 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index fa26a4e3a99d..91ab4cf29b5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+   amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0960e0a665d3..310aae6fb49b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -988,6 +988,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  
  	r = vm->update_funcs->commit(, fence);
  
+	/* Prepare a TLB flush fence to be attached to PTs */

+   if (!unlocked && params.needs_flush && vm->is_compute_context) {
+   amdgpu_vm_tlb_fence_create(adev, vm, fence);
+
+   /* Makes sure no PD/PT is freed before the flush */
+   dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+  DMA_RESV_USAGE_BOOKKEEP);
+   }
+
  error_unlock:
amdgpu_vm_eviction_unlock(vm);
drm_dev_exit(idx);
@@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
  
  	mutex_init(>eviction_lock);

vm->evicting = false;
+   vm->tlb_fence_context = dma_fence_context_alloc(1);
  
  	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,

false, , xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 64b3f69efa57..298f604b8e5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -341,6 +341,7 @@ struct amdgpu_vm {
atomic64_t  tlb_seq;
uint64_ttlb_seq_va;
uint64_t*tlb_seq_cpu_addr;
+   uint64_ttlb_fence_context;
  
  	atomic64_t		kfd_last_flushed_seq;
  
@@ -594,5 +595,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,

  uint64_t addr,
  uint32_t status,
  unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+struct amdgpu_vm *vm,
+struct dma_fence **fence);
  
  #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index ..54c33c24fa46
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to 

[PATCH] drm/amdgpu: add ring timeout information in devcoredump

2024-03-01 Thread Sunil Khatri
Add ring timeout related information in the amdgpu
devcoredump file for debugging purposes.

During the gpu recovery process the registered call
is triggered and add the debug information in data
file created by devcoredump framework under the
directory /sys/class/devcoredump/devcdx/

Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 15 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   | 11 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h |  1 +
 4 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9246bca0a008..9f57c7795c47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -816,6 +816,17 @@ struct amdgpu_reset_info {
 #endif
 };
 
+/*
+ *  IP and Queue information during timeout
+ */
+struct amdgpu_ring_timeout_info {
+   bool timeout;
+   char ring_name[32];
+   enum amdgpu_ring_type ip_type;
+   bool soft_recovery;
+};
+
+
 /*
  * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
  */
@@ -1150,6 +1161,10 @@ struct amdgpu_device {
booldebug_largebar;
booldebug_disable_soft_recovery;
booldebug_use_vram_fw_buf;
+
+#ifdef CONFIG_DEV_COREDUMP
+   struct amdgpu_ring_timeout_info ring_timeout_info;
+#endif
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 71a5cf37b472..e36b7352b2de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -51,8 +51,19 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
memset(, 0, sizeof(struct amdgpu_task_info));
adev->job_hang = true;
 
+#ifdef CONFIG_DEV_COREDUMP
+   /* Update the ring timeout info for coredump*/
+   adev->ring_timeout_info.timeout = TRUE;
+   sprintf(adev->ring_timeout_info.ring_name, s_job->sched->name);
+   adev->ring_timeout_info.ip_type = ring->funcs->type;
+   adev->ring_timeout_info.soft_recovery = FALSE;
+#endif
+
if (amdgpu_gpu_recovery &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) 
{
+#ifdef CONFIG_DEV_COREDUMP
+   adev->ring_timeout_info.soft_recovery = TRUE;
+#endif
DRM_ERROR("ring %s timeout, but soft recovered\n",
  s_job->sched->name);
goto exit;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 4baa300121d8..d4f892ed105f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -196,8 +196,16 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
size_t count,
   coredump->reset_task_info.process_name,
   coredump->reset_task_info.pid);
 
+   if (coredump->timeout_info.timeout) {
+   drm_printf(, "\nRing timed out details\n");
+   drm_printf(, "IP Type: %d Ring Name: %s Soft Recovery: %s\n",
+   coredump->timeout_info.ip_type,
+   coredump->timeout_info.ring_name,
+   coredump->timeout_info.soft_recovery ? 
"Successful":"Failed");
+   }
+
if (coredump->reset_vram_lost)
-   drm_printf(, "VRAM is lost due to GPU reset!\n");
+   drm_printf(, "\nVRAM is lost due to GPU reset!\n");
if (coredump->adev->reset_info.num_regs) {
drm_printf(, "AMDGPU register dumps:\nOffset: Value:\n");
 
@@ -228,6 +236,7 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
vram_lost,
return;
}
 
+   coredump->timeout_info = adev->ring_timeout_info;
coredump->reset_vram_lost = vram_lost;
 
if (reset_context->job && reset_context->job->vm)
@@ -236,6 +245,7 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
vram_lost,
coredump->adev = adev;
 
ktime_get_ts64(>reset_time);
+   memset(>ring_timeout_info, 0, sizeof(adev->ring_timeout_info));
 
dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
  amdgpu_devcoredump_read, amdgpu_devcoredump_free);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 19899f6b9b2b..37172d6e1f94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -97,6 +97,7 @@ struct amdgpu_coredump_info {
struct amdgpu_task_info reset_task_info;
struct timespec64   reset_time;
boolreset_vram_lost;
+   struct 

[PATCH] drm/amdgpu: Fix potential Spectre vulnerability in amdgpu_gfx_parse_disable_cu()

2024-03-01 Thread Srinivasan Shanmugam
The 'mask' array could be used in a way that would make the code
vulnerable to a Spectre attack. The issue is likely related to the fact
that the 'mask' array is being indexed using values that are derived
from user input (the 'se' and 'sh' variables), which could potentially
be manipulated by an attacker.

The array_index_nospec() function is typically used in these situations
where an array index is derived from user input or other untrusted data.
By sanitizing the index, it helps to ensure that even if an attacker can
influence the index, they cannot use this to read sensitive data from
other parts of the array or memory.

The array indices are now sanitized using the array_index_nospec()
function, which ensures that the index cannot be greater than the size
of the array, helping to mitigate Spectre attacks.

The array_index_nospec() function, takes two parameters: the array index
and the maximum size of the array. It ensures that the array index is
within the bounds of the array, i.e., it is less than the maximum size
of the array.

If the array index is within bounds, the function returns the index. If
the index is out of bounds, the function returns a safe index (usually
0) instead. This prevents out-of-bounds reads that could potentially be
exploited in a speculative execution attack.

Reported by smatch:
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c:136 amdgpu_gfx_parse_disable_cu() warn: 
potential spectre issue 'mask' [w]

Fixes: 6f8941a23088 ("drm/amdgpu: add disable_cu parameter")
Cc: Nicolai Hähnle 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 4835d6d899e7..2ef31dbdbc3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -24,6 +24,7 @@
  */
 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_rlc.h"
@@ -132,8 +133,9 @@ void amdgpu_gfx_parse_disable_cu(unsigned int *mask, 
unsigned int max_se, unsign
}
 
if (se < max_se && sh < max_sh && cu < 16) {
+   unsigned int index = array_index_nospec(se * max_sh + 
sh, max_se * max_sh);
DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
-   mask[se * max_sh + sh] |= 1u << cu;
+   mask[index] |= 1u << cu;
} else {
DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of 
range\n",
  se, sh, cu);
-- 
2.34.1



[PATCH v4 1/2] drm/amdgpu: implement TLB flush fence

2024-03-01 Thread Shashank Sharma
From: Christian König 

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.

V2: (Shashank)
- rebase
- set dma_fence_error only in case of error
- add tlb_flush fence only when PT/PD BO is locked (Felix)
- use vm->pasid when f is NULL (Mukul)

V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
- move the misplaced fence_create call to the end (Philip)

Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Cc: Alex Deucher 
Signed-off-by: Christian König 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   4 +
 .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 111 ++
 4 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index fa26a4e3a99d..91ab4cf29b5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+   amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0960e0a665d3..310aae6fb49b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -988,6 +988,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
 
r = vm->update_funcs->commit(, fence);
 
+   /* Prepare a TLB flush fence to be attached to PTs */
+   if (!unlocked && params.needs_flush && vm->is_compute_context) {
+   amdgpu_vm_tlb_fence_create(adev, vm, fence);
+
+   /* Makes sure no PD/PT is freed before the flush */
+   dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+  DMA_RESV_USAGE_BOOKKEEP);
+   }
+
 error_unlock:
amdgpu_vm_eviction_unlock(vm);
drm_dev_exit(idx);
@@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 
mutex_init(>eviction_lock);
vm->evicting = false;
+   vm->tlb_fence_context = dma_fence_context_alloc(1);
 
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
false, , xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 64b3f69efa57..298f604b8e5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -341,6 +341,7 @@ struct amdgpu_vm {
atomic64_t  tlb_seq;
uint64_ttlb_seq_va;
uint64_t*tlb_seq_cpu_addr;
+   uint64_ttlb_fence_context;
 
atomic64_t  kfd_last_flushed_seq;
 
@@ -594,5 +595,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device 
*adev,
  uint64_t addr,
  uint32_t status,
  unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+struct amdgpu_vm *vm,
+struct dma_fence **fence);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index ..54c33c24fa46
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * 

[PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-01 Thread Shashank Sharma
The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist which will keep the objects that need to be
  freed after tlb_flush
- Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of freeing
  them immediately.
- Exports function amdgpu_vm_pt_free to be called dircetly.
- Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
  between immediate freeing of the BOs (like from
  amdgpu_vm_pt_free_root) vs delayed freeing.

V2: rebase
V4: (Christian)
- add only locked PTEs entries in TLB flush waitlist.
- do not create a separate function for list flush.
- do not create a new lock for TLB flush.
- there is no need to wait on tlb_flush_fence exclusively.

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++---
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 310aae6fb49b..94581a1fe34f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
 
/* Prepare a TLB flush fence to be attached to PTs */
if (!unlocked && params.needs_flush && vm->is_compute_context) {
+   struct amdgpu_vm_bo_base *entry, *next;
+
amdgpu_vm_tlb_fence_create(adev, vm, fence);
 
/* Makes sure no PD/PT is freed before the flush */
dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
   DMA_RESV_USAGE_BOOKKEEP);
+
+   if (list_empty(>tlb_flush_waitlist))
+   goto error_unlock;
+
+   /* Now actually free the waitlist */
+   list_for_each_entry_safe(entry, next, >tlb_flush_waitlist, 
vm_status)
+   amdgpu_vm_pt_free(entry);
}
 
 error_unlock:
@@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
INIT_LIST_HEAD(>pt_freed);
INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work);
INIT_KFIFO(vm->faults);
+   INIT_LIST_HEAD(>tlb_flush_waitlist);
 
r = amdgpu_seq64_alloc(adev, >tlb_seq_va, >tlb_seq_cpu_addr);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 298f604b8e5f..ba374c2c61bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -343,6 +343,9 @@ struct amdgpu_vm {
uint64_t*tlb_seq_cpu_addr;
uint64_ttlb_fence_context;
 
+   /* temporary storage of PT BOs until the TLB flush */
+   struct list_headtlb_flush_waitlist;
+
atomic64_t  kfd_last_flushed_seq;
 
/* How many times we had to re-generate the page tables */
@@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params 
*params,
  uint64_t start, uint64_t end,
  uint64_t dst, uint64_t flags);
 void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
 
 #if defined(CONFIG_DEBUG_FS)
 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 95dc0afdaffb..cb14e5686c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
  *
  * @entry: PDE to free
  */
-static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 {
struct amdgpu_bo *shadow;
 
@@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
  * @vm: amdgpu vm structure
  * @start: optional cursor where to start freeing PDs/PTs
  * @unlocked: vm resv unlock status
+ * @force: force free all PDs/PTs without waiting for TLB flush
  *
  * Free the page directory or page table level and all sub levels.
  */
 static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
  struct amdgpu_vm *vm,
  struct amdgpu_vm_pt_cursor *start,
- bool unlocked)
+ bool unlocked,
+ bool force)
 {
struct amdgpu_vm_pt_cursor cursor;
struct amdgpu_vm_bo_base *entry;
@@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device 
*adev,
return;
}
 

Re: [PATCH] drm/amdgpu/pm: Fix the error of pwm1_enable setting

2024-03-01 Thread Lazar, Lijo



On 3/1/2024 1:15 PM, Ma Jun wrote:
> Fix the pwm_mode value error which used for
> pwm1_enable setting
> 
> Signed-off-by: Ma Jun 
> ---
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index 9e70c41ad98f..7cc5cd7616b1 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -2582,6 +2582,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct 
> device *dev,
>   struct amdgpu_device *adev = dev_get_drvdata(dev);
>   int err, ret;
>   int value;
> + u32 pwm_mode;
>  

You may move this declaration up to follow reverse Christmas tree order.

Reviewed-by: Lijo Lazar 

Thanks,
Lijo
>   if (amdgpu_in_reset(adev))
>   return -EPERM;
> @@ -2592,13 +2593,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct 
> device *dev,
>   if (err)
>   return err;
>  
> + if (value == 0)
> + pwm_mode = AMD_FAN_CTRL_NONE;
> + else if (value == 1)
> + pwm_mode = AMD_FAN_CTRL_MANUAL;
> + else if (value == 2)
> + pwm_mode = AMD_FAN_CTRL_AUTO;
> + else
> + return -EINVAL;
> +
>   ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>   if (ret < 0) {
>   pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>   return ret;
>   }
>  
> - ret = amdgpu_dpm_set_fan_control_mode(adev, value);
> + ret = amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
>  
>   pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>   pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);