[PATCH v2] drm/amdgpu/vcn: fix unitialized variable warnings

2024-04-19 Thread Pierre-Eric Pelloux-Prayer
Avoid returning an uninitialized value if we never enter the loop.
This case should never be hit in practice, but returning 0 doesn't
hurt.

The same fix is applied to the 4 places using the same pattern.

v2: - fixed typos in commit message (Alex)
- use "return 0;" before the done label instead of initializing
  r to 0

Signed-off-by: Pierre-Eric Pelloux-Prayer 
Reviewed-by: Alex Deucher 
Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   | 1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   | 1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 8f82fb887e9c..26e63f01250a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -359,6 +359,7 @@ static int vcn_v3_0_hw_init(void *handle)
}
}
 
+   return 0;
 done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under 
%s).\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 832d15f7b5f6..aff1a4d8d393 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -288,6 +288,7 @@ static int vcn_v4_0_hw_init(void *handle)
}
}
 
+   return 0;
 done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under 
%s).\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 501e53e69f2a..8f2bcce13339 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -237,6 +237,7 @@ static int vcn_v4_0_5_hw_init(void *handle)
goto done;
}
 
+   return 0;
 done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under 
%s).\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index bc60c554eb32..b226306164bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -203,6 +203,7 @@ static int vcn_v5_0_0_hw_init(void *handle)
goto done;
}
 
+   return 0;
 done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under 
%s).\n",
-- 
2.41.0



[PATCH] drm/amdgpu/vcn: fix unitialized variable warnings

2024-04-18 Thread Pierre-Eric Pelloux-Prayer
Init r to 0 to avoid returning an uninitialized value if we never
enter the loop. This case should never be hit in practive, but
returning 0 doesn't hurt.

The same fix is applied to the 4 places using the same pattern.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 8f82fb887e9c..724445545563 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -298,7 +298,7 @@ static int vcn_v3_0_hw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
-   int i, j, r;
+   int i, j, r = 0;
 
if (amdgpu_sriov_vf(adev)) {
r = vcn_v3_0_start_sriov(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 832d15f7b5f6..9be7ae7af4b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -253,7 +253,7 @@ static int vcn_v4_0_hw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
-   int i, r;
+   int i, r = 0;
 
if (amdgpu_sriov_vf(adev)) {
r = vcn_v4_0_start_sriov(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 501e53e69f2a..593c64e4b8ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -221,7 +221,7 @@ static int vcn_v4_0_5_hw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
-   int i, r;
+   int i, r = 0;
 
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index bc60c554eb32..246f967e2e7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -187,7 +187,7 @@ static int vcn_v5_0_0_hw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
-   int i, r;
+   int i, r = 0;
 
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
-- 
2.41.0



Re: [PATCH] drm/amdgpu: remove invalid resource->start check

2024-03-27 Thread Pierre-Eric Pelloux-Prayer

On my kernel branch this produces an "unused variable" warning for 'bus_size' 
so it
should be dropped as well.

Pierre-Eric

Le 21/03/2024 à 13:25, Pierre-Eric Pelloux-Prayer a écrit :


Le 20/03/2024 à 13:49, Christian König a écrit :

The majority of those where removed in the patch aed01a68047b
drm/amdgpu: Remove TTM resource->start visible VRAM condition v2

But this one was missed because it's working on the resource and not the
BO. Since we also no longer use a fake start address for visible BOs
this will now trigger invalid mapping errors.

Fixes: aed01a68047b ("drm/amdgpu: Remove TTM resource->start visible VRAM condition 
v2")
Signed-off-by: Christian König 
CC: sta...@vger.kernel.org



Acked-by: Pierre-Eric Pelloux-Prayer 

Thanks!


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b0ed10f4de60..6bd7e71c5ff6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -573,9 +573,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device 
*bdev,
  break;
  case TTM_PL_VRAM:
  mem->bus.offset = mem->start << PAGE_SHIFT;
-    /* check if it's visible */
-    if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
-    return -EINVAL;
  if (adev->mman.aper_base_kaddr &&
  mem->placement & TTM_PL_FLAG_CONTIGUOUS)


Re: [PATCH] drm/amdgpu: remove invalid resource->start check

2024-03-21 Thread Pierre-Eric Pelloux-Prayer



Le 20/03/2024 à 13:49, Christian König a écrit :

The majority of those where removed in the patch aed01a68047b
drm/amdgpu: Remove TTM resource->start visible VRAM condition v2

But this one was missed because it's working on the resource and not the
BO. Since we also no longer use a fake start address for visible BOs
this will now trigger invalid mapping errors.

Fixes: aed01a68047b ("drm/amdgpu: Remove TTM resource->start visible VRAM condition 
v2")
Signed-off-by: Christian König 
CC: sta...@vger.kernel.org



Acked-by: Pierre-Eric Pelloux-Prayer 

Thanks!


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b0ed10f4de60..6bd7e71c5ff6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -573,9 +573,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device 
*bdev,
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
-   /* check if it's visible */
-   if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
-   return -EINVAL;
  
  		if (adev->mman.aper_base_kaddr &&

mem->placement & TTM_PL_FLAG_CONTIGUOUS)


[PATCH] drm/amdgpu: disable ring_muxer if mcbp is off

2024-02-23 Thread Pierre-Eric Pelloux-Prayer
Using the ring_muxer without preemption adds overhead for no
reason since mcbp cannot be triggered.

Moving back to a single queue in this case also helps when
high priority app are used: in this case the gpu_scheduler
priority handling will work as expected - much better than
ring_muxer with its 2 independant schedulers competing for
the same hardware queue.

This change requires moving amdgpu_device_set_mcbp above
amdgpu_device_ip_early_init because we use adev->gfx.mcbp.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 21 -
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d534e192e260..40516d24026c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4054,13 +4054,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
 
+   amdgpu_device_set_mcbp(adev);
+
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
return r;
 
-   amdgpu_device_set_mcbp(adev);
-
/* Get rid of things like offb */
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 169d45268ef6..f682f830f7f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2080,7 +2080,7 @@ static int gfx_v9_0_sw_init(void *handle)
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
 
/* disable scheduler on the real ring */
-   ring->no_scheduler = true;
+   ring->no_scheduler = adev->gfx.mcbp;
ring->vm_hub = AMDGPU_GFXHUB(0);
r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq,
 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
@@ -2090,7 +2090,7 @@ static int gfx_v9_0_sw_init(void *handle)
}
 
/* set up the software rings */
-   if (adev->gfx.num_gfx_rings) {
+   if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
ring = >gfx.sw_gfx_ring[i];
ring->ring_obj = NULL;
@@ -2181,7 +2181,7 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   if (adev->gfx.num_gfx_rings) {
+   if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
amdgpu_ring_fini(>gfx.sw_gfx_ring[i]);
amdgpu_ring_mux_fini(>gfx.muxer);
@@ -5910,11 +5910,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
 
switch (me_id) {
case 0:
-   if (adev->gfx.num_gfx_rings &&
-   !amdgpu_mcbp_handle_trailing_fence_irq(>gfx.muxer)) {
-   /* Fence signals are handled on the software rings*/
-   for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
-   amdgpu_fence_process(>gfx.sw_gfx_ring[i]);
+   if (adev->gfx.num_gfx_rings) {
+   if (!adev->gfx.mcbp) {
+   amdgpu_fence_process(>gfx.gfx_ring[0]);
+   } else if 
(!amdgpu_mcbp_handle_trailing_fence_irq(>gfx.muxer)) {
+   /* Fence signals are handled on the software 
rings*/
+   for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+   
amdgpu_fence_process(>gfx.sw_gfx_ring[i]);
+   }
}
break;
case 1:
@@ -7051,7 +7054,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device 
*adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = _v9_0_ring_funcs_gfx;
 
-   if (adev->gfx.num_gfx_rings) {
+   if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
adev->gfx.sw_gfx_ring[i].funcs = 
_v9_0_sw_ring_funcs_gfx;
}
-- 
2.40.1



Re: [PATCH v3 6/8] drm: add drm_mode_atomic_commit event

2024-02-22 Thread Pierre-Eric Pelloux-Prayer




Le 16/02/2024 à 17:24, Ville Syrjälä a écrit :

On Fri, Feb 16, 2024 at 04:09:55PM +0100, Pierre-Eric Pelloux-Prayer wrote:

With this and the dma_fence_used_as_dependency event, a tool can draw the
relationship between the compositing draw, the atomic commit, and vblank.

An example on a 2 monitors system look like this:

gnome-shell-1638[018] .  2571.905124: drm_mode_atomic_commit: 
file=245c3f0c, pid=1165, flags=0201, crtcs={0x1}
gnome-shell-1638[018] .  2571.905147: dma_fence_used_as_dependency: 
driver=drm_sched timeline=gfx_0.0.0 context=270 seqno=73240 
reason=dma_fence_chain_init
gnome-shell-1638[018] .  2571.913226: drm_mode_atomic_commit: 
file=245c3f0c, pid=1165, flags=0201, crtcs={0x0}
gnome-shell-1638[018] .  2571.913250: dma_fence_used_as_dependency: 
driver=drm_sched timeline=gfx_0.0.0 context=270 seqno=73241 
reason=dma_fence_chain_init
 -0   [018] d.h3.  2571.915687: drm_vblank_event: crtc=1, 
seq=155747, time=2571916093743, high-prec=true
 -0   [018] d.h3.  2571.915968: drm_vblank_event: crtc=0, 
seq=153862, time=2571916377180, high-prec=true

v2: fix unchecked memory allocation

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
  drivers/gpu/drm/drm_atomic_uapi.c | 21 +
  drivers/gpu/drm/drm_trace.h   | 23 +++
  2 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 29d4940188d4..f31b5c6f870b 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -41,6 +41,7 @@
  #include 
  
  #include "drm_crtc_internal.h"

+#include "drm_trace.h"
  
  /**

   * DOC: overview
@@ -1503,6 +1504,26 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
drm_mode_object_put(obj);
}
  
+	if (trace_drm_mode_atomic_commit_enabled()) {

+   struct drm_crtc_state *crtc_state;
+   struct drm_crtc *crtc;
+   int *crtcs;
+   int i, num_crtcs;
+
+   crtcs = kcalloc(dev->mode_config.num_crtc, sizeof(int),
+   GFP_KERNEL);
+
+   if (crtcs) {
+   num_crtcs = 0;
+   for_each_new_crtc_in_state(state, crtc, crtc_state, i)
+   crtcs[num_crtcs++] = drm_crtc_index(crtc);
+
+   trace_drm_mode_atomic_commit(file_priv, crtcs, num_crtcs, 
arg->flags);
+
+   kfree(crtcs);
+   }
+   }


I think the current drm trace events are sort of semi-useless.
The problems are:
- no device id in the events so good luck with multi gpu systems
- vblank trace events are only emitted from some vblank
   codepaths but not others

I'm also not sure putting an event straight into the atomic ioctl is
particularly useful.

First of all it means that any commit not initiated by the atomic
ioctl will not be traced.

It would also seem more useful to me if the driver can emit the
trace just before it commits the frame to the hardware, so that
we can also observe the latency between userspace submitting
the frame vs. when the hardware will actually see it.

Also if we want tools to use these I think we're going to have to
make some kind of abi promises about the events, so we should make
sure they are as future proof as we can make them (eg. regarding
mutli-gpu systems/etc.).


Thanks for your feedback.

This series was also discussed on IRC with Sima [1], and the conclusion was
that it would be good to rework the series with the following goals in
mind:
* make sure the events are useful for any drivers using the core drm code,
not just amdgpu
* add new events or extend existing ones so that all the required information is
there (= no guessing needed)
* document the updated tracepoints (as UAPI?): how they should be interpreted
by tools (eg: how to reconstruct fence dependencies? how to measure latency? 
etc)


Pierre-Eric


[1]: 
https://dri.freedesktop.org/~cbrill/dri-log/?channel=dri-devel=2024-02-16







+
ret = prepare_signaling(dev, state, arg, file_priv, _state,
_fences);
if (ret)
diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h
index 11c6dd577e8e..63489923c289 100644
--- a/drivers/gpu/drm/drm_trace.h
+++ b/drivers/gpu/drm/drm_trace.h
@@ -66,6 +66,29 @@ TRACE_EVENT(drm_vblank_event_delivered,
  __entry->seq)
  );
  
+TRACE_EVENT(drm_mode_atomic_commit,

+   TP_PROTO(struct drm_file *file, int *crtcs, int ncrtcs, uint32_t 
flags),
+   TP_ARGS(file, crtcs, ncrtcs, flags),
+   TP_STRUCT__entry(
+   __field(struct drm_file *, file)
+   __dynamic_array(u32, crtcs, ncrtcs)
+   __field(uint32_t, ncrtcs)
+   __field(uint32_t, flags)
+   ),
+   TP_fast_assign(

[PATCH v3 8/8] drm/amdgpu: add devname to trace_amdgpu_sched_run_job

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
With the move to work queues for the drm scheduler it becomes
impossible for a tool to match the events to the GPU.

Before this move, the event source was fixed (eg: gfx_0.0.0-598),
so even if the system had multiple GPUs with identical queue names
it was possible to map the events using the PID.

With work queues, the source is now something like: "kworker/u64:0-15248"
(and the PID isn't stable), so the "timeline=gfx_0.0.0" attribute
isn't enough in multi-GPU setups.

This commit adds a dev=devname attribute to resolve this issue.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 71a5cf37b472..657866a498f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -292,7 +292,7 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
job = to_amdgpu_job(sched_job);
finished = >base.s_fence->finished;
 
-   trace_amdgpu_sched_run_job(job);
+   trace_amdgpu_sched_run_job(job, adev);
 
/* Skip job if VRAM is lost and never resubmit gangs */
if (job->generation != amdgpu_vm_generation(adev, job->vm) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 3f18f570e5ac..1aea1b78747d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -202,8 +202,8 @@ TRACE_EVENT(amdgpu_cs_start,
 );
 
 TRACE_EVENT(amdgpu_sched_run_job,
-   TP_PROTO(struct amdgpu_job *job),
-   TP_ARGS(job),
+   TP_PROTO(struct amdgpu_job *job, struct amdgpu_device *adev),
+   TP_ARGS(job, adev),
TP_STRUCT__entry(
 __field(uint64_t, sched_job_id)
 __string(timeline, 
AMDGPU_JOB_GET_TIMELINE_NAME(job))
@@ -211,6 +211,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 __field(unsigned int, seqno)
 __string(ring, 
to_amdgpu_ring(job->base.sched)->name)
 __field(u32, num_ibs)
+__string(dname, dev_name(adev->dev))
 ),
 
TP_fast_assign(
@@ -220,10 +221,13 @@ TRACE_EVENT(amdgpu_sched_run_job,
   __entry->seqno = job->base.s_fence->finished.seqno;
   __assign_str(ring, 
to_amdgpu_ring(job->base.sched)->name);
   __entry->num_ibs = job->num_ibs;
+  __assign_str(dname, dev_name(adev->dev));
   ),
-   TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, 
ring_name=%s, num_ibs=%u",
+   TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, "
+ "ring_name=%s, num_ibs=%u, dev=%s",
  __entry->sched_job_id, __get_str(timeline), 
__entry->context,
- __entry->seqno, __get_str(ring), __entry->num_ibs)
+ __entry->seqno, __get_str(ring), __entry->num_ibs, 
__get_str(dname))
+
 );
 
 
-- 
2.40.1



[PATCH v3 7/8] drm/sched: use trace_dma_fence_used_as_dependency

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
drm_sched_job_add_dependency adds dependencies so use the new
trace event.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/scheduler/sched_main.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 7e90c9f95611..6ee49f70d319 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -84,6 +84,8 @@
 #include 
 #include 
 
+#include 
+
 #define CREATE_TRACE_POINTS
 #include "gpu_scheduler_trace.h"
 
@@ -879,6 +881,8 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job,
if (entry->context != fence->context)
continue;
 
+   trace_dma_fence_used_as_dependency(fence, __func__);
+
if (dma_fence_is_later(fence, entry)) {
dma_fence_put(entry);
xa_store(>dependencies, index, fence, GFP_KERNEL);
-- 
2.40.1



[PATCH v3 6/8] drm: add drm_mode_atomic_commit event

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
With this and the dma_fence_used_as_dependency event, a tool can draw the
relationship between the compositing draw, the atomic commit, and vblank.

An example on a 2 monitors system look like this:

gnome-shell-1638[018] .  2571.905124: drm_mode_atomic_commit: 
file=245c3f0c, pid=1165, flags=0201, crtcs={0x1}
gnome-shell-1638[018] .  2571.905147: dma_fence_used_as_dependency: 
driver=drm_sched timeline=gfx_0.0.0 context=270 seqno=73240 
reason=dma_fence_chain_init
gnome-shell-1638[018] .  2571.913226: drm_mode_atomic_commit: 
file=245c3f0c, pid=1165, flags=0201, crtcs={0x0}
gnome-shell-1638[018] .  2571.913250: dma_fence_used_as_dependency: 
driver=drm_sched timeline=gfx_0.0.0 context=270 seqno=73241 
reason=dma_fence_chain_init
-0   [018] d.h3.  2571.915687: drm_vblank_event: crtc=1, 
seq=155747, time=2571916093743, high-prec=true
-0   [018] d.h3.  2571.915968: drm_vblank_event: crtc=0, 
seq=153862, time=2571916377180, high-prec=true

v2: fix unchecked memory allocation

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/drm_atomic_uapi.c | 21 +
 drivers/gpu/drm/drm_trace.h   | 23 +++
 2 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 29d4940188d4..f31b5c6f870b 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -41,6 +41,7 @@
 #include 
 
 #include "drm_crtc_internal.h"
+#include "drm_trace.h"
 
 /**
  * DOC: overview
@@ -1503,6 +1504,26 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
drm_mode_object_put(obj);
}
 
+   if (trace_drm_mode_atomic_commit_enabled()) {
+   struct drm_crtc_state *crtc_state;
+   struct drm_crtc *crtc;
+   int *crtcs;
+   int i, num_crtcs;
+
+   crtcs = kcalloc(dev->mode_config.num_crtc, sizeof(int),
+   GFP_KERNEL);
+
+   if (crtcs) {
+   num_crtcs = 0;
+   for_each_new_crtc_in_state(state, crtc, crtc_state, i)
+   crtcs[num_crtcs++] = drm_crtc_index(crtc);
+
+   trace_drm_mode_atomic_commit(file_priv, crtcs, 
num_crtcs, arg->flags);
+
+   kfree(crtcs);
+   }
+   }
+
ret = prepare_signaling(dev, state, arg, file_priv, _state,
_fences);
if (ret)
diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h
index 11c6dd577e8e..63489923c289 100644
--- a/drivers/gpu/drm/drm_trace.h
+++ b/drivers/gpu/drm/drm_trace.h
@@ -66,6 +66,29 @@ TRACE_EVENT(drm_vblank_event_delivered,
  __entry->seq)
 );
 
+TRACE_EVENT(drm_mode_atomic_commit,
+   TP_PROTO(struct drm_file *file, int *crtcs, int ncrtcs, uint32_t 
flags),
+   TP_ARGS(file, crtcs, ncrtcs, flags),
+   TP_STRUCT__entry(
+   __field(struct drm_file *, file)
+   __dynamic_array(u32, crtcs, ncrtcs)
+   __field(uint32_t, ncrtcs)
+   __field(uint32_t, flags)
+   ),
+   TP_fast_assign(
+   unsigned int i;
+
+   __entry->file = file;
+   for (i = 0; i < ncrtcs; i++)
+   ((u32 *)__get_dynamic_array(crtcs))[i] = crtcs[i];
+   __entry->ncrtcs = ncrtcs;
+   __entry->flags = flags;
+   ),
+   TP_printk("file=%p, pid=%8d, flags=%08x, crtcs=%s", __entry->file,
+ pid_nr(__entry->file->pid), __entry->flags,
+ __print_array(__get_dynamic_array(crtcs), 
__entry->ncrtcs, 4))
+);
+
 #endif /* _DRM_TRACE_H_ */
 
 /* This part must be outside protection */
-- 
2.40.1



[PATCH v3 5/8] drm/amdgpu: add a amdgpu_cs_start trace event

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
amdgpu_cs_ioctl already exists but serves a different
purpose.

amdgpu_cs_start marks the beginning of the kernel processing of
the ioctl which is useful for tools to map which events belong to
the same submission (without this, the first event would be the
amdgpu_bo_set_list ones).

v2: renamed to amdgpu_cs_start

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0a4b09709cfb..f3369cd0d9a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1402,6 +1402,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
return r;
}
 
+   trace_amdgpu_cs_start(data);
+
r = amdgpu_cs_pass1(, data);
if (r)
goto error_fini;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 0e47cbe7e0a9..3f18f570e5ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -189,6 +189,18 @@ TRACE_EVENT(amdgpu_cs_ioctl,
  __entry->seqno, __get_str(ring), __entry->num_ibs)
 );
 
+TRACE_EVENT(amdgpu_cs_start,
+   TP_PROTO(union drm_amdgpu_cs *cs),
+   TP_ARGS(cs),
+   TP_STRUCT__entry(
+__field(uint32_t, ctx_id)
+   ),
+   TP_fast_assign(
+  __entry->ctx_id = cs->in.ctx_id;
+   ),
+   TP_printk("context=%u", __entry->ctx_id)
+);
+
 TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
-- 
2.40.1



[PATCH v3 4/8] drm/amdgpu: add a amdgpu_bo_fill trace event

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
Useful to identify why sdma jobs are submitted.

v2: moved from amdgpu_bo_create to amdgpu_bo_fill

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 18 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f539b1d00234..0e47cbe7e0a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -514,6 +514,24 @@ TRACE_EVENT(amdgpu_bo_move,
__entry->new_placement, __entry->bo_size)
 );
 
+TRACE_EVENT(amdgpu_bo_fill,
+   TP_PROTO(struct amdgpu_bo *bo, uint32_t value),
+   TP_ARGS(bo, value),
+   TP_STRUCT__entry(
+   __field(struct amdgpu_bo *, bo)
+   __field(u64, bo_size)
+   __field(u32, value)
+   ),
+
+   TP_fast_assign(
+   __entry->bo  = bo;
+   __entry->bo_size = amdgpu_bo_size(bo);
+   __entry->value   = value;
+   ),
+   TP_printk("bo=%p, size=%lld, value=0x%08x",
+   __entry->bo, __entry->bo_size, __entry->value)
+);
+
 TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8722beba494e..7d0b2c1191f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2231,6 +2231,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
return -EINVAL;
}
 
+   trace_amdgpu_bo_fill(bo, src_data);
+
amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), );
 
mutex_lock(>mman.gtt_window_lock);
-- 
2.40.1



[PATCH v3 3/8] amdgpu: use trace_dma_fence_sync_to in amdgpu_fence_sync

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
This makes it possible to understand the dependencies between jobs.
Possible usage of this trace:
* stuttering issues like Mesa !9189
* incorrect synchronization: I don't have a link for this one, but having
  these events was very useful to debug a virtio-gpu / native-context /
  radeonsi sync issue

I have prototype code using this in UMR, as can be see here:
   https://gitlab.freedesktop.org/tomstdenis/umr/-/merge_requests/37

v2: add a macro since every caller passes __func__ as the reason parameter

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 9 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 4 +++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 1b013a44ca99..9a3fdc4be51e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -30,6 +30,7 @@
  */
 
 #include 
+#include 
 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
@@ -145,14 +146,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync 
*sync, struct dma_fence *f)
 }
 
 /**
- * amdgpu_sync_fence - remember to sync to this fence
+ * amdgpu_sync_fence_with_reason - remember to sync to this fence
  *
  * @sync: sync object to add fence to
  * @f: fence to sync to
+ * @reason: why do we sync to this fence
  *
  * Add the fence to the sync object.
  */
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
+int amdgpu_sync_fence_with_reason(struct amdgpu_sync *sync, struct dma_fence 
*f,
+ const char *reason)
 {
struct amdgpu_sync_entry *e;
 
@@ -166,6 +169,8 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct 
dma_fence *f)
if (!e)
return -ENOMEM;
 
+   trace_dma_fence_used_as_dependency(f, reason);
+
hash_add(sync->fences, >node, f->context);
e->fence = dma_fence_get(f);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index cf1e9e858efd..52e7306801de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -47,7 +47,9 @@ struct amdgpu_sync {
 };
 
 void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
+int amdgpu_sync_fence_with_reason(struct amdgpu_sync *sync, struct dma_fence 
*f,
+ const char *reason);
+#define amdgpu_sync_fence(s, f) amdgpu_sync_fence_with_reason(s, f, __func__)
 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 struct dma_resv *resv, enum amdgpu_sync_mode mode,
 void *owner);
-- 
2.40.1



[PATCH v3 2/8] dma-buf/fence-chain: use trace_dma_fence_sync_to

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
To inform tools about the relationship between the fences.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/dma-buf/dma-fence-chain.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
index 9663ba1bb6ac..3435078c45b7 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -9,6 +9,8 @@
 
 #include 
 
+#include "trace/events/dma_fence.h"
+
 static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
 
 /**
@@ -251,6 +253,8 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
chain->fence = fence;
chain->prev_seqno = 0;
 
+   trace_dma_fence_used_as_dependency(fence, __func__);
+
/* Try to reuse the context of the previous chain node. */
if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) {
context = prev->context;
-- 
2.40.1



[PATCH v3 1/8] tracing, dma-buf: add a trace_dma_fence_sync_to event

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
This new event can be used to trace where a given dma_fence is added
as a dependency of some other work.

I plan to use it in amdgpu.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/dma-buf/dma-fence.c  |  1 +
 include/trace/events/dma_fence.h | 27 +++
 2 files changed, 28 insertions(+)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 0393a9bba3a8..e7276c043984 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -23,6 +23,7 @@
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_used_as_dependency);
 
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h
index 3963e79ca7b4..5a5d272031ce 100644
--- a/include/trace/events/dma_fence.h
+++ b/include/trace/events/dma_fence.h
@@ -83,6 +83,33 @@ DEFINE_EVENT(dma_fence, dma_fence_wait_end,
TP_ARGS(fence)
 );
 
+TRACE_EVENT(dma_fence_used_as_dependency,
+
+   TP_PROTO(struct dma_fence *fence, const char *reason),
+
+   TP_ARGS(fence, reason),
+
+   TP_STRUCT__entry(
+   __string(driver, fence->ops->get_driver_name(fence))
+   __string(timeline, fence->ops->get_timeline_name(fence))
+   __field(unsigned int, context)
+   __field(unsigned int, seqno)
+   __string(reason, reason)
+   ),
+
+   TP_fast_assign(
+   __assign_str(driver, fence->ops->get_driver_name(fence));
+   __assign_str(timeline, fence->ops->get_timeline_name(fence));
+   __entry->context = fence->context;
+   __entry->seqno = fence->seqno;
+   __assign_str(reason, reason);
+   ),
+
+   TP_printk("driver=%s timeline=%s context=%u seqno=%u reason=%s",
+ __get_str(driver), __get_str(timeline), __entry->context,
+ __entry->seqno, __get_str(reason))
+);
+
 #endif /*  _TRACE_DMA_FENCE_H */
 
 /* This part must be outside protection */
-- 
2.40.1



[PATCH v3 0/8] dma-fence, drm, amdgpu new trace events

2024-02-16 Thread Pierre-Eric Pelloux-Prayer
This series adds new events to make it easier for tools
like gpuvis or umr to graph the GPUs, kernel and applications
activity.

UMR patches using these events can be found here:
https://gitlab.freedesktop.org/tomstdenis/umr/-/merge_requests/37

V1:
https://patchwork.kernel.org/project/linux-media/patch/20240117184329.479554-1-pierre-eric.pelloux-pra...@amd.com/

Changes from V1:
* uses trace_dma_fence_sync_to from dma-fence-chain.c
* new amdgpu events
* new drm plane commit event

Changes from V2:
* uses trace_dma_fence_used_as_dependency from drm_sched_job_add_dependency
* add devname attribute to the trace_amdgpu_sched_run_job event
* addressed review comments

Pierre-Eric Pelloux-Prayer (8):
  tracing, dma-buf: add a trace_dma_fence_sync_to event
  dma-buf/fence-chain: use trace_dma_fence_sync_to
  amdgpu: use trace_dma_fence_sync_to in amdgpu_fence_sync
  drm/amdgpu: add a amdgpu_bo_fill trace event
  drm/amdgpu: add a amdgpu_cs_start trace event
  drm: add drm_mode_atomic_commit event
  drm/sched: use trace_dma_fence_used_as_dependency
  drm/amdgpu: add devname to trace_amdgpu_sched_run_job

 drivers/dma-buf/dma-fence-chain.c |  4 +++
 drivers/dma-buf/dma-fence.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c  |  9 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h  |  4 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 42 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 ++
 drivers/gpu/drm/drm_atomic_uapi.c | 21 
 drivers/gpu/drm/drm_trace.h   | 23 +
 drivers/gpu/drm/scheduler/sched_main.c|  4 +++
 include/trace/events/dma_fence.h  | 27 +++
 12 files changed, 133 insertions(+), 8 deletions(-)

-- 
2.40.1



Re: [PATCH v2 1/6] tracing, dma-buf: add a trace_dma_fence_sync_to event

2024-02-14 Thread Pierre-Eric Pelloux-Prayer

Le 14/02/2024 à 16:10, Steven Rostedt a écrit :

On Wed, 14 Feb 2024 13:00:16 +0100
Christian König  wrote:


+DEFINE_EVENT(dma_fence_from, dma_fence_sync_to,


For a single event you should probably use TRACE_EVENT() instead of
declaring a class. A class is only used if you have multiple events with
the same parameters.


FYI, TRACE_EVENT() is actually defined as:

#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
DECLARE_EVENT_CLASS(name,  \
 PARAMS(proto),\
 PARAMS(args), \
 PARAMS(tstruct),  \
 PARAMS(assign),   \
 PARAMS(print));   \
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));

So basically, you could really just declare one TRACE_EVENT() and add
DEFINE_EVENT()s on top of it ;)

I never recommended that because I thought it would be confusing.



Thanks Steve and Christian for your feedback.

I'm integrating your suggestions in my branch and will re-send the series
after more testing.


Pierre-Eric




-- Steve


Re: [PATCH v2 5/6] drm/amdgpu: add a amdgpu_cs_ioctl2 event

2024-02-14 Thread Pierre-Eric Pelloux-Prayer




Le 14/02/2024 à 13:09, Christian König a écrit :

Am 13.02.24 um 16:50 schrieb Pierre-Eric Pelloux-Prayer:

amdgpu_cs_ioctl already exists but serves a different
purpose.

amdgpu_cs_ioctl2 marks the beginning of the kernel processing of
the ioctl which is useful for tools to map which events belong to
the same submission (without this, the first event would be the
amdgpu_bo_set_list ones).


That's not necessary a good justification for the naming. What exactly was the 
original trace_amdgpu_cs_ioctl() doing?



trace_amdgpu_cs_ioctl is used right before drm_sched_entity_push_job is called 
so in a sense it's a duplicate
of trace_drm_sched_job.

That being said, it's used by gpuvis so I chose to not modify it.

As for the new event: initially I named it "amdgpu_cs_parser_init", but since 
the intent is to mark the
beginning of the amdgpu_cs_submit I've renamed it.

Any suggestion for a better name?

Thanks,
Pierre-Eric




Regards,
Christian.



Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 
  2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6830892383c3..29e43a66d0d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1402,6 +1402,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
  return r;
  }
+    trace_amdgpu_cs_ioctl2(data);
+
  r = amdgpu_cs_pass1(, data);
  if (r)
  goto error_fini;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index e8ea1cfe7027..24e95560ede5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -189,6 +189,18 @@ TRACE_EVENT(amdgpu_cs_ioctl,
    __entry->seqno, __get_str(ring), __entry->num_ibs)
  );
+TRACE_EVENT(amdgpu_cs_ioctl2,
+    TP_PROTO(union drm_amdgpu_cs *cs),
+    TP_ARGS(cs),
+    TP_STRUCT__entry(
+ __field(uint32_t, ctx_id)
+    ),
+    TP_fast_assign(
+   __entry->ctx_id = cs->in.ctx_id;
+    ),
+    TP_printk("context=%u", __entry->ctx_id)
+);
+
  TRACE_EVENT(amdgpu_sched_run_job,
  TP_PROTO(struct amdgpu_job *job),
  TP_ARGS(job),






[PATCH v2 6/6] drm: add drm_mode_atomic_commit event

2024-02-13 Thread Pierre-Eric Pelloux-Prayer
With this and the dma_fence_sync_to event, a tool can draw the
relationship between the compositing draw, the atomic commit, and vblank.

An example on a 2 monitors system look like this:

gnome-shell-1638[018] .  2571.905124: drm_mode_atomic_commit: 
file=245c3f0c, pid=1165, flags=0201, crtcs={0x1}
gnome-shell-1638[018] .  2571.905147: dma_fence_sync_to: 
driver=drm_sched timeline=gfx_0.0.0 context=270 seqno=73240 
reason=dma_fence_chain_init
gnome-shell-1638[018] .  2571.913226: drm_mode_atomic_commit: 
file=245c3f0c, pid=1165, flags=0201, crtcs={0x0}
gnome-shell-1638[018] .  2571.913250: dma_fence_sync_to: 
driver=drm_sched timeline=gfx_0.0.0 context=270 seqno=73241 
reason=dma_fence_chain_init
 -0   [018] d.h3.  2571.915687: drm_vblank_event: crtc=1, 
seq=155747, time=2571916093743, high-prec=true
 -0   [018] d.h3.  2571.915968: drm_vblank_event: crtc=0, 
seq=153862, time=2571916377180, high-prec=true

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/drm_atomic_uapi.c | 19 +++
 drivers/gpu/drm/drm_trace.h   | 28 ++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 29d4940188d4..0d3767cd155a 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -41,6 +41,7 @@
 #include 
 
 #include "drm_crtc_internal.h"
+#include "drm_trace.h"
 
 /**
  * DOC: overview
@@ -1503,6 +1504,24 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
drm_mode_object_put(obj);
}
 
+   if (trace_drm_mode_atomic_commit_enabled()) {
+   struct drm_crtc_state *crtc_state;
+   struct drm_crtc *crtc;
+   int *crtcs;
+   int i, num_crtcs;
+
+   crtcs = kcalloc(dev->mode_config.num_crtc, sizeof(int),
+   GFP_KERNEL);
+
+   num_crtcs = 0;
+   for_each_new_crtc_in_state(state, crtc, crtc_state, i)
+   crtcs[num_crtcs++] = drm_crtc_index(crtc);
+
+   trace_drm_mode_atomic_commit(file_priv, crtcs, num_crtcs, 
arg->flags);
+
+   kfree(crtcs);
+   }
+
ret = prepare_signaling(dev, state, arg, file_priv, _state,
_fences);
if (ret)
diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h
index 11c6dd577e8e..b62a44cb1270 100644
--- a/drivers/gpu/drm/drm_trace.h
+++ b/drivers/gpu/drm/drm_trace.h
@@ -62,8 +62,32 @@ TRACE_EVENT(drm_vblank_event_delivered,
__entry->crtc = crtc;
__entry->seq = seq;
),
-   TP_printk("file=%p, crtc=%d, seq=%u", __entry->file, __entry->crtc, 
\
- __entry->seq)
+   TP_printk("file=%p, crtc=%d, seq=%u, pid=%8d", \
+ __entry->file, __entry->crtc, __entry->seq, \
+ pid_nr(__entry->file->pid))
+);
+
+TRACE_EVENT(drm_mode_atomic_commit,
+   TP_PROTO(struct drm_file *file, int *crtcs, int ncrtcs, uint32_t 
flags),
+   TP_ARGS(file, crtcs, ncrtcs, flags),
+   TP_STRUCT__entry(
+   __field(struct drm_file *, file)
+   __dynamic_array(u32, crtcs, ncrtcs)
+   __field(uint32_t, ncrtcs)
+   __field(uint32_t, flags)
+   ),
+   TP_fast_assign(
+   unsigned int i;
+
+   __entry->file = file;
+   for (i = 0; i < ncrtcs; i++)
+   ((u32 *)__get_dynamic_array(crtcs))[i] = crtcs[i];
+   __entry->ncrtcs = ncrtcs;
+   __entry->flags = flags;
+   ),
+   TP_printk("file=%p, pid=%8d, flags=%08x, crtcs=%s", __entry->file, \
+ pid_nr(__entry->file->pid), __entry->flags, \
+ __print_array(__get_dynamic_array(crtcs), 
__entry->ncrtcs, 4))
 );
 
 #endif /* _DRM_TRACE_H_ */
-- 
2.40.1



[PATCH v2 5/6] drm/amdgpu: add a amdgpu_cs_ioctl2 event

2024-02-13 Thread Pierre-Eric Pelloux-Prayer
amdgpu_cs_ioctl already exists but serves a different
purpose.

amdgpu_cs_ioctl2 marks the beginning of the kernel processing of
the ioctl which is useful for tools to map which events belong to
the same submission (without this, the first event would be the
amdgpu_bo_set_list ones).

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6830892383c3..29e43a66d0d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1402,6 +1402,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
return r;
}
 
+   trace_amdgpu_cs_ioctl2(data);
+
r = amdgpu_cs_pass1(, data);
if (r)
goto error_fini;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index e8ea1cfe7027..24e95560ede5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -189,6 +189,18 @@ TRACE_EVENT(amdgpu_cs_ioctl,
  __entry->seqno, __get_str(ring), __entry->num_ibs)
 );
 
+TRACE_EVENT(amdgpu_cs_ioctl2,
+   TP_PROTO(union drm_amdgpu_cs *cs),
+   TP_ARGS(cs),
+   TP_STRUCT__entry(
+__field(uint32_t, ctx_id)
+   ),
+   TP_fast_assign(
+  __entry->ctx_id = cs->in.ctx_id;
+   ),
+   TP_printk("context=%u", __entry->ctx_id)
+);
+
 TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
-- 
2.40.1



[PATCH v2 4/6] drm/amdgpu: add BO clear event

2024-02-13 Thread Pierre-Eric Pelloux-Prayer
Useful to identify why sdma jobs are submitted.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h  | 16 
 2 files changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 425cebcc5cbf..7219f329d6f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -631,6 +631,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
 
+   trace_amdgpu_bo_clear(bo);
+
r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
if (unlikely(r))
goto fail_unreserve;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f539b1d00234..e8ea1cfe7027 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -514,6 +514,22 @@ TRACE_EVENT(amdgpu_bo_move,
__entry->new_placement, __entry->bo_size)
 );
 
+TRACE_EVENT(amdgpu_bo_clear,
+   TP_PROTO(struct amdgpu_bo *bo),
+   TP_ARGS(bo),
+   TP_STRUCT__entry(
+   __field(struct amdgpu_bo *, bo)
+   __field(u64, bo_size)
+   ),
+
+   TP_fast_assign(
+   __entry->bo  = bo;
+   __entry->bo_size = amdgpu_bo_size(bo);
+   ),
+   TP_printk("bo=%p, size=%lld",
+   __entry->bo, __entry->bo_size)
+);
+
 TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
-- 
2.40.1



[PATCH v2 3/6] amdgpu: use trace_dma_fence_sync_to in amdgpu_fence_sync

2024-02-13 Thread Pierre-Eric Pelloux-Prayer
This makes it possible to understand the dependencies between jobs.
Possible usage of this trace:
* stuttering issues like Mesa !9189
* incorrect synchronization: I don't have a link for this one, but having
  these events was very useful to debug a virtio-gpu / native-context /
  radeonsi sync issue

I have prototype code using this in UMR, as can be see here:
   https://gitlab.freedesktop.org/tomstdenis/umr/-/merge_requests/37

The 'reason' param currently uses __func__ but I didn't add a macro for
this because it'd be interesting to use more descriptive names for each
use of amdgpu_fence_sync.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 14 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c  |  8 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 11 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c |  4 ++--
 7 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d17b2452cb1f..fde98e48c84b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
amdgpu_sync *sync)
if (ret)
return ret;
 
-   return amdgpu_sync_fence(sync, vm->last_update);
+   return amdgpu_sync_fence(sync, vm->last_update, __func__);
 }
 
 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@@ -1251,7 +1251,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
 
amdgpu_vm_clear_freed(adev, vm, _va->last_pt_update);
 
-   amdgpu_sync_fence(sync, bo_va->last_pt_update);
+   amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__);
 }
 
 static int update_gpuvm_pte(struct kgd_mem *mem,
@@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
 
-   return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+   return amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__);
 }
 
 static int map_bo_to_gpuvm(struct kgd_mem *mem,
@@ -2910,7 +2910,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence **ef)
}
dma_resv_for_each_fence(, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
-   ret = amdgpu_sync_fence(_obj, fence);
+   ret = amdgpu_sync_fence(_obj, fence, __func__);
if (ret) {
pr_debug("Memory eviction: Sync BO fence 
failed. Try again\n");
goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6adeddfb3d56..6830892383c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -423,7 +423,7 @@ static int amdgpu_cs_p2_dependencies(struct 
amdgpu_cs_parser *p,
dma_fence_put(old);
}
 
-   r = amdgpu_sync_fence(>sync, fence);
+   r = amdgpu_sync_fence(>sync, fence, __func__);
dma_fence_put(fence);
if (r)
return r;
@@ -445,7 +445,7 @@ static int amdgpu_syncobj_lookup_and_add(struct 
amdgpu_cs_parser *p,
return r;
}
 
-   r = amdgpu_sync_fence(>sync, fence);
+   r = amdgpu_sync_fence(>sync, fence, __func__);
dma_fence_put(fence);
return r;
 }
@@ -1101,7 +1101,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
 
-   r = amdgpu_sync_fence(>sync, fpriv->prt_va->last_pt_update);
+   r = amdgpu_sync_fence(>sync, fpriv->prt_va->last_pt_update, 
__func__);
if (r)
return r;
 
@@ -1112,7 +1112,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
 
-   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update);
+   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update, 
__func__);
if (r)
return r;
}
@@ -1131,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
 
-   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update);
+   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update, 
__func__);
if (r)
return r;
}
@@ -1144,7 +1144,7 @@ static int amdgpu_cs_vm_ha

[PATCH v2 2/6] dma-buf/fence-chain: use trace_dma_fence_sync_to

2024-02-13 Thread Pierre-Eric Pelloux-Prayer
To inform tools about the relationship between the fences.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/dma-buf/dma-fence-chain.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
index 9663ba1bb6ac..a211b3d4156a 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -9,6 +9,8 @@
 
 #include 
 
+#include "trace/events/dma_fence.h"
+
 static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
 
 /**
@@ -251,6 +253,8 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
chain->fence = fence;
chain->prev_seqno = 0;
 
+   trace_dma_fence_sync_to(fence, __func__);
+
/* Try to reuse the context of the previous chain node. */
if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) {
context = prev->context;
-- 
2.40.1



[PATCH v2 1/6] tracing, dma-buf: add a trace_dma_fence_sync_to event

2024-02-13 Thread Pierre-Eric Pelloux-Prayer
This new event can be used to trace where a given dma_fence is added
as a dependency of some other work.

I plan to use it in amdgpu.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/dma-buf/dma-fence.c  |  1 +
 include/trace/events/dma_fence.h | 34 
 2 files changed, 35 insertions(+)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index e0fd99e61a2d..671a499a5ccd 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -23,6 +23,7 @@
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_sync_to);
 
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h
index 3963e79ca7b4..9b3875f7aa79 100644
--- a/include/trace/events/dma_fence.h
+++ b/include/trace/events/dma_fence.h
@@ -83,6 +83,40 @@ DEFINE_EVENT(dma_fence, dma_fence_wait_end,
TP_ARGS(fence)
 );
 
+DECLARE_EVENT_CLASS(dma_fence_from,
+
+   TP_PROTO(struct dma_fence *fence, const char *reason),
+
+   TP_ARGS(fence, reason),
+
+   TP_STRUCT__entry(
+   __string(driver, fence->ops->get_driver_name(fence))
+   __string(timeline, fence->ops->get_timeline_name(fence))
+   __field(unsigned int, context)
+   __field(unsigned int, seqno)
+   __string(reason, reason)
+   ),
+
+   TP_fast_assign(
+   __assign_str(driver, fence->ops->get_driver_name(fence));
+   __assign_str(timeline, fence->ops->get_timeline_name(fence));
+   __entry->context = fence->context;
+   __entry->seqno = fence->seqno;
+   __assign_str(reason, reason);
+   ),
+
+   TP_printk("driver=%s timeline=%s context=%u seqno=%u reason=%s",
+ __get_str(driver), __get_str(timeline), __entry->context,
+ __entry->seqno, __get_str(reason))
+);
+
+DEFINE_EVENT(dma_fence_from, dma_fence_sync_to,
+
+   TP_PROTO(struct dma_fence *fence, const char *reason),
+
+   TP_ARGS(fence, reason)
+);
+
 #endif /*  _TRACE_DMA_FENCE_H */
 
 /* This part must be outside protection */
-- 
2.40.1



[PATCH v2 0/6] dma-fence, drm, amdgpu new trace events

2024-02-13 Thread Pierre-Eric Pelloux-Prayer
This series adds new events to make it easier for tools
like gpuvis or umr to graph the GPUs, kernel and applications
activity.

UMR patches using these events can be found here:
https://gitlab.freedesktop.org/tomstdenis/umr/-/merge_requests/37

V1:
https://patchwork.kernel.org/project/linux-media/patch/20240117184329.479554-1-pierre-eric.pelloux-pra...@amd.com/

Changes from V1:
* uses trace_dma_fence_sync_to from dma-fence-chain.c
* new amdgpu events
* new drm plane commit event

Pierre-Eric Pelloux-Prayer (6):
  tracing, dma-buf: add a trace_dma_fence_sync_to event
  dma-buf/fence-chain: use trace_dma_fence_sync_to
  amdgpu: use trace_dma_fence_sync_to in amdgpu_fence_sync
  drm/amdgpu: add BO clear event
  drm/amdgpu: add a amdgpu_cs_ioctl2 event
  drm: add drm_mode_atomic_commit event

 drivers/dma-buf/dma-fence-chain.c |  4 +++
 drivers/dma-buf/dma-fence.c   |  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 16 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c   |  8 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c  | 11 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h  |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c  |  4 +--
 drivers/gpu/drm/drm_atomic_uapi.c | 19 +++
 drivers/gpu/drm/drm_trace.h   | 28 +--
 include/trace/events/dma_fence.h  | 34 +++
 14 files changed, 144 insertions(+), 26 deletions(-)

-- 
2.40.1



[PATCH 2/2] amdgpu: use trace_dma_fence_sync_to in amdgpu_fence_sync

2024-01-17 Thread Pierre-Eric Pelloux-Prayer
This makes it possible to understand the dependencies between jobs.
Possible usage of this trace:
* stuttering issues like Mesa !9189
* incorrect synchronization: I don't have a link for this one, but having
  these events was very useful to debug a virtio-gpu / native-context /
  radeonsi sync issue

I have prototype code using this in UMR, as can be see here:
   https://gitlab.freedesktop.org/tomstdenis/umr/-/merge_requests/37

The 'reason' param currently uses __func__ but I didn't add a macro for
this because it'd be interesting to use more descriptive names for each
use of amdgpu_fence_sync.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 14 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c  |  8 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 11 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c |  4 ++--
 7 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d17b2452cb1f..fde98e48c84b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
amdgpu_sync *sync)
if (ret)
return ret;
 
-   return amdgpu_sync_fence(sync, vm->last_update);
+   return amdgpu_sync_fence(sync, vm->last_update, __func__);
 }
 
 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@@ -1251,7 +1251,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
 
amdgpu_vm_clear_freed(adev, vm, _va->last_pt_update);
 
-   amdgpu_sync_fence(sync, bo_va->last_pt_update);
+   amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__);
 }
 
 static int update_gpuvm_pte(struct kgd_mem *mem,
@@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
 
-   return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+   return amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__);
 }
 
 static int map_bo_to_gpuvm(struct kgd_mem *mem,
@@ -2910,7 +2910,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence **ef)
}
dma_resv_for_each_fence(, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
-   ret = amdgpu_sync_fence(_obj, fence);
+   ret = amdgpu_sync_fence(_obj, fence, __func__);
if (ret) {
pr_debug("Memory eviction: Sync BO fence 
failed. Try again\n");
goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6adeddfb3d56..6830892383c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -423,7 +423,7 @@ static int amdgpu_cs_p2_dependencies(struct 
amdgpu_cs_parser *p,
dma_fence_put(old);
}
 
-   r = amdgpu_sync_fence(>sync, fence);
+   r = amdgpu_sync_fence(>sync, fence, __func__);
dma_fence_put(fence);
if (r)
return r;
@@ -445,7 +445,7 @@ static int amdgpu_syncobj_lookup_and_add(struct 
amdgpu_cs_parser *p,
return r;
}
 
-   r = amdgpu_sync_fence(>sync, fence);
+   r = amdgpu_sync_fence(>sync, fence, __func__);
dma_fence_put(fence);
return r;
 }
@@ -1101,7 +1101,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
 
-   r = amdgpu_sync_fence(>sync, fpriv->prt_va->last_pt_update);
+   r = amdgpu_sync_fence(>sync, fpriv->prt_va->last_pt_update, 
__func__);
if (r)
return r;
 
@@ -1112,7 +1112,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
 
-   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update);
+   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update, 
__func__);
if (r)
return r;
}
@@ -1131,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
 
-   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update);
+   r = amdgpu_sync_fence(>sync, bo_va->last_pt_update, 
__func__);
if (r)
return r;
}
@@ -1144,7 +1144,7 @@ static int amdgpu_cs_vm_ha

[PATCH 1/2] tracing, dma-buf: add a trace_dma_fence_sync_to event

2024-01-17 Thread Pierre-Eric Pelloux-Prayer
This new event can be used to trace where a given dma_fence is added
as a dependency of some other work.

I plan to use it in amdgpu.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/dma-buf/dma-fence.c  |  1 +
 include/trace/events/dma_fence.h | 34 
 2 files changed, 35 insertions(+)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index e0fd99e61a2d..671a499a5ccd 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -23,6 +23,7 @@
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_sync_to);
 
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h
index 3963e79ca7b4..9b3875f7aa79 100644
--- a/include/trace/events/dma_fence.h
+++ b/include/trace/events/dma_fence.h
@@ -83,6 +83,40 @@ DEFINE_EVENT(dma_fence, dma_fence_wait_end,
TP_ARGS(fence)
 );
 
+DECLARE_EVENT_CLASS(dma_fence_from,
+
+   TP_PROTO(struct dma_fence *fence, const char *reason),
+
+   TP_ARGS(fence, reason),
+
+   TP_STRUCT__entry(
+   __string(driver, fence->ops->get_driver_name(fence))
+   __string(timeline, fence->ops->get_timeline_name(fence))
+   __field(unsigned int, context)
+   __field(unsigned int, seqno)
+   __string(reason, reason)
+   ),
+
+   TP_fast_assign(
+   __assign_str(driver, fence->ops->get_driver_name(fence));
+   __assign_str(timeline, fence->ops->get_timeline_name(fence));
+   __entry->context = fence->context;
+   __entry->seqno = fence->seqno;
+   __assign_str(reason, reason);
+   ),
+
+   TP_printk("driver=%s timeline=%s context=%u seqno=%u reason=%s",
+ __get_str(driver), __get_str(timeline), __entry->context,
+ __entry->seqno, __get_str(reason))
+);
+
+DEFINE_EVENT(dma_fence_from, dma_fence_sync_to,
+
+   TP_PROTO(struct dma_fence *fence, const char *reason),
+
+   TP_ARGS(fence, reason)
+);
+
 #endif /*  _TRACE_DMA_FENCE_H */
 
 /* This part must be outside protection */
-- 
2.40.1



[PATCH] drm/amdgpu: add VISIBLE info in amdgpu_bo_print_info

2023-06-21 Thread Pierre-Eric Pelloux-Prayer
This allows tools to distinguish between VRAM and visible VRAM.

Use the opportunity to fix locking before accessing bo.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 33 ++
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ff73cc11d47e..f12f019d7f99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1583,18 +1583,27 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, 
struct seq_file *m)
unsigned int pin_count;
u64 size;
 
-   domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
-   switch (domain) {
-   case AMDGPU_GEM_DOMAIN_VRAM:
-   placement = "VRAM";
-   break;
-   case AMDGPU_GEM_DOMAIN_GTT:
-   placement = " GTT";
-   break;
-   case AMDGPU_GEM_DOMAIN_CPU:
-   default:
-   placement = " CPU";
-   break;
+   if (dma_resv_trylock(bo->tbo.base.resv)) {
+   unsigned int domain;
+   domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+   switch (domain) {
+   case AMDGPU_GEM_DOMAIN_VRAM:
+   if (amdgpu_bo_in_cpu_visible_vram(bo))
+   placement = "VRAM VISIBLE";
+   else
+   placement = "VRAM";
+   break;
+   case AMDGPU_GEM_DOMAIN_GTT:
+   placement = " GTT";
+   break;
+   case AMDGPU_GEM_DOMAIN_CPU:
+   default:
+   placement = " CPU";
+   break;
+   }
+   dma_resv_unlock(bo->tbo.base.resv);
+   } else {
+   placement = "UNKNOWN";
}
 
size = amdgpu_bo_size(bo);
-- 
2.40.0



[PATCH] drm/amdgpu: add new flag to AMDGPU_CTX_QUERY2

2023-04-13 Thread Pierre-Eric Pelloux-Prayer
OpenGL EXT_robustness extension expects the driver to stop reporting
GUILTY_CONTEXT_RESET when the reset has completed and the GPU is ready
to accept submission again.

This commit adds a AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS flag,
that let the UMD know that the reset is still not finished.

Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 include/uapi/drm/amdgpu_drm.h   | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d2139ac12159..e1f642a3dc2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -576,6 +576,9 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (atomic_read(>guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 
+   if (amdgpu_in_reset(adev))
+   out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
+
if (adev->ras_enabled && con) {
/* Return the cached values in O(1),
 * and schedule delayed work to cache
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e652ffb2c68e..223be5d75b8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -111,9 +111,10 @@
  *   3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info 
fields:
  *tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, 
sqc_inst_cache_size,
  *gl1c_cache_size, gl2c_cache_size, mall_size, 
enabled_rb_pipes_mask_hi
+ *   3.53.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   52
+#define KMS_DRIVER_MINOR   53
 #define KMS_DRIVER_PATCHLEVEL  0
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index b6eb90df5d05..3d820750c1c9 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -245,6 +245,8 @@ union drm_amdgpu_bo_list {
 /* indicate some errors are detected by RAS */
 #define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE   (1<<3)
 #define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE   (1<<4)
+/* indicate that the reset hasn't completed yet */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5)
 
 /* Context priority level */
 #define AMDGPU_CTX_PRIORITY_UNSET   -2048
-- 
2.40.0



[PATCH] drm/amdgpu: print bo inode number instead of ptr

2023-01-12 Thread Pierre-Eric Pelloux-Prayer
This allows to correlate the infos printed by
/sys/kernel/debug/dri/n/amdgpu_gem_info to the ones found
in /proc/.../fdinfo and /sys/kernel/debug/dma_buf/bufinfo.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 90eb07106609..2b076ed46e78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1572,9 +1572,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, 
struct seq_file *m)
attachment = READ_ONCE(bo->tbo.base.import_attach);
 
if (attachment)
-   seq_printf(m, " imported from %p", dma_buf);
+   seq_printf(m, " imported from ino:%lu", 
file_inode(dma_buf->file)->i_ino);
else if (dma_buf)
-   seq_printf(m, " exported as %p", dma_buf);
+   seq_printf(m, " exported as ino:%lu", 
file_inode(dma_buf->file)->i_ino);
 
amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
-- 
2.39.0



[PATCH 2/2] drm/amdgpu: use real_vram_size in ttm_vram_fops

2022-06-22 Thread Pierre-Eric Pelloux-Prayer
If amdgpu.vramlimit= is used, amdgpu_gmc_vram_location will update
real_vram_size based on this value.
mc_vram_size is the real amount of VRAM, initialized in gmc_..._mc_init.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 952e99e6d07e..8f245e9f8f7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2252,10 +2252,10 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, 
char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
 
-   if (*pos >= adev->gmc.mc_vram_size)
+   if (*pos >= adev->gmc.real_vram_size)
return -ENXIO;
 
-   size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
+   size = min(size, (size_t)(adev->gmc.real_vram_size - *pos));
while (size) {
size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
@@ -2288,13 +2288,13 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, 
const char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
 
-   if (*pos >= adev->gmc.mc_vram_size)
+   if (*pos >= adev->gmc.real_vram_size)
return -ENXIO;
 
while (size) {
uint32_t value;
 
-   if (*pos >= adev->gmc.mc_vram_size)
+   if (*pos >= adev->gmc.real_vram_size)
return result;
 
r = get_user(value, (uint32_t *)buf);
@@ -2442,7 +2442,7 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
struct dentry *root = minor->debugfs_root;
 
debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
-_ttm_vram_fops, adev->gmc.mc_vram_size);
+_ttm_vram_fops, 
adev->gmc.real_vram_size);
debugfs_create_file("amdgpu_iomem", 0444, root, adev,
_ttm_iomem_fops);
debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
-- 
2.36.1



[PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling

2022-06-22 Thread Pierre-Eric Pelloux-Prayer
Without this change amdgpu_ttm_training_data_block_init tries
to allocate at the end of the real amount of RAM, which
then fails like this if amdgpu.vramlimit= is used:

   [drm:amdgpu_ttm_init [amdgpu]] *ERROR* alloc c2p_bo failed(-12)!
   [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* sw_init of IP block 
 failed -12
   amdgpu: amdgpu_device_ip_init failed
   amdgpu: Fatal error during GPU init

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index be0efaae79a9..952e99e6d07e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1621,9 +1621,9 @@ static void amdgpu_ttm_training_data_block_init(struct 
amdgpu_device *adev)
memset(ctx, 0, sizeof(*ctx));
 
ctx->c2p_train_data_offset =
-   ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - 
SZ_1M), SZ_1M);
+   ALIGN((adev->gmc.real_vram_size - adev->mman.discovery_tmr_size 
- SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
-   (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+   (adev->gmc.real_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
 

base-commit: a81bcfc756bcaa9e8bb46262f910504fa5290aab
-- 
2.36.1



Re: [PATCH] drm/amdgpu: always flush the TLB on gfx8

2022-06-03 Thread Pierre-Eric Pelloux-Prayer
Hi Christian,

This patch fixes almost all GPU faults on polaris caused by 86fd5edfbdae 
"drm/amdgpu: rework TLB flushing".

I still get occasional faults though, about 1 every 3 runs of a subset of 
piglit tests.

Thanks,
Pierre-Eric



On 03/06/2022 15:05, Christian König wrote:
> The TLB on GFX8 stores each block of 8 PTEs where any of the valid bits
> are set.
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9596c22fded6..b747488c28ad 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -847,6 +847,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>   flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
>adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
>  
> + /*
> +  * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
> +  */
> + flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
> +
>   memset(, 0, sizeof(params));
>   params.adev = adev;
>   params.vm = vm;
> 


Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on VCN3

2022-06-03 Thread Pierre-Eric Pelloux-Prayer
Hi Christian,

The patch is: Tested-by: Pierre-Eric Pelloux-Prayer 


Could you add a reference to 
https://gitlab.freedesktop.org/drm/amd/-/issues/2037 in the commit message?

Thanks,
Pierre-Eric

On 03/06/2022 12:21, Christian König wrote:
> The job is not yet initialized here.
> 
> Signed-off-by: Christian König 
> Fixes: 1027d5d791b7 ("drm/amdgpu: use job and ib structures directly in CS 
> parsers")
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 17 +++--
>  1 file changed, 7 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> index 3cabceee5f57..39405f0db824 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> @@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs 
> vcn_v3_0_dec_sw_ring_vm_funcs = {
>   .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
>  };
>  
> -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
> - struct amdgpu_job *job)
> +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
>  {
>   struct drm_gpu_scheduler **scheds;
>  
>   /* The create msg must be in the first IB submitted */
> - if (atomic_read(>base.entity->fence_seq))
> + if (atomic_read(>entity->fence_seq))
>   return -EINVAL;
>  
>   scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
>   [AMDGPU_RING_PRIO_DEFAULT].sched;
> - drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
> + drm_sched_entity_modify_sched(p->entity, scheds, 1);
>   return 0;
>  }
>  
> -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job 
> *job,
> - uint64_t addr)
> +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
>  {
>   struct ttm_operation_ctx ctx = { false, false };
>   struct amdgpu_bo_va_mapping *map;
> @@ -1848,7 +1846,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, 
> struct amdgpu_job *job,
>   if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
>   continue;
>  
> - r = vcn_v3_0_limit_sched(p, job);
> + r = vcn_v3_0_limit_sched(p);
>   if (r)
>   goto out;
>   }
> @@ -1862,7 +1860,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct 
> amdgpu_cs_parser *p,
>  struct amdgpu_job *job,
>  struct amdgpu_ib *ib)
>  {
> - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
> + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
>   uint32_t msg_lo = 0, msg_hi = 0;
>   unsigned i;
>   int r;
> @@ -1881,8 +1879,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct 
> amdgpu_cs_parser *p,
>   msg_hi = val;
>   } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
>  val == 0) {
> - r = vcn_v3_0_dec_msg(p, job,
> -  ((u64)msg_hi) << 32 | msg_lo);
> + r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);
>   if (r)
>   return r;
>   }
> 


Re: [PATCH v2 1/2] drm: Add GPU reset sysfs event

2022-03-09 Thread Pierre-Eric Pelloux-Prayer



On 09/03/2022 11:24, Christian König wrote:
> Am 09.03.22 um 11:10 schrieb Simon Ser:
>> On Wednesday, March 9th, 2022 at 10:56, Pierre-Eric Pelloux-Prayer 
>>  wrote:
>>
>>> Would it be possible to include the app parameters as well?
>> Can all processes read sysfs events?
> 
> No, but application parameters are usually not secret.
> 
>> There might be security implications here. The app parameters might
>> contain sensitive information, like passwords or tokens.
> 
> It's a well known security vulnerably to provide password etc.. as 
> application parameter since everybody can read them through procfs.
> 
>> Can't the uevent receiver query the kernel to get that info from the
>> PID?
> 
> I'm leaning also towards just providing the PID and not the application name. 
> The information should be as short as possible.
> 

Indeed you're both right. The PID + reading procfs should be enough.

Thanks,
Pierre-Eric


Re: [PATCH v2 1/2] drm: Add GPU reset sysfs event

2022-03-09 Thread Pierre-Eric Pelloux-Prayer
Hi Shashank,

On 08/03/2022 19:04, Shashank Sharma wrote:
> From: Shashank Sharma 
> 
> This patch adds a new sysfs event, which will indicate
> the userland about a GPU reset, and can also provide
> some information like:
> - process ID of the process involved with the GPU reset
> - process name of the involved process

Would it be possible to include the app parameters as well?

piglit has a shader_runner test application that can cause hangs,
and it's run like this:

   shader_runner 1.shader_test

Knowing that shader_runner caused the hang is not really useful
without the "1.shader_test" part.

Thanks,
Pierre-Eric 

> - the GPU status info (using flags)
> 
> This patch also introduces the first flag of the flags
> bitmap, which can be appended as and when required.
> 
> V2: Addressed review comments from Christian and Amar
>- move the reset information structure to DRM layer
>- drop _ctx from struct name
>- make pid 32 bit(than 64)
>- set flag when VRAM invalid (than valid)
>- add process name as well (Amar)
> 
> Cc: Alexandar Deucher 
> Cc: Christian Koenig 
> Cc: Amaranath Somalapuram 
> Signed-off-by: Shashank Sharma 
> ---
>  drivers/gpu/drm/drm_sysfs.c | 31 +++
>  include/drm/drm_sysfs.h | 10 ++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
> index 430e00b16eec..840994810910 100644
> --- a/drivers/gpu/drm/drm_sysfs.c
> +++ b/drivers/gpu/drm/drm_sysfs.c
> @@ -409,6 +409,37 @@ void drm_sysfs_hotplug_event(struct drm_device *dev)
>  }
>  EXPORT_SYMBOL(drm_sysfs_hotplug_event);
>  
> +/**
> + * drm_sysfs_reset_event - generate a DRM uevent to indicate GPU reset
> + * @dev: DRM device
> + * @reset_info: The contextual information about the reset (like PID, flags)
> + *
> + * Send a uevent for the DRM device specified by @dev. This informs
> + * user that a GPU reset has occurred, so that an interested client
> + * can take any recovery or profiling measure.
> + */
> +void drm_sysfs_reset_event(struct drm_device *dev, struct drm_reset_event 
> *reset_info)
> +{
> + unsigned char pid_str[13];
> + unsigned char flags_str[15];
> + unsigned char pname_str[TASK_COMM_LEN + 6];
> + unsigned char reset_str[] = "RESET=1";
> + char *envp[] = { reset_str, pid_str, pname_str, flags_str, NULL };
> +
> + if (!reset_info) {
> + DRM_WARN("No reset info, not sending the event\n");
> + return;
> + }
> +
> + DRM_DEBUG("generating reset event\n");
> +
> + snprintf(pid_str, ARRAY_SIZE(pid_str), "PID=%u", reset_info->pid);
> + snprintf(pname_str, ARRAY_SIZE(pname_str), "NAME=%s", 
> reset_info->pname);
> + snprintf(flags_str, ARRAY_SIZE(flags_str), "FLAGS=%u", 
> reset_info->flags);
> + kobject_uevent_env(>primary->kdev->kobj, KOBJ_CHANGE, envp);
> +}
> +EXPORT_SYMBOL(drm_sysfs_reset_event);
> +
>  /**
>   * drm_sysfs_connector_hotplug_event - generate a DRM uevent for any 
> connector
>   * change
> diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h
> index 6273cac44e47..5ba11c760619 100644
> --- a/include/drm/drm_sysfs.h
> +++ b/include/drm/drm_sysfs.h
> @@ -1,16 +1,26 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
>  #ifndef _DRM_SYSFS_H_
>  #define _DRM_SYSFS_H_
> +#include 
> +
> +#define DRM_GPU_RESET_FLAG_VRAM_INVALID (1 << 0)
>  
>  struct drm_device;
>  struct device;
>  struct drm_connector;
>  struct drm_property;
>  
> +struct drm_reset_event {
> + uint32_t pid;
> + uint32_t flags;
> + char pname[TASK_COMM_LEN];
> +};
> +
>  int drm_class_device_register(struct device *dev);
>  void drm_class_device_unregister(struct device *dev);
>  
>  void drm_sysfs_hotplug_event(struct drm_device *dev);
> +void drm_sysfs_reset_event(struct drm_device *dev, struct drm_reset_event 
> *reset_info);
>  void drm_sysfs_connector_hotplug_event(struct drm_connector *connector);
>  void drm_sysfs_connector_status_event(struct drm_connector *connector,
> struct drm_property *property);
> 


Re: [PATCH] drm/amd/pm: fix runpm hang when amdgpu loaded prior to sound driver

2021-09-10 Thread Pierre-Eric Pelloux-Prayer

Tested-by: Pierre-Eric Pelloux-Prayer 

Thanks!

On 10/09/2021 05:17, Evan Quan wrote:

Current RUNPM mechanism relies on PMFW to master the timing for BACO
in/exit. And that needs cooperation from sound driver for dstate
change notification for function 1(audio). Otherwise(on sound driver
missing), BACO cannot be kicked in correctly and hang will be observed
on RUNPM exit.

By switching back to legacy message way on sound driver missing,
we are able to fix the runpm hang observed for the scenario below:
amdgpu driver loaded -> runpm suspend kicked -> sound driver loaded

Change-Id: I0e44fef11349b5e45e6102913eb46c8c7d279c65
Signed-off-by: Evan Quan 
Reported-by: Pierre-Eric Pelloux-Prayer 
---
  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 24 +--
  .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  4 ++--
  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c| 21 
  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h|  2 ++
  4 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 7bc90f841a11..bcafccf7f07a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2272,7 +2272,27 @@ static int navi10_baco_enter(struct smu_context *smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm)

+   /*
+* This aims the case below:
+*   amdgpu driver loaded -> runpm suspend kicked -> sound driver loaded
+*
+* For NAVI10 and later ASICs, we rely on PMFW to handle the runpm. To
+* make that possible, PMFW needs to acknowledge the dstate transition
+* process for both gfx(function 0) and audio(function 1) function of
+* the ASIC.
+*
+* The PCI device's initial runpm status is RUNPM_SUSPENDED. So as the
+* device representing the audio function of the ASIC. And that means
+* even if the sound driver(snd_hda_intel) was not loaded yet, it's 
still
+* possible runpm suspend kicked on the ASIC. However without the dstate
+* transition notification from audio function, pmfw cannot handle the
+* BACO in/exit correctly. And that will cause driver hang on runpm
+* resuming.
+*
+* To address this, we revert to legacy message way(driver masters the
+* timing for BACO in/exit) on sound driver missing.
+*/
+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO);
else
return smu_v11_0_baco_enter(smu);
@@ -2282,7 +2302,7 @@ static int navi10_baco_exit(struct smu_context *smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm) {

+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
/* Wait for PMFW handling for the Dstate change */
msleep(10);
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 43c7580a4ea6..f9b730c5ba9e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2361,7 +2361,7 @@ static int sienna_cichlid_baco_enter(struct smu_context 
*smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm)

+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO);
else
return smu_v11_0_baco_enter(smu);
@@ -2371,7 +2371,7 @@ static int sienna_cichlid_baco_exit(struct smu_context 
*smu)
  {
struct amdgpu_device *adev = smu->adev;
  
-	if (adev->in_runpm) {

+   if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
/* Wait for PMFW handling for the Dstate change */
msleep(10);
return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 69da9a7b665f..d61403e917df 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -1055,3 +1055,24 @@ int smu_cmn_set_mp1_state(struct smu_context *smu,
  
  	return ret;

  }
+
+bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev)
+{
+   struct pci_dev *p = NULL;
+   bool snd_driver_loaded;
+
+   /*
+* If the ASIC comes with no audio function, we always assume
+* it is "enabled".
+*/
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+ 

Re: [PATCH] drm: Check actual format for legacy pageflip.

2021-01-04 Thread Pierre-Eric Pelloux-Prayer
Hi Bas,

On 02/01/2021 15:02, Bas Nieuwenhuizen wrote:
> With modifiers one can actually have different format_info structs
> for the same format, which now matters for AMDGPU since we convert
> implicit modifiers to explicit modifiers with multiple planes.
> 
> I checked other drivers and it doesn't look like they end up triggering
> this case so I think this is safe to relax.

This patch fixes https://gitlab.freedesktop.org/drm/amd/-/issues/1379:

   Tested-by: Pierre-Eric Pelloux-Prayer 

Thanks!
P-E

> 
> Signed-off-by: Bas Nieuwenhuizen 
> Fixes: 816853f9dc40 ("drm/amd/display: Set new format info for converted 
> metadata.")
> ---
>  drivers/gpu/drm/drm_plane.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
> index e6231947f987..f5085990cfac 100644
> --- a/drivers/gpu/drm/drm_plane.c
> +++ b/drivers/gpu/drm/drm_plane.c
> @@ -1163,7 +1163,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
>   if (ret)
>   goto out;
>  
> - if (old_fb->format != fb->format) {
> + if (old_fb->format->format != fb->format->format) {
>   DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer 
> format.\n");
>   ret = -EINVAL;
>   goto out;
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 00/11] amd/display: Add GFX9+ modifier support.

2020-10-26 Thread Pierre-Eric Pelloux-Prayer
Hi Bas,

I've been using v2 for a while so you can tag the series as:
Tested-by: Pierre-Eric Pelloux-Prayer 



On 22/10/2020 01:31, Bas Nieuwenhuizen wrote:
> This adds modifier support to the amdgpu kernel drivers for GFX9 and
> later GPUs. It has been tested on
> 
> - VEGA10, RAVEN, NAVI14
> - weston, sway, X with xf86-video-amdgpu (i.e. legacy path still works)
> 
> and includes some basic testing of the layout code.
> 
> The main goal is to keep it somewhat simple and regression free, so
> on the display side this series only exposes what the current GPU
> can render to. While we could expose more I think that is more
> suitable for follow-up work as the benefit would be minimal and
> there are some more design discussion there to discuss that are
> orthogonal from the initial implementation.
> 
> Similarly this series only exposes 32-bpp displayable DCC in the cases
> that radeonsi would use it and any extra capabilities here should be
> future work.
> 
> I believe these are by far the most complicated modifiers we've seen
> up till now, mostly related to
> 
> - GPU identification for cases where it matters wrt tiling.
> - Every generation having tiling layout changes
> - Compression settings.
> 
> I believe the complexity is necessary as every layout should be different
> and all layouts should be the best in some situation (though not all
> combinations of GPU parameters will actually have an existing GPU).
> 
> That said, on the render side the number of modifiers actually listed for
> a given GPU is ~10, and in the current implementation that is the same
> for the display side. (we could expose more actually differing layouts
> on the display side for cross-GPU compatibility, but I consider that
> out of scope for this initial work).
> 
> This series can be found on
> https://github.com/BNieuwenhuizen/linux/tree/modifiers
> 
> An userspace implementation in radeonsi can be found on
> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6176
> 
> which has been reviewed and is ready for submission once these kernel
> patches land.
> 
> v2:
> 
> Per suggestion from Daniel Vetter I added logic to get the tiling_flags at
> addfb2 time and convert them into modifiers for GFX9+.  Furthermore, the DCC
> constant econding modifers only get exposed on RAVEN2 and newer.
> 
> v3:
> 
> Fixed some typos, rebased and CCing more people to actually get a review.
> 
> Bas Nieuwenhuizen (11):
>   drm/amd/display: Do not silently accept DCC for multiplane formats.
>   drm/amd: Init modifier field of helper fb.
>   drm/amd/display: Honor the offset for plane 0.
>   drm/fourcc:  Add AMD DRM modifiers.
>   drm/amd/display: Store tiling_flags in the framebuffer.
>   drm/amd/display: Convert tiling_flags to modifiers.
>   drm/amd/display: Refactor surface tiling setup.
>   drm/amd/display: Set DC options from modifiers.
>   drm/amd/display: Add formats for DCC with 2/3 planes.
>   drm/amd/display: Expose modifiers.
>   drm/amd/display: Clean up GFX9 tiling_flags path.
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   | 169 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c|   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |   3 +
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 754 ++
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   2 -
>  include/uapi/drm/drm_fourcc.h | 115 +++
>  6 files changed, 880 insertions(+), 165 deletions(-)
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 04/11] drm/fourcc: Add AMD DRM modifiers.

2020-09-07 Thread Pierre-Eric Pelloux-Prayer
Hi Bas,

2 small typos you may want to fix:

On 04/09/2020 18:07, Bas Nieuwenhuizen wrote:
> This adds modifiers for GFX9+ AMD GPUs.
> 
> As the modifiers need a lot of parameters I split things out in
> getters and setters.
>   - Advantage: simplifies the code a lot
>   - Disadvantage: Makes it harder to check that you're setting all
>   the required fields.
> 
> The tiling modes seem to change every generatio, but the structure

"generatio" -> "generation"

> of what each tiling mode is good for stays really similar. As such
> the core of the modifier is
>  - the tiling mode
>  - a version. Not explicitly a GPU generation, but splitting out
>a new set of tiling equations.

[...]
> + * with DCC & DCC_RETILE:
> + *   - main surface in plane 0
> + *   - displayable DCC surface in plane 1 (not RB-aligned & not pipe-aligned)
> + *   - pipe-aligned DCC surface in plane 2 (RB-aligned & pipe-aligned)
> + *
> + * For multi-plane formats the above surfaces get merged into one plane for
> + * each for format plane, based on the required alignment only.

"for each for format plane" => "for each format plane"?


Pierre-Eric

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: new ids flag for tmz (v2)

2020-08-05 Thread Pierre-Eric Pelloux-Prayer
Hi Christian,

On 30/07/2020 16:46, Christian König wrote:
> Am 30.07.20 um 15:54 schrieb Pierre-Eric Pelloux-Prayer:
>> Allows UMD to know if TMZ is supported and enabled.
>>
>> This commit also bumps KMS_DRIVER_MINOR because if we don't
>> UMD can't tell if "ids_flags & AMDGPU_IDS_FLAGS_TMZ == 0" means
>> "tmz is not enabled" or "tmz may be enabled but the kernel doesn't
>> report it".
>>
>> v2: use amdgpu_is_tmz() and reworded commit message.
> 
> Your signed-off-by line is missing, but apart from that the patch is 
> Reviewed-by: Christian König 


Thanks, I'll add my s-o-b tag before pushing the patch.

Pierre-Eric

> 
>> ---
>> Patch for using it in Mesa is at 
>> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6049
>> (ac/gpu_info: add detection of TMZ support).
>> I've kept the AMDGPU_IDS_FLAGS_TMZ name to match the existing
>> AMDGPU_IDS_FLAGS_PREEMPTION flag.
>>
>> Pierre-Eric
>>
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++
>>   include/uapi/drm/amdgpu_drm.h   | 1 +
>>   3 files changed, 5 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6291f5f0d223..6dcab25914cf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -88,9 +88,10 @@
>>    * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
>>    * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
>>    * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
>> + * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
>>    */
>>   #define KMS_DRIVER_MAJOR    3
>> -#define KMS_DRIVER_MINOR    39
>> +#define KMS_DRIVER_MINOR    40
>>   #define KMS_DRIVER_PATCHLEVEL    0
>>     int amdgpu_vram_limit = 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index eebbe2103e32..d353ded353bb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -736,6 +736,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file
>>   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
>>   if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
>>   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
>> +    if (amdgpu_is_tmz(adev))
>> +    dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
>>     vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
>>   vm_size -= AMDGPU_VA_RESERVED_SIZE;
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 765a94ec4420..b826f2d6efe1 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -676,6 +676,7 @@ struct drm_amdgpu_cs_chunk_data {
>>    */
>>   #define AMDGPU_IDS_FLAGS_FUSION 0x1
>>   #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
>> +#define AMDGPU_IDS_FLAGS_TMZ    0x4
>>     /* indicate if acceleration can be working */
>>   #define AMDGPU_INFO_ACCEL_WORKING    0x00
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: new ids flag for tmz (v2)

2020-07-30 Thread Pierre-Eric Pelloux-Prayer
Allows UMD to know if TMZ is supported and enabled.

This commit also bumps KMS_DRIVER_MINOR because if we don't
UMD can't tell if "ids_flags & AMDGPU_IDS_FLAGS_TMZ == 0" means
"tmz is not enabled" or "tmz may be enabled but the kernel doesn't
report it".

v2: use amdgpu_is_tmz() and reworded commit message.
---
Patch for using it in Mesa is at 
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6049
(ac/gpu_info: add detection of TMZ support).
I've kept the AMDGPU_IDS_FLAGS_TMZ name to match the existing
AMDGPU_IDS_FLAGS_PREEMPTION flag.

Pierre-Eric

 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++
 include/uapi/drm/amdgpu_drm.h   | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f0d223..6dcab25914cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
  * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
  * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
  * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   39
+#define KMS_DRIVER_MINOR   40
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index eebbe2103e32..d353ded353bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -736,6 +736,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
+   if (amdgpu_is_tmz(adev))
+   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
 
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
vm_size -= AMDGPU_VA_RESERVED_SIZE;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 765a94ec4420..b826f2d6efe1 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -676,6 +676,7 @@ struct drm_amdgpu_cs_chunk_data {
  */
 #define AMDGPU_IDS_FLAGS_FUSION 0x1
 #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
+#define AMDGPU_IDS_FLAGS_TMZ0x4
 
 /* indicate if acceleration can be working */
 #define AMDGPU_INFO_ACCEL_WORKING  0x00
-- 
2.26.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: new ids flag for tmz

2020-07-30 Thread Pierre-Eric Pelloux-Prayer
Hi Christian,

On 30/07/2020 12:30, Christian König wrote:
> Am 30.07.20 um 12:25 schrieb Pierre-Eric Pelloux-Prayer:
>> Allows UMD to know if TMZ is supported and enabled.
>> This commit also bumps KMS_DRIVER_MINOR so UMD knows if it can rely on
>> AMDGPU_IDS_FLAGS_TMZ.
>> ---
>> Patch for using it in Mesa is at 
>> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6049
>> (ac/gpu_info: add detection of TMZ support).
>>
>> Pierre-Eric
>>
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++
>>   include/uapi/drm/amdgpu_drm.h   | 1 +
>>   3 files changed, 5 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6291f5f0d223..6dcab25914cf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -88,9 +88,10 @@
>>    * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
>>    * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
>>    * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
>> + * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
>>    */
>>   #define KMS_DRIVER_MAJOR    3
>> -#define KMS_DRIVER_MINOR    39
>> +#define KMS_DRIVER_MINOR    40
> 
> I don't think we need this. Unused ids_flags should be zero on older kernels.

If we don't increase KMS_DRIVER_MINOR then:

   ids_flags & AMDGPU_IDS_FLAGS_TMZ == 0

has 2 meanings:
  - TMZ is not enabled
  - or TMZ might be enabled but it's not reported by the kernel

If you prefer not bumping KMS_DRIVER_MINOR that's fine though. Mesa can check 
if TMZ is really
disabled by trying to allocate a TMZ buffer.

Thanks,
Pierre-Eric

> 
> That's why we have this in the first place.
> 
> Christian.
> 
>>   #define KMS_DRIVER_PATCHLEVEL    0
>>     int amdgpu_vram_limit = 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index eebbe2103e32..d92ee30bc06c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -736,6 +736,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file
>>   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
>>   if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
>>   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
>> +    if (adev->gmc.tmz_enabled)
>> +    dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
>>     vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
>>   vm_size -= AMDGPU_VA_RESERVED_SIZE;
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 765a94ec4420..b826f2d6efe1 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -676,6 +676,7 @@ struct drm_amdgpu_cs_chunk_data {
>>    */
>>   #define AMDGPU_IDS_FLAGS_FUSION 0x1
>>   #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
>> +#define AMDGPU_IDS_FLAGS_TMZ    0x4
>>     /* indicate if acceleration can be working */
>>   #define AMDGPU_INFO_ACCEL_WORKING    0x00
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: new ids flag for tmz

2020-07-30 Thread Pierre-Eric Pelloux-Prayer
Allows UMD to know if TMZ is supported and enabled.
This commit also bumps KMS_DRIVER_MINOR so UMD knows if it can rely on
AMDGPU_IDS_FLAGS_TMZ.
---
Patch for using it in Mesa is at 
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6049
(ac/gpu_info: add detection of TMZ support).

Pierre-Eric

 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++
 include/uapi/drm/amdgpu_drm.h   | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f0d223..6dcab25914cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
  * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
  * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
  * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   39
+#define KMS_DRIVER_MINOR   40
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index eebbe2103e32..d92ee30bc06c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -736,6 +736,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
+   if (adev->gmc.tmz_enabled)
+   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
 
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
vm_size -= AMDGPU_VA_RESERVED_SIZE;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 765a94ec4420..b826f2d6efe1 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -676,6 +676,7 @@ struct drm_amdgpu_cs_chunk_data {
  */
 #define AMDGPU_IDS_FLAGS_FUSION 0x1
 #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
+#define AMDGPU_IDS_FLAGS_TMZ0x4
 
 /* indicate if acceleration can be working */
 #define AMDGPU_INFO_ACCEL_WORKING  0x00
-- 
2.26.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 13/18] drm/amdgpu/dc: Stop dma_resv_lock inversion in commit_tail

2020-06-05 Thread Pierre-Eric Pelloux-Prayer
Hi Daniel,

On 04/06/2020 10:12, Daniel Vetter wrote:
[...]
> @@ -6910,7 +6910,11 @@ static void amdgpu_dm_commit_planes(struct 
> drm_atomic_state *state,
>* explicitly on fences instead
>* and in general should be called for
>* blocking commit to as per framework helpers
> +  *
> +  * Yes, this deadlocks, since you're calling dma_resv_lock in a
> +  * path that leads to a dma_fence_signal(). Don't do that.
>*/
> +#if 0
>   r = amdgpu_bo_reserve(abo, true);
>   if (unlikely(r != 0))
>   DRM_ERROR("failed to reserve buffer before flip\n");
> @@ -6920,6 +6924,12 @@ static void amdgpu_dm_commit_planes(struct 
> drm_atomic_state *state,
>   tmz_surface = amdgpu_bo_encrypted(abo);
>  
>   amdgpu_bo_unreserve(abo);
> +#endif
> + /*
> +  * this races anyway, so READ_ONCE isn't any better or worse
> +  * than the stuff above. Except the stuff above can deadlock.
> +  */
> + tiling_flags = READ_ONCE(abo->tiling_flags);

With this change "tmz_surface" won't be initialized properly.
Adding the following line should fix it:

  tmz_surface = READ_ONCE(abo->flags) & AMDGPU_GEM_CREATE_ENCRYPTED;


Pierre-Eric


>  
>   fill_dc_plane_info_and_addr(
>   dm->adev, new_plane_state, tiling_flags,
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset v4

2020-04-29 Thread Pierre-Eric Pelloux-Prayer
Hi Jiange,

This version seems to work fine.

Tested-by: Pierre-Eric Pelloux-Prayer 


On 29/04/2020 07:08, Zhao, Jiange wrote:
> [AMD Official Use Only - Internal Distribution Only]
> 
> 
> Hi all,
> 
> I worked out the race condition and here is version 5. Please have a look.
> 
> Jiange
> --
> *From:* Zhao, Jiange 
> *Sent:* Wednesday, April 29, 2020 1:06 PM
> *To:* amd-gfx@lists.freedesktop.org 
> *Cc:* Koenig, Christian ; Kuehling, Felix 
> ; Pelloux-prayer, Pierre-eric 
> ; Deucher, Alexander 
> ; Zhang, Hawking ; Liu, 
> Monk ; Zhao, Jiange 
> *Subject:* [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset v4
>  
> From: Jiange Zhao 
> 
> When GPU got timeout, it would notify an interested part
> of an opportunity to dump info before actual GPU reset.
> 
> A usermode app would open 'autodump' node under debugfs system
> and poll() for readable/writable. When a GPU reset is due,
> amdgpu would notify usermode app through wait_queue_head and give
> it 10 minutes to dump info.
> 
> After usermode app has done its work, this 'autodump' node is closed.
> On node closure, amdgpu gets to know the dump is done through
> the completion that is triggered in release().
> 
> There is no write or read callback because necessary info can be
> obtained through dmesg and umr. Messages back and forth between
> usermode app and amdgpu are unnecessary.
> 
> v2: (1) changed 'registered' to 'app_listening'
>     (2) add a mutex in open() to prevent race condition
> 
> v3 (chk): grab the reset lock to avoid race in autodump_open,
>   rename debugfs file to amdgpu_autodump,
>   provide autodump_read as well,
>   style and code cleanups
> 
> v4: add 'bool app_listening' to differentiate situations, so that
>     the node can be reopened; also, there is no need to wait for
>     completion when no app is waiting for a dump.
> 
> v5: change 'bool app_listening' to 'enum amdgpu_autodump_state'
>     add 'app_state_mutex' for race conditions:
>     (1)Only 1 user can open this file node
>     (2)wait_dump() can only take effect after poll() executed.
>     (3)eliminated the race condition between release() and
>    wait_dump()
> 
> Signed-off-by: Jiange Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 92 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 14 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  2 +
>  4 files changed, 109 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index bc1e0fd71a09..6f8ef98c4b97 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -990,6 +990,8 @@ struct amdgpu_device {
>  char    product_number[16];
>  char    product_name[32];
>  char    serial[16];
> +
> +   struct amdgpu_autodump  autodump;
>  };
>  
>  static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device 
> *bdev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 1a4894fa3693..1d4a95e8ad5b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -27,7 +27,7 @@
>  #include 
>  #include 
>  #include 
> -
> +#include 
>  #include 
>  
>  #include "amdgpu.h"
> @@ -74,8 +74,96 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
>  return 0;
>  }
>  
> +int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
> +{
> +#if defined(CONFIG_DEBUG_FS

Re: drm/amdgpu: invalidate L2 before SDMA IBs (on gfx10)

2020-04-25 Thread Pierre-Eric Pelloux-Prayer
Hi Marek,

With this patch applied I could replay a trace (that usually caused a sdma
timeout on the first run) several times in a row without any issues.

So you can tag the commit as:
Tested-by: Pierre-Eric Pelloux-Prayer 

Thanks,
Pierre-Eric



On 25/04/2020 10:52, Marek Olšák wrote:
> This should fix SDMA hangs on gfx10.
> 
> Marek
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset

2020-04-24 Thread Pierre-Eric Pelloux-Prayer
Running "umr --autodump" currently does the following:
  - open the debugfs autodump file
  - wait for a hang (using poll)
  - dump gfx rings
  - close the file and exit

I don't think adding support for a "Done" message is necessary, but I'd expect 
to
be able to restart "umr --autodump" and be able to perform another dump.

Pierre-Eric

On 24/04/2020 10:24, Zhao, Jiange wrote:
> [AMD Official Use Only - Internal Distribution Only]
> 
> 
> Hi,
> 
> Of course, considering all the suggestions, I will implement a write callback 
> for usermode app to notify KMD that a dump is finished by sending "Done".
> 
> In this way, usermode app can do multiple dumps without closing the node,
> 
> Jiange
> --
> *From:* Pelloux-prayer, Pierre-eric 
> *Sent:* Friday, April 24, 2020 3:46 PM
> *To:* Zhao, Jiange ; Koenig, Christian 
> ; amd-gfx@lists.freedesktop.org 
> 
> *Cc:* Deucher, Alexander ; Pelloux-prayer, 
> Pierre-eric ; Kuehling, Felix 
> ; Liu, Monk ; Zhang, Hawking 
> 
> *Subject:* Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset
>  
> Hi Jiange,
> 
> FYI I'm working on adding a new "--autodump" command to umr that uses this 
> feature.
> This is not yet merged but you can find the code here: 
> https://gitlab.freedesktop.org/pepp/umr/-/tree/autodump
> 
>> (3) At the same time, considering the use case of this node, I believe that 
>> only the first GPU reset is worthy of a dump.
> 
> If it's possible I'd like to be able to do multiple dumps instead of limiting 
> ourselves to only the first one.
> 
> Thanks!
> Pierre-Eric
> 
> 
> 
>> (4) I didn't implement race condition guard because I believe that this node 
>> caters for a cautious super-user and a single client is enough to do all the 
>> work. I can add the logic if you think it is necessary.
>> 
>> Jiange
>> 
>> -Original Message-
>> From: Koenig, Christian  
>> Sent: Thursday, April 23, 2020 4:53 PM
>> To: Zhao, Jiange ; amd-gfx@lists.freedesktop.org
>> Cc: Kuehling, Felix ; Pelloux-prayer, Pierre-eric 
>> ; Deucher, Alexander 
>> ; Zhang, Hawking ; Liu, 
>> Monk ; Zhao, Jiange 
>> Subject: Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset
>> 
>> Am 23.04.20 um 09:19 schrieb jia...@amd.com:
>>> From: Jiange Zhao 
>>>
>>> When GPU got timeout, it would notify an interested part of an 
>>> opportunity to dump info before actual GPU reset.
>>>
>>> A usermode app would open 'autodump' node under debugfs system and 
>>> poll() for readable/writable. When a GPU reset is due, amdgpu would 
>>> notify usermode app through wait_queue_head and give it 10 minutes to 
>>> dump info.
>>>
>>> After usermode app has done its work, this 'autodump' node is closed.
>>> On node closure, amdgpu gets to know the dump is done through the 
>>> completion that is triggered in release().
>>>
>>> There is no write or read callback because necessary info can be 
>>> obtained through dmesg and umr. Messages back and forth between 
>>> usermode app and amdgpu are unnecessary.
>>>
>>> Signed-off-by: Jiange Zhao 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  9 +++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 85 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  2 +
>>>   4 files changed, 97 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index bc1e0fd71a09..a505b547f242 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -724,6 +724,13 @@ struct amd_powerplay {
>>>   const struct amd_pm_funcs *pp_funcs;
>>>   };
>>>   
>>> +struct amdgpu_autodump {
>>> +    bool    registered;
>>> +    struct completion   completed;
>> 
>> Registered and completed seems to have the same meaning.
>> 
>>> +    struct dentry   *dentry;
>>> +    struct wait_queue_head  gpu_hang_wait;
>>> +};
>>> +
>>>   #define 

Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset

2020-04-24 Thread Pierre-Eric Pelloux-Prayer
Hi Jiange,

FYI I'm working on adding a new "--autodump" command to umr that uses this 
feature.
This is not yet merged but you can find the code here: 
https://gitlab.freedesktop.org/pepp/umr/-/tree/autodump

> (3) At the same time, considering the use case of this node, I believe that 
> only the first GPU reset is worthy of a dump.

If it's possible I'd like to be able to do multiple dumps instead of limiting 
ourselves to only the first one.

Thanks!
Pierre-Eric



> (4) I didn't implement race condition guard because I believe that this node 
> caters for a cautious super-user and a single client is enough to do all the 
> work. I can add the logic if you think it is necessary.
> 
> Jiange
> 
> -Original Message-
> From: Koenig, Christian  
> Sent: Thursday, April 23, 2020 4:53 PM
> To: Zhao, Jiange ; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Pelloux-prayer, Pierre-eric 
> ; Deucher, Alexander 
> ; Zhang, Hawking ; Liu, 
> Monk ; Zhao, Jiange 
> Subject: Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset
> 
> Am 23.04.20 um 09:19 schrieb jia...@amd.com:
>> From: Jiange Zhao 
>>
>> When GPU got timeout, it would notify an interested part of an 
>> opportunity to dump info before actual GPU reset.
>>
>> A usermode app would open 'autodump' node under debugfs system and 
>> poll() for readable/writable. When a GPU reset is due, amdgpu would 
>> notify usermode app through wait_queue_head and give it 10 minutes to 
>> dump info.
>>
>> After usermode app has done its work, this 'autodump' node is closed.
>> On node closure, amdgpu gets to know the dump is done through the 
>> completion that is triggered in release().
>>
>> There is no write or read callback because necessary info can be 
>> obtained through dmesg and umr. Messages back and forth between 
>> usermode app and amdgpu are unnecessary.
>>
>> Signed-off-by: Jiange Zhao 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  9 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 85 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  2 +
>>   4 files changed, 97 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index bc1e0fd71a09..a505b547f242 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -724,6 +724,13 @@ struct amd_powerplay {
>>  const struct amd_pm_funcs *pp_funcs;
>>   };
>>   
>> +struct amdgpu_autodump {
>> +boolregistered;
>> +struct completion   completed;
> 
> Registered and completed seems to have the same meaning.
> 
>> +struct dentry   *dentry;
>> +struct wait_queue_head  gpu_hang_wait;
>> +};
>> +
>>   #define AMDGPU_RESET_MAGIC_NUM 64
>>   #define AMDGPU_MAX_DF_PERFMONS 4
>>   struct amdgpu_device {
>> @@ -990,6 +997,8 @@ struct amdgpu_device {
>>  charproduct_number[16];
>>  charproduct_name[32];
>>  charserial[16];
>> +
>> +struct amdgpu_autodump  autodump;
>>   };
>>   
>>   static inline struct amdgpu_device *amdgpu_ttm_adev(struct 
>> ttm_bo_device *bdev) diff --git 
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index 1a4894fa3693..cdd4bf00adee 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -74,8 +74,91 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
>>  return 0;
>>   }
>>   
>> +int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev) { #if 
>> +defined(CONFIG_DEBUG_FS)
>> +int ret;
>> +unsigned long tmo = 600*HZ;
> 
> In general please declare constant lines first and variable like "i" or "r" 
> last.
> 
>> +
>> +if (!adev->autodump.registered)
>> +return 0;
>> +
>> +wake_up_interruptible(>autodump.gpu_hang_wait);
>> +
>> +ret = 
>> +wait_for_completion_interruptible_timeout(>autodump.completed, 
>> +tmo);
> 
> This is racy, in other words it can happen that a new client opens up the 
> debugfs file without being signaled but blocks the reset here.
> 
> You could use two completion structures to avoid that.
> 
>> +if (ret == 0) { /* time out and dump tool still not finish its dump*/
>> +pr_err("autodump: timeout before dump finished, move on to gpu 
>> recovery\n");
>> +return -ETIMEDOUT;
>> +}
>> +#endif
>> +return 0;
>> +}
>> +
>>   #if defined(CONFIG_DEBUG_FS)
>>   
>> +static int amdgpu_debugfs_autodump_open(struct inode *inode, struct 
>> +file *file) {
>> +int ret;
>> +struct amdgpu_device *adev;
>> +
>> +ret = simple_open(inode, file);
>> +if (ret)
>> +return ret;
>> +
>> +adev = file->private_data;
>> +if (adev->autodump.registered == true)
>> +  

Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset

2020-04-23 Thread Pierre-Eric Pelloux-Prayer
Hi,

The build fails for me with this patch applied (poll_wait, POLLIN,
POLLRDNORM and POLLWRNORM are undeclared).

Adding "#include " to amdgpu_debugfs.c fixes it.

Pierre-Eric

On 23/04/2020 09:19, jia...@amd.com wrote:
> From: Jiange Zhao 
> 
> When GPU got timeout, it would notify an interested part
> of an opportunity to dump info before actual GPU reset.
> 
> A usermode app would open 'autodump' node under debugfs system
> and poll() for readable/writable. When a GPU reset is due,
> amdgpu would notify usermode app through wait_queue_head and give
> it 10 minutes to dump info.
> 
> After usermode app has done its work, this 'autodump' node is closed.
> On node closure, amdgpu gets to know the dump is done through
> the completion that is triggered in release().
> 
> There is no write or read callback because necessary info can be
> obtained through dmesg and umr. Messages back and forth between
> usermode app and amdgpu are unnecessary.
> 
> Signed-off-by: Jiange Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  9 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 85 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  2 +
>  4 files changed, 97 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index bc1e0fd71a09..a505b547f242 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -724,6 +724,13 @@ struct amd_powerplay {
>   const struct amd_pm_funcs *pp_funcs;
>  };
>  
> +struct amdgpu_autodump {
> + boolregistered;
> + struct completion   completed;
> + struct dentry   *dentry;
> + struct wait_queue_head  gpu_hang_wait;
> +};
> +
>  #define AMDGPU_RESET_MAGIC_NUM 64
>  #define AMDGPU_MAX_DF_PERFMONS 4
>  struct amdgpu_device {
> @@ -990,6 +997,8 @@ struct amdgpu_device {
>   charproduct_number[16];
>   charproduct_name[32];
>   charserial[16];
> +
> + struct amdgpu_autodump  autodump;
>  };
>  
>  static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device 
> *bdev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 1a4894fa3693..cdd4bf00adee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -74,8 +74,91 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
>   return 0;
>  }
>  
> +int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
> +{
> +#if defined(CONFIG_DEBUG_FS)
> + int ret;
> + unsigned long tmo = 600*HZ;
> +
> + if (!adev->autodump.registered)
> + return 0;
> +
> + wake_up_interruptible(>autodump.gpu_hang_wait);
> +
> + ret = 
> wait_for_completion_interruptible_timeout(>autodump.completed, tmo);
> + if (ret == 0) { /* time out and dump tool still not finish its dump*/
> + pr_err("autodump: timeout before dump finished, move on to gpu 
> recovery\n");
> + return -ETIMEDOUT;
> + }
> +#endif
> + return 0;
> +}
> +
>  #if defined(CONFIG_DEBUG_FS)
>  
> +static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file 
> *file)
> +{
> + int ret;
> + struct amdgpu_device *adev;
> +
> + ret = simple_open(inode, file);
> + if (ret)
> + return ret;
> +
> + adev = file->private_data;
> + if (adev->autodump.registered == true)
> + return -EINVAL;
> +
> + adev->autodump.registered = true;
> +
> + return 0;
> +}
> +
> +static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file 
> *file)
> +{
> + struct amdgpu_device *adev = file->private_data;
> +
> + complete(>autodump.completed);
> + adev->autodump.registered = false;
> +
> + return 0;
> +}
> +
> +unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct 
> poll_table_struct *poll_table)
> +{
> + struct amdgpu_device *adev = file->private_data;
> +
> + poll_wait(file, >autodump.gpu_hang_wait, poll_table);
> +
> + if (adev->in_gpu_reset)
> + return POLLIN | POLLRDNORM | POLLWRNORM;
> +
> + return 0;
> +}
> +
> +static const struct file_operations autodump_debug_fops = {
> + .owner = THIS_MODULE,
> + .open = amdgpu_debugfs_autodump_open,
> + .poll = amdgpu_debugfs_autodump_poll,
> + .release = amdgpu_debugfs_autodump_release,
> +};
> +
> +static int amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
> +{
> + struct dentry *entry;
> +
> + init_completion(>autodump.completed);
> + init_waitqueue_head(>autodump.gpu_hang_wait);
> + adev->autodump.registered = false;
> +
> + entry = debugfs_create_file("autodump", 0600,
> + adev->ddev->primary->debugfs_root,
> 

Re: [PATCH 2/2] drm/amdgpu: add full TMZ support into amdgpu_ttm_map_buffer v2

2020-03-23 Thread Pierre-Eric Pelloux-Prayer
Hi Christian,

The 2 patches are:
  Tested-by: Pierre-Eric Pelloux-Prayer 

Thanks,
Pierre-Eric

On 22/03/2020 16:48, Christian König wrote:
> This should allow us to also support VRAM->GTT moves.
> 
> v2: fix missing vram_base_adjustment
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 
> ++---
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 53de99dbaead..e15a343a944b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -309,21 +309,21 @@ static int amdgpu_ttm_map_buffer(struct 
> ttm_buffer_object *bo,
>unsigned window, struct amdgpu_ring *ring,
>bool tmz, uint64_t *addr)
>  {
> - struct ttm_dma_tt *dma = container_of(bo->ttm, struct ttm_dma_tt, ttm);
>   struct amdgpu_device *adev = ring->adev;
>   struct amdgpu_job *job;
>   unsigned num_dw, num_bytes;
> - dma_addr_t *dma_address;
>   struct dma_fence *fence;
>   uint64_t src_addr, dst_addr;
> + void *cpu_addr;
>   uint64_t flags;
> + unsigned int i;
>   int r;
>  
>   BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
>  AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
>  
>   /* Map only what can't be accessed directly */
> - if (mem->start != AMDGPU_BO_INVALID_OFFSET) {
> + if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
>   *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset;
>   return 0;
>   }
> @@ -351,15 +351,37 @@ static int amdgpu_ttm_map_buffer(struct 
> ttm_buffer_object *bo,
>   amdgpu_ring_pad_ib(ring, >ibs[0]);
>   WARN_ON(job->ibs[0].length_dw > num_dw);
>  
> - dma_address = >dma_address[offset >> PAGE_SHIFT];
>   flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
>   if (tmz)
>   flags |= AMDGPU_PTE_TMZ;
>  
> - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
> - >ibs[0].ptr[num_dw]);
> - if (r)
> - goto error_free;
> + cpu_addr = >ibs[0].ptr[num_dw];
> +
> + if (mem->mem_type == TTM_PL_TT) {
> + struct ttm_dma_tt *dma;
> + dma_addr_t *dma_address;
> +
> + dma = container_of(bo->ttm, struct ttm_dma_tt, ttm);
> + dma_address = >dma_address[offset >> PAGE_SHIFT];
> + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
> + cpu_addr);
> + if (r)
> + goto error_free;
> + } else {
> + dma_addr_t dma_address;
> +
> + dma_address = (mm_node->start << PAGE_SHIFT) + offset;
> + dma_address += adev->vm_manager.vram_base_offset;
> +
> + for (i = 0; i < num_pages; ++i) {
> + r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
> + _address, flags, cpu_addr);
> + if (r)
> + goto error_free;
> +
> + dma_address += PAGE_SIZE;
> + }
> + }
>  
>   r = amdgpu_job_submit(job, >mman.entity,
> AMDGPU_FENCE_OWNER_UNDEFINED, );
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Few TMZ BO move patches

2020-03-02 Thread Pierre-Eric Pelloux-Prayer
Hi Christian,

The 3 patches are:
   Tested-by: Pierre-Eric Pelloux-Prayer 

Regards,
Pierre-Eric

On 02/03/2020 13:17, Christian König wrote:
> Because of a shift in priorities I won't work on TMZ this week.
> 
> So attached are a few smaller patches I already prepared, but the bounce copy 
> for system eviction is still missing.
> 
> Patches are totally untested, but I anybody find them useful feel free to 
> test and review them.
> 
> Regards,
> Christian.
> 
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag

2020-02-25 Thread Pierre-Eric Pelloux-Prayer
Hi Ray,

I confirm that this fixes the hang I had when running libdrm's amdgpu_test.
Thanks!

Tested-by: Pierre-Eric Pelloux-Prayer 


On 25/02/2020 14:57, Huang Rui wrote:
> Since 6643ba1 frame control packets are only issued in presence of secure 
> IB(s).
> This causes hangs on some hardware (eg: Raven1). This patch restores the
> unconditionnal frame control packets issuing, that's to keep the per-IB logic
> regarding the secure flag.
> 
> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
> 
> Reported-by: Pierre-Eric Pelloux-Prayer 
> Signed-off-by: Huang Rui 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 
> +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 13 +-
>  4 files changed, 43 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d..9713a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>   uint64_t fence_ctx;
>   uint32_t status = 0, alloc_size;
>   unsigned fence_flags = 0;
> - bool secure;
> + int secure = -1;
>  
>   unsigned i;
>   int r = 0;
> @@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>   amdgpu_ring_emit_cntxcntl(ring, status);
>   }
>  
> - secure = false;
>   for (i = 0; i < num_ibs; ++i) {
>   ib = [i];
>  
> @@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned num_ibs,
>   !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble 
> CE ib must be inserted anyway */
>   continue;
>  
> - /* If this IB is TMZ, add frame TMZ start packet,
> -  * else, turn off TMZ.
> -  */
> - if (ib->flags & AMDGPU_IB_FLAGS_SECURE && 
> ring->funcs->emit_tmz) {
> - if (!secure) {
> - secure = true;
> - amdgpu_ring_emit_tmz(ring, true);
> + if (job && ring->funcs->emit_frame_cntl) {
> + if (secure == -1) {
> + if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> + secure = 1;
> + amdgpu_ring_emit_frame_cntl(ring, true, 
> true);
> + } else {
> + secure = 0;
> + amdgpu_ring_emit_frame_cntl(ring, true, 
> false);
> + }
> + } else {
> + if (secure == 1 &&
> + !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> + secure = 0;
> + amdgpu_ring_emit_frame_cntl(ring, 
> false, true);
> + amdgpu_ring_emit_frame_cntl(ring, true, 
> false);
> + } else if (secure == 0 &&
> +ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> + secure = 1;
> + amdgpu_ring_emit_frame_cntl(ring, 
> false, false);
> + amdgpu_ring_emit_frame_cntl(ring, true, 
> true);
> + }
>   }
> - } else if (secure) {
> - secure = false;
> - amdgpu_ring_emit_tmz(ring, false);
>   }
>  
>   amdgpu_ring_emit_ib(ring, job, ib, status);
>   status &= ~AMDGPU_HAVE_CTX_SWITCH;
>   }
>  
> - if (secure) {
> - secure = false;
> - amdgpu_ring_emit_tmz(ring, false);
> - }
> + if (job && ring->funcs->emit_frame_cntl)
> + amdgpu_ring_emit_frame_cntl(ring, false,
> + (secure == 1) ? true : false);
>  
>  #ifdef CONFIG_X86_64
>   if (!(adev->flags & AMD_IS_APU))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 24caff0..4d019d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -166,7 +166,8 @@ struct amdgpu_r

Re: [PATCH 1/2] drm/amdgpu: enable GPU reset by default on Navi

2020-01-30 Thread Pierre-Eric Pelloux-Prayer
Hi Alex,

I had one issue while testing this patch on a RX5700.

After a gfx hang a reset is executed.
Switching to a VT and restarting gdm works fine but the clocks seem messed up:
   - lots of graphical artifcats (underflows?)
   - pp_dpm_sclk and pp_dpm_socclk have strange values (see attached files)

dmesg output (from the gfx hang):
[  169.755071] [drm:amdgpu_dm_atomic_commit_tail [amdgpu]] *ERROR* Waiting for 
fences timed out!
[  169.755173] [drm:amdgpu_dm_atomic_commit_tail [amdgpu]] *ERROR* Waiting for 
fences timed out!
[  174.874847] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx_0.0.0 
timeout, signaled seq=10034, emitted seq=10036
[  174.874925] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* Process information: 
process gnome-shell pid 1724 thread gnome-shel:cs0 pid 1741
[  174.874933] amdgpu :0b:00.0: GPU reset begin!
[  174.875192] [ cut here ]
[  174.875282] WARNING: CPU: 0 PID: 7 at 
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn20/dcn20_resource.c:2969 
dcn20_validate_bandwidth+0x87/0xe0 [amdgpu]
[  174.875283] Modules linked in: binfmt_misc(E) nls_ascii(E) nls_cp437(E) 
vfat(E) fat(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) 
ledtrig_audio(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) snd_intel_nhlt(E) 
snd_hda_codec(E) snd_hwdep(E) efi_pstore(E) snd_hda_core(E) snd_pcm(E) 
snd_timer(E) snd(E) ccp(E) xpad(E) wmi_bmof(E) mxm_wmi(E) evdev(E) joydev(E) 
ff_memless(E) efivars(E) soundcore(E) pcspkr(E) k10temp(E) sp5100_tco(E) 
rng_core(E) sg(E) wmi(E) button(E) acpi_cpufreq(E) uinput(E) parport_pc(E) 
ppdev(E) lp(E) parport(E) efivarfs(E) ip_tables(E) autofs4(E) ext4(E) 
crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) dm_crypt(E) dm_mod(E) 
hid_generic(E) usbhid(E) hid(E) sd_mod(E) crct10dif_pclmul(E) crc32_pclmul(E) 
crc32c_intel(E) ghash_clmulni_intel(E) amdgpu(E) gpu_sched(E) ttm(E) 
drm_kms_helper(E) aesni_intel(E) glue_helper(E) xhci_pci(E) ahci(E) 
crypto_simd(E) libahci(E) cryptd(E) drm(E) xhci_hcd(E) i2c_piix4(E) libata(E) 
igb(E) usbcore(E) scsi_mod(E) dca(E) i2c_algo_bit(E) gpio_amdpt(E)
[  174.875310]  gpio_generic(E)
[  174.875313] CPU: 0 PID: 7 Comm: kworker/0:1 Tainted: GE 
5.4.0-rc7-02679-g9d664d914f0e #10
[  174.875314] Hardware name: Gigabyte Technology Co., Ltd. X470 AORUS ULTRA 
GAMING/X470 AORUS ULTRA GAMING-CF, BIOS F6 01/25/2019
[  174.875318] Workqueue: events drm_sched_job_timedout [gpu_sched]
[  174.875404] RIP: 0010:dcn20_validate_bandwidth+0x87/0xe0 [amdgpu]
[  174.875406] Code: 2d 44 22 a5 e8 1d 00 00 75 26 f2 0f 11 85 a8 21 00 00 31 
d2 48 89 ee 4c 89 ef e8 d4 f5 ff ff 41 89 c4 22 85 e8 1d 00 00 75 4a <0f> 0b eb 
02 75 d1 f2 0f 10 14 24 f2 0f 11 95 a8 21 00 00 e8 f1 4b
[  174.875407] RSP: 0018:a87880067a90 EFLAGS: 00010246
[  174.875408] RAX:  RBX:  RCX: 1e61
[  174.875409] RDX: 1e60 RSI: 981f3ec2d880 RDI: 0002d880
[  174.875409] RBP: 981f25a6 R08: 0006 R09: 
[  174.875410] R10: 0001 R11: 00010001 R12: 0001
[  174.875411] R13: 981f1af4 R14:  R15: 981f25a6
[  174.875412] FS:  () GS:981f3ec0() 
knlGS:
[  174.875413] CS:  0010 DS:  ES:  CR0: 80050033
[  174.875414] CR2: 7f42858f3000 CR3: 0007f02ae000 CR4: 003406f0
[  174.875414] Call Trace:
[  174.875498]  dc_validate_global_state+0x25f/0x2d0 [amdgpu]
[  174.875583]  amdgpu_dm_atomic_check+0x8ff/0xf20 [amdgpu]
[  174.875587]  ? __ww_mutex_lock.isra.0+0x3a/0x760
[  174.875590]  ? _cond_resched+0x15/0x30
[  174.875591]  ? __ww_mutex_lock.isra.0+0x3a/0x760
[  174.875606]  drm_atomic_check_only+0x554/0x7e0 [drm]
[  174.875620]  ? drm_connector_list_iter_next+0x7d/0x90 [drm]
[  174.875632]  drm_atomic_commit+0x13/0x50 [drm]
[  174.875640]  drm_atomic_helper_disable_all+0x14c/0x160 [drm_kms_helper]
[  174.875647]  drm_atomic_helper_suspend+0x60/0xf0 [drm_kms_helper]
[  174.875730]  dm_suspend+0x1c/0x60 [amdgpu]
[  174.875782]  amdgpu_device_ip_suspend_phase1+0x81/0xe0 [amdgpu]
[  174.875836]  amdgpu_device_ip_suspend+0x1c/0x60 [amdgpu]
[  174.875923]  amdgpu_device_pre_asic_reset+0x191/0x1a4 [amdgpu]
[  174.876010]  amdgpu_device_gpu_recover.cold+0x43a/0xbca [amdgpu]
[  174.876084]  amdgpu_job_timedout+0x103/0x130 [amdgpu]
[  174.876088]  drm_sched_job_timedout+0x7f/0xe0 [gpu_sched]
[  174.876092]  process_one_work+0x1b5/0x360
[  174.876094]  worker_thread+0x50/0x3c0
[  174.876096]  kthread+0xf9/0x130
[  174.876097]  ? process_one_work+0x360/0x360
[  174.876099]  ? kthread_park+0x90/0x90
[  174.876100]  ret_from_fork+0x22/0x40
[  174.876103] ---[ end trace af4365804bf318ce ]---
[  175.346937] [drm:gfx_v10_0_hw_fini [amdgpu]] *ERROR* KGQ disable failed
[  175.600179] [drm:gfx_v10_0_hw_fini [amdgpu]] *ERROR* KCQ disable failed
[  175.853418] [drm:gfx_v10_0_cp_gfx_enable [amdgpu]] *ERROR* failed to halt cp 
gfx
[  175.874639] 

Re: [PATCH 3/5] drm/amdgpu/smu: add metrics table lock for navi

2019-12-17 Thread Pierre-Eric Pelloux-Prayer
Hi Alex,

Isn't this patch missing something like this:

pr_info("Failed to export SMU metrics table!\n");
+   mutex_unlock(>metrics_lock);
return ret;

to release the lock in case of error?

Regards,
Pierre-Eric 


On 17/12/2019 15:55, Alex Deucher wrote:
> To protect access to the metrics table.
> 
> Bug: https://gitlab.freedesktop.org/drm/amd/issues/900
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c 
> b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
> index 15403b7979d6..102fddda925b 100644
> --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
> @@ -564,6 +564,7 @@ static int navi10_get_metrics_table(struct smu_context 
> *smu,
>   struct smu_table_context *smu_table= >smu_table;
>   int ret = 0;
>  
> + mutex_lock(>metrics_lock);
>   if (!smu_table->metrics_time || time_after(jiffies, 
> smu_table->metrics_time + msecs_to_jiffies(100))) {
>   ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
>   (void *)smu_table->metrics_table, false);
> @@ -575,6 +576,7 @@ static int navi10_get_metrics_table(struct smu_context 
> *smu,
>   }
>  
>   memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
> + mutex_unlock(>metrics_lock);
>  
>   return ret;
>  }
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amgdpu: add cache flush workaround to gfx8 emit_fence

2019-11-28 Thread Pierre-Eric Pelloux-Prayer
The same workaround is used for gfx7.
Both PAL and Mesa use it for gfx8 too, so port this commit to
gfx_v8_0_ring_emit_fence_gfx.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 80b79583dffe..dcd747bef391 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6183,7 +6183,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
-   /* EVENT_WRITE_EOP - flush caches, send int */
+   /* Workaround for cache flush problems. First send a dummy EOP
+* event down the pipe with seq one below.
+*/
+   amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+   amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+EOP_TC_ACTION_EN |
+EOP_TC_WB_ACTION_EN |
+EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+EVENT_INDEX(5)));
+   amdgpu_ring_write(ring, addr & 0xfffc);
+   amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
+   DATA_SEL(1) | INT_SEL(0));
+   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
+   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+
+   /* Then send the real EOP event down the pipe:
+* EVENT_WRITE_EOP - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
@@ -6926,7 +6942,7 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_gfx = {
5 +  /* COND_EXEC */
7 +  /* PIPELINE_SYNC */
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
-   8 +  /* FENCE for VM_FLUSH */
+   12 +  /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
   the first COND_EXEC jump to the place just
@@ -6938,7 +6954,7 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_gfx = {
31 + /* DE_META */
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
-   8 + 8 + /* FENCE x2 */
+   12 + 12 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
-- 
2.24.0.rc0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx