Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v3)

2017-08-08 Thread Tom St Denis

Thanks, I pushed v4 which moved the tracepoints into functions.

Cheers,
Tom

On 08/08/17 08:01 AM, Christian König wrote:

Am 08.08.2017 um 13:54 schrieb Tom St Denis:

ping?


Ups sorry, thought that I already send that out. One comment below.



Tom

On 02/08/17 07:52 AM, Tom St Denis wrote:

This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis 

(v2):  Added tracepoints for USERPTR, SG mappings, and
  SWIOTBL mappings.  Reformatted trace call perform
  PCI decoding internal to the trace.

(v3):  Add unmap tracepoints as well
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 56 
+++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 55 
++

  2 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

index 509f7a63d40c..0d708e8fb391 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,62 @@
  #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) 


  +TRACE_EVENT(amdgpu_ttm_tt_populate,
+TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, 
uint64_t phys_address),

+TP_ARGS(adev, dma_address, phys_address),
+TP_STRUCT__entry(
+__field(uint16_t, domain)
+__field(uint8_t, bus)
+__field(uint8_t, slot)
+__field(uint8_t, func)
+__field(uint64_t, dma)
+__field(uint64_t, phys)
+),
+TP_fast_assign(
+   __entry->domain = pci_domain_nr(adev->pdev->bus);
+   __entry->bus = adev->pdev->bus->number;
+   __entry->slot = PCI_SLOT(adev->pdev->devfn);
+   __entry->func = PCI_FUNC(adev->pdev->devfn);
+   __entry->dma = dma_address;
+   __entry->phys = phys_address;
+   ),
+TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+  (unsigned)__entry->domain,
+  (unsigned)__entry->bus,
+  (unsigned)__entry->slot,
+  (unsigned)__entry->func,
+  (unsigned long long)__entry->dma,
+  (unsigned long long)__entry->phys)
+);
+
+TRACE_EVENT(amdgpu_ttm_tt_unpopulate,
+TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, 
uint64_t phys_address),

+TP_ARGS(adev, dma_address, phys_address),
+TP_STRUCT__entry(
+__field(uint16_t, domain)
+__field(uint8_t, bus)
+__field(uint8_t, slot)
+__field(uint8_t, func)
+__field(uint64_t, dma)
+__field(uint64_t, phys)
+),
+TP_fast_assign(
+   __entry->domain = pci_domain_nr(adev->pdev->bus);
+   __entry->bus = adev->pdev->bus->number;
+   __entry->slot = PCI_SLOT(adev->pdev->devfn);
+   __entry->func = PCI_FUNC(adev->pdev->devfn);
+   __entry->dma = dma_address;
+   __entry->phys = phys_address;
+   ),
+TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+  (unsigned)__entry->domain,
+  (unsigned)__entry->bus,
+  (unsigned)__entry->slot,
+  (unsigned)__entry->func,
+  (unsigned long long)__entry->dma,
+  (unsigned long long)__entry->phys)
+);
+
  TRACE_EVENT(amdgpu_mm_rreg,
  TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
  TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 8da59d212b3b..d5f1b99b0ab2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
  #include 
  #include 
  #include "amdgpu.h"
+#include "amdgpu_trace.h"
  #include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT)
@@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct 
ttm_tt *ttm)

  {
  struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
  struct amdgpu_ttm_tt *gtt = (void *)ttm;
-unsigned nents;
+unsigned i, nents;
  int r;
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
@@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct 
ttm_tt *ttm)

  drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
   gtt->ttm.dma_address, ttm->num_pages);
  +if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {
+for (i = 0; i < ttm->num_pages; i++) {
+trace_amdgpu_ttm_tt_populate(
+adev,
+gtt->ttm.dma_address[i],
+page_to_phys(ttm->pages[i]));
+}
+}
+


It would still be nice to have that chunk in a separate function since 
it is used multiple 

Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v3)

2017-08-08 Thread Christian König

Am 08.08.2017 um 13:54 schrieb Tom St Denis:

ping?


Ups sorry, thought that I already send that out. One comment below.



Tom

On 02/08/17 07:52 AM, Tom St Denis wrote:

This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis 

(v2):  Added tracepoints for USERPTR, SG mappings, and
  SWIOTBL mappings.  Reformatted trace call perform
  PCI decoding internal to the trace.

(v3):  Add unmap tracepoints as well
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 56 
+++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 55 
++

  2 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

index 509f7a63d40c..0d708e8fb391 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,62 @@
  #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)
  +TRACE_EVENT(amdgpu_ttm_tt_populate,
+TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, 
uint64_t phys_address),

+TP_ARGS(adev, dma_address, phys_address),
+TP_STRUCT__entry(
+__field(uint16_t, domain)
+__field(uint8_t, bus)
+__field(uint8_t, slot)
+__field(uint8_t, func)
+__field(uint64_t, dma)
+__field(uint64_t, phys)
+),
+TP_fast_assign(
+   __entry->domain = pci_domain_nr(adev->pdev->bus);
+   __entry->bus = adev->pdev->bus->number;
+   __entry->slot = PCI_SLOT(adev->pdev->devfn);
+   __entry->func = PCI_FUNC(adev->pdev->devfn);
+   __entry->dma = dma_address;
+   __entry->phys = phys_address;
+   ),
+TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+  (unsigned)__entry->domain,
+  (unsigned)__entry->bus,
+  (unsigned)__entry->slot,
+  (unsigned)__entry->func,
+  (unsigned long long)__entry->dma,
+  (unsigned long long)__entry->phys)
+);
+
+TRACE_EVENT(amdgpu_ttm_tt_unpopulate,
+TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, 
uint64_t phys_address),

+TP_ARGS(adev, dma_address, phys_address),
+TP_STRUCT__entry(
+__field(uint16_t, domain)
+__field(uint8_t, bus)
+__field(uint8_t, slot)
+__field(uint8_t, func)
+__field(uint64_t, dma)
+__field(uint64_t, phys)
+),
+TP_fast_assign(
+   __entry->domain = pci_domain_nr(adev->pdev->bus);
+   __entry->bus = adev->pdev->bus->number;
+   __entry->slot = PCI_SLOT(adev->pdev->devfn);
+   __entry->func = PCI_FUNC(adev->pdev->devfn);
+   __entry->dma = dma_address;
+   __entry->phys = phys_address;
+   ),
+TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+  (unsigned)__entry->domain,
+  (unsigned)__entry->bus,
+  (unsigned)__entry->slot,
+  (unsigned)__entry->func,
+  (unsigned long long)__entry->dma,
+  (unsigned long long)__entry->phys)
+);
+
  TRACE_EVENT(amdgpu_mm_rreg,
  TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
  TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 8da59d212b3b..d5f1b99b0ab2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
  #include 
  #include 
  #include "amdgpu.h"
+#include "amdgpu_trace.h"
  #include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT)
@@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct 
ttm_tt *ttm)

  {
  struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
  struct amdgpu_ttm_tt *gtt = (void *)ttm;
-unsigned nents;
+unsigned i, nents;
  int r;
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
@@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct 
ttm_tt *ttm)

  drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
   gtt->ttm.dma_address, ttm->num_pages);
  +if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {
+for (i = 0; i < ttm->num_pages; i++) {
+trace_amdgpu_ttm_tt_populate(
+adev,
+gtt->ttm.dma_address[i],
+page_to_phys(ttm->pages[i]));
+}
+}
+


It would still be nice to have that chunk in a separate function since 
it is used multiple times, but that's not a must have.


Anyway patch is Reviewed-by: Christian König 

Christian.


 

Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v3)

2017-08-08 Thread Tom St Denis

ping?

Tom

On 02/08/17 07:52 AM, Tom St Denis wrote:

This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis 

(v2):  Added tracepoints for USERPTR, SG mappings, and
  SWIOTBL mappings.  Reformatted trace call perform
  PCI decoding internal to the trace.

(v3):  Add unmap tracepoints as well
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 56 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 55 ++
  2 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 509f7a63d40c..0d708e8fb391 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,62 @@
  #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
 
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)
  
+TRACE_EVENT(amdgpu_ttm_tt_populate,

+   TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t 
phys_address),
+   TP_ARGS(adev, dma_address, phys_address),
+   TP_STRUCT__entry(
+   __field(uint16_t, domain)
+   __field(uint8_t, bus)
+   __field(uint8_t, slot)
+   __field(uint8_t, func)
+   __field(uint64_t, dma)
+   __field(uint64_t, phys)
+   ),
+   TP_fast_assign(
+  __entry->domain = pci_domain_nr(adev->pdev->bus);
+  __entry->bus = adev->pdev->bus->number;
+  __entry->slot = PCI_SLOT(adev->pdev->devfn);
+  __entry->func = PCI_FUNC(adev->pdev->devfn);
+  __entry->dma = dma_address;
+  __entry->phys = phys_address;
+  ),
+   TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+ (unsigned)__entry->domain,
+ (unsigned)__entry->bus,
+ (unsigned)__entry->slot,
+ (unsigned)__entry->func,
+ (unsigned long long)__entry->dma,
+ (unsigned long long)__entry->phys)
+);
+
+TRACE_EVENT(amdgpu_ttm_tt_unpopulate,
+   TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t 
phys_address),
+   TP_ARGS(adev, dma_address, phys_address),
+   TP_STRUCT__entry(
+   __field(uint16_t, domain)
+   __field(uint8_t, bus)
+   __field(uint8_t, slot)
+   __field(uint8_t, func)
+   __field(uint64_t, dma)
+   __field(uint64_t, phys)
+   ),
+   TP_fast_assign(
+  __entry->domain = pci_domain_nr(adev->pdev->bus);
+  __entry->bus = adev->pdev->bus->number;
+  __entry->slot = PCI_SLOT(adev->pdev->devfn);
+  __entry->func = PCI_FUNC(adev->pdev->devfn);
+  __entry->dma = dma_address;
+  __entry->phys = phys_address;
+  ),
+   TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+ (unsigned)__entry->domain,
+ (unsigned)__entry->bus,
+ (unsigned)__entry->slot,
+ (unsigned)__entry->func,
+ (unsigned long long)__entry->dma,
+ (unsigned long long)__entry->phys)
+);
+
  TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8da59d212b3b..d5f1b99b0ab2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
  #include 
  #include 
  #include "amdgpu.h"
+#include "amdgpu_trace.h"
  #include "bif/bif_4_1_d.h"
  
  #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT)

@@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
  {
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-   unsigned nents;
+   unsigned i, nents;
int r;
  
  	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);

@@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 gtt->ttm.dma_address, ttm->num_pages);
  
+	if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {

+   for (i = 0; i < ttm->num_pages; i++) {

Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Christian König

Am 01.08.2017 um 19:00 schrieb Tom St Denis:

On 01/08/17 11:41 AM, Christian König wrote:

We can turn of the trace subsystem during compile time already.

Additional to that the trace points use self modifying code to make 
sure that they don't have any overhead as long as they are turned 
off. But I don't think this works with the "if (trace_*_enabled()" as 
well.


Anyway, not a performance critical code path so I don't really bother.


Well aside from that this will only really be useful in upstream 
kernels where we don't control the configuration.


Eventually if we come up with a better solution (tm) then we can 
revert this cleanly.


Actually even if we come up with some other approach to access the GPU 
VM of a process I would rather like to keep that trace point because it 
is rather useful for some userptr debugging as well.


Christian.



Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Tom St Denis

On 01/08/17 11:41 AM, Christian König wrote:

We can turn of the trace subsystem during compile time already.

Additional to that the trace points use self modifying code to make sure 
that they don't have any overhead as long as they are turned off. But I 
don't think this works with the "if (trace_*_enabled()" as well.


Anyway, not a performance critical code path so I don't really bother.


Well aside from that this will only really be useful in upstream kernels 
where we don't control the configuration.


Eventually if we come up with a better solution (tm) then we can revert 
this cleanly.


Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Tom St Denis
In v2 of the patch I used the _enabled() function around the blocks so 
the for loop is only reached if the trace is enabled.


Tom

On 01/08/17 11:56 AM, Xie, AlexBin wrote:
I don't know if compiler is smart enough to optimize the following for 
statement out...



+   for (i = 0; i < ttm->num_pages; i++) {
+   trace_amdgpu_ttm_tt_populate(
+   adev,
+   gtt->ttm.dma_address[i],
+   page_to_phys(ttm->pages[i]));
+   }


-Alex Bin



*From:* Christian König <deathsim...@vodafone.de>
*Sent:* Tuesday, August 1, 2017 11:41 AM
*To:* Xie, AlexBin; amd-gfx@lists.freedesktop.org
*Subject:* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
We can turn of the trace subsystem during compile time already.

Additional to that the trace points use self modifying code to make sure
that they don't have any overhead as long as they are turned off. But I
don't think this works with the "if (trace_*_enabled()" as well.

Anyway, not a performance critical code path so I don't really bother.

Christian.

Am 01.08.2017 um 17:00 schrieb axie:
Because this is used by a debug tool, can we use a macro to 
conditionally compile this feature?



On 2017-08-01 10:54 AM, Christian König wrote:

Am 01.08.2017 um 16:26 schrieb Tom St Denis:

On 01/08/17 10:10 AM, Christian König wrote:


You need to cover multiple code path here:
1. The one you currently implemented which uses ttm_dma_populate() 
and pci_map_page().

2. The one using ttm_dma_populate().


I'll have to look into this one though it's my understanding that 
code path is only followed if there's no (hardware) IOMMU right?


Wrong, that one is used when "anything" in the system used the 
SWIOTLB path once. So the detection doesn't always work reliable.


This one seems rather straight forward but the only catch is I don't 
have access to "adev" inside that drm function.


When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
get the adev.



  Would it be taboo to do something like

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 376078334f54..cd97ef2144c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
*ttm)

drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
ttm->state = tt_unbound;
+   for (i = 0; i < ttm->num_pages; i++) {
+   trace_amdgpu_ttm_tt_populate(
+   adev,
+   gtt->ttm.dma_address[i],
+ page_to_phys(ttm->pages[i]));
+   }
return 0;
}

This would normally result in a for loop around a nop which 
shouldn't be a huge performance hit but is one none the less.


Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those 
functions are generated by the trace subsystem automatically when you 
define a trace point.


It just doesn't use those nice tricks to modify the compiled binary 
on the fly.


Christian.



4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
userptrs.


Basically all just take gtt->ttm.ttm.pages and fill 
gtt->ttm.dma_address.


Same comment as #3.  I can tackle this with a for loop around the 
trace if that is ok.


I suggest to add a helper you can call to trace all 
pages->dma_address mappings inside a ttm.


One thing at a time :-).  That would probably be a bit better since 
the trace log gets filled with remappings (which is why my 
proof-of-concept umr patch only grabs the latest mapping as it reads 
the trace).


Is there a handy place we can grab a list of ttm_tt's bound to our 
device?  If so then in theory I could write a debugfs interface 
instead.


Thanks for your patience as I'm definitely less familiar with the VM 
side of things :-)


Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd-gfx Info Page - freedesktop.org 
<https://lists.freedesktop.org/mailman/listinfo/amd-gfx>

lists.freedesktop.org
Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the 
following form. Use of all freedesktop.org lists is subject to our Code 
of ...






___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd-gfx Info Page - freedesktop.org 
<https://lists.freedesktop.org/mailman/listinfo/amd-gfx>

lists.freedesktop.org
Subscribing to amd-gfx: Sub

Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Christian König

We can turn of the trace subsystem during compile time already.

Additional to that the trace points use self modifying code to make sure 
that they don't have any overhead as long as they are turned off. But I 
don't think this works with the "if (trace_*_enabled()" as well.


Anyway, not a performance critical code path so I don't really bother.

Christian.

Am 01.08.2017 um 17:00 schrieb axie:
Because this is used by a debug tool, can we use a macro to 
conditionally compile this feature?



On 2017-08-01 10:54 AM, Christian König wrote:

Am 01.08.2017 um 16:26 schrieb Tom St Denis:

On 01/08/17 10:10 AM, Christian König wrote:


You need to cover multiple code path here:
1. The one you currently implemented which uses ttm_dma_populate() 
and pci_map_page().

2. The one using ttm_dma_populate().


I'll have to look into this one though it's my understanding that 
code path is only followed if there's no (hardware) IOMMU right?


Wrong, that one is used when "anything" in the system used the 
SWIOTLB path once. So the detection doesn't always work reliable.


This one seems rather straight forward but the only catch is I don't 
have access to "adev" inside that drm function.


When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
get the adev.



  Would it be taboo to do something like

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 376078334f54..cd97ef2144c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
*ttm)

drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
ttm->state = tt_unbound;
+   for (i = 0; i < ttm->num_pages; i++) {
+   trace_amdgpu_ttm_tt_populate(
+   adev,
+   gtt->ttm.dma_address[i],
+ page_to_phys(ttm->pages[i]));
+   }
return 0;
}

This would normally result in a for loop around a nop which 
shouldn't be a huge performance hit but is one none the less.


Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those 
functions are generated by the trace subsystem automatically when you 
define a trace point.


It just doesn't use those nice tricks to modify the compiled binary 
on the fly.


Christian.



4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
userptrs.


Basically all just take gtt->ttm.ttm.pages and fill 
gtt->ttm.dma_address.


Same comment as #3.  I can tackle this with a for loop around the 
trace if that is ok.


I suggest to add a helper you can call to trace all 
pages->dma_address mappings inside a ttm.


One thing at a time :-).  That would probably be a bit better since 
the trace log gets filled with remappings (which is why my 
proof-of-concept umr patch only grabs the latest mapping as it reads 
the trace).


Is there a handy place we can grab a list of ttm_tt's bound to our 
device?  If so then in theory I could write a debugfs interface 
instead.


Thanks for your patience as I'm definitely less familiar with the VM 
side of things :-)


Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread axie
Because this is used by a debug tool, can we use a macro to 
conditionally compile this feature?



On 2017-08-01 10:54 AM, Christian König wrote:

Am 01.08.2017 um 16:26 schrieb Tom St Denis:

On 01/08/17 10:10 AM, Christian König wrote:


You need to cover multiple code path here:
1. The one you currently implemented which uses ttm_dma_populate() 
and pci_map_page().

2. The one using ttm_dma_populate().


I'll have to look into this one though it's my understanding that 
code path is only followed if there's no (hardware) IOMMU right?


Wrong, that one is used when "anything" in the system used the SWIOTLB 
path once. So the detection doesn't always work reliable.


This one seems rather straight forward but the only catch is I don't 
have access to "adev" inside that drm function.


When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
get the adev.



  Would it be taboo to do something like

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 376078334f54..cd97ef2144c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
*ttm)

drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
ttm->state = tt_unbound;
+   for (i = 0; i < ttm->num_pages; i++) {
+   trace_amdgpu_ttm_tt_populate(
+   adev,
+   gtt->ttm.dma_address[i],
+   page_to_phys(ttm->pages[i]));
+   }
return 0;
}

This would normally result in a for loop around a nop which shouldn't 
be a huge performance hit but is one none the less.


Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those 
functions are generated by the trace subsystem automatically when you 
define a trace point.


It just doesn't use those nice tricks to modify the compiled binary on 
the fly.


Christian.



4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
userptrs.


Basically all just take gtt->ttm.ttm.pages and fill 
gtt->ttm.dma_address.


Same comment as #3.  I can tackle this with a for loop around the 
trace if that is ok.


I suggest to add a helper you can call to trace all 
pages->dma_address mappings inside a ttm.


One thing at a time :-).  That would probably be a bit better since 
the trace log gets filled with remappings (which is why my 
proof-of-concept umr patch only grabs the latest mapping as it reads 
the trace).


Is there a handy place we can grab a list of ttm_tt's bound to our 
device?  If so then in theory I could write a debugfs interface instead.


Thanks for your patience as I'm definitely less familiar with the VM 
side of things :-)


Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v2)

2017-08-01 Thread Tom St Denis
This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis 

(v2):  Added tracepoints for USERPTR, SG mappings, and
 SWIOTBL mappings.  Reformatted trace call perform
 PCI decoding internal to the trace.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 35 ---
 2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 509f7a63d40c..3e0f1885a379 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,34 @@
 #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
 
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)
 
+TRACE_EVENT(amdgpu_ttm_tt_populate,
+   TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t 
phys_address),
+   TP_ARGS(adev, dma_address, phys_address),
+   TP_STRUCT__entry(
+   __field(uint16_t, domain)
+   __field(uint8_t, bus)
+   __field(uint8_t, slot)
+   __field(uint8_t, func)
+   __field(uint64_t, dma)
+   __field(uint64_t, phys)
+   ),
+   TP_fast_assign(
+  __entry->domain = pci_domain_nr(adev->pdev->bus);
+  __entry->bus = adev->pdev->bus->number;
+  __entry->slot = PCI_SLOT(adev->pdev->devfn);
+  __entry->func = PCI_FUNC(adev->pdev->devfn);
+  __entry->dma = dma_address;
+  __entry->phys = phys_address;
+  ),
+   TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+ (unsigned)__entry->domain,
+ (unsigned)__entry->bus,
+ (unsigned)__entry->slot,
+ (unsigned)__entry->func,
+ (unsigned long long)__entry->dma,
+ (unsigned long long)__entry->phys)
+);
+
 TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8da59d212b3b..7857fc581342 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 #include "bif/bif_4_1_d.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT)
@@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-   unsigned nents;
+   unsigned i, nents;
int r;
 
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
@@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 gtt->ttm.dma_address, ttm->num_pages);
 
+   if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {
+   for (i = 0; i < ttm->num_pages; i++) {
+   trace_amdgpu_ttm_tt_populate(
+   adev,
+   gtt->ttm.dma_address[i],
+   page_to_phys(ttm->pages[i]));
+   }
+   }
+
return 0;
 
 release_sg:
@@ -892,7 +902,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct 
ttm_bo_device *bdev,
 
 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
 {
-   struct amdgpu_device *adev;
+   struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i;
int r;
@@ -915,14 +925,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 gtt->ttm.dma_address, 
ttm->num_pages);
ttm->state = tt_unbound;
-   return 0;
+   r = 0;
+   goto trace_mappings;
}
 
-   adev = amdgpu_ttm_adev(ttm->bdev);
-
 #ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) {
-   return ttm_dma_populate(>ttm, adev->dev);
+   r = ttm_dma_populate(>ttm, adev->dev);
+   goto trace_mappings;
}
 #endif
 
@@ -945,7 +955,18 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
return -EFAULT;
}
}
-   return 0;
+
+   r = 0;
+trace_mappings:
+   if (!r && unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {

Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Christian König

Am 01.08.2017 um 16:26 schrieb Tom St Denis:

On 01/08/17 10:10 AM, Christian König wrote:


You need to cover multiple code path here:
1. The one you currently implemented which uses ttm_dma_populate() 
and pci_map_page().

2. The one using ttm_dma_populate().


I'll have to look into this one though it's my understanding that code 
path is only followed if there's no (hardware) IOMMU right?


Wrong, that one is used when "anything" in the system used the SWIOTLB 
path once. So the detection doesn't always work reliable.


This one seems rather straight forward but the only catch is I don't 
have access to "adev" inside that drm function.


When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
get the adev.



  Would it be taboo to do something like

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 376078334f54..cd97ef2144c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
*ttm)

drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
ttm->state = tt_unbound;
+   for (i = 0; i < ttm->num_pages; i++) {
+   trace_amdgpu_ttm_tt_populate(
+   adev,
+   gtt->ttm.dma_address[i],
+   page_to_phys(ttm->pages[i]));
+   }
return 0;
}

This would normally result in a for loop around a nop which shouldn't 
be a huge performance hit but is one none the less.


Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those functions 
are generated by the trace subsystem automatically when you define a 
trace point.


It just doesn't use those nice tricks to modify the compiled binary on 
the fly.


Christian.



4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
userptrs.


Basically all just take gtt->ttm.ttm.pages and fill 
gtt->ttm.dma_address.


Same comment as #3.  I can tackle this with a for loop around the 
trace if that is ok.


I suggest to add a helper you can call to trace all 
pages->dma_address mappings inside a ttm.


One thing at a time :-).  That would probably be a bit better since 
the trace log gets filled with remappings (which is why my 
proof-of-concept umr patch only grabs the latest mapping as it reads 
the trace).


Is there a handy place we can grab a list of ttm_tt's bound to our 
device?  If so then in theory I could write a debugfs interface instead.


Thanks for your patience as I'm definitely less familiar with the VM 
side of things :-)


Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Tom St Denis

On 01/08/17 10:10 AM, Christian König wrote:


You need to cover multiple code path here:
1. The one you currently implemented which uses ttm_dma_populate() and 
pci_map_page().

2. The one using ttm_dma_populate().


I'll have to look into this one though it's my understanding that code 
path is only followed if there's no (hardware) IOMMU right?  Which while 
it'd have to be covered isn't a high priority right now (our devel 
platforms have hardware IOMMU).


None the less I'll look into it once I figure out #3 and #4 per your 
comments.


3. The one using drm_prime_sg_to_page_addr_arrays() a bit above for 
imported BOs.


This one seems rather straight forward but the only catch is I don't 
have access to "adev" inside that drm function.  Would it be taboo to do 
something like


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 376078334f54..cd97ef2144c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 gtt->ttm.dma_address, 
ttm->num_pages);

ttm->state = tt_unbound;
+   for (i = 0; i < ttm->num_pages; i++) {
+   trace_amdgpu_ttm_tt_populate(
+   adev,
+   gtt->ttm.dma_address[i],
+   page_to_phys(ttm->pages[i]));
+   }
return 0;
}

This would normally result in a for loop around a nop which shouldn't be 
a huge performance hit but is one none the less.




4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs.

Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address.


Same comment as #3.  I can tackle this with a for loop around the trace 
if that is ok.


I suggest to add a helper you can call to trace all pages->dma_address 
mappings inside a ttm.


One thing at a time :-).  That would probably be a bit better since the 
trace log gets filled with remappings (which is why my proof-of-concept 
umr patch only grabs the latest mapping as it reads the trace).


Is there a handy place we can grab a list of ttm_tt's bound to our 
device?  If so then in theory I could write a debugfs interface instead.


Thanks for your patience as I'm definitely less familiar with the VM 
side of things :-)


Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Tom St Denis

On 01/08/17 07:55 AM, Christian König wrote:

Am 01.08.2017 um 13:51 schrieb Tom St Denis:

This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 


  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  8 
  2 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

index 509f7a63d40c..5b2bb28da504 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,34 @@
  #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
   
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) 


+TRACE_EVENT(amdgpu_ttm_tt_populate,
+TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t 
func, uint64_t dma_address, uint64_t phys_address),

+TP_ARGS(domain, bus, slot, func, dma_address, phys_address),
+TP_STRUCT__entry(
+__field(uint16_t, domain)
+__field(uint8_t, bus)
+__field(uint8_t, slot)
+__field(uint8_t, func)
+__field(uint64_t, dma)
+__field(uint64_t, phys)
+),


Better just give adev here and extract the values during the fast assign.


Easy enough, I've done this now.




+TP_fast_assign(
+   __entry->domain = domain;
+   __entry->bus = bus;
+   __entry->slot = slot;
+   __entry->func = func;
+   __entry->dma = dma_address;
+   __entry->phys = phys_address;
+   ),
+TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+  (unsigned)__entry->domain,
+  (unsigned)__entry->bus,
+  (unsigned)__entry->slot,
+  (unsigned)__entry->func,
+  (unsigned long long)__entry->dma,
+  (unsigned long long)__entry->phys)
+);
+
  TRACE_EVENT(amdgpu_mm_rreg,
  TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
  TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 8da59d212b3b..1cf274603476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
  #include 
  #include 
  #include "amdgpu.h"
+#include "amdgpu_trace.h"
  #include "bif/bif_4_1_d.h"
  #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT)
@@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
*ttm)

  ttm_pool_unpopulate(ttm);
  return -EFAULT;
  }
+trace_amdgpu_ttm_tt_populate(
+pci_domain_nr(adev->pdev->bus),
+adev->pdev->bus->number,
+PCI_SLOT(adev->pdev->devfn),
+PCI_FUNC(adev->pdev->devfn),
+gtt->ttm.dma_address[i],
+page_to_phys(ttm->pages[i]));


Please add that tracing for the dma pool path as well.

With that fixed the change looks good to me,
Christian.


Unsure what you mean here.  The ttm_pool_populate() seems to be 
preparing the page list to back the request.


Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Christian König

Am 01.08.2017 um 13:51 schrieb Tom St Denis:

This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  8 
  2 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 509f7a63d40c..5b2bb28da504 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,34 @@
  #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
 
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)
  
+TRACE_EVENT(amdgpu_ttm_tt_populate,

+   TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t func, 
uint64_t dma_address, uint64_t phys_address),
+   TP_ARGS(domain, bus, slot, func, dma_address, phys_address),
+   TP_STRUCT__entry(
+   __field(uint16_t, domain)
+   __field(uint8_t, bus)
+   __field(uint8_t, slot)
+   __field(uint8_t, func)
+   __field(uint64_t, dma)
+   __field(uint64_t, phys)
+   ),


Better just give adev here and extract the values during the fast assign.


+   TP_fast_assign(
+  __entry->domain = domain;
+  __entry->bus = bus;
+  __entry->slot = slot;
+  __entry->func = func;
+  __entry->dma = dma_address;
+  __entry->phys = phys_address;
+  ),
+   TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+ (unsigned)__entry->domain,
+ (unsigned)__entry->bus,
+ (unsigned)__entry->slot,
+ (unsigned)__entry->func,
+ (unsigned long long)__entry->dma,
+ (unsigned long long)__entry->phys)
+);
+
  TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8da59d212b3b..1cf274603476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
  #include 
  #include 
  #include "amdgpu.h"
+#include "amdgpu_trace.h"
  #include "bif/bif_4_1_d.h"
  
  #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT)

@@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
ttm_pool_unpopulate(ttm);
return -EFAULT;
}
+   trace_amdgpu_ttm_tt_populate(
+   pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number,
+   PCI_SLOT(adev->pdev->devfn),
+   PCI_FUNC(adev->pdev->devfn),
+   gtt->ttm.dma_address[i],
+   page_to_phys(ttm->pages[i]));


Please add that tracing for the dma pool path as well.

With that fixed the change looks good to me,
Christian.


}
return 0;
  }



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

2017-08-01 Thread Tom St Denis
This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  8 
 2 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 509f7a63d40c..5b2bb28da504 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,34 @@
 #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
 
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)
 
+TRACE_EVENT(amdgpu_ttm_tt_populate,
+   TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t func, 
uint64_t dma_address, uint64_t phys_address),
+   TP_ARGS(domain, bus, slot, func, dma_address, phys_address),
+   TP_STRUCT__entry(
+   __field(uint16_t, domain)
+   __field(uint8_t, bus)
+   __field(uint8_t, slot)
+   __field(uint8_t, func)
+   __field(uint64_t, dma)
+   __field(uint64_t, phys)
+   ),
+   TP_fast_assign(
+  __entry->domain = domain;
+  __entry->bus = bus;
+  __entry->slot = slot;
+  __entry->func = func;
+  __entry->dma = dma_address;
+  __entry->phys = phys_address;
+  ),
+   TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+ (unsigned)__entry->domain,
+ (unsigned)__entry->bus,
+ (unsigned)__entry->slot,
+ (unsigned)__entry->func,
+ (unsigned long long)__entry->dma,
+ (unsigned long long)__entry->phys)
+);
+
 TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8da59d212b3b..1cf274603476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 #include "bif/bif_4_1_d.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT)
@@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
ttm_pool_unpopulate(ttm);
return -EFAULT;
}
+   trace_amdgpu_ttm_tt_populate(
+   pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number,
+   PCI_SLOT(adev->pdev->devfn),
+   PCI_FUNC(adev->pdev->devfn),
+   gtt->ttm.dma_address[i],
+   page_to_phys(ttm->pages[i]));
}
return 0;
 }
-- 
2.12.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx