Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v3)
Thanks, I pushed v4 which moved the tracepoints into functions. Cheers, Tom On 08/08/17 08:01 AM, Christian König wrote: Am 08.08.2017 um 13:54 schrieb Tom St Denis: ping? Ups sorry, thought that I already send that out. One comment below. Tom On 02/08/17 07:52 AM, Tom St Denis wrote: This helps map DMA addresses back to physical addresses. Signed-off-by: Tom St Denis(v2): Added tracepoints for USERPTR, SG mappings, and SWIOTBL mappings. Reformatted trace call perform PCI decoding internal to the trace. (v3): Add unmap tracepoints as well --- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 56 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 55 ++ 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..0d708e8fb391 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,62 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, +TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), +TP_ARGS(adev, dma_address, phys_address), +TP_STRUCT__entry( +__field(uint16_t, domain) +__field(uint8_t, bus) +__field(uint8_t, slot) +__field(uint8_t, func) +__field(uint64_t, dma) +__field(uint64_t, phys) +), +TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), +TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + +TRACE_EVENT(amdgpu_ttm_tt_unpopulate, +TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), +TP_ARGS(adev, dma_address, phys_address), +TP_STRUCT__entry( +__field(uint16_t, domain) +__field(uint8_t, bus) +__field(uint8_t, slot) +__field(uint8_t, func) +__field(uint64_t, dma) +__field(uint64_t, phys) +), +TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), +TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8da59d212b3b..d5f1b99b0ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT) @@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; -unsigned nents; +unsigned i, nents; int r; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); @@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); +if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { +for (i = 0; i < ttm->num_pages; i++) { +trace_amdgpu_ttm_tt_populate( +adev, +gtt->ttm.dma_address[i], +page_to_phys(ttm->pages[i])); +} +} + It would still be nice to have that chunk in a separate function since it is used multiple
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v3)
Am 08.08.2017 um 13:54 schrieb Tom St Denis: ping? Ups sorry, thought that I already send that out. One comment below. Tom On 02/08/17 07:52 AM, Tom St Denis wrote: This helps map DMA addresses back to physical addresses. Signed-off-by: Tom St Denis(v2): Added tracepoints for USERPTR, SG mappings, and SWIOTBL mappings. Reformatted trace call perform PCI decoding internal to the trace. (v3): Add unmap tracepoints as well --- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 56 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 55 ++ 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..0d708e8fb391 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,62 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, +TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), +TP_ARGS(adev, dma_address, phys_address), +TP_STRUCT__entry( +__field(uint16_t, domain) +__field(uint8_t, bus) +__field(uint8_t, slot) +__field(uint8_t, func) +__field(uint64_t, dma) +__field(uint64_t, phys) +), +TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), +TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + +TRACE_EVENT(amdgpu_ttm_tt_unpopulate, +TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), +TP_ARGS(adev, dma_address, phys_address), +TP_STRUCT__entry( +__field(uint16_t, domain) +__field(uint8_t, bus) +__field(uint8_t, slot) +__field(uint8_t, func) +__field(uint64_t, dma) +__field(uint64_t, phys) +), +TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), +TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8da59d212b3b..d5f1b99b0ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT) @@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; -unsigned nents; +unsigned i, nents; int r; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); @@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); +if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { +for (i = 0; i < ttm->num_pages; i++) { +trace_amdgpu_ttm_tt_populate( +adev, +gtt->ttm.dma_address[i], +page_to_phys(ttm->pages[i])); +} +} + It would still be nice to have that chunk in a separate function since it is used multiple times, but that's not a must have. Anyway patch is Reviewed-by: Christian König Christian.
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v3)
ping? Tom On 02/08/17 07:52 AM, Tom St Denis wrote: This helps map DMA addresses back to physical addresses. Signed-off-by: Tom St Denis(v2): Added tracepoints for USERPTR, SG mappings, and SWIOTBL mappings. Reformatted trace call perform PCI decoding internal to the trace. (v3): Add unmap tracepoints as well --- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 56 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 55 ++ 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..0d708e8fb391 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,62 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, + TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(adev, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + +TRACE_EVENT(amdgpu_ttm_tt_unpopulate, + TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(adev, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8da59d212b3b..d5f1b99b0ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT) @@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned nents; + unsigned i, nents; int r; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); @@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); + if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { + for (i = 0; i < ttm->num_pages; i++) {
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
Am 01.08.2017 um 19:00 schrieb Tom St Denis: On 01/08/17 11:41 AM, Christian König wrote: We can turn of the trace subsystem during compile time already. Additional to that the trace points use self modifying code to make sure that they don't have any overhead as long as they are turned off. But I don't think this works with the "if (trace_*_enabled()" as well. Anyway, not a performance critical code path so I don't really bother. Well aside from that this will only really be useful in upstream kernels where we don't control the configuration. Eventually if we come up with a better solution (tm) then we can revert this cleanly. Actually even if we come up with some other approach to access the GPU VM of a process I would rather like to keep that trace point because it is rather useful for some userptr debugging as well. Christian. Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
On 01/08/17 11:41 AM, Christian König wrote: We can turn of the trace subsystem during compile time already. Additional to that the trace points use self modifying code to make sure that they don't have any overhead as long as they are turned off. But I don't think this works with the "if (trace_*_enabled()" as well. Anyway, not a performance critical code path so I don't really bother. Well aside from that this will only really be useful in upstream kernels where we don't control the configuration. Eventually if we come up with a better solution (tm) then we can revert this cleanly. Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
In v2 of the patch I used the _enabled() function around the blocks so the for loop is only reached if the trace is enabled. Tom On 01/08/17 11:56 AM, Xie, AlexBin wrote: I don't know if compiler is smart enough to optimize the following for statement out... + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } -Alex Bin *From:* Christian König <deathsim...@vodafone.de> *Sent:* Tuesday, August 1, 2017 11:41 AM *To:* Xie, AlexBin; amd-gfx@lists.freedesktop.org *Subject:* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping We can turn of the trace subsystem during compile time already. Additional to that the trace points use self modifying code to make sure that they don't have any overhead as long as they are turned off. But I don't think this works with the "if (trace_*_enabled()" as well. Anyway, not a performance critical code path so I don't really bother. Christian. Am 01.08.2017 um 17:00 schrieb axie: Because this is used by a debug tool, can we use a macro to conditionally compile this feature? On 2017-08-01 10:54 AM, Christian König wrote: Am 01.08.2017 um 16:26 schrieb Tom St Denis: On 01/08/17 10:10 AM, Christian König wrote: You need to cover multiple code path here: 1. The one you currently implemented which uses ttm_dma_populate() and pci_map_page(). 2. The one using ttm_dma_populate(). I'll have to look into this one though it's my understanding that code path is only followed if there's no (hardware) IOMMU right? Wrong, that one is used when "anything" in the system used the SWIOTLB path once. So the detection doesn't always work reliable. This one seems rather straight forward but the only catch is I don't have access to "adev" inside that drm function. When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to get the adev. Would it be taboo to do something like diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 376078334f54..cd97ef2144c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } return 0; } This would normally result in a for loop around a nop which shouldn't be a huge performance hit but is one none the less. Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those functions are generated by the trace subsystem automatically when you define a trace point. It just doesn't use those nice tricks to modify the compiled binary on the fly. Christian. 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs. Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address. Same comment as #3. I can tackle this with a for loop around the trace if that is ok. I suggest to add a helper you can call to trace all pages->dma_address mappings inside a ttm. One thing at a time :-). That would probably be a bit better since the trace log gets filled with remappings (which is why my proof-of-concept umr patch only grabs the latest mapping as it reads the trace). Is there a handy place we can grab a list of ttm_tt's bound to our device? If so then in theory I could write a debugfs interface instead. Thanks for your patience as I'm definitely less familiar with the VM side of things :-) Cheers, Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx amd-gfx Info Page - freedesktop.org <https://lists.freedesktop.org/mailman/listinfo/amd-gfx> lists.freedesktop.org Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the following form. Use of all freedesktop.org lists is subject to our Code of ... ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx amd-gfx Info Page - freedesktop.org <https://lists.freedesktop.org/mailman/listinfo/amd-gfx> lists.freedesktop.org Subscribing to amd-gfx: Sub
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
We can turn of the trace subsystem during compile time already. Additional to that the trace points use self modifying code to make sure that they don't have any overhead as long as they are turned off. But I don't think this works with the "if (trace_*_enabled()" as well. Anyway, not a performance critical code path so I don't really bother. Christian. Am 01.08.2017 um 17:00 schrieb axie: Because this is used by a debug tool, can we use a macro to conditionally compile this feature? On 2017-08-01 10:54 AM, Christian König wrote: Am 01.08.2017 um 16:26 schrieb Tom St Denis: On 01/08/17 10:10 AM, Christian König wrote: You need to cover multiple code path here: 1. The one you currently implemented which uses ttm_dma_populate() and pci_map_page(). 2. The one using ttm_dma_populate(). I'll have to look into this one though it's my understanding that code path is only followed if there's no (hardware) IOMMU right? Wrong, that one is used when "anything" in the system used the SWIOTLB path once. So the detection doesn't always work reliable. This one seems rather straight forward but the only catch is I don't have access to "adev" inside that drm function. When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to get the adev. Would it be taboo to do something like diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 376078334f54..cd97ef2144c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } return 0; } This would normally result in a for loop around a nop which shouldn't be a huge performance hit but is one none the less. Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those functions are generated by the trace subsystem automatically when you define a trace point. It just doesn't use those nice tricks to modify the compiled binary on the fly. Christian. 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs. Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address. Same comment as #3. I can tackle this with a for loop around the trace if that is ok. I suggest to add a helper you can call to trace all pages->dma_address mappings inside a ttm. One thing at a time :-). That would probably be a bit better since the trace log gets filled with remappings (which is why my proof-of-concept umr patch only grabs the latest mapping as it reads the trace). Is there a handy place we can grab a list of ttm_tt's bound to our device? If so then in theory I could write a debugfs interface instead. Thanks for your patience as I'm definitely less familiar with the VM side of things :-) Cheers, Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
Because this is used by a debug tool, can we use a macro to conditionally compile this feature? On 2017-08-01 10:54 AM, Christian König wrote: Am 01.08.2017 um 16:26 schrieb Tom St Denis: On 01/08/17 10:10 AM, Christian König wrote: You need to cover multiple code path here: 1. The one you currently implemented which uses ttm_dma_populate() and pci_map_page(). 2. The one using ttm_dma_populate(). I'll have to look into this one though it's my understanding that code path is only followed if there's no (hardware) IOMMU right? Wrong, that one is used when "anything" in the system used the SWIOTLB path once. So the detection doesn't always work reliable. This one seems rather straight forward but the only catch is I don't have access to "adev" inside that drm function. When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to get the adev. Would it be taboo to do something like diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 376078334f54..cd97ef2144c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } return 0; } This would normally result in a for loop around a nop which shouldn't be a huge performance hit but is one none the less. Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those functions are generated by the trace subsystem automatically when you define a trace point. It just doesn't use those nice tricks to modify the compiled binary on the fly. Christian. 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs. Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address. Same comment as #3. I can tackle this with a for loop around the trace if that is ok. I suggest to add a helper you can call to trace all pages->dma_address mappings inside a ttm. One thing at a time :-). That would probably be a bit better since the trace log gets filled with remappings (which is why my proof-of-concept umr patch only grabs the latest mapping as it reads the trace). Is there a handy place we can grab a list of ttm_tt's bound to our device? If so then in theory I could write a debugfs interface instead. Thanks for your patience as I'm definitely less familiar with the VM side of things :-) Cheers, Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping (v2)
This helps map DMA addresses back to physical addresses. Signed-off-by: Tom St Denis(v2): Added tracepoints for USERPTR, SG mappings, and SWIOTBL mappings. Reformatted trace call perform PCI decoding internal to the trace. --- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 35 --- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..3e0f1885a379 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,34 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, + TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(adev, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8da59d212b3b..7857fc581342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT) @@ -667,7 +668,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned nents; + unsigned i, nents; int r; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); @@ -688,6 +689,15 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); + if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } + } + return 0; release_sg: @@ -892,7 +902,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) { - struct amdgpu_device *adev; + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; int r; @@ -915,14 +925,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; - return 0; + r = 0; + goto trace_mappings; } - adev = amdgpu_ttm_adev(ttm->bdev); - #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { - return ttm_dma_populate(>ttm, adev->dev); + r = ttm_dma_populate(>ttm, adev->dev); + goto trace_mappings; } #endif @@ -945,7 +955,18 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) return -EFAULT; } } - return 0; + + r = 0; +trace_mappings: + if (!r && unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
Am 01.08.2017 um 16:26 schrieb Tom St Denis: On 01/08/17 10:10 AM, Christian König wrote: You need to cover multiple code path here: 1. The one you currently implemented which uses ttm_dma_populate() and pci_map_page(). 2. The one using ttm_dma_populate(). I'll have to look into this one though it's my understanding that code path is only followed if there's no (hardware) IOMMU right? Wrong, that one is used when "anything" in the system used the SWIOTLB path once. So the detection doesn't always work reliable. This one seems rather straight forward but the only catch is I don't have access to "adev" inside that drm function. When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to get the adev. Would it be taboo to do something like diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 376078334f54..cd97ef2144c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } return 0; } This would normally result in a for loop around a nop which shouldn't be a huge performance hit but is one none the less. Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those functions are generated by the trace subsystem automatically when you define a trace point. It just doesn't use those nice tricks to modify the compiled binary on the fly. Christian. 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs. Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address. Same comment as #3. I can tackle this with a for loop around the trace if that is ok. I suggest to add a helper you can call to trace all pages->dma_address mappings inside a ttm. One thing at a time :-). That would probably be a bit better since the trace log gets filled with remappings (which is why my proof-of-concept umr patch only grabs the latest mapping as it reads the trace). Is there a handy place we can grab a list of ttm_tt's bound to our device? If so then in theory I could write a debugfs interface instead. Thanks for your patience as I'm definitely less familiar with the VM side of things :-) Cheers, Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
On 01/08/17 10:10 AM, Christian König wrote: You need to cover multiple code path here: 1. The one you currently implemented which uses ttm_dma_populate() and pci_map_page(). 2. The one using ttm_dma_populate(). I'll have to look into this one though it's my understanding that code path is only followed if there's no (hardware) IOMMU right? Which while it'd have to be covered isn't a high priority right now (our devel platforms have hardware IOMMU). None the less I'll look into it once I figure out #3 and #4 per your comments. 3. The one using drm_prime_sg_to_page_addr_arrays() a bit above for imported BOs. This one seems rather straight forward but the only catch is I don't have access to "adev" inside that drm function. Would it be taboo to do something like diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 376078334f54..cd97ef2144c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } return 0; } This would normally result in a for loop around a nop which shouldn't be a huge performance hit but is one none the less. 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs. Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address. Same comment as #3. I can tackle this with a for loop around the trace if that is ok. I suggest to add a helper you can call to trace all pages->dma_address mappings inside a ttm. One thing at a time :-). That would probably be a bit better since the trace log gets filled with remappings (which is why my proof-of-concept umr patch only grabs the latest mapping as it reads the trace). Is there a handy place we can grab a list of ttm_tt's bound to our device? If so then in theory I could write a debugfs interface instead. Thanks for your patience as I'm definitely less familiar with the VM side of things :-) Cheers, Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
On 01/08/17 07:55 AM, Christian König wrote: Am 01.08.2017 um 13:51 schrieb Tom St Denis: This helps map DMA addresses back to physical addresses. Signed-off-by: Tom St Denis--- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..5b2bb28da504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,34 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, +TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t func, uint64_t dma_address, uint64_t phys_address), +TP_ARGS(domain, bus, slot, func, dma_address, phys_address), +TP_STRUCT__entry( +__field(uint16_t, domain) +__field(uint8_t, bus) +__field(uint8_t, slot) +__field(uint8_t, func) +__field(uint64_t, dma) +__field(uint64_t, phys) +), Better just give adev here and extract the values during the fast assign. Easy enough, I've done this now. +TP_fast_assign( + __entry->domain = domain; + __entry->bus = bus; + __entry->slot = slot; + __entry->func = func; + __entry->dma = dma_address; + __entry->phys = phys_address; + ), +TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8da59d212b3b..1cf274603476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT) @@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); return -EFAULT; } +trace_amdgpu_ttm_tt_populate( +pci_domain_nr(adev->pdev->bus), +adev->pdev->bus->number, +PCI_SLOT(adev->pdev->devfn), +PCI_FUNC(adev->pdev->devfn), +gtt->ttm.dma_address[i], +page_to_phys(ttm->pages[i])); Please add that tracing for the dma pool path as well. With that fixed the change looks good to me, Christian. Unsure what you mean here. The ttm_pool_populate() seems to be preparing the page list to back the request. Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
Am 01.08.2017 um 13:51 schrieb Tom St Denis: This helps map DMA addresses back to physical addresses. Signed-off-by: Tom St Denis--- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..5b2bb28da504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,34 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, + TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t func, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(domain, bus, slot, func, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), Better just give adev here and extract the values during the fast assign. + TP_fast_assign( + __entry->domain = domain; + __entry->bus = bus; + __entry->slot = slot; + __entry->func = func; + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8da59d212b3b..1cf274603476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT) @@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); return -EFAULT; } + trace_amdgpu_ttm_tt_populate( + pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, + PCI_SLOT(adev->pdev->devfn), + PCI_FUNC(adev->pdev->devfn), + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); Please add that tracing for the dma pool path as well. With that fixed the change looks good to me, Christian. } return 0; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
This helps map DMA addresses back to physical addresses. Signed-off-by: Tom St Denis--- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..5b2bb28da504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,34 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, + TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t func, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(domain, bus, slot, func, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = domain; + __entry->bus = bus; + __entry->slot = slot; + __entry->func = func; + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8da59d212b3b..1cf274603476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x1ULL >> PAGE_SHIFT) @@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); return -EFAULT; } + trace_amdgpu_ttm_tt_populate( + pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, + PCI_SLOT(adev->pdev->devfn), + PCI_FUNC(adev->pdev->devfn), + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); } return 0; } -- 2.12.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx