date:20170424

[PATCH] drm/amdgpu: validate shadow before restoring from it

2017-04-24 Thread Roger . He

Change-Id: Id925f4e241c4192127880d2017fbf2979aa09fc7
Signed-off-by: Roger.He 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 ++
 1 file changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f74149c..cebd546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2468,6 +2468,27 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
return amdgpu_lockup_timeout > 0 ? true : false;
 }
 
+static int amdgpu_bo_validate(struct amdgpu_bo *bo)
+{
+   uint32_t domain;
+   int r;
+
+   if (bo->pin_count)
+   return 0;
+
+   domain = bo->prefered_domains;
+
+retry:
+   amdgpu_ttm_placement_from_domain(bo, domain);
+   r = ttm_bo_validate(>tbo, >placement, false, false);
+   if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
+   domain = bo->allowed_domains;
+   goto retry;
+   }
+
+   return r;
+}
+
 static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
   struct amdgpu_ring *ring,
   struct amdgpu_bo *bo,
@@ -2485,6 +2506,18 @@ static int amdgpu_recover_vram_from_shadow(struct 
amdgpu_device *adev,
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
/* if bo has been evicted, then no need to recover */
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+   r = amdgpu_bo_validate(bo->shadow);
+   if (r) {
+   DRM_ERROR("bo validate failed!\n");
+   goto err;
+   }
+
+   r = amdgpu_ttm_bind(>shadow->tbo, >shadow->tbo.mem);
+   if (r) {
+   DRM_ERROR("%p bind failed\n", bo->shadow);
+   goto err;
+   }
+
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
 NULL, fence, true);
if (r) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: validate shadow before restoring from it

2017-04-24 Thread zhoucm1




On 2017年04月25日 11:14, Roger.He wrote:

Change-Id: Id925f4e241c4192127880d2017fbf2979aa09fc7
Signed-off-by: Roger.He 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 ++
  1 file changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f74149c..cebd546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2468,6 +2468,27 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
return amdgpu_lockup_timeout > 0 ? true : false;
  }
  
+static int amdgpu_bo_validate(struct amdgpu_bo *bo)

+{
+   uint32_t domain;
+   int r;
+
+   if (bo->pin_count)
+   return 0;
+
+   domain = bo->prefered_domains;
+
+retry:
+   amdgpu_ttm_placement_from_domain(bo, domain);
+   r = ttm_bo_validate(>tbo, >placement, false, false);
+   if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
+   domain = bo->allowed_domains;
+   goto retry;
+   }
+
+   return r;
+}
you can move this function to amdgpu_object.c, with that fix, it looks 
ok to me, Reviewed-by: Chunming Zhou 



+
  static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
   struct amdgpu_ring *ring,
   struct amdgpu_bo *bo,
@@ -2485,6 +2506,18 @@ static int amdgpu_recover_vram_from_shadow(struct 
amdgpu_device *adev,
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
/* if bo has been evicted, then no need to recover */
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+   r = amdgpu_bo_validate(bo->shadow);
+   if (r) {
+   DRM_ERROR("bo validate failed!\n");
+   goto err;
+   }
+
+   r = amdgpu_ttm_bind(>shadow->tbo, >shadow->tbo.mem);
+   if (r) {
+   DRM_ERROR("%p bind failed\n", bo->shadow);
+   goto err;
+   }
+
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
 NULL, fence, true);
if (r) {


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/6] drm: fourcc byteorder: add bigendian support to drm_mode_legacy_fb_format

2017-04-24 Thread Michel Dänzer

On 24/04/17 03:25 PM, Gerd Hoffmann wrote:
> Return correct fourcc codes on bigendian.  Drivers must be adapted to
> this change.
> 
> Signed-off-by: Gerd Hoffmann 

Just to reiterate, this won't work for the radeon driver, which programs
the GPU to use (effectively, per the current definition that these are
little endian GPU formats) DRM_FORMAT_XRGB with pre-R600 and
DRM_FORMAT_BGRX with >= R600.

> +#ifdef __BIG_ENDIAN
> + switch (bpp) {
> + case 8:
> + fmt = DRM_FORMAT_C8;
> + break;
> + case 24:
> + fmt = DRM_FORMAT_BGR888;
> + break;

BTW, endianness as a concept cannot apply to 8 or 24 bpp formats.

-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm: fourcc byteorder: brings header file comments in line with reality.

2017-04-24 Thread Michel Dänzer

On 25/04/17 10:12 AM, Michel Dänzer wrote:
> On 24/04/17 10:03 PM, Ville Syrjälä wrote:
>> On Mon, Apr 24, 2017 at 03:57:02PM +0900, Michel Dänzer wrote:
>>> On 22/04/17 07:05 PM, Ville Syrjälä wrote:
 On Fri, Apr 21, 2017 at 06:14:31PM +0200, Gerd Hoffmann wrote:
>   Hi,
>
>>> My personal opinion is that formats in drm_fourcc.h should be 
>>> independent of the CPU byte order and the function 
>>> drm_mode_legacy_fb_format() and drivers depending on that incorrect 
>>> assumption be fixed instead.
>>
>> The problem is this isn't a kernel-internal thing any more.  With the
>> addition of the ADDFB2 ioctl the fourcc codes became part of the
>> kernel/userspace abi ...
>
> Ok, added some printk's to the ADDFB and ADDFB2 code paths and tested a
> bit.  Apparently pretty much all userspace still uses the ADDFB ioctl.
> xorg (modesetting driver) does.  gnome-shell in wayland mode does.
> Seems the big transition to ADDFB2 didn't happen yet.
>
> I guess that makes changing drm_mode_legacy_fb_format + drivers a
> reasonable option ...

 Yeah, I came to the same conclusion after chatting with some
 folks on irc.

 So my current idea is that we change any driver that wants to follow the
 CPU endianness
>>>
>>> This isn't really optional for various reasons, some of which have been
>>> covered in this discussion.
>>>
>>>
 to declare support for big endian formats if the CPU is
 big endian. Presumably these are mostly the virtual GPU drivers.

 Additonally we'll make the mapping performed by drm_mode_legacy_fb_format()
 driver controlled. That way drivers that got changed to follow CPU
 endianness can return a framebuffer that matches CPU endianness. And
 drivers that expect the GPU endianness to not depend on the CPU
 endianness will keep working as they do now. The downside is that users
 of the legacy addfb ioctl will need to magically know which endianness
 they will get, but that is apparently already the case. And users of
 addfb2 will keep on specifying the endianness explicitly with
 DRM_FORMAT_BIG_ENDIAN vs. 0.
>>>
>>> I'm afraid it's not that simple.
>>>
>>> The display hardware of older (pre-R600 generation) Radeon GPUs does not
>>> support the "big endian" formats directly. In order to allow userspace
>>> to access pixel data in native endianness with the CPU, we instead use
>>> byte-swapping functionality which only affects CPU access.
>>
>> OK, I'm getting confused. Based on our irc discussion I got the
>> impression you don't byte swap CPU accesses.
> 
> Sorry for the confusion. The radeon kernel driver does support
> byte-swapping for CPU access to VRAM with pre-R600 GPUs, and this is
> used for fbdev emulation. What I meant on IRC is that the xf86-video-ati
> radeon driver doesn't make use of this, mostly because it only applies
> while a BO is in VRAM, and userspace can't control when that's the case
> (while a BO isn't being scanned out).
> 
> 
>> But since you do, how do you deal with mixing 8bpp vs. 16bpp vs. 32bpp?
> 
> The byte-swapping is configured per-BO via the
> RADEON_TILING_SWAP_16/32BIT flags.

... which means that it's disabled by default, so it shouldn't affect
generic userspace. So exposing the GPU format directly should be
feasible in this case as well after all. Sorry for the noise. :(


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: fix gpu reset crash

2017-04-24 Thread zhoucm1




On 2017年04月24日 17:47, Christian König wrote:

The root cause is some ring doesn't have scheduler, like KIQ ring

Change-Id: I420e84add9cdd9a7fd1f9921b8a5d0afa3dd2058
Signed-off-by: Chunming Zhou 


Reviewed-by: Christian König  for both. 

I forgot to add RB when pushing patches, How can I add it again?

Sorry for that.
David Zhou
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: extend vm flags to 64-bit in tracepoint

2017-04-24 Thread Zhang, Jerry (Junwei)


On 04/24/2017 04:56 PM, Christian König wrote:

Am 24.04.2017 um 10:55 schrieb Christian König:

Am 24.04.2017 um 08:43 schrieb Junwei Zhang:

Signed-off-by: Junwei Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 18 +-
  1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8676eff..998ff4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -221,7 +221,7 @@
   __field(long, start)
   __field(long, last)
   __field(u64, offset)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -231,7 +231,7 @@
 __entry->offset = mapping->offset;
 __entry->flags = mapping->flags;
 ),
-TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%010llx",


The full 64bit flags need to be printed with %016llx.

We only use %010llx for the 40bit addresses and even that needs to be
extended to %048llx for Vega10.


Ups, hit send to early. That should read "needs to be extended to %012llx for
Vega10 because it has 48bit addresses".


You reminder me that the flags may be printed as:
  - flags: %015llx (58-bit at most for vega10)

About the address, like offset/soffs/eoffs/pe/addr, need to be
  - addr: %012llx (48-bit)

How do you think about it?

BTW, anyway it turns out to print the correct result, even if the format is 
less than the output.


Jerry



Christian.



With that fixed the patch is Reviewed-by: Christian König


Regards,
Christian.


__entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
  );
@@ -245,7 +245,7 @@
   __field(long, start)
   __field(long, last)
   __field(u64, offset)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -255,7 +255,7 @@
 __entry->offset = mapping->offset;
 __entry->flags = mapping->flags;
 ),
-TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%010llx",
__entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
  );
@@ -266,7 +266,7 @@
  TP_STRUCT__entry(
   __field(u64, soffset)
   __field(u64, eoffset)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -274,7 +274,7 @@
 __entry->eoffset = mapping->it.last + 1;
 __entry->flags = mapping->flags;
 ),
-TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
+TP_printk("soffs=%010llx, eoffs=%010llx, flags=%010llx",
__entry->soffset, __entry->eoffset, __entry->flags)
  );
  @@ -290,14 +290,14 @@
TRACE_EVENT(amdgpu_vm_set_ptes,
  TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags),
+ uint32_t incr, uint64_t flags),
  TP_ARGS(pe, addr, count, incr, flags),
  TP_STRUCT__entry(
   __field(u64, pe)
   __field(u64, addr)
   __field(u32, count)
   __field(u32, incr)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -307,7 +307,7 @@
 __entry->incr = incr;
 __entry->flags = flags;
 ),
-TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, count=%u",
+TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%010Lx, count=%u",
__entry->pe, __entry->addr, __entry->incr,
__entry->flags, __entry->count)
  );



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm: fourcc byteorder: brings header file comments in line with reality.

2017-04-24 Thread Michel Dänzer

On 24/04/17 10:03 PM, Ville Syrjälä wrote:
> On Mon, Apr 24, 2017 at 03:57:02PM +0900, Michel Dänzer wrote:
>> On 22/04/17 07:05 PM, Ville Syrjälä wrote:
>>> On Fri, Apr 21, 2017 at 06:14:31PM +0200, Gerd Hoffmann wrote:
   Hi,

>> My personal opinion is that formats in drm_fourcc.h should be 
>> independent of the CPU byte order and the function 
>> drm_mode_legacy_fb_format() and drivers depending on that incorrect 
>> assumption be fixed instead.
>
> The problem is this isn't a kernel-internal thing any more.  With the
> addition of the ADDFB2 ioctl the fourcc codes became part of the
> kernel/userspace abi ...

 Ok, added some printk's to the ADDFB and ADDFB2 code paths and tested a
 bit.  Apparently pretty much all userspace still uses the ADDFB ioctl.
 xorg (modesetting driver) does.  gnome-shell in wayland mode does.
 Seems the big transition to ADDFB2 didn't happen yet.

 I guess that makes changing drm_mode_legacy_fb_format + drivers a
 reasonable option ...
>>>
>>> Yeah, I came to the same conclusion after chatting with some
>>> folks on irc.
>>>
>>> So my current idea is that we change any driver that wants to follow the
>>> CPU endianness
>>
>> This isn't really optional for various reasons, some of which have been
>> covered in this discussion.
>>
>>
>>> to declare support for big endian formats if the CPU is
>>> big endian. Presumably these are mostly the virtual GPU drivers.
>>>
>>> Additonally we'll make the mapping performed by drm_mode_legacy_fb_format()
>>> driver controlled. That way drivers that got changed to follow CPU
>>> endianness can return a framebuffer that matches CPU endianness. And
>>> drivers that expect the GPU endianness to not depend on the CPU
>>> endianness will keep working as they do now. The downside is that users
>>> of the legacy addfb ioctl will need to magically know which endianness
>>> they will get, but that is apparently already the case. And users of
>>> addfb2 will keep on specifying the endianness explicitly with
>>> DRM_FORMAT_BIG_ENDIAN vs. 0.
>>
>> I'm afraid it's not that simple.
>>
>> The display hardware of older (pre-R600 generation) Radeon GPUs does not
>> support the "big endian" formats directly. In order to allow userspace
>> to access pixel data in native endianness with the CPU, we instead use
>> byte-swapping functionality which only affects CPU access.
> 
> OK, I'm getting confused. Based on our irc discussion I got the
> impression you don't byte swap CPU accesses.

Sorry for the confusion. The radeon kernel driver does support
byte-swapping for CPU access to VRAM with pre-R600 GPUs, and this is
used for fbdev emulation. What I meant on IRC is that the xf86-video-ati
radeon driver doesn't make use of this, mostly because it only applies
while a BO is in VRAM, and userspace can't control when that's the case
(while a BO isn't being scanned out).


> But since you do, how do you deal with mixing 8bpp vs. 16bpp vs. 32bpp?

The byte-swapping is configured per-BO via the
RADEON_TILING_SWAP_16/32BIT flags.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/6] drm: tackle byteorder issues, take two

2017-04-24 Thread Michel Dänzer

On 24/04/17 11:26 PM, Ville Syrjälä wrote:
> On Mon, Apr 24, 2017 at 04:54:25PM +0900, Michel Dänzer wrote:
>> On 24/04/17 04:36 PM, Gerd Hoffmann wrote:
>>>
>   drm: fourcc byteorder: add DRM_FORMAT_CPU_*
>   drm: fourcc byteorder: add bigendian support to
> drm_mode_legacy_fb_format

 As I explained in my last followup in the "[PATCH] drm: fourcc
 byteorder: brings header file comments in line with reality." thread,
 the mapping between GPU and CPU formats has to be provided by the
 driver, it cannot be done statically.
>>>
>>> Well, the drm fourcc codes represent the cpu view (i.e. what userspace
>>> will fill the ADDFB2-created framebuffers with).
>>
>> Ville is adamant that they represent the GPU view. This needs to be
>> resolved one way or the other.
> 
> Since the byte swapping can happen either for CPU or display access
> I guess we can't just consider the GPU and display as a single entity.
> 
> We may need to consider several agents:
> 1. display
> 2. GPU
> 3. CPU
> 4. other DMA
> 
> Not sure what we can say about 4. I presume it's going to be like the
> GPU or the CPU in the sense that it might go through the CPU byte
> swapping logic or not. I'm just going to ignore it.
> 
> Let's say we have the following bytes in memory
> (in order of increasing address): A,B,C,D
> We'll assume GPU and display are LE natively. Each component will see
> the resulting 32bpp  pixel as follows (msb left->lsb right):
> 
> LE CPU w/ no byte swapping:
>  display: DCBA
>  GPU: DCBA
>  CPU: DCBA
>  = everyone agrees
> 
> BE CPU w/ no byte swapping:
>  display: DCBA
>  GPU: DCBA
>  CPU: ABCD
>  = GPU and display agree
> 
> BE CPU w/ display byte swapping:
>  display: ABCD
>  GPU: DCBA
>  CPU: ABCD
>  = CPU and display agree
> 
> BE CPU w/ CPU access byte swapping:
>  display: DCBA
>  GPU: DCBA
>  CPU: DCBA
>  = everyone agrees

Beware that for this list, you're using a format definition which is
based on a packed 32-bit value. This does *not* match the current
DRM_FORMAT_* definitions. E.g. in the last case, display and GPU use
the same DRM_FORMAT, but the CPU uses the "inverse" one.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2

2017-04-24 Thread Alex Deucher

On Mon, Apr 24, 2017 at 11:36 AM, Andres Rodriguez  wrote:
>
>
> On 2017-04-24 11:25 AM, Alex Deucher wrote:
>>
>> On Mon, Apr 24, 2017 at 3:40 AM, Christian König
>>  wrote:
>>>
>>> I wanted to push that to our internal branch, but found that it doesn't
>>> apply cleanly. So please rebase on alex amd-staging-4.9 branch.
>>
>>
>> Please stick with my drm-next-wip tree.  It will make it easier for me
>> to integrate.
>>
>> Alex
>>
>>
>
> I'm guessing the tradeoff here would be a delay getting into drm-next. I'm
> okay with that as long as the patch series makes it in time for the 4.13
> feature merge window.
>
> One exception, I would like to get the IOCTL interface patch submitted into
> a branch that will land on drm-next sooner rather than later so that I can
> begin submitting the corresponding patches to the userspace components.
> Should this be amd-staging-4.9? Or should it be somewhere else?

No real trade-offs per se.  The code either needs to get backported to
our stable branch or forward ported to drm-next.  It ultimately needs
to land in both to be properly tested, etc.

Alex


>
> Regards,
> Andres
>
>
>>>
>>> Additional to that you forgot my rb on patch #4.
>>>
>>> Regards,
>>> Christian.
>>>
>>>
>>> Am 22.04.2017 um 01:48 schrieb Andres Rodriguez:


 V2 updates:
   * Rebased
   * All patches now have r-b

 Second part of the split of the series:
 Add support for high priority scheduling in amdgpu v8

 These patches should be close to being good enough to land.

 The first two patches are simple fixes I've ported from the ROCm branch.
 These
 still need review.

 I've fixed all of Christian's comments for patch 04:
 drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4

 ___
 amd-gfx mailing list
 amd-gfx@lists.freedesktop.org
 https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>>
>>>
>>>
>>> ___
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/4] drm/amdgpu: Fix use of interruptible waiting 1. The signal interrupt can affect the expected behaviour. 2. There is no mechanism to handle the corresponding error.

2017-04-24 Thread Alex Xie

Change-Id: I43f3b0b420833a02ad5ece3ddeef04c6512667e9
Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 051696d..4d04af9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2181,7 +2181,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
suspend, bool fbcon)
 
if (amdgpu_crtc->cursor_bo) {
struct amdgpu_bo *aobj = 
gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-   r = amdgpu_bo_reserve(aobj, false);
+   r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
amdgpu_bo_unpin(aobj);
amdgpu_bo_unreserve(aobj);
@@ -2194,7 +2194,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
suspend, bool fbcon)
robj = gem_to_amdgpu_bo(rfb->obj);
/* don't unpin kernel fb objects */
if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
-   r = amdgpu_bo_reserve(robj, false);
+   r = amdgpu_bo_reserve(robj, true);
if (r == 0) {
amdgpu_bo_unpin(robj);
amdgpu_bo_unreserve(robj);
@@ -2301,7 +2301,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
resume, bool fbcon)
 
if (amdgpu_crtc->cursor_bo) {
struct amdgpu_bo *aobj = 
gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-   r = amdgpu_bo_reserve(aobj, false);
+   r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
r = amdgpu_bo_pin(aobj,
  AMDGPU_GEM_DOMAIN_VRAM,
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/4] drm/amdgpu: Fix use of interruptible waiting 1. The signal interrupt can affect the expected behaviour. 2. There is no good mechanism to handle the corresponding error.

2017-04-24 Thread Alex Xie

Change-Id: I43f3b0b420833a02ad5ece3ddeef04c6512667e9
Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4d04af9..2ef4759 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2479,7 +2479,7 @@ static int amdgpu_recover_vram_from_shadow(struct 
amdgpu_device *adev,
if (!bo->shadow)
return 0;
 
-   r = amdgpu_bo_reserve(bo, false);
+   r = amdgpu_bo_reserve(bo, true);
if (r)
return r;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amdgpu: Fix potential issue if reserve function is interrupted If amdgpu_bo_reserve function is interrupted by signal, amdgpu_bo_kunmap function is not called.

2017-04-24 Thread Alex Xie

Change-Id: Ide2b3be6549b3afb8d6116094b5fff495b18addf
Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a09ad3cf..051696d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -350,7 +350,7 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device 
*adev)
if (adev->vram_scratch.robj == NULL) {
return;
}
-   r = amdgpu_bo_reserve(adev->vram_scratch.robj, false);
+   r = amdgpu_bo_reserve(adev->vram_scratch.robj, true);
if (likely(r == 0)) {
amdgpu_bo_kunmap(adev->vram_scratch.robj);
amdgpu_bo_unpin(adev->vram_scratch.robj);
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: Real return value can be over-written when clean up

2017-04-24 Thread Alex Xie

Change-Id: Ib69f035eeb213a1aec5025e0a9f4515065706118
Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 3453052..76be2d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -117,6 +117,11 @@ static void amdgpu_benchmark_move(struct amdgpu_device 
*adev, unsigned size,
}
 
 out_cleanup:
+   /* Check error value now. The value can be overwritten when clean up.*/
+   if (r) {
+   DRM_ERROR("Error while benchmarking BO move.\n");
+   }
+
if (sobj) {
r = amdgpu_bo_reserve(sobj, false);
if (likely(r == 0)) {
@@ -133,10 +138,6 @@ static void amdgpu_benchmark_move(struct amdgpu_device 
*adev, unsigned size,
}
amdgpu_bo_unref();
}
-
-   if (r) {
-   DRM_ERROR("Error while benchmarking BO move.\n");
-   }
 }
 
 void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting 1. The signal interrupt can affect the expected behaviour. 2. There is no good mechanism to handle the corresponding error. When signal interru

2017-04-24 Thread Alex Xie

Change-Id: I6889a4d9dd2703bcf5d448d18f6af51c496a93c9
Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 76be2d2..75bd76f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -123,7 +123,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device 
*adev, unsigned size,
}
 
if (sobj) {
-   r = amdgpu_bo_reserve(sobj, false);
+   r = amdgpu_bo_reserve(sobj, true);
if (likely(r == 0)) {
amdgpu_bo_unpin(sobj);
amdgpu_bo_unreserve(sobj);
@@ -131,7 +131,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device 
*adev, unsigned size,
amdgpu_bo_unref();
}
if (dobj) {
-   r = amdgpu_bo_reserve(dobj, false);
+   r = amdgpu_bo_reserve(dobj, true);
if (likely(r == 0)) {
amdgpu_bo_unpin(dobj);
amdgpu_bo_unreserve(dobj);
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/2] drm/amd/display: Prevent premature pageflip when comitting in vblank. (v3)

2017-04-24 Thread Grodzovsky, Andrey



> -Original Message-
> From: Mario Kleiner [mailto:mario.kleiner...@gmail.com]
> Sent: Monday, April 24, 2017 12:54 PM
> To: dri-de...@lists.freedesktop.org
> Cc: amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey;
> mario.kleiner...@gmail.com; Wentland, Harry; Deucher, Alexander;
> Daenzer, Michel
> Subject: [PATCH 2/2] drm/amd/display: Prevent premature pageflip when
> comitting in vblank. (v3)
> 
> Make sure we do not program a hw pageflip inside vblank 'n' iff the atomic
> flip is comitted while inside the same vblank 'n'. We must defer such a flip 
> by
> one refresh cycle to vblank 'n+1', unless this is a
> DRM_MODE_PAGE_FLIP_ASYNC async pageflip, which must always execute
> as soon as possible.
> 
> Without this, pageflips programmed via X11 GLX_OML_sync_control
> extensions glXSwapBuffersMscOML(..., target_msc, ...); call and/or via
> DRI3/Present PresentPixmap(..., target_msc, ...); request will complete one
> vblank too early whenever target_msc > current_msc + 1, ie. more than 1
> vblank in the future. In such a case, the call of the pageflip ioctl() would 
> be
> triggered by a queued drmWaitVblank() vblank event, which itself gets
> dispatched inside the vblank one frame before the target_msc vblank.
> 
> Testing with this patch does no longer show any problems with
> OML_sync_control swap scheduling or flip completion timestamps.
> Tested on R9 380 Tonga.
> 
> v2: Add acked/r-b by Harry and Michel.
> v3: Feedback from Andrey: Must not wait an extra frame for
> DRM_MODE_PAGE_FLIP_ASYNC flips.
> 
> Signed-off-by: Mario Kleiner 
> Acked-by: Harry Wentland 
> Reviewed-by: Michel Dänzer 
> 
> Cc: Harry Wentland 
> Cc: Alex Deucher 
> Cc: Michel Dänzer 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c  | 20
> ++--
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
> index 086a842..82b2ce6 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
> @@ -2460,6 +2460,9 @@ static void amdgpu_dm_do_flip(
>   struct amdgpu_device *adev = crtc->dev->dev_private;
>   bool async_flip = (acrtc->flip_flags &
> DRM_MODE_PAGE_FLIP_ASYNC) != 0;
> 
> + /* Prepare wait for target vblank early - before the fence-waits */
> + target_vblank = target - drm_crtc_vblank_count(crtc) +
> + amdgpu_get_vblank_counter_kms(crtc->dev, acrtc-
> >crtc_id);
> 
>   /*TODO This might fail and hence better not used, wait
>* explicitly on fences instead
> @@ -2478,13 +2481,9 @@ static void amdgpu_dm_do_flip(
> 
>   amdgpu_bo_unreserve(abo);
> 
> - /* Wait for target vblank */
>   /* Wait until we're out of the vertical blank period before the one
>* targeted by the flip
>*/
> - target_vblank = target - drm_crtc_vblank_count(crtc) +
> - amdgpu_get_vblank_counter_kms(crtc->dev, acrtc-
> >crtc_id);
> -
>   while ((acrtc->enabled &&
>   (amdgpu_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id,
> 0,
>   , , NULL, NULL,
> @@ -2760,14 +2759,15 @@ void amdgpu_dm_atomic_commit_tail(
>   pflip_needed = !state->allow_modeset;
> 
>   if (pflip_needed) {
> + wait_for_vblank =
> + acrtc->flip_flags &
> DRM_MODE_PAGE_FLIP_ASYNC ?
> + false : true;
> +
>   amdgpu_dm_do_flip(
> - crtc,
> - fb,
> - drm_crtc_vblank_count(crtc));
> + crtc,
> + fb,
> + drm_crtc_vblank_count(crtc) +
> wait_for_vblank);
> 
> - wait_for_vblank =
> - acrtc->flip_flags &
> DRM_MODE_PAGE_FLIP_ASYNC ?
> - false : true;
>   /*clean up the flags for next usage*/
>   acrtc->flip_flags = 0;
>   }
> --
> 2.7.4

 
Reviewed-by: Andrey Grodzovsky 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amdgpu: Fix use of interruptible waiting 1. The signal interrupt can affect the expected behaviour. 2. There is no good mechanism to handle the corresponding error.

2017-04-24 Thread Alex Xie

Change-Id: I504cc9c280c70fa832cb64e6a1e0f8b4a1f3cbfd
Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8f5db2d..e4a6a40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -295,7 +295,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
if (*bo == NULL)
return;
 
-   if (likely(amdgpu_bo_reserve(*bo, false) == 0)) {
+   if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
if (cpu_addr)
amdgpu_bo_kunmap(*bo);
 
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/6] drm: tackle byteorder issues, take two

2017-04-24 Thread Ville Syrjälä

On Mon, Apr 24, 2017 at 05:26:03PM +0300, Ville Syrjälä wrote:
> On Mon, Apr 24, 2017 at 04:54:25PM +0900, Michel Dänzer wrote:
> > On 24/04/17 04:36 PM, Gerd Hoffmann wrote:
> > > 
> > >>>   drm: fourcc byteorder: add DRM_FORMAT_CPU_*
> > >>>   drm: fourcc byteorder: add bigendian support to
> > >>> drm_mode_legacy_fb_format
> > >>
> > >> As I explained in my last followup in the "[PATCH] drm: fourcc
> > >> byteorder: brings header file comments in line with reality." thread,
> > >> the mapping between GPU and CPU formats has to be provided by the
> > >> driver, it cannot be done statically.
> > > 
> > > Well, the drm fourcc codes represent the cpu view (i.e. what userspace
> > > will fill the ADDFB2-created framebuffers with).
> > 
> > Ville is adamant that they represent the GPU view. This needs to be
> > resolved one way or the other.
> 
> Since the byte swapping can happen either for CPU or display access
> I guess we can't just consider the GPU and display as a single entity.
> 
> We may need to consider several agents:
> 1. display
> 2. GPU
> 3. CPU
> 4. other DMA
> 
> Not sure what we can say about 4. I presume it's going to be like the
> GPU or the CPU in the sense that it might go through the CPU byte
> swapping logic or not. I'm just going to ignore it.
> 
> Let's say we have the following bytes in memory
> (in order of increasing address): A,B,C,D
> We'll assume GPU and display are LE natively. Each component will see
> the resulting 32bpp  pixel as follows (msb left->lsb right):
> 
> LE CPU w/ no byte swapping:
>  display: DCBA
>  GPU: DCBA
>  CPU: DCBA
>  = everyone agrees
> 
> BE CPU w/ no byte swapping:
>  display: DCBA
>  GPU: DCBA
>  CPU: ABCD
>  = GPU and display agree
> 
> BE CPU w/ display byte swapping:
>  display: ABCD
>  GPU: DCBA
>  CPU: ABCD
>  = CPU and display agree

So after some further thought this seems like a somewhat crazy
combination. It does make sense from the simplicity POV in that 
the CPU byte swapping isn't needed, and thus the problems with
concurrent access to buffers with different pixel sizes vanish.

However the GPU has to somehow be able to produce data the display
can consume, so presumably there must be some knobs in the GPU to do
the opposite byte swapping that the display does, or the GPU must be
restricted to only use framebuffers in formats like .

> 
> BE CPU w/ CPU access byte swapping:
>  display: DCBA
>  GPU: DCBA
>  CPU: DCBA
>  = everyone agrees
> 
> BE CPU w/ both display and CPU byte swapping:
>  display: ABCD
>  GPU: DCBA
>  CPU: DCBA
>  = CPU and GPU agree (doesn't seem all that useful)
> 
> The different byte swapping tricks must have seemed like a great idea to
> someone, but in the end they're just making our life more miserable.
> 
> > > The gpu view can certainly differ from that.  Implementing this is up
> > > to the driver IMO.
> > > 
> > > When running on dumb framebuffers userspace doesn't need to know what
> > > the gpu view is.
> 
> True. So for that we'd just need to consider whether the CPU and display
> agree or disagree on the byte order. And I guess we'd have to pick from
> the following choices for a BE CPU:
> 
> CPU and display agree:
>  * FB is big endian, or FB is host endian (or whatever we would call it)
> CPU and display disagree:
>  * FB is little endian, or FB is foreign endian (or whatever)
> 
> > > 
> > > When running in opengl mode there will be a hardware-specific mesa
> > > driver in userspace, which will either know what the gpu view is (for
> > > example because there is only one way to implement this in hardware) or
> > > it can use hardware-specific ioctls to ask the kernel driver what the
> > > gpu view is.
> > 
> > Not sure this can be hidden in the OpenGL driver. How would e.g. a
> > Wayland compositor or the Xorg modesetting driver know which OpenGL
> > format corresponds to a given DRM_FORMAT?
> 
> How are GL formats defined? /me needs to go read the spec again.
> 
> -- 
> Ville Syrjälä
> Intel OTC

-- 
Ville Syrjälä
Intel OTC
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/6] drm: tackle byteorder issues, take two

2017-04-24 Thread Daniel Stone

Hi,

On 24 April 2017 at 15:26, Ville Syrjälä  wrote:
> On Mon, Apr 24, 2017 at 04:54:25PM +0900, Michel Dänzer wrote:
>> On 24/04/17 04:36 PM, Gerd Hoffmann wrote:
>> > When running in opengl mode there will be a hardware-specific mesa
>> > driver in userspace, which will either know what the gpu view is (for
>> > example because there is only one way to implement this in hardware) or
>> > it can use hardware-specific ioctls to ask the kernel driver what the
>> > gpu view is.
>>
>> Not sure this can be hidden in the OpenGL driver. How would e.g. a
>> Wayland compositor or the Xorg modesetting driver know which OpenGL
>> format corresponds to a given DRM_FORMAT?
>
> How are GL formats defined? /me needs to go read the spec again.

They aren't, per se. Only relative to 'native formats', which for this
discussion is the set of GBM formats, which is in turn just
drm_fourcc.h.

Cheers,
Daniel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amd/display: Prevent premature pageflip when comitting in vblank. (v3)

2017-04-24 Thread Mario Kleiner

Make sure we do not program a hw pageflip inside vblank 'n' iff the
atomic flip is comitted while inside the same vblank 'n'. We must
defer such a flip by one refresh cycle to vblank 'n+1', unless this
is a DRM_MODE_PAGE_FLIP_ASYNC async pageflip, which must always
execute as soon as possible.

Without this, pageflips programmed via X11 GLX_OML_sync_control extensions
glXSwapBuffersMscOML(..., target_msc, ...); call and/or via DRI3/Present
PresentPixmap(..., target_msc, ...); request will complete one vblank
too early whenever target_msc > current_msc + 1, ie. more than 1 vblank
in the future. In such a case, the call of the pageflip ioctl() would be
triggered by a queued drmWaitVblank() vblank event, which itself gets
dispatched inside the vblank one frame before the target_msc vblank.

Testing with this patch does no longer show any problems with
OML_sync_control swap scheduling or flip completion timestamps.
Tested on R9 380 Tonga.

v2: Add acked/r-b by Harry and Michel.
v3: Feedback from Andrey: Must not wait an extra frame for
DRM_MODE_PAGE_FLIP_ASYNC flips.

Signed-off-by: Mario Kleiner 
Acked-by: Harry Wentland 
Reviewed-by: Michel Dänzer 

Cc: Harry Wentland 
Cc: Alex Deucher 
Cc: Michel Dänzer 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c  | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
index 086a842..82b2ce6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
@@ -2460,6 +2460,9 @@ static void amdgpu_dm_do_flip(
struct amdgpu_device *adev = crtc->dev->dev_private;
bool async_flip = (acrtc->flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 
+   /* Prepare wait for target vblank early - before the fence-waits */
+   target_vblank = target - drm_crtc_vblank_count(crtc) +
+   amdgpu_get_vblank_counter_kms(crtc->dev, 
acrtc->crtc_id);
 
/*TODO This might fail and hence better not used, wait
 * explicitly on fences instead
@@ -2478,13 +2481,9 @@ static void amdgpu_dm_do_flip(
 
amdgpu_bo_unreserve(abo);
 
-   /* Wait for target vblank */
/* Wait until we're out of the vertical blank period before the one
 * targeted by the flip
 */
-   target_vblank = target - drm_crtc_vblank_count(crtc) +
-   amdgpu_get_vblank_counter_kms(crtc->dev, 
acrtc->crtc_id);
-
while ((acrtc->enabled &&
(amdgpu_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id, 0,
, , NULL, NULL,
@@ -2760,14 +2759,15 @@ void amdgpu_dm_atomic_commit_tail(
pflip_needed = !state->allow_modeset;
 
if (pflip_needed) {
+   wait_for_vblank =
+   acrtc->flip_flags & DRM_MODE_PAGE_FLIP_ASYNC ?
+   false : true;
+
amdgpu_dm_do_flip(
-   crtc,
-   fb,
-   drm_crtc_vblank_count(crtc));
+   crtc,
+   fb,
+   drm_crtc_vblank_count(crtc) + wait_for_vblank);
 
-   wait_for_vblank =
-   acrtc->flip_flags & 
DRM_MODE_PAGE_FLIP_ASYNC ?
-   false : true;
/*clean up the flags for next usage*/
acrtc->flip_flags = 0;
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: fix s3 ring test failed on Vi caused by KIQ enabled.

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Rex Zhu
> Sent: Monday, April 24, 2017 10:07 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhu, Rex
> Subject: [PATCH] drm/amdgpu: fix s3 ring test failed on Vi caused by KIQ
> enabled.
> 
> commit 2f3ea04dc9258cc can't fix s3 issue completely.
> 
> Change-Id: I31d0ba7d143b11543bbb83273c022d8e77f34e08
> Signed-off-by: Rex Zhu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 563944a..2ff5f19 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4972,7 +4972,6 @@ static int gfx_v8_0_kiq_init_queue(struct
> amdgpu_ring *ring)
>   /* reset ring buffer */
>   ring->wptr = 0;
>   amdgpu_ring_clear_ring(ring);
> -
>   mutex_lock(>srbm_mutex);
>   vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>   gfx_v8_0_kiq_init_register(ring);
> @@ -5014,12 +5013,12 @@ static int gfx_v8_0_kcq_init_queue(struct
> amdgpu_ring *ring)
>   /* reset MQD to a clean status */
>   if (adev->gfx.mec.mqd_backup[mqd_idx])
>   memcpy(mqd, adev-
> >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
> -
>   /* reset ring buffer */
>   ring->wptr = 0;
>   amdgpu_ring_clear_ring(ring);
> + } else {
> + amdgpu_ring_clear_ring(ring);
>   }
> -
>   return 0;
>  }
> 
> --
> 1.9.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] Interface changes for prioritized contexts amd-staging-4.9

2017-04-24 Thread Andres Rodriguez

This patch has been rebased onto amd-staging-4.9.

It would be useful to have this patch outside the drm-next-4.12-wip branch as
it would unblock submission of the corresponding libdrm and mesa patches.

Splitting this off as it is my understanding that drm-next-4.12-wip may take a
while to land on drm-next due to some KIQ regressions.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: add parameter to allocate high priority contexts v8

2017-04-24 Thread Andres Rodriguez

Add a new context creation parameter to express a global context priority.

The priority ranking in descending order is as follows:
 * AMDGPU_CTX_PRIORITY_HIGH
 * AMDGPU_CTX_PRIORITY_NORMAL
 * AMDGPU_CTX_PRIORITY_LOW

The driver will attempt to schedule work to the hardware according to
the priorities. No latency or throughput guarantees are provided by
this patch.

This interface intends to service the EGL_IMG_context_priority
extension, and vulkan equivalents.

v2: Instead of using flags, repurpose __pad
v3: Swap enum values of _NORMAL _HIGH for backwards compatibility
v4: Validate usermode priority and store it
v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword
v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN
v7: remove ctx->priority
v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE

Reviewed-by: Emil Velikov 
Reviewed-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 38 ---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  4 ++-
 include/uapi/drm/amdgpu_drm.h |  9 ++-
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index b43..af75571 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -8,124 +8,134 @@
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors: monk liu 
  */
 
 #include 
 #include "amdgpu.h"
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+  enum amd_sched_priority priority,
+  struct amdgpu_ctx *ctx)
 {
unsigned i, j;
int r;
 
+   if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
+   return -EINVAL;
+
+   if (priority >= AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_NICE))
+   return -EACCES;
+
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
kref_init(>refcount);
spin_lock_init(>ring_lock);
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
  sizeof(struct fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
 
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i];
}
 
ctx->reset_counter = atomic_read(>gpu_reset_counter);
 
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
struct amd_sched_rq *rq;
 
-   rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+   rq = >sched.sched_rq[priority];
r = amd_sched_entity_init(>sched, >rings[i].entity,
  rq, amdgpu_sched_jobs);
if (r)
goto failed;
}
 
return 0;
 
 failed:
for (j = 0; j < i; j++)
amd_sched_entity_fini(>rings[j]->sched,
  >rings[j].entity);
kfree(ctx->fences);
ctx->fences = NULL;
return r;
 }
 
 static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 {
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
 
if (!adev)
return;
 
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
for (j = 0; j < amdgpu_sched_jobs; ++j)
fence_put(ctx->rings[i].fences[j]);
kfree(ctx->fences);
ctx->fences = NULL;
 
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(>rings[i]->sched,
  >rings[i].entity);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
+   enum amd_sched_priority priority,
uint32_t *id)
 {
struct

RE: [PATCH 08/11] drm/amdgpu/vce4: replaced with virt_alloc_mm_table

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yu, Xiangliang
> Subject: [PATCH 08/11] drm/amdgpu/vce4: replaced with
> virt_alloc_mm_table
> 
> Used virt_alloc_mm_table function to allocate MM table memory.
> 
> Signed-off-by: Xiangliang Yu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 20 +++-
>  1 file changed, 3 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index a3d9d4d..a34cdbd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -444,20 +444,9 @@ static int vce_v4_0_sw_init(void *handle)
>   return r;
>   }
> 
> - if (amdgpu_sriov_vf(adev)) {
> - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - >virt.mm_table.bo,
> - >virt.mm_table.gpu_addr,
> - (void *)
> >virt.mm_table.cpu_addr);
> - if (!r) {
> - memset((void *)adev->virt.mm_table.cpu_addr, 0,
> PAGE_SIZE);
> - printk("mm table gpu addr = 0x%llx, cpu addr = %p.
> \n",
> -adev->virt.mm_table.gpu_addr,
> -adev->virt.mm_table.cpu_addr);
> - }
> + r = amdgpu_virt_alloc_mm_table(adev);
> + if (r)
>   return r;
> - }
> 
>   return r;
>  }
> @@ -468,10 +457,7 @@ static int vce_v4_0_sw_fini(void *handle)
>   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> 
>   /* free MM table */
> - if (amdgpu_sriov_vf(adev))
> - amdgpu_bo_free_kernel(>virt.mm_table.bo,
> -   >virt.mm_table.gpu_addr,
> -   (void *)>virt.mm_table.cpu_addr);
> + amdgpu_virt_free_mm_table(adev);
> 
>   r = amdgpu_vce_suspend(adev);
>   if (r)
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 07/11] drm/amdgpu/virt: add two functions for MM table

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yu, Xiangliang
> Subject: [PATCH 07/11] drm/amdgpu/virt: add two functions for MM table
> 
> Add two functions to allocate & free MM table memory.
> 
> Signed-off-by: Xiangliang Yu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 46
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 ++
>  2 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 7fce7b5..1363239 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -227,3 +227,49 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device
> *adev)
> 
>   return 0;
>  }
> +
> +/**
> + * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
> + * @amdgpu:  amdgpu device.
> + * MM table is used by UVD and VCE for its initialization
> + * Return: Zero if allocate success.
> + */
> +int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
> +{
> + int r;
> +
> + if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
> + return 0;
> +
> + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + >virt.mm_table.bo,
> + >virt.mm_table.gpu_addr,
> + (void *)>virt.mm_table.cpu_addr);
> + if (r) {
> + DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
> + return r;
> + }
> +
> + memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
> + DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
> +  adev->virt.mm_table.gpu_addr,
> +  adev->virt.mm_table.cpu_addr);
> + return 0;
> +}
> +
> +/**
> + * amdgpu_virt_free_mm_table() - free mm table memory
> + * @amdgpu:  amdgpu device.
> + * Free MM table memory
> + */
> +void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
> +{
> + if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
> + return;
> +
> + amdgpu_bo_free_kernel(>virt.mm_table.bo,
> +   >virt.mm_table.gpu_addr,
> +   (void *)>virt.mm_table.cpu_addr);
> + adev->virt.mm_table.gpu_addr = 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 1ee0a19..a8ed162 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -98,5 +98,7 @@ int amdgpu_virt_request_full_gpu(struct
> amdgpu_device *adev, bool init);
>  int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
>  int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
>  int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
> +int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
> +void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
> 
>  #endif
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 11/11] drm/amdgpu/uvd7: add UVD hw init sequences for sriov

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yu, Xiangliang; Min, Frank
> Subject: [PATCH 11/11] drm/amdgpu/uvd7: add UVD hw init sequences for
> sriov
> 
> From: Frank Min 
> 
> Add UVD hw init.
> 
> Signed-off-by: Frank Min 
> Signed-off-by: Xiangliang Yu 

This needs to land before patch 6 as well.  With that fixed:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 92 --
> -
>  1 file changed, 54 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index a294f05..e0b7ded 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -368,7 +368,10 @@ static int uvd_v7_0_early_init(void *handle)
>  {
>   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> 
> - adev->uvd.num_enc_rings = 2;
> + if (amdgpu_sriov_vf(adev))
> + adev->uvd.num_enc_rings = 1;
> + else
> + adev->uvd.num_enc_rings = 2;
>   uvd_v7_0_set_ring_funcs(adev);
>   uvd_v7_0_set_enc_ring_funcs(adev);
>   uvd_v7_0_set_irq_funcs(adev);
> @@ -421,12 +424,14 @@ static int uvd_v7_0_sw_init(void *handle)
>   r = amdgpu_uvd_resume(adev);
>   if (r)
>   return r;
> + if (!amdgpu_sriov_vf(adev)) {
> + ring = >uvd.ring;
> + sprintf(ring->name, "uvd");
> + r = amdgpu_ring_init(adev, ring, 512, >uvd.irq, 0);
> + if (r)
> + return r;
> + }
> 
> - ring = >uvd.ring;
> - sprintf(ring->name, "uvd");
> - r = amdgpu_ring_init(adev, ring, 512, >uvd.irq, 0);
> - if (r)
> - return r;
> 
>   for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
>   ring = >uvd.ring_enc[i];
> @@ -445,6 +450,10 @@ static int uvd_v7_0_sw_init(void *handle)
>   return r;
>   }
> 
> + r = amdgpu_virt_alloc_mm_table(adev);
> + if (r)
> + return r;
> +
>   return r;
>  }
> 
> @@ -453,6 +462,8 @@ static int uvd_v7_0_sw_fini(void *handle)
>   int i, r;
>   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> 
> + amdgpu_virt_free_mm_table(adev);
> +
>   r = amdgpu_uvd_suspend(adev);
>   if (r)
>   return r;
> @@ -479,48 +490,53 @@ static int uvd_v7_0_hw_init(void *handle)
>   uint32_t tmp;
>   int i, r;
> 
> - r = uvd_v7_0_start(adev);
> + if (amdgpu_sriov_vf(adev))
> + r = uvd_v7_0_sriov_start(adev);
> + else
> + r = uvd_v7_0_start(adev);
>   if (r)
>   goto done;
> 
> - ring->ready = true;
> - r = amdgpu_ring_test_ring(ring);
> - if (r) {
> - ring->ready = false;
> - goto done;
> - }
> + if (!amdgpu_sriov_vf(adev)) {
> + ring->ready = true;
> + r = amdgpu_ring_test_ring(ring);
> + if (r) {
> + ring->ready = false;
> + goto done;
> + }
> 
> - r = amdgpu_ring_alloc(ring, 10);
> - if (r) {
> - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n",
> r);
> - goto done;
> - }
> + r = amdgpu_ring_alloc(ring, 10);
> + if (r) {
> + DRM_ERROR("amdgpu: ring failed to lock UVD ring
> (%d).\n", r);
> + goto done;
> + }
> 
> - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
> - mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
> - amdgpu_ring_write(ring, tmp);
> - amdgpu_ring_write(ring, 0xF);
> + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
> + mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
> + amdgpu_ring_write(ring, tmp);
> + amdgpu_ring_write(ring, 0xF);
> 
> - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
> - mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
> - amdgpu_ring_write(ring, tmp);
> - amdgpu_ring_write(ring, 0xF);
> + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
> +
>   mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
> + amdgpu_ring_write(ring, tmp);
> + amdgpu_ring_write(ring, 0xF);
> 
> - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
> - mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
> - amdgpu_ring_write(ring, tmp);
> - amdgpu_ring_write(ring, 0xF);
> + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
> +
>   mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
> + amdgpu_ring_write(ring, tmp);
> + amdgpu_ring_write(ring, 0xF);
> 
> - /* Clear timeout status bits */
> - amdgpu_ring_write(ring,

RE: [PATCH 09/11] drm/amdgpu/uvd7: add sriov uvd initialization sequences

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yu, Xiangliang; Min, Frank
> Subject: [PATCH 09/11] drm/amdgpu/uvd7: add sriov uvd initialization
> sequences
> 
> From: Frank Min 
> 
> Add UVD initialization for SRIOV.
> 
> Signed-off-by: Frank Min 
> Signed-off-by: Xiangliang Yu 

This patch should come before patch 6 since 6 enables the functionality on 
sr-iov and this patch makes it work.  With that fixed:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 246
> ++
>  1 file changed, 246 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index bf35d56..fb3da07 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -27,10 +27,14 @@
>  #include "amdgpu_uvd.h"
>  #include "soc15d.h"
>  #include "soc15_common.h"
> +#include "mmsch_v1_0.h"
> 
>  #include "vega10/soc15ip.h"
>  #include "vega10/UVD/uvd_7_0_offset.h"
>  #include "vega10/UVD/uvd_7_0_sh_mask.h"
> +#include "vega10/VCE/vce_4_0_offset.h"
> +#include "vega10/VCE/vce_4_0_default.h"
> +#include "vega10/VCE/vce_4_0_sh_mask.h"
>  #include "vega10/NBIF/nbif_6_1_offset.h"
>  #include "vega10/HDP/hdp_4_0_offset.h"
>  #include "vega10/MMHUB/mmhub_1_0_offset.h"
> @@ -41,6 +45,7 @@ static void uvd_v7_0_set_enc_ring_funcs(struct
> amdgpu_device *adev);
>  static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
>  static int uvd_v7_0_start(struct amdgpu_device *adev);
>  static void uvd_v7_0_stop(struct amdgpu_device *adev);
> +static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
> 
>  /**
>   * uvd_v7_0_ring_get_rptr - get read pointer
> @@ -618,6 +623,247 @@ static void uvd_v7_0_mc_resume(struct
> amdgpu_device *adev)
>   WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4),
> adev->uvd.max_handles);
>  }
> 
> +static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
> + struct amdgpu_mm_table *table)
> +{
> + uint32_t data = 0, loop;
> + uint64_t addr = table->gpu_addr;
> + struct mmsch_v1_0_init_header *header = (struct
> mmsch_v1_0_init_header *)table->cpu_addr;
> + uint32_t size;
> +
> + size = header->header_size + header->vce_table_size + header-
> >uvd_table_size;
> +
> + /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc
> addr of memory descriptor location */
> + WREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
> + WREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
> +
> + /* 2, update vmid of descriptor */
> + data = RREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_VMID));
> + data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
> + data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /*
> use domain0 for MM scheduler */
> + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID),
> data);
> +
> + /* 3, notify mmsch about the size of this descriptor */
> + WREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_CTX_SIZE), size);
> +
> + /* 4, set resp to zero */
> + WREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
> +
> + /* 5, kick off the initialization and wait until
> VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
> + WREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_MAILBOX_HOST), 0x1001);
> +
> + data = RREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_MAILBOX_RESP));
> + loop = 1000;
> + while ((data & 0x1002) != 0x1002) {
> + udelay(10);
> + data = RREG32(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_MMSCH_VF_MAILBOX_RESP));
> + loop--;
> + if (!loop)
> + break;
> + }
> +
> + if (!loop) {
> + dev_err(adev->dev, "failed to init MMSCH,
> mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
> + return -EBUSY;
> + }
> +
> + return 0;
> +}
> +
> +static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
> +{
> + struct amdgpu_ring *ring;
> + uint32_t offset, size, tmp;
> + uint32_t table_size = 0;
> + struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} };
> + struct mmsch_v1_0_cmd_direct_read_modify_write
> direct_rd_mod_wt = { {0} };
> + struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} };
> + //struct mmsch_v1_0_cmd_indirect_write indirect_wt = {{0}};
> + struct mmsch_v1_0_cmd_end end = { {0} };
> + uint32_t *init_table = adev->virt.mm_table.cpu_addr;
> + struct mmsch_v1_0_init_header *header = (struct
> mmsch_v1_0_init_header *)init_table;
> +
> + direct_wt.cmd_header.command_type =
> MMSCH_COMMAND__DIRECT_REG_WRITE;
> + direct_rd_mod_wt.cmd_header.command_type =
>

RE: [PATCH 06/11] drm/amdgpu/soc15: enable UVD code path for sriov

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yu, Xiangliang; Min, Frank
> Subject: [PATCH 06/11] drm/amdgpu/soc15: enable UVD code path for sriov
> 
> From: Frank Min 
> 
> Enable UVD block for SRIOV.
> 
> Signed-off-by: Frank Min 
> Signed-off-by: Xiangliang Yu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 6999ac3..4e514b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -482,8 +482,7 @@ int soc15_set_ip_blocks(struct amdgpu_device
> *adev)
>  #endif
>   amdgpu_ip_block_add(adev, _v9_0_ip_block);
>   amdgpu_ip_block_add(adev, _v4_0_ip_block);
> - if (!amdgpu_sriov_vf(adev))
> - amdgpu_ip_block_add(adev, _v7_0_ip_block);
> + amdgpu_ip_block_add(adev, _v7_0_ip_block);
>   amdgpu_ip_block_add(adev, _v4_0_ip_block);
>   break;
>   default:
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 05/11] drm/amdgpu/vce4: move mm table constructions functions into mmsch header file

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yu, Xiangliang; Min, Frank
> Subject: [PATCH 05/11] drm/amdgpu/vce4: move mm table constructions
> functions into mmsch header file
> 
> From: Frank Min 
> 
> Move mm table construction functions into mmsch header file so that
> UVD can reuse it.
> 
> Signed-off-by: Frank Min 
> Signed-off-by: Xiangliang Yu 
> ---
>  drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h | 57
> +
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c   | 57 
> -
>  2 files changed, 57 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
> b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
> index 5f0fc8b..f048f91 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
> @@ -84,4 +84,61 @@ struct mmsch_v1_0_cmd_indirect_write {
>   uint32_t reg_value;
>  };
> 
> +static inline void mmsch_insert_direct_wt(struct

Please change the names of the exported functions to have the v1_0 in the name. 
 E.g.,
mmsch_v1_0_insert_direct_wt()
With that fixed:
Reviewed-by: Alex Deucher 

> mmsch_v1_0_cmd_direct_write *direct_wt,
> +   uint32_t *init_table,
> +   uint32_t reg_offset,
> +   uint32_t value)
> +{
> + direct_wt->cmd_header.reg_offset = reg_offset;
> + direct_wt->reg_value = value;
> + memcpy((void *)init_table, direct_wt, sizeof(struct
> mmsch_v1_0_cmd_direct_write));
> +}
> +
> +static inline void mmsch_insert_direct_rd_mod_wt(struct
> mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
> +  uint32_t *init_table,
> +  uint32_t reg_offset,
> +  uint32_t mask, uint32_t data)
> +{
> + direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
> + direct_rd_mod_wt->mask_value = mask;
> + direct_rd_mod_wt->write_data = data;
> + memcpy((void *)init_table, direct_rd_mod_wt,
> +sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
> +}
> +
> +static inline void mmsch_insert_direct_poll(struct
> mmsch_v1_0_cmd_direct_polling *direct_poll,
> + uint32_t *init_table,
> + uint32_t reg_offset,
> + uint32_t mask, uint32_t wait)
> +{
> + direct_poll->cmd_header.reg_offset = reg_offset;
> + direct_poll->mask_value = mask;
> + direct_poll->wait_value = wait;
> + memcpy((void *)init_table, direct_poll, sizeof(struct
> mmsch_v1_0_cmd_direct_polling));
> +}
> +
> +#define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
> + mmsch_insert_direct_rd_mod_wt(_rd_mod_wt, \
> +   init_table, (reg), \
> +   (mask), (data)); \
> + init_table += sizeof(struct
> mmsch_v1_0_cmd_direct_read_modify_write)/4; \
> + table_size += sizeof(struct
> mmsch_v1_0_cmd_direct_read_modify_write)/4; \
> +}
> +
> +#define INSERT_DIRECT_WT(reg, value) { \
> + mmsch_insert_direct_wt(_wt, \
> +init_table, (reg), \
> +(value)); \
> + init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
> + table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
> +}
> +
> +#define INSERT_DIRECT_POLL(reg, mask, wait) { \
> + mmsch_insert_direct_poll(_poll, \
> +  init_table, (reg), \
> +  (mask), (wait)); \
> + init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
> + table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 1deb546..a3d9d4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -49,63 +49,6 @@ static void vce_v4_0_mc_resume(struct
> amdgpu_device *adev);
>  static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
>  static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
> 
> -static inline void mmsch_insert_direct_wt(struct
> mmsch_v1_0_cmd_direct_write *direct_wt,
> -   uint32_t *init_table,
> -   uint32_t reg_offset,
> -   uint32_t value)
> -{
> - direct_wt->cmd_header.reg_offset = reg_offset;
> - direct_wt->reg_value = value;
> - memcpy((void *)init_table, direct_wt, sizeof(struct
> mmsch_v1_0_cmd_direct_write));
> -}
> -
>

RE: [PATCH 04/11] drm/amdgpu/vce4: fix a PSP loading VCE issue

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Wang, Daniel(Xiaowei); Yu, Xiangliang
> Subject: [PATCH 04/11] drm/amdgpu/vce4: fix a PSP loading VCE issue
> 
> From: Daniel Wang 
> 
> Fixed PSP loading issue for sriov.
> 
> Signed-off-by: Daniel Wang 
> Signed-off-by: Xiangliang Yu 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 18 +++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 76fc8ed..1deb546 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -291,9 +291,21 @@ static int vce_v4_0_sriov_start(struct
> amdgpu_device *adev)
>   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_SWAP_CNTL1), 0);
>   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VM_CTRL), 0);
> 
> - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8);
> - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8);
> - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8);
> + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
> {
> + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
> + adev-
> >firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
> + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
> + adev-
> >firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
> + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
> + adev-
> >firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
> + } else {
> + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
> + adev->vce.gpu_addr >> 8);
> + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
> + adev->vce.gpu_addr >> 8);
> + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
> mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
> + adev->vce.gpu_addr >> 8);
> + }
> 
>   offset = AMDGPU_VCE_FIRMWARE_OFFSET;
>   size = VCE_V4_0_FW_SIZE;
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 02/11] drm/amdgpu/virt: change the place of virt_init_setting

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yu, Xiangliang
> Subject: [PATCH 02/11] drm/amdgpu/virt: change the place of
> virt_init_setting
> 
> Change place of virt_init_setting function so that can cover the
> cg and pg flags configuration.
> 
> Signed-off-by: Xiangliang Yu 

Reviewed-by: Alex Deucher 


> ---
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 10 +-
>  drivers/gpu/drm/amd/amdgpu/vi.c| 10 +-
>  2 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 2c05dab..6999ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -538,11 +538,6 @@ static int soc15_common_early_init(void *handle)
>   (amdgpu_ip_block_mask & (1 <<
> AMD_IP_BLOCK_TYPE_PSP)))
>   psp_enabled = true;
> 
> - if (amdgpu_sriov_vf(adev)) {
> - amdgpu_virt_init_setting(adev);
> - xgpu_ai_mailbox_set_irq_funcs(adev);
> - }
> -
>   /*
>* nbio need be used for both sdma and gfx9, but only
>* initializes once
> @@ -586,6 +581,11 @@ static int soc15_common_early_init(void *handle)
>   return -EINVAL;
>   }
> 
> + if (amdgpu_sriov_vf(adev)) {
> + amdgpu_virt_init_setting(adev);
> + xgpu_ai_mailbox_set_irq_funcs(adev);
> + }
> +
>   adev->firmware.load_type = amdgpu_ucode_get_load_type(adev,
> amdgpu_fw_load_type);
> 
>   amdgpu_get_pcie_info(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
> b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 505c17a..48fb373 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -895,11 +895,6 @@ static int vi_common_early_init(void *handle)
>   (amdgpu_ip_block_mask & (1 <<
> AMD_IP_BLOCK_TYPE_SMC)))
>   smc_enabled = true;
> 
> - if (amdgpu_sriov_vf(adev)) {
> - amdgpu_virt_init_setting(adev);
> - xgpu_vi_mailbox_set_irq_funcs(adev);
> - }
> -
>   adev->rev_id = vi_get_rev_id(adev);
>   adev->external_rev_id = 0xFF;
>   switch (adev->asic_type) {
> @@ -1072,6 +1067,11 @@ static int vi_common_early_init(void *handle)
>   return -EINVAL;
>   }
> 
> + if (amdgpu_sriov_vf(adev)) {
> + amdgpu_virt_init_setting(adev);
> + xgpu_vi_mailbox_set_irq_funcs(adev);
> + }
> +
>   /* vi use smc load by default */
>   adev->firmware.load_type = amdgpu_ucode_get_load_type(adev,
> amdgpu_fw_load_type);
> 
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 01/11] drm/amdgpu/virt: bypass cg and pg setting for SRIOV

2017-04-24 Thread Deucher, Alexander

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Xiangliang Yu
> Sent: Monday, April 24, 2017 2:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Min, Frank; Yu, Xiangliang
> Subject: [PATCH 01/11] drm/amdgpu/virt: bypass cg and pg setting for SRIOV
> 
> GPU hypervisor cover all settings of CG and PG, so guest doesn't
> need to do anything. Bypass it.
> 
> Signed-off-by: Frank Min 
> Signed-off-by: Xiangliang Yu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index be43823..7fce7b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -105,6 +105,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device
> *adev)
>   /* enable virtual display */
>   adev->mode_info.num_crtc = 1;
>   adev->enable_virtual_display = true;
> + adev->cg_flags = 0;
> + adev->pg_flags = 0;
> 
>   mutex_init(>virt.lock_kiq);
>   mutex_init(>virt.lock_reset);
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2

2017-04-24 Thread Andres Rodriguez




On 2017-04-24 11:25 AM, Alex Deucher wrote:

On Mon, Apr 24, 2017 at 3:40 AM, Christian König
 wrote:

I wanted to push that to our internal branch, but found that it doesn't
apply cleanly. So please rebase on alex amd-staging-4.9 branch.


Please stick with my drm-next-wip tree.  It will make it easier for me
to integrate.

Alex




I'm guessing the tradeoff here would be a delay getting into drm-next. 
I'm okay with that as long as the patch series makes it in time for the 
4.13 feature merge window.


One exception, I would like to get the IOCTL interface patch submitted 
into a branch that will land on drm-next sooner rather than later so 
that I can begin submitting the corresponding patches to the userspace 
components. Should this be amd-staging-4.9? Or should it be somewhere else?


Regards,
Andres



Additional to that you forgot my rb on patch #4.

Regards,
Christian.


Am 22.04.2017 um 01:48 schrieb Andres Rodriguez:


V2 updates:
  * Rebased
  * All patches now have r-b

Second part of the split of the series:
Add support for high priority scheduling in amdgpu v8

These patches should be close to being good enough to land.

The first two patches are simple fixes I've ported from the ROCm branch.
These
still need review.

I've fixed all of Christian's comments for patch 04:
drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2

2017-04-24 Thread Alex Deucher

On Mon, Apr 24, 2017 at 3:40 AM, Christian König
 wrote:
> I wanted to push that to our internal branch, but found that it doesn't
> apply cleanly. So please rebase on alex amd-staging-4.9 branch.

Please stick with my drm-next-wip tree.  It will make it easier for me
to integrate.

Alex


>
> Additional to that you forgot my rb on patch #4.
>
> Regards,
> Christian.
>
>
> Am 22.04.2017 um 01:48 schrieb Andres Rodriguez:
>>
>> V2 updates:
>>   * Rebased
>>   * All patches now have r-b
>>
>> Second part of the split of the series:
>> Add support for high priority scheduling in amdgpu v8
>>
>> These patches should be close to being good enough to land.
>>
>> The first two patches are simple fixes I've ported from the ROCm branch.
>> These
>> still need review.
>>
>> I've fixed all of Christian's comments for patch 04:
>> drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
>>
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/radeon: check return value of radeon_ring_lock

2017-04-24 Thread Pan Bian

From: Pan Bian 

Function radeon_ring_lock() returns an errno on failure, and its return
value should be validated. However, in functions r420_cp_errata_init()
and r420_cp_errata_fini(), its return value is not checked. This patch
adds the checks.

Signed-off-by: Pan Bian 
---
 drivers/gpu/drm/radeon/r420.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 2828605..391c764 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -206,6 +206,7 @@ static void r420_clock_resume(struct radeon_device *rdev)
 
 static void r420_cp_errata_init(struct radeon_device *rdev)
 {
+   int r;
struct radeon_ring *ring = >ring[RADEON_RING_TYPE_GFX_INDEX];
 
/* RV410 and R420 can lock up if CP DMA to host memory happens
@@ -215,7 +216,8 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 * of the CP init, apparently.
 */
radeon_scratch_get(rdev, >config.r300.resync_scratch);
-   radeon_ring_lock(rdev, ring, 8);
+   r = radeon_ring_lock(rdev, ring, 8);
+   WARN_ON(r);
radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
radeon_ring_write(ring, rdev->config.r300.resync_scratch);
radeon_ring_write(ring, 0xDEADBEEF);
@@ -224,12 +226,14 @@ static void r420_cp_errata_init(struct radeon_device 
*rdev)
 
 static void r420_cp_errata_fini(struct radeon_device *rdev)
 {
+   int r;
struct radeon_ring *ring = >ring[RADEON_RING_TYPE_GFX_INDEX];
 
/* Catch the RESYNC we dispatched all the way back,
 * at the very beginning of the CP init.
 */
-   radeon_ring_lock(rdev, ring, 8);
+   r = radeon_ring_lock(rdev, ring, 8);
+   WARN_ON(r);
radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
radeon_ring_write(ring, R300_RB3D_DC_FINISH);
radeon_ring_unlock_commit(rdev, ring, false);
-- 
1.9.1


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/radeon: check return value of radeon_fence_emit

2017-04-24 Thread Pan Bian

From: Pan Bian 

Function radeon_fence_emit() returns -ENOMEM if there is no enough
memory. And in this case, function radeon_ring_unlock_undo() rather than
function radeon_ring_unlock_commit() should be called. However, in
function radeon_test_create_and_emit_fence(), the return value of
radeon_fence_emit() is ignored. This patch adds the check.

Signed-off-by: Pan Bian 
---
 drivers/gpu/drm/radeon/radeon_test.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_test.c 
b/drivers/gpu/drm/radeon/radeon_test.c
index 79181816..623768e 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -298,7 +298,12 @@ static int radeon_test_create_and_emit_fence(struct 
radeon_device *rdev,
DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
return r;
}
-   radeon_fence_emit(rdev, fence, ring->idx);
+   r = radeon_fence_emit(rdev, fence, ring->idx);
+   if (r) {
+   DRM_ERROR("Failed to emit fence\n");
+   radeon_ring_unlock_undo(rdev, ring);
+   return r;
+   }
radeon_ring_unlock_commit(rdev, ring, false);
}
return 0;
-- 
1.9.1


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix s3 ring test failed on Vi caused by KIQ enabled.

2017-04-24 Thread Rex Zhu

commit 2f3ea04dc9258cc can't fix s3 issue completely.

Change-Id: I31d0ba7d143b11543bbb83273c022d8e77f34e08
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 563944a..2ff5f19 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4972,7 +4972,6 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring 
*ring)
/* reset ring buffer */
ring->wptr = 0;
amdgpu_ring_clear_ring(ring);
-
mutex_lock(>srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_kiq_init_register(ring);
@@ -5014,12 +5013,12 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring 
*ring)
/* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], 
sizeof(*mqd));
-
/* reset ring buffer */
ring->wptr = 0;
amdgpu_ring_clear_ring(ring);
+   } else {
+   amdgpu_ring_clear_ring(ring);
}
-
return 0;
 }
 
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/6] drm: tackle byteorder issues, take two

2017-04-24 Thread Ville Syrjälä

On Mon, Apr 24, 2017 at 04:54:25PM +0900, Michel Dänzer wrote:
> On 24/04/17 04:36 PM, Gerd Hoffmann wrote:
> > 
> >>>   drm: fourcc byteorder: add DRM_FORMAT_CPU_*
> >>>   drm: fourcc byteorder: add bigendian support to
> >>> drm_mode_legacy_fb_format
> >>
> >> As I explained in my last followup in the "[PATCH] drm: fourcc
> >> byteorder: brings header file comments in line with reality." thread,
> >> the mapping between GPU and CPU formats has to be provided by the
> >> driver, it cannot be done statically.
> > 
> > Well, the drm fourcc codes represent the cpu view (i.e. what userspace
> > will fill the ADDFB2-created framebuffers with).
> 
> Ville is adamant that they represent the GPU view. This needs to be
> resolved one way or the other.

Since the byte swapping can happen either for CPU or display access
I guess we can't just consider the GPU and display as a single entity.

We may need to consider several agents:
1. display
2. GPU
3. CPU
4. other DMA

Not sure what we can say about 4. I presume it's going to be like the
GPU or the CPU in the sense that it might go through the CPU byte
swapping logic or not. I'm just going to ignore it.

Let's say we have the following bytes in memory
(in order of increasing address): A,B,C,D
We'll assume GPU and display are LE natively. Each component will see
the resulting 32bpp  pixel as follows (msb left->lsb right):

LE CPU w/ no byte swapping:
 display: DCBA
 GPU: DCBA
 CPU: DCBA
 = everyone agrees

BE CPU w/ no byte swapping:
 display: DCBA
 GPU: DCBA
 CPU: ABCD
 = GPU and display agree

BE CPU w/ display byte swapping:
 display: ABCD
 GPU: DCBA
 CPU: ABCD
 = CPU and display agree

BE CPU w/ CPU access byte swapping:
 display: DCBA
 GPU: DCBA
 CPU: DCBA
 = everyone agrees

BE CPU w/ both display and CPU byte swapping:
 display: ABCD
 GPU: DCBA
 CPU: DCBA
 = CPU and GPU agree (doesn't seem all that useful)

The different byte swapping tricks must have seemed like a great idea to
someone, but in the end they're just making our life more miserable.

> > The gpu view can certainly differ from that.  Implementing this is up
> > to the driver IMO.
> > 
> > When running on dumb framebuffers userspace doesn't need to know what
> > the gpu view is.

True. So for that we'd just need to consider whether the CPU and display
agree or disagree on the byte order. And I guess we'd have to pick from
the following choices for a BE CPU:

CPU and display agree:
 * FB is big endian, or FB is host endian (or whatever we would call it)
CPU and display disagree:
 * FB is little endian, or FB is foreign endian (or whatever)

> > 
> > When running in opengl mode there will be a hardware-specific mesa
> > driver in userspace, which will either know what the gpu view is (for
> > example because there is only one way to implement this in hardware) or
> > it can use hardware-specific ioctls to ask the kernel driver what the
> > gpu view is.
> 
> Not sure this can be hidden in the OpenGL driver. How would e.g. a
> Wayland compositor or the Xorg modesetting driver know which OpenGL
> format corresponds to a given DRM_FORMAT?

How are GL formats defined? /me needs to go read the spec again.

-- 
Ville Syrjälä
Intel OTC
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm: fourcc byteorder: brings header file comments in line with reality.

2017-04-24 Thread Ville Syrjälä

On Mon, Apr 24, 2017 at 03:57:02PM +0900, Michel Dänzer wrote:
> On 22/04/17 07:05 PM, Ville Syrjälä wrote:
> > On Fri, Apr 21, 2017 at 06:14:31PM +0200, Gerd Hoffmann wrote:
> >>   Hi,
> >>
>  My personal opinion is that formats in drm_fourcc.h should be 
>  independent of the CPU byte order and the function 
>  drm_mode_legacy_fb_format() and drivers depending on that incorrect 
>  assumption be fixed instead.
> >>>
> >>> The problem is this isn't a kernel-internal thing any more.  With the
> >>> addition of the ADDFB2 ioctl the fourcc codes became part of the
> >>> kernel/userspace abi ...
> >>
> >> Ok, added some printk's to the ADDFB and ADDFB2 code paths and tested a
> >> bit.  Apparently pretty much all userspace still uses the ADDFB ioctl.
> >> xorg (modesetting driver) does.  gnome-shell in wayland mode does.
> >> Seems the big transition to ADDFB2 didn't happen yet.
> >>
> >> I guess that makes changing drm_mode_legacy_fb_format + drivers a
> >> reasonable option ...
> > 
> > Yeah, I came to the same conclusion after chatting with some
> > folks on irc.
> > 
> > So my current idea is that we change any driver that wants to follow the
> > CPU endianness
> 
> This isn't really optional for various reasons, some of which have been
> covered in this discussion.
> 
> 
> > to declare support for big endian formats if the CPU is
> > big endian. Presumably these are mostly the virtual GPU drivers.
> > 
> > Additonally we'll make the mapping performed by drm_mode_legacy_fb_format()
> > driver controlled. That way drivers that got changed to follow CPU
> > endianness can return a framebuffer that matches CPU endianness. And
> > drivers that expect the GPU endianness to not depend on the CPU
> > endianness will keep working as they do now. The downside is that users
> > of the legacy addfb ioctl will need to magically know which endianness
> > they will get, but that is apparently already the case. And users of
> > addfb2 will keep on specifying the endianness explicitly with
> > DRM_FORMAT_BIG_ENDIAN vs. 0.
> 
> I'm afraid it's not that simple.
> 
> The display hardware of older (pre-R600 generation) Radeon GPUs does not
> support the "big endian" formats directly. In order to allow userspace
> to access pixel data in native endianness with the CPU, we instead use
> byte-swapping functionality which only affects CPU access.

OK, I'm getting confused. Based on our irc discussion I got the
impression you don't byte swap CPU accesses. But since you do, how
do you deal with mixing 8bpp vs. 16bpp vs. 32bpp?

> This means
> that the GPU and CPU effectively see different representations of the
> same video memory contents.
> 
> Userspace code dealing with GPU access to pixel data needs to know the
> format as seen by the GPU, whereas code dealing with CPU access needs to
> know the format as seen by the CPU. I don't see any way to express this
> with a single format definition.

Hmm. Well that certainly makes life even more interesting.

-- 
Ville Syrjälä
Intel OTC
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH libdrm] amdgpu: Use the canonical form in branch predicate

2017-04-24 Thread Nicolai Hähnle


On 22.04.2017 08:47, Edward O'Callaghan wrote:

Suggested-by: Emil Velikov 
Signed-off-by: Edward O'Callaghan 


Reviewed-by: Nicolai Hähnle 


---
 amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index 0993a6d..868eb7b 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -559,7 +559,7 @@ int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
if (ring >= AMDGPU_CS_MAX_RINGS)
return -EINVAL;
/* must signal first */
-   if (NULL == sem->signal_fence.context)
+   if (!sem->signal_fence.context)
return -EINVAL;

pthread_mutex_lock(>sequence_mutex);




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] [RFC] drm/radeon: clear WC flag when moving bo from vram to gtt

2017-04-24 Thread Michel Dänzer

On 24/04/17 07:01 PM, Julien Isorce wrote:
> 
> On 24 April 2017 at 10:51, Christian König  > wrote:
> 
> Am 24.04.2017 um 11:42 schrieb Julien Isorce:
> 
> But re-add the flag is the bo is moved back to vram.
> 
> This fixes "ring 0/3 stalled" issue which happens when the driver
> evicts bo from vram to gtt, at least on TAHITI and CAPVERDE.
> 
> 
> Interesting find, but NAK on the approach for fixing it.
> 
> 
> Thx for the comments.
>  
> 
> 
> If WC mappings don't work for TAHITI and CAPVERDE we need to figure
> out why or at least disable them for those hardware generations in
> general.
> 
> 
> Should I extend
> https://cgit.freedesktop.org/~agd5f/linux/tree/drivers/gpu/drm/radeon/radeon_object.c?h=amd-staging-4.9#n228
> to BONAIRE (which will include VERDE and TAHITI) ? (to match
> https://cgit.freedesktop.org/~agd5f/linux/tree/drivers/gpu/drm/radeon/radeon_ib.c?h=amd-staging-4.9#n199
>  )

Not sure the issue is widespread / systemic enough to justify that
upstream. You can do whatever you deem appropriate in your project, of
course.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] [RFC] drm/radeon: clear WC flag when moving bo from vram to gtt

2017-04-24 Thread Michel Dänzer

On 24/04/17 06:51 PM, Christian König wrote:
> Am 24.04.2017 um 11:42 schrieb Julien Isorce:
>> But re-add the flag is the bo is moved back to vram.
>>
>> This fixes "ring 0/3 stalled" issue which happens when the driver
>> evicts bo from vram to gtt, at least on TAHITI and CAPVERDE.
> 
> Interesting find, but NAK on the approach for fixing it.
> 
> If WC mappings don't work for TAHITI and CAPVERDE we need to figure out
> why or at least disable them for those hardware generations in general.
> 
> Disabling WC for BOs swapped out from VRAM won't buy us much if the BO
> was initially created in GTT anyway.

Moreover, RADEON_GEM_GTT_WC shouldn't have any effect at all for a BO
which is currently in VRAM, so it's not clear how the patch makes any
difference. I suspect it might accidentally cause RADEON_GEM_GTT_WC to
be ignored altogether in radeon_vm_bo_update and/or
radeon_ttm_placement_from_domain.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] [RFC] drm/radeon: clear WC flag when moving bo from vram to gtt

2017-04-24 Thread Julien Isorce

On 24 April 2017 at 10:51, Christian König  wrote:

> Am 24.04.2017 um 11:42 schrieb Julien Isorce:
>
>> But re-add the flag is the bo is moved back to vram.
>>
>> This fixes "ring 0/3 stalled" issue which happens when the driver
>> evicts bo from vram to gtt, at least on TAHITI and CAPVERDE.
>>
>
> Interesting find, but NAK on the approach for fixing it.
>

Thx for the comments.


>
> If WC mappings don't work for TAHITI and CAPVERDE we need to figure out
> why or at least disable them for those hardware generations in general.
>

Should I extend
https://cgit.freedesktop.org/~agd5f/linux/tree/drivers/gpu/drm/radeon/radeon_object.c?h=amd-staging-4.9#n228
to BONAIRE (which will include VERDE and TAHITI) ? (to match
https://cgit.freedesktop.org/~agd5f/linux/tree/drivers/gpu/drm/radeon/radeon_ib.c?h=amd-staging-4.9#n199
 )

>
> Disabling WC for BOs swapped out from VRAM won't buy us much if the BO was
> initially created in GTT anyway.


Initially created in VRAM:
https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeon/r600_buffer_common.c#n164


>
>
> Christian.
>
>
>
>> I do not know the exact reason among the following:
>>- si_copy_dma from vram to gtt is slow if WC
>> (only for the non-visible part ? specific cases ?)
>>- Allow snooping (SNOOPED flag from radeon_vm_bo_update).
>>- WC should not be set at all for bo in the GTT
>>  (same reason why WC is only set for vram domain,
>>   see mesa::r600_init_resource_fields since mesa commit 5b6a0b7
>>   "gallium/radeon: set GTT WC on tiled textures")
>>- Bug in WC
>>- Same reason as why radeon_sa_bo_manager_init is not passing
>>  WC flags if older than CHIP_BONAIRE (see 810b73d1
>>  drm/radeon: Use write-combined CPU mappings of IBs on >= CIK)
>>- Same as b738ca5d
>>  Revert "drm/radeon: Use write-combined CPU mappings of ring ..."
>>- Same as 96ea47c0
>>  drm/radeon: Disable uncacheable CPU mappings of GTT with RV6xx
>>  see https://bugs.freedesktop.org/show_bug.cgi?id=91268#c2
>>
>> https://bugs.freedesktop.org/show_bug.cgi?id=100712
>> ---
>>   drivers/gpu/drm/radeon/radeon.h|  1 +
>>   drivers/gpu/drm/radeon/radeon_object.c |  1 +
>>   drivers/gpu/drm/radeon/radeon_ttm.c| 13 +
>>   3 files changed, 15 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>> b/drivers/gpu/drm/radeon/radeon.h
>> index 7a39a35..9847f4e 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -518,6 +518,7 @@ struct radeon_bo {
>> struct radeon_mn*mn;
>> struct list_headmn_list;
>> +   u32 vram_flags;
>>   };
>>   #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo,
>> gem_base)
>>   diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>> b/drivers/gpu/drm/radeon/radeon_object.c
>> index a557869..870f6b0 100644
>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>> @@ -228,6 +228,7 @@ int radeon_bo_create(struct radeon_device *rdev,
>>RADEON_GEM_DOMAIN_CPU);
>> bo->flags = flags;
>> +   bo->vram_flags = 0;
>> /* PCI GART is always snooped */
>> if (!(rdev->flags & RADEON_IS_PCIE))
>> bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
>> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c
>> b/drivers/gpu/drm/radeon/radeon_ttm.c
>> index d07ff84..a8743bd 100644
>> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
>> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>> @@ -440,6 +440,19 @@ static int radeon_bo_move(struct ttm_buffer_object
>> *bo,
>> r = radeon_move_ram_vram(bo, evict, interruptible,
>> no_wait_gpu, new_mem);
>> } else {
>> +   /* Clear WC flag when moving bo from vram to gtt. */
>> +   if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type
>> == TTM_PL_TT) {
>> +   if (rbo->flags & RADEON_GEM_GTT_WC) {
>> +   rbo->vram_flags |= RADEON_GEM_GTT_WC;
>> +   rbo->flags &= ~RADEON_GEM_GTT_WC;
>> +   }
>> +   /* Re-add WC flag when moving back from gtt to vram. */
>> +   } else if (old_mem->mem_type == TTM_PL_TT &&
>> new_mem->mem_type == TTM_PL_VRAM) {
>> +   if (rbo->vram_flags & RADEON_GEM_GTT_WC) {
>> +   rbo->flags |= RADEON_GEM_GTT_WC;
>> +   rbo->vram_flags &= ~RADEON_GEM_GTT_WC;
>> +   }
>> +   }
>> r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem,
>> old_mem);
>> }
>>
>>
>
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH libdrm] amdgpu: Use the canonical form in branch predicate

2017-04-24 Thread Emil Velikov

On 22 April 2017 at 07:47, Edward O'Callaghan
 wrote:
> Suggested-by: Emil Velikov 
> Signed-off-by: Edward O'Callaghan 
Reviewed-by: Emil Velikov 

Thanks
Emil
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] [RFC] drm/radeon: clear WC flag when moving bo from vram to gtt

2017-04-24 Thread Christian König


Am 24.04.2017 um 11:42 schrieb Julien Isorce:

But re-add the flag is the bo is moved back to vram.

This fixes "ring 0/3 stalled" issue which happens when the driver
evicts bo from vram to gtt, at least on TAHITI and CAPVERDE.


Interesting find, but NAK on the approach for fixing it.

If WC mappings don't work for TAHITI and CAPVERDE we need to figure out 
why or at least disable them for those hardware generations in general.


Disabling WC for BOs swapped out from VRAM won't buy us much if the BO 
was initially created in GTT anyway.


Christian.



I do not know the exact reason among the following:
   - si_copy_dma from vram to gtt is slow if WC
(only for the non-visible part ? specific cases ?)
   - Allow snooping (SNOOPED flag from radeon_vm_bo_update).
   - WC should not be set at all for bo in the GTT
 (same reason why WC is only set for vram domain,
  see mesa::r600_init_resource_fields since mesa commit 5b6a0b7
  "gallium/radeon: set GTT WC on tiled textures")
   - Bug in WC
   - Same reason as why radeon_sa_bo_manager_init is not passing
 WC flags if older than CHIP_BONAIRE (see 810b73d1
 drm/radeon: Use write-combined CPU mappings of IBs on >= CIK)
   - Same as b738ca5d
 Revert "drm/radeon: Use write-combined CPU mappings of ring ..."
   - Same as 96ea47c0
 drm/radeon: Disable uncacheable CPU mappings of GTT with RV6xx
 see https://bugs.freedesktop.org/show_bug.cgi?id=91268#c2

https://bugs.freedesktop.org/show_bug.cgi?id=100712
---
  drivers/gpu/drm/radeon/radeon.h|  1 +
  drivers/gpu/drm/radeon/radeon_object.c |  1 +
  drivers/gpu/drm/radeon/radeon_ttm.c| 13 +
  3 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7a39a35..9847f4e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -518,6 +518,7 @@ struct radeon_bo {
  
  	struct radeon_mn		*mn;

struct list_headmn_list;
+   u32 vram_flags;
  };
  #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, 
gem_base)
  
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c

index a557869..870f6b0 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -228,6 +228,7 @@ int radeon_bo_create(struct radeon_device *rdev,
   RADEON_GEM_DOMAIN_CPU);
  
  	bo->flags = flags;

+   bo->vram_flags = 0;
/* PCI GART is always snooped */
if (!(rdev->flags & RADEON_IS_PCIE))
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index d07ff84..a8743bd 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -440,6 +440,19 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
r = radeon_move_ram_vram(bo, evict, interruptible,
no_wait_gpu, new_mem);
} else {
+   /* Clear WC flag when moving bo from vram to gtt. */
+   if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == 
TTM_PL_TT) {
+   if (rbo->flags & RADEON_GEM_GTT_WC) {
+   rbo->vram_flags |= RADEON_GEM_GTT_WC;
+   rbo->flags &= ~RADEON_GEM_GTT_WC;
+   }
+   /* Re-add WC flag when moving back from gtt to vram. */
+   } else if (old_mem->mem_type == TTM_PL_TT && new_mem->mem_type 
== TTM_PL_VRAM) {
+   if (rbo->vram_flags & RADEON_GEM_GTT_WC) {
+   rbo->flags |= RADEON_GEM_GTT_WC;
+   rbo->vram_flags &= ~RADEON_GEM_GTT_WC;
+   }
+   }
r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
}
  



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: fix gpu reset crash

2017-04-24 Thread Christian König


Am 24.04.2017 um 11:40 schrieb Chunming Zhou:

[  413.687439] BUG: unable to handle kernel NULL pointer dereference at 
0548
[  413.687479] IP: [] to_live_kthread+0x5/0x60
[  413.687507] PGD 1efd12067
[  413.687519] PUD 1efd11067
[  413.687531] PMD 0

[  413.687543] Oops:  [#1] SMP
[  413.687557] Modules linked in: amdgpu(OE) ttm(OE) drm_kms_helper(E) drm(E) 
i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) 
rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) snd_hda_codec_realtek(E) 
snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) eeepc_wmi(E) 
snd_hda_codec(E) asus_wmi(E) snd_hda_core(E) sparse_keymap(E) snd_hwdep(E) 
video(E) snd_pcm(E) snd_seq_midi(E) joydev(E) snd_seq_midi_event(E) 
snd_rawmidi(E) snd_seq(E) snd_seq_device(E) snd_timer(E) kvm(E) irqbypass(E) 
crct10dif_pclmul(E) snd(E) crc32_pclmul(E) ghash_clmulni_intel(E) soundcore(E) 
aesni_intel(E) aes_x86_64(E) lrw(E) gf128mul(E) glue_helper(E) ablk_helper(E) 
cryptd(E) shpchp(E) serio_raw(E) i2c_piix4(E) 8250_dw(E) 
i2c_designware_platform(E) i2c_designware_core(E) mac_hid(E) binfmt_misc(E)
[  413.687894]  parport_pc(E) ppdev(E) lp(E) parport(E) nfsd(E) auth_rpcgss(E) 
nfs_acl(E) lockd(E) grace(E) sunrpc(E) autofs4(E) hid_generic(E) usbhid(E) 
hid(E) psmouse(E) ahci(E) r8169(E) mii(E) libahci(E) wmi(E)
[  413.687989] CPU: 13 PID: 1134 Comm: kworker/13:2 Tainted: G   OE   
4.9.0-custom #4
[  413.688019] Hardware name: System manufacturer System Product Name/PRIME 
B350-PLUS, BIOS 0606 04/06/2017
[  413.688089] Workqueue: events amd_sched_job_timedout [amdgpu]
[  413.688116] task: 88020f9657c0 task.stack: c90001a88000
[  413.688139] RIP: 0010:[]  [] 
to_live_kthread+0x5/0x60
[  413.688171] RSP: 0018:c90001a8bd60  EFLAGS: 00010282
[  413.688191] RAX: 88020f0073f8 RBX: 88020f00 RCX: 
[  413.688217] RDX: 0001 RSI: 88020f9670c0 RDI: 
[  413.688243] RBP: c90001a8bd78 R08:  R09: 1000
[  413.688269] R10: 006051b11a82 R11: 0001 R12: 
[  413.688295] R13: 88020f002770 R14: 88020f004838 R15: 8801b23c2c60
[  413.688321] FS:  () GS:88021ef4() 
knlGS:
[  413.688352] CS:  0010 DS:  ES:  CR0: 80050033
[  413.688373] CR2: 0548 CR3: 0001efd0f000 CR4: 003406e0
[  413.688399] Stack:
[  413.688407]  8109b304 88020f00 0070 
c90001a8bdf0
[  413.688439]  a05ce29d a052feb7 a07b5820 
c90001a8bda0
[  413.688470]  0018 8801bb88f060 01a8bdb8 
88021ef59280
[  413.688502] Call Trace:
[  413.688514]  [] ? kthread_park+0x14/0x60
[  413.688555]  [] amdgpu_gpu_reset+0x7d/0x670 [amdgpu]
[  413.688589]  [] ? drm_printk+0x97/0xa0 [drm]
[  413.688643]  [] amdgpu_job_timedout+0x46/0x50 [amdgpu]
[  413.688700]  [] amd_sched_job_timedout+0x17/0x20 [amdgpu]
[  413.688727]  [] process_one_work+0x153/0x3f0
[  413.688751]  [] worker_thread+0x12b/0x4b0
[  413.688773]  [] ? do_syscall_64+0x6e/0x180
[  413.688795]  [] ? rescuer_thread+0x350/0x350
[  413.688818]  [] ? do_syscall_64+0x6e/0x180
[  413.688839]  [] kthread+0xd3/0xf0
[  413.688858]  [] ? kthread_park+0x60/0x60
[  413.61]  [] ret_from_fork+0x25/0x30
[  413.688901] Code: 25 40 d3 00 00 48 8b 80 48 05 00 00 48 89 e5 5d 48 8b 40 c8 48 
c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b b7 48 
05 00 00 55 48 89 e5 48 85 f6 74 31 8b 97 f8 18 00
[  413.689045] RIP  [] to_live_kthread+0x5/0x60
[  413.689064]  RSP 
[  413.689076] CR2: 0548
[  413.697985] ---[ end trace 0a314a64821f84e9 ]---

The root cause is some ring doesn't have scheduler, like KIQ ring

Change-Id: I420e84add9cdd9a7fd1f9921b8a5d0afa3dd2058
Signed-off-by: Chunming Zhou 


Reviewed-by: Christian König  for both.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9993085..168a9de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2675,7 +2675,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
  
-		if (!ring)

+   if (!ring || !ring->sched.thread)
continue;
kcl_kthread_park(ring->sched.thread);
amd_sched_hw_job_reset(>sched);
@@ -2770,7 +2770,8 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
}
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
-   if (!ring)
+
+   if (!ring ||

[PATCH 2/2] drm/amd/display: Prevent premature pageflip when comitting in vblank. (v2)

2017-04-24 Thread Mario Kleiner

Make sure we do not program a hw pageflip inside vblank 'n' iff the
atomic flip is comitted while inside the same vblank 'n'. We must
defer such a flip by one refresh cycle to vblank 'n+1'.

Without this, pageflips programmed via X11 GLX_OML_sync_control extensions
glXSwapBuffersMscOML(..., target_msc, ...); call and/or via DRI3/Present
PresentPixmap(..., target_msc, ...); request will complete one vblank
too early whenever target_msc > current_msc + 1, ie. more than 1 vblank
in the future. In such a case, the call of the pageflip ioctl() would be
triggered by a queued drmWaitVblank() vblank event, which itself gets
dispatched inside the vblank one frame before the target_msc vblank.

Testing with this patch does no longer show any problems with
OML_sync_control swap scheduling or flip completion timestamps.
Tested on R9 380 Tonga.

v2: Add acked/r-b by Harry and Michel.

Signed-off-by: Mario Kleiner 
Acked-by: Harry Wentland 
Reviewed-by: Michel Dänzer 

Cc: Harry Wentland 
Cc: Alex Deucher 
Cc: Michel Dänzer 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
index 086a842..19be2d9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
@@ -2460,6 +2460,9 @@ static void amdgpu_dm_do_flip(
struct amdgpu_device *adev = crtc->dev->dev_private;
bool async_flip = (acrtc->flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 
+   /* Prepare wait for target vblank early - before the fence-waits */
+   target_vblank = target - drm_crtc_vblank_count(crtc) +
+   amdgpu_get_vblank_counter_kms(crtc->dev, 
acrtc->crtc_id);
 
/*TODO This might fail and hence better not used, wait
 * explicitly on fences instead
@@ -2478,13 +2481,9 @@ static void amdgpu_dm_do_flip(
 
amdgpu_bo_unreserve(abo);
 
-   /* Wait for target vblank */
/* Wait until we're out of the vertical blank period before the one
 * targeted by the flip
 */
-   target_vblank = target - drm_crtc_vblank_count(crtc) +
-   amdgpu_get_vblank_counter_kms(crtc->dev, 
acrtc->crtc_id);
-
while ((acrtc->enabled &&
(amdgpu_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id, 0,
, , NULL, NULL,
@@ -2763,7 +2762,7 @@ void amdgpu_dm_atomic_commit_tail(
amdgpu_dm_do_flip(
crtc,
fb,
-   drm_crtc_vblank_count(crtc));
+   drm_crtc_vblank_count(crtc) + 1);
 
wait_for_vblank =
acrtc->flip_flags & 
DRM_MODE_PAGE_FLIP_ASYNC ?
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amd/display: Fix race between vblank irq and pageflip irq. (v2)

2017-04-24 Thread Mario Kleiner

Since DC now uses CRTC_VERTICAL_INTERRUPT0 as VBLANK irq trigger
and vblank interrupts actually happen earliest at start of vblank,
instead of a bit before vblank, we no longer need some of the
fudging logic to deal with too early vblank irq handling (grep for
lb_vblank_lead_lines). This itself fixes a pageflip scheduling
bug in DC, caused by uninitialized  use of lb_vblank_lead_lines,
with a wrong startup value of 0. Thanks to the new vblank irq
trigger this value of zero is now actually correct for DC :).

A new problem is that vblank irq's race against pflip irq's,
and as both can fire at first line of vblank, it is no longer
guaranteed that vblank irq handling (therefore -> drm_handle_vblank()
-> drm_update_vblank_count()) executes before pflip irq handling
for a given vblank interval when a pageflip completes. Therefore
the vblank count and timestamps emitted to user-space as part of
the pageflip completion event will be often stale and cause new
timestamping and swap scheduling errors in user-space.

This was observed with large frequency on R9 380 Tonga Pro.

Fix this by enforcing a vblank count+timestamp update right
before emitting the pageflip completion event from the pflip
irq handler. The logic in core drm_update_vblank_count() makes
sure that no redundant or conflicting updates happen, iow. the
call turns into a no-op if it wasn't needed for that vblank,
burning a few microseconds of cpu time though.

Successfully tested on AMD R9 380 "Tonga Pro" (VI/DCE 10)
with DC enabled on the current DC staging branch. Independent
measurement of pageflip completion timing with special hardware
measurement equipment now confirms correct pageflip timestamps
and counts in the pageflip completion events.

v2: Review comments by Michel, drop outdated paragraph
about problem already fixed in 2nd patch of the series.
Add acked/r-b by Harry and Michel.

Signed-off-by: Mario Kleiner 
Acked-by: Harry Wentland 
Reviewed-by: Michel Dänzer 

Cc: Andrey Grodzovsky 
Cc: Alex Deucher 
Cc: Michel Dänzer 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 794362e..0d77b0a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -208,6 +208,9 @@ static void dm_pflip_high_irq(void *interrupt_params)
if (amdgpu_crtc->event
&& amdgpu_crtc->event->event.base.type
== DRM_EVENT_FLIP_COMPLETE) {
+   /* Update to correct count/ts if racing with vblank irq */
+   drm_accurate_vblank_count(_crtc->base);
+
drm_crtc_send_vblank_event(_crtc->base, 
amdgpu_crtc->event);
/* page flip completed. clean up */
amdgpu_crtc->event = NULL;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] [RFC] drm/radeon: clear WC flag when moving bo from vram to gtt

2017-04-24 Thread Julien Isorce

But re-add the flag is the bo is moved back to vram.

This fixes "ring 0/3 stalled" issue which happens when the driver
evicts bo from vram to gtt, at least on TAHITI and CAPVERDE.

I do not know the exact reason among the following:
  - si_copy_dma from vram to gtt is slow if WC
(only for the non-visible part ? specific cases ?)
  - Allow snooping (SNOOPED flag from radeon_vm_bo_update).
  - WC should not be set at all for bo in the GTT
(same reason why WC is only set for vram domain,
 see mesa::r600_init_resource_fields since mesa commit 5b6a0b7
 "gallium/radeon: set GTT WC on tiled textures")
  - Bug in WC
  - Same reason as why radeon_sa_bo_manager_init is not passing
WC flags if older than CHIP_BONAIRE (see 810b73d1
drm/radeon: Use write-combined CPU mappings of IBs on >= CIK)
  - Same as b738ca5d
Revert "drm/radeon: Use write-combined CPU mappings of ring ..."
  - Same as 96ea47c0
drm/radeon: Disable uncacheable CPU mappings of GTT with RV6xx
see https://bugs.freedesktop.org/show_bug.cgi?id=91268#c2

https://bugs.freedesktop.org/show_bug.cgi?id=100712
---
 drivers/gpu/drm/radeon/radeon.h|  1 +
 drivers/gpu/drm/radeon/radeon_object.c |  1 +
 drivers/gpu/drm/radeon/radeon_ttm.c| 13 +
 3 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7a39a35..9847f4e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -518,6 +518,7 @@ struct radeon_bo {
 
struct radeon_mn*mn;
struct list_headmn_list;
+   u32 vram_flags;
 };
 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index a557869..870f6b0 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -228,6 +228,7 @@ int radeon_bo_create(struct radeon_device *rdev,
   RADEON_GEM_DOMAIN_CPU);
 
bo->flags = flags;
+   bo->vram_flags = 0;
/* PCI GART is always snooped */
if (!(rdev->flags & RADEON_IS_PCIE))
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index d07ff84..a8743bd 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -440,6 +440,19 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
r = radeon_move_ram_vram(bo, evict, interruptible,
no_wait_gpu, new_mem);
} else {
+   /* Clear WC flag when moving bo from vram to gtt. */
+   if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == 
TTM_PL_TT) {
+   if (rbo->flags & RADEON_GEM_GTT_WC) {
+   rbo->vram_flags |= RADEON_GEM_GTT_WC;
+   rbo->flags &= ~RADEON_GEM_GTT_WC;
+   }
+   /* Re-add WC flag when moving back from gtt to vram. */
+   } else if (old_mem->mem_type == TTM_PL_TT && new_mem->mem_type 
== TTM_PL_VRAM) {
+   if (rbo->vram_flags & RADEON_GEM_GTT_WC) {
+   rbo->flags |= RADEON_GEM_GTT_WC;
+   rbo->vram_flags &= ~RADEON_GEM_GTT_WC;
+   }
+   }
r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
}
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: fix gpu reset crash

2017-04-24 Thread Chunming Zhou

[  413.687439] BUG: unable to handle kernel NULL pointer dereference at 
0548
[  413.687479] IP: [] to_live_kthread+0x5/0x60
[  413.687507] PGD 1efd12067
[  413.687519] PUD 1efd11067
[  413.687531] PMD 0

[  413.687543] Oops:  [#1] SMP
[  413.687557] Modules linked in: amdgpu(OE) ttm(OE) drm_kms_helper(E) drm(E) 
i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) 
rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) snd_hda_codec_realtek(E) 
snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) eeepc_wmi(E) 
snd_hda_codec(E) asus_wmi(E) snd_hda_core(E) sparse_keymap(E) snd_hwdep(E) 
video(E) snd_pcm(E) snd_seq_midi(E) joydev(E) snd_seq_midi_event(E) 
snd_rawmidi(E) snd_seq(E) snd_seq_device(E) snd_timer(E) kvm(E) irqbypass(E) 
crct10dif_pclmul(E) snd(E) crc32_pclmul(E) ghash_clmulni_intel(E) soundcore(E) 
aesni_intel(E) aes_x86_64(E) lrw(E) gf128mul(E) glue_helper(E) ablk_helper(E) 
cryptd(E) shpchp(E) serio_raw(E) i2c_piix4(E) 8250_dw(E) 
i2c_designware_platform(E) i2c_designware_core(E) mac_hid(E) binfmt_misc(E)
[  413.687894]  parport_pc(E) ppdev(E) lp(E) parport(E) nfsd(E) auth_rpcgss(E) 
nfs_acl(E) lockd(E) grace(E) sunrpc(E) autofs4(E) hid_generic(E) usbhid(E) 
hid(E) psmouse(E) ahci(E) r8169(E) mii(E) libahci(E) wmi(E)
[  413.687989] CPU: 13 PID: 1134 Comm: kworker/13:2 Tainted: G   OE   
4.9.0-custom #4
[  413.688019] Hardware name: System manufacturer System Product Name/PRIME 
B350-PLUS, BIOS 0606 04/06/2017
[  413.688089] Workqueue: events amd_sched_job_timedout [amdgpu]
[  413.688116] task: 88020f9657c0 task.stack: c90001a88000
[  413.688139] RIP: 0010:[]  [] 
to_live_kthread+0x5/0x60
[  413.688171] RSP: 0018:c90001a8bd60  EFLAGS: 00010282
[  413.688191] RAX: 88020f0073f8 RBX: 88020f00 RCX: 
[  413.688217] RDX: 0001 RSI: 88020f9670c0 RDI: 
[  413.688243] RBP: c90001a8bd78 R08:  R09: 1000
[  413.688269] R10: 006051b11a82 R11: 0001 R12: 
[  413.688295] R13: 88020f002770 R14: 88020f004838 R15: 8801b23c2c60
[  413.688321] FS:  () GS:88021ef4() 
knlGS:
[  413.688352] CS:  0010 DS:  ES:  CR0: 80050033
[  413.688373] CR2: 0548 CR3: 0001efd0f000 CR4: 003406e0
[  413.688399] Stack:
[  413.688407]  8109b304 88020f00 0070 
c90001a8bdf0
[  413.688439]  a05ce29d a052feb7 a07b5820 
c90001a8bda0
[  413.688470]  0018 8801bb88f060 01a8bdb8 
88021ef59280
[  413.688502] Call Trace:
[  413.688514]  [] ? kthread_park+0x14/0x60
[  413.688555]  [] amdgpu_gpu_reset+0x7d/0x670 [amdgpu]
[  413.688589]  [] ? drm_printk+0x97/0xa0 [drm]
[  413.688643]  [] amdgpu_job_timedout+0x46/0x50 [amdgpu]
[  413.688700]  [] amd_sched_job_timedout+0x17/0x20 [amdgpu]
[  413.688727]  [] process_one_work+0x153/0x3f0
[  413.688751]  [] worker_thread+0x12b/0x4b0
[  413.688773]  [] ? do_syscall_64+0x6e/0x180
[  413.688795]  [] ? rescuer_thread+0x350/0x350
[  413.688818]  [] ? do_syscall_64+0x6e/0x180
[  413.688839]  [] kthread+0xd3/0xf0
[  413.688858]  [] ? kthread_park+0x60/0x60
[  413.61]  [] ret_from_fork+0x25/0x30
[  413.688901] Code: 25 40 d3 00 00 48 8b 80 48 05 00 00 48 89 e5 5d 48 8b 40 
c8 48 c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 
b7 48 05 00 00 55 48 89 e5 48 85 f6 74 31 8b 97 f8 18 00
[  413.689045] RIP  [] to_live_kthread+0x5/0x60
[  413.689064]  RSP 
[  413.689076] CR2: 0548
[  413.697985] ---[ end trace 0a314a64821f84e9 ]---

The root cause is some ring doesn't have scheduler, like KIQ ring

Change-Id: I420e84add9cdd9a7fd1f9921b8a5d0afa3dd2058
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9993085..168a9de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2675,7 +2675,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
 
-   if (!ring)
+   if (!ring || !ring->sched.thread)
continue;
kcl_kthread_park(ring->sched.thread);
amd_sched_hw_job_reset(>sched);
@@ -2770,7 +2770,8 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
}
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
-   if (!ring)
+
+   if (!ring || !ring->sched.thread)
continue;

[PATCH 2/2] drm/amdgpu: fix NULL pointer error

2017-04-24 Thread Chunming Zhou

[  141.420491] BUG: unable to handle kernel NULL pointer dereference at 
0030
[  141.420532] IP: [] fence_remove_callback+0x11/0x60
[  141.420563] PGD 20a030067
[  141.420575] PUD 2088ca067
[  141.420587] PMD 0

[  141.420599] Oops:  [#1] SMP
[  141.420612] Modules linked in: amdgpu(OE) ttm(OE) drm_kms_helper(E) drm(E) 
i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) 
rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) eeepc_wmi(E) asus_wmi(E) 
sparse_keymap(E) snd_hda_codec_realtek(E) video(E) snd_hda_codec_generic(E) 
snd_hda_codec_hdmi(E) snd_hda_intel(E) joydev(E) snd_hda_codec(E) 
snd_seq_midi(E) snd_seq_midi_event(E) snd_hda_core(E) snd_hwdep(E) 
snd_rawmidi(E) snd_pcm(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) snd_seq(E) 
crc32_pclmul(E) ghash_clmulni_intel(E) snd_seq_device(E) snd_timer(E) 
aesni_intel(E) aes_x86_64(E) lrw(E) gf128mul(E) glue_helper(E) ablk_helper(E) 
cryptd(E) snd(E) soundcore(E) serio_raw(E) shpchp(E) i2c_piix4(E) 
i2c_designware_platform(E) 8250_dw(E) i2c_designware_core(E) mac_hid(E) 
binfmt_misc(E)
[  141.420948]  nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) 
parport_pc(E) ppdev(E) lp(E) parport(E) autofs4(E) hid_generic(E) usbhid(E) 
hid(E) psmouse(E) r8169(E) ahci(E) mii(E) libahci(E) wmi(E)
[  141.421042] CPU: 14 PID: 223 Comm: kworker/14:2 Tainted: G   OE   
4.9.0-custom #4
[  141.421074] Hardware name: System manufacturer System Product Name/PRIME 
B350-PLUS, BIOS 0606 04/06/2017
[  141.421146] Workqueue: events amd_sched_job_timedout [amdgpu]
[  141.421169] task: 88020b03ba80 task.stack: c900016f4000
[  141.421193] RIP: 0010:[]  [] 
fence_remove_callback+0x11/0x60
[  141.421229] RSP: 0018:c900016f7d30  EFLAGS: 00010202
[  141.421250] RAX: 8801c049fc00 RBX: 8801d4d8dc00 RCX: 
[  141.421278] RDX: 0001 RSI: 8801c049fcc0 RDI: 
[  141.421307] RBP: c900016f7d48 R08:  R09: 
[  141.421334] R10: 0020ed512a30 R11: 0001 R12: 
[  141.421362] R13: 880209ba4ba0 R14: 880209ba4c58 R15: 8801c055cc60
[  141.421390] FS:  () GS:88021ef8() 
knlGS:
[  141.421421] CS:  0010 DS:  ES:  CR0: 80050033
[  141.421443] CR2: 0030 CR3: 00020b554000 CR4: 003406e0
[  141.421471] Stack:
[  141.421480]  8801d4d8dc00 880209ba4c48 880209ba4ba0 
c900016f7d78
[  141.421513]  a0697920 880209ba  
880209ba2770
[  141.421549]  880209ba4b08 c900016f7df0 a05ce2ae 
a0509eb7
[  141.421583] Call Trace:
[  141.421628]  [] amd_sched_hw_job_reset+0x50/0xb0 [amdgpu]
[  141.421676]  [] amdgpu_gpu_reset+0x8e/0x690 [amdgpu]
[  141.421712]  [] ? drm_printk+0x97/0xa0 [drm]
[  141.421770]  [] amdgpu_job_timedout+0x46/0x50 [amdgpu]
[  141.421829]  [] amd_sched_job_timedout+0x17/0x20 [amdgpu]
[  141.421859]  [] process_one_work+0x153/0x3f0
[  141.421884]  [] worker_thread+0x12b/0x4b0
[  141.421907]  [] ? rescuer_thread+0x350/0x350
[  141.421931]  [] kthread+0xd3/0xf0
[  141.421951]  [] ? kthread_park+0x60/0x60
[  141.421975]  [] ret_from_fork+0x25/0x30
[  141.421996] Code: ac 81 e8 a3 1f b0 ff 48 c7 c0 ea ff ff ff e9 48 ff ff ff 
0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 49 89 fc 53 <48> 8b 
7f 30 48 89 f3 e8 73 7c 26 00 48 8b 13 48 39 d3 41 0f 95
[  141.422156] RIP  [] fence_remove_callback+0x11/0x60
[  141.422183]  RSP 
[  141.422197] CR2: 0030
[  141.433483] ---[ end trace bc0949bf7ddd6d4b ]---

if the job is reset twice, then the parent could be NULL.

Change-Id: I234887f5c26cf1fb9c7bdec3fc6c25a75f6dd3c0
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 8ab345d..80bc4f7 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -385,7 +385,9 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 
spin_lock(>job_list_lock);
list_for_each_entry_reverse(s_job, >ring_mirror_list, node) {
-   if (fence_remove_callback(s_job->s_fence->parent, 
_job->s_fence->cb)) {
+   if (s_job->s_fence->parent &&
+   fence_remove_callback(s_job->s_fence->parent,
+ _job->s_fence->cb)) {
fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = NULL;
}
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/1] drm/radeon: check return value of radeon_ring_lock

2017-04-24 Thread Christian König


Am 24.04.2017 um 10:38 schrieb Pan Bian:

From: Pan Bian 

Function radeon_ring_lock() returns an errno on failure, and its return
value should be validated. However, in functions r420_cp_errata_init()
and r420_cp_errata_fini(), its return value is not checked. This patch
adds the checks.

Signed-off-by: Pan Bian 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/radeon/r420.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 2828605..391c764 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -206,6 +206,7 @@ static void r420_clock_resume(struct radeon_device *rdev)
  
  static void r420_cp_errata_init(struct radeon_device *rdev)

  {
+   int r;
struct radeon_ring *ring = >ring[RADEON_RING_TYPE_GFX_INDEX];
  
  	/* RV410 and R420 can lock up if CP DMA to host memory happens

@@ -215,7 +216,8 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 * of the CP init, apparently.
 */
radeon_scratch_get(rdev, >config.r300.resync_scratch);
-   radeon_ring_lock(rdev, ring, 8);
+   r = radeon_ring_lock(rdev, ring, 8);
+   WARN_ON(r);
radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
radeon_ring_write(ring, rdev->config.r300.resync_scratch);
radeon_ring_write(ring, 0xDEADBEEF);
@@ -224,12 +226,14 @@ static void r420_cp_errata_init(struct radeon_device 
*rdev)
  
  static void r420_cp_errata_fini(struct radeon_device *rdev)

  {
+   int r;
struct radeon_ring *ring = >ring[RADEON_RING_TYPE_GFX_INDEX];
  
  	/* Catch the RESYNC we dispatched all the way back,

 * at the very beginning of the CP init.
 */
-   radeon_ring_lock(rdev, ring, 8);
+   r = radeon_ring_lock(rdev, ring, 8);
+   WARN_ON(r);
radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
radeon_ring_write(ring, R300_RB3D_DC_FINISH);
radeon_ring_unlock_commit(rdev, ring, false);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/1] drm/radeon: check return value of radeon_fence_emit

2017-04-24 Thread Christian König


Am 24.04.2017 um 10:45 schrieb Pan Bian:

From: Pan Bian 

Function radeon_fence_emit() returns -ENOMEM if there is no enough
memory. And in this case, function radeon_ring_unlock_undo() rather than
function radeon_ring_unlock_commit() should be called. However, in
function radeon_test_create_and_emit_fence(), the return value of
radeon_fence_emit() is ignored. This patch adds the check.

Signed-off-by: Pan Bian 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/radeon/radeon_test.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_test.c 
b/drivers/gpu/drm/radeon/radeon_test.c
index 79181816..623768e 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -298,7 +298,12 @@ static int radeon_test_create_and_emit_fence(struct 
radeon_device *rdev,
DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
return r;
}
-   radeon_fence_emit(rdev, fence, ring->idx);
+   r = radeon_fence_emit(rdev, fence, ring->idx);
+   if (r) {
+   DRM_ERROR("Failed to emit fence\n");
+   radeon_ring_unlock_undo(rdev, ring);
+   return r;
+   }
radeon_ring_unlock_commit(rdev, ring, false);
}
return 0;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: extend vm flags to 64-bit in tracepoint

2017-04-24 Thread Christian König


Am 24.04.2017 um 10:55 schrieb Christian König:

Am 24.04.2017 um 08:43 schrieb Junwei Zhang:

Signed-off-by: Junwei Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 18 +-
  1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

index 8676eff..998ff4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -221,7 +221,7 @@
   __field(long, start)
   __field(long, last)
   __field(u64, offset)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -231,7 +231,7 @@
 __entry->offset = mapping->offset;
 __entry->flags = mapping->flags;
 ),
-TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%08x",
+TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%010llx",


The full 64bit flags need to be printed with %016llx.

We only use %010llx for the 40bit addresses and even that needs to be 
extended to %048llx for Vega10.


Ups, hit send to early. That should read "needs to be extended to 
%012llx for Vega10 because it has 48bit addresses".


Christian.



With that fixed the patch is Reviewed-by: Christian König 



Regards,
Christian.


__entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
  );
@@ -245,7 +245,7 @@
   __field(long, start)
   __field(long, last)
   __field(u64, offset)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -255,7 +255,7 @@
 __entry->offset = mapping->offset;
 __entry->flags = mapping->flags;
 ),
-TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%08x",
+TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%010llx",

__entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
  );
@@ -266,7 +266,7 @@
  TP_STRUCT__entry(
   __field(u64, soffset)
   __field(u64, eoffset)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -274,7 +274,7 @@
 __entry->eoffset = mapping->it.last + 1;
 __entry->flags = mapping->flags;
 ),
-TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
+TP_printk("soffs=%010llx, eoffs=%010llx, flags=%010llx",
__entry->soffset, __entry->eoffset, __entry->flags)
  );
  @@ -290,14 +290,14 @@
TRACE_EVENT(amdgpu_vm_set_ptes,
  TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags),
+ uint32_t incr, uint64_t flags),
  TP_ARGS(pe, addr, count, incr, flags),
  TP_STRUCT__entry(
   __field(u64, pe)
   __field(u64, addr)
   __field(u32, count)
   __field(u32, incr)
- __field(u32, flags)
+ __field(u64, flags)
   ),
TP_fast_assign(
@@ -307,7 +307,7 @@
 __entry->incr = incr;
 __entry->flags = flags;
 ),
-TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, 
count=%u",
+TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%010Lx, 
count=%u",

__entry->pe, __entry->addr, __entry->incr,
__entry->flags, __entry->count)
  );



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: extend vm flags to 64-bit in tracepoint

2017-04-24 Thread Christian König


Am 24.04.2017 um 08:43 schrieb Junwei Zhang:

Signed-off-by: Junwei Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 18 +-
  1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8676eff..998ff4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -221,7 +221,7 @@
 __field(long, start)
 __field(long, last)
 __field(u64, offset)
-__field(u32, flags)
+__field(u64, flags)
 ),
  
  	TP_fast_assign(

@@ -231,7 +231,7 @@
   __entry->offset = mapping->offset;
   __entry->flags = mapping->flags;
   ),
-   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%010llx",


The full 64bit flags need to be printed with %016llx.

We only use %010llx for the 40bit addresses and even that needs to be 
extended to %048llx for Vega10.


With that fixed the patch is Reviewed-by: Christian König 



Regards,
Christian.


  __entry->bo, __entry->start, __entry->last,
  __entry->offset, __entry->flags)
  );
@@ -245,7 +245,7 @@
 __field(long, start)
 __field(long, last)
 __field(u64, offset)
-__field(u32, flags)
+__field(u64, flags)
 ),
  
  	TP_fast_assign(

@@ -255,7 +255,7 @@
   __entry->offset = mapping->offset;
   __entry->flags = mapping->flags;
   ),
-   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%010llx",
  __entry->bo, __entry->start, __entry->last,
  __entry->offset, __entry->flags)
  );
@@ -266,7 +266,7 @@
TP_STRUCT__entry(
 __field(u64, soffset)
 __field(u64, eoffset)
-__field(u32, flags)
+__field(u64, flags)
 ),
  
  	TP_fast_assign(

@@ -274,7 +274,7 @@
   __entry->eoffset = mapping->it.last + 1;
   __entry->flags = mapping->flags;
   ),
-   TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
+   TP_printk("soffs=%010llx, eoffs=%010llx, flags=%010llx",
  __entry->soffset, __entry->eoffset, __entry->flags)
  );
  
@@ -290,14 +290,14 @@
  
  TRACE_EVENT(amdgpu_vm_set_ptes,

TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
-uint32_t incr, uint32_t flags),
+uint32_t incr, uint64_t flags),
TP_ARGS(pe, addr, count, incr, flags),
TP_STRUCT__entry(
 __field(u64, pe)
 __field(u64, addr)
 __field(u32, count)
 __field(u32, incr)
-__field(u32, flags)
+__field(u64, flags)
 ),
  
  	TP_fast_assign(

@@ -307,7 +307,7 @@
   __entry->incr = incr;
   __entry->flags = flags;
   ),
-   TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, count=%u",
+   TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%010Lx, count=%u",
  __entry->pe, __entry->addr, __entry->incr,
  __entry->flags, __entry->count)
  );



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: fix trace error for amdgpu_vm_bo_unmap

2017-04-24 Thread Christian König


Am 24.04.2017 um 08:15 schrieb Junwei Zhang:

Signed-off-by: Junwei Zhang 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f38e5e2..8676eff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -249,7 +249,7 @@
 ),
  
  	TP_fast_assign(

-  __entry->bo = bo_va->bo;
+  __entry->bo = bo_va ? bo_va->bo : NULL;
   __entry->start = mapping->it.start;
   __entry->last = mapping->it.last;
   __entry->offset = mapping->offset;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] Improve pipe split between amdgpu and amdkfd v4

2017-04-24 Thread Christian König

If you ask me that set of patches and your other work are more than 
ready to land.


I advise to rebase on to of alex amd-staging-4.9 branch for that, that's 
where most of our internal development of based on.


We are going to move to drm-next based development model sooner or 
later, but that will still take a while.


Regards,
Christian.

Am 21.04.2017 um 22:02 schrieb Andres Rodriguez:

V4 updates:
  * Rebased onto latest 4.12-wip (ba16d6c), since I got test reports that the
patch series failed to apply

V3 updates:
  * Fixed kfd set resources grabbing MEC1 queues
  * kfdtest now passes on Kaveri with the amdgpu or radeon driver

V2 updates:
  * Fixed wrong HPD offset in compute_pipe_init for gfx7
  * Fixed compute_pipe_init using wrong ME for gfx7


This is a split of patches that are ready to land from the series:
Add support for high priority scheduling in amdgpu v8

I've included Felix and Alex's feedback from the thread above. This includes:
  * Separate MEC_HPD_SIZE rename into a separate patch (patch 01)
  * Added a patch to fix the kgd_hqd_load bug Felix pointed out (patch 06)
  * Fixes for various off-by-one errors
  * Use gfx_v8_0_deactivate_hqd

Only comment I didn't address was changing the queue allocation policy for
gfx9 (similar to gfx7/8). See inline reply in that thread for more details
on why this was skipped.


Series available in the wip-queue-policy-v4 branch at:
g...@github.com:lostgoat/linux.git
Or:
https://github.com/lostgoat/linux.git


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/6] drm: tackle byteorder issues, take two

2017-04-24 Thread Michel Dänzer

On 24/04/17 04:36 PM, Gerd Hoffmann wrote:
> 
>>>   drm: fourcc byteorder: add DRM_FORMAT_CPU_*
>>>   drm: fourcc byteorder: add bigendian support to
>>> drm_mode_legacy_fb_format
>>
>> As I explained in my last followup in the "[PATCH] drm: fourcc
>> byteorder: brings header file comments in line with reality." thread,
>> the mapping between GPU and CPU formats has to be provided by the
>> driver, it cannot be done statically.
> 
> Well, the drm fourcc codes represent the cpu view (i.e. what userspace
> will fill the ADDFB2-created framebuffers with).

Ville is adamant that they represent the GPU view. This needs to be
resolved one way or the other.


> The gpu view can certainly differ from that.  Implementing this is up
> to the driver IMO.
> 
> When running on dumb framebuffers userspace doesn't need to know what
> the gpu view is.
> 
> When running in opengl mode there will be a hardware-specific mesa
> driver in userspace, which will either know what the gpu view is (for
> example because there is only one way to implement this in hardware) or
> it can use hardware-specific ioctls to ask the kernel driver what the
> gpu view is.

Not sure this can be hidden in the OpenGL driver. How would e.g. a
Wayland compositor or the Xorg modesetting driver know which OpenGL
format corresponds to a given DRM_FORMAT?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/1] drm/radeon: check return vlaue of radeon_fence_emit

2017-04-24 Thread Christian König


Am 23.04.2017 um 15:50 schrieb Pan Bian:

From: Pan Bian 

Function radeon_fence_emit() returns -ENOMEM if there is no enough
memory. And in this case, function radeon_ring_unlock_undo() rather than
function radeon_ring_unlock_commit() should be called. However, in
function radeon_test_create_and_emit_fence(), the return value of
radeon_fence_emit() is ignored. This patch adds the check.

Signed-off-by: Pan Bian 
---
  drivers/gpu/drm/radeon/radeon_test.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_test.c 
b/drivers/gpu/drm/radeon/radeon_test.c
index 79181816..7a44703 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -298,7 +298,11 @@ static int radeon_test_create_and_emit_fence(struct 
radeon_device *rdev,
DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
return r;
}
-   radeon_fence_emit(rdev, fence, ring->idx);
+   r = radeon_fence_emit(rdev, fence, ring->idx);
+   if (r) {
+   radeon_ring_unlock_undo(rdev, ring);


Please add a DRM_ERROR() here as well.

Apart from that it looks good to me.

Christian.


+   return r;
+   }
radeon_ring_unlock_commit(rdev, ring, false);
}
return 0;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/1] drm/radeon: check return value of radeon_ring_lock

2017-04-24 Thread Christian König


Am 23.04.2017 um 15:36 schrieb Pan Bian:

From: Pan Bian 

Function radeon_ring_lock() returns an errno on failure, and its return
value should be validated. However, in functions r420_cp_errata_init()
and r420_cp_errata_fini(), its return value is not checked. This patch
adds the checks.

Signed-off-by: Pan Bian 
---
  drivers/gpu/drm/radeon/r420.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 2828605..a8c2b37 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -215,7 +215,8 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 * of the CP init, apparently.
 */
radeon_scratch_get(rdev, >config.r300.resync_scratch);
-   radeon_ring_lock(rdev, ring, 8);
+   if (radeon_ring_lock(rdev, ring, 8))
+   return;


Nice that somebody wants to clean that up, but just returning here is 
not a good idea.


Additional to that radeon_ring_lock() can only fail if we try to 
allocate to many dw (impossible with only 8) or the hardware is crashed 
and then it doesn't matter anyway.


I suggest to just add a WARN_ON() here.

Regards,
Christian.


radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
radeon_ring_write(ring, rdev->config.r300.resync_scratch);
radeon_ring_write(ring, 0xDEADBEEF);
@@ -229,7 +230,8 @@ static void r420_cp_errata_fini(struct radeon_device *rdev)
/* Catch the RESYNC we dispatched all the way back,
 * at the very beginning of the CP init.
 */
-   radeon_ring_lock(rdev, ring, 8);
+   if (radeon_ring_lock(rdev, ring, 8))
+   return;
radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
radeon_ring_write(ring, R300_RB3D_DC_FINISH);
radeon_ring_unlock_commit(rdev, ring, false);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amd/display: Prevent premature pageflip when comitting in vblank.

2017-04-24 Thread Michel Dänzer

On 22/04/17 01:23 AM, Mario Kleiner wrote:
> Make sure we do not program a hw pageflip inside vblank 'n' iff the
> atomic flip is comitted while inside the same vblank 'n'. We must
> defer such a flip by one refresh cycle to vblank 'n+1'.
> 
> Without this, pageflips programmed via X11 GLX_OML_sync_control extensions
> glXSwapBuffersMscOML(..., target_msc, ...); call and/or via DRI3/Present
> PresentPixmap(..., target_msc, ...); request will complete one vblank
> too early whenever target_msc > current_msc + 1, ie. more than 1 vblank
> in the future. In such a case, the call of the pageflip ioctl() would be
> triggered by a queued drmWaitVblank() vblank event, which itself gets
> dispatched inside the vblank one frame before the target_msc vblank.
> 
> Testing with this patch does no longer show any problems with
> OML_sync_control swap scheduling or flip completion timestamps.
> Tested on R9 380 Tonga.
> 
> Signed-off-by: Mario Kleiner 
> Cc: Harry Wentland 
> Cc: Alex Deucher 
> Cc: Michel Dänzer 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
> index 086a842..19be2d9 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c
> @@ -2460,6 +2460,9 @@ static void amdgpu_dm_do_flip(
>   struct amdgpu_device *adev = crtc->dev->dev_private;
>   bool async_flip = (acrtc->flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
>  
> + /* Prepare wait for target vblank early - before the fence-waits */
> + target_vblank = target - drm_crtc_vblank_count(crtc) +
> + amdgpu_get_vblank_counter_kms(crtc->dev, 
> acrtc->crtc_id);
>  
>   /*TODO This might fail and hence better not used, wait
>* explicitly on fences instead
> @@ -2478,13 +2481,9 @@ static void amdgpu_dm_do_flip(
>  
>   amdgpu_bo_unreserve(abo);
>  
> - /* Wait for target vblank */
>   /* Wait until we're out of the vertical blank period before the one
>* targeted by the flip
>*/
> - target_vblank = target - drm_crtc_vblank_count(crtc) +
> - amdgpu_get_vblank_counter_kms(crtc->dev, 
> acrtc->crtc_id);
> -
>   while ((acrtc->enabled &&
>   (amdgpu_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id, 0,
>   , , NULL, NULL,

Makes sense.


> @@ -2763,7 +2762,7 @@ void amdgpu_dm_atomic_commit_tail(
>   amdgpu_dm_do_flip(
>   crtc,
>   fb,
> - drm_crtc_vblank_count(crtc));
> + drm_crtc_vblank_count(crtc) + 1);
>  
>   wait_for_vblank =
>   acrtc->flip_flags & 
> DRM_MODE_PAGE_FLIP_ASYNC ?
> 

I suspect this code runs relatively late, so if userspace calls
DRM_IOCTL_MODE_PAGE_FLIP shortly before the start of vertical blank, it
could result in the flip being unnecessarily delayed by one frame. I
added drm_crtc_funcs::page_flip_target to address this.

Anyway, this is okay for now.

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amd/display: Fix race between vblank irq and pageflip irq.

2017-04-24 Thread Michel Dänzer

On 22/04/17 01:23 AM, Mario Kleiner wrote:
> Since DC now uses CRTC_VERTICAL_INTERRUPT0 as VBLANK irq trigger
> and vblank interrupts actually happen earliest at start of vblank,
> instead of a bit before vblank, we no longer need some of the
> fudging logic to deal with too early vblank irq handling (grep for
> lb_vblank_lead_lines). This itself fixes a pageflip scheduling
> bug in DC, caused by uninitialized  use of lb_vblank_lead_lines,
> with a wrong startup value of 0. Thanks to the new vblank irq
> trigger this value of zero is now actually correct for DC :).
> 
> A new problem is that vblank irq's race against pflip irq's,
> and as both can fire at first line of vblank, it is no longer
> guaranteed that vblank irq handling (therefore -> drm_handle_vblank()
> -> drm_update_vblank_count()) executes before pflip irq handling
> for a given vblank interval when a pageflip completes. Therefore
> the vblank count and timestamps emitted to user-space as part of
> the pageflip completion event will be often stale and cause new
> timestamping and swap scheduling errors in user-space.
> 
> This was observed with large frequency on R9 380 Tonga Pro.
> 
> Fix this by enforcing a vblank count+timestamp update right
> before emitting the pageflip completion event from the pflip
> irq handler. The logic in core drm_update_vblank_count() makes
> sure that no redundant or conflicting updates happen, iow. the
> call turns into a no-op if it wasn't needed for that vblank,
> burning a few microseconds of cpu time though.
> 
> Successfully tested on AMD R9 380 "Tonga Pro" (VI/DCE 10)
> with DC enabled on the current DC staging branch. Independent
> measurement of pageflip completion timing with special hardware
> measurement equipment now confirms correct pageflip timestamps
> and counts in the pageflip completion events.
> 
> Note that there is another unresolved pageflip bug present in current
> dc staging, which causes pageflips to complete one vblank too early
> when the pageflip ioctl gets called while in vblank. Something seems
> to be amiss in the way amdgpu_dm_do_flip() handles 'target_vblank',
> or how amdgpu_dm_atomic_commit_tail() computes 'target' for calling
> amdgpu_dm_do_flip().

If the last paragraph refers to the problem fixed by patch 2, I'd drop
that paragraph. With that,

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 03/11] drm/amdgpu/psp: skip loading SDMA/RLCG under SRIOV VF

2017-04-24 Thread Xiangliang Yu

From: Daniel Wang 

Now GPU hypervisor will load SDMA and RLCG ucode, so skip it
in guest.

Signed-off-by: Daniel Wang 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 1e380fe..ac5e92e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -289,6 +289,12 @@ static int psp_np_fw_load(struct psp_context *psp)
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
psp_smu_reload_quirk(psp))
continue;
+   if (amdgpu_sriov_vf(adev) &&
+  (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
+   || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
+   || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
+   /*skip ucode loading in SRIOV VF */
+   continue;
 
ret = psp_prep_cmd_buf(ucode, psp->cmd);
if (ret)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/6] drm: tackle byteorder issues, take two

2017-04-24 Thread Michel Dänzer

On 24/04/17 03:25 PM, Gerd Hoffmann wrote:
>   Hi,
> 
> Ok, different approach up for discussion.  Given that userspace didn't
> made the transition from ADDFB to ADDFB2 yet it seems we still can muck
> with the fourcc codes without breaking everything, as long as we
> maintain ADDFB and fbdev behavior (use cpu byte order format) so nothing
> changes for userspace.
> 
> So, this series basically makes drm_mode_legacy_fb_format return correct
> formats in bigendian mode and adapts the bochs and virtio drivers to
> this change.  Other drivers must be adapted to this change too.
> 
> Ilia Mirkin figured the dispnv04 backend in nouveau turns on/off
> byteswapping depending on cpu byte order.  So, one way to adapt the
> driver would be to simply use the new #defines added by patch #2.  The
> other way would be to support both XRGB and BGRX and turn on/off
> byteswapping depending on framebuffer format instead of cpu byte order.
> 
> cheers,
>   Gerd
> 
> Gerd Hoffmann (6):
>   drm: fourcc byteorder: drop DRM_FORMAT_BIG_ENDIAN

I don't see how it can be dropped. It's only optional for formats where
all components have 8 bits.


>   drm: fourcc byteorder: add DRM_FORMAT_CPU_*
>   drm: fourcc byteorder: add bigendian support to
> drm_mode_legacy_fb_format

As I explained in my last followup in the "[PATCH] drm: fourcc
byteorder: brings header file comments in line with reality." thread,
the mapping between GPU and CPU formats has to be provided by the
driver, it cannot be done statically.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 11/11] drm/amdgpu/uvd7: add UVD hw init sequences for sriov

2017-04-24 Thread Xiangliang Yu

From: Frank Min 

Add UVD hw init.

Signed-off-by: Frank Min 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 92 ---
 1 file changed, 54 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index a294f05..e0b7ded 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -368,7 +368,10 @@ static int uvd_v7_0_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   adev->uvd.num_enc_rings = 2;
+   if (amdgpu_sriov_vf(adev))
+   adev->uvd.num_enc_rings = 1;
+   else
+   adev->uvd.num_enc_rings = 2;
uvd_v7_0_set_ring_funcs(adev);
uvd_v7_0_set_enc_ring_funcs(adev);
uvd_v7_0_set_irq_funcs(adev);
@@ -421,12 +424,14 @@ static int uvd_v7_0_sw_init(void *handle)
r = amdgpu_uvd_resume(adev);
if (r)
return r;
+   if (!amdgpu_sriov_vf(adev)) {
+   ring = >uvd.ring;
+   sprintf(ring->name, "uvd");
+   r = amdgpu_ring_init(adev, ring, 512, >uvd.irq, 0);
+   if (r)
+   return r;
+   }
 
-   ring = >uvd.ring;
-   sprintf(ring->name, "uvd");
-   r = amdgpu_ring_init(adev, ring, 512, >uvd.irq, 0);
-   if (r)
-   return r;
 
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = >uvd.ring_enc[i];
@@ -445,6 +450,10 @@ static int uvd_v7_0_sw_init(void *handle)
return r;
}
 
+   r = amdgpu_virt_alloc_mm_table(adev);
+   if (r)
+   return r;
+
return r;
 }
 
@@ -453,6 +462,8 @@ static int uvd_v7_0_sw_fini(void *handle)
int i, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_uvd_suspend(adev);
if (r)
return r;
@@ -479,48 +490,53 @@ static int uvd_v7_0_hw_init(void *handle)
uint32_t tmp;
int i, r;
 
-   r = uvd_v7_0_start(adev);
+   if (amdgpu_sriov_vf(adev))
+   r = uvd_v7_0_sriov_start(adev);
+   else
+   r = uvd_v7_0_start(adev);
if (r)
goto done;
 
-   ring->ready = true;
-   r = amdgpu_ring_test_ring(ring);
-   if (r) {
-   ring->ready = false;
-   goto done;
-   }
+   if (!amdgpu_sriov_vf(adev)) {
+   ring->ready = true;
+   r = amdgpu_ring_test_ring(ring);
+   if (r) {
+   ring->ready = false;
+   goto done;
+   }
 
-   r = amdgpu_ring_alloc(ring, 10);
-   if (r) {
-   DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
-   goto done;
-   }
+   r = amdgpu_ring_alloc(ring, 10);
+   if (r) {
+   DRM_ERROR("amdgpu: ring failed to lock UVD ring 
(%d).\n", r);
+   goto done;
+   }
 
-   tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-   mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
-   amdgpu_ring_write(ring, tmp);
-   amdgpu_ring_write(ring, 0xF);
+   tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+   mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+   amdgpu_ring_write(ring, tmp);
+   amdgpu_ring_write(ring, 0xF);
 
-   tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-   mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
-   amdgpu_ring_write(ring, tmp);
-   amdgpu_ring_write(ring, 0xF);
+   tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+   mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+   amdgpu_ring_write(ring, tmp);
+   amdgpu_ring_write(ring, 0xF);
 
-   tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-   mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
-   amdgpu_ring_write(ring, tmp);
-   amdgpu_ring_write(ring, 0xF);
+   tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+   mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+   amdgpu_ring_write(ring, tmp);
+   amdgpu_ring_write(ring, 0xF);
 
-   /* Clear timeout status bits */
-   amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-   mmUVD_SEMA_TIMEOUT_STATUS), 0));
-   amdgpu_ring_write(ring, 0x8);
+   /* Clear timeout status bits */
+   amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
+   mmUVD_SEMA_TIMEOUT_STATUS), 0));
+   amdgpu_ring_write(ring, 0x8);
 
-   amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-   mmUVD_SEMA_CNTL), 0));
-   amdgpu_ring_write(ring, 3);
+

[PATCH 04/11] drm/amdgpu/vce4: fix a PSP loading VCE issue

2017-04-24 Thread Xiangliang Yu

From: Daniel Wang 

Fixed PSP loading issue for sriov.

Signed-off-by: Daniel Wang 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 76fc8ed..1deb546 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -291,9 +291,21 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_SWAP_CNTL1), 0);
INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 
0);
 
-   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8);
-   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8);
-   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8);
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+   
adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+   
adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+   
adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+   } else {
+   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+   adev->vce.gpu_addr >> 8);
+   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+   adev->vce.gpu_addr >> 8);
+   INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+   adev->vce.gpu_addr >> 8);
+   }
 
offset = AMDGPU_VCE_FIRMWARE_OFFSET;
size = VCE_V4_0_FW_SIZE;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 10/11] drm/amdgpu/uvd7: add uvd doorbell initialization for sriov

2017-04-24 Thread Xiangliang Yu

From: Frank Min 

Add UVD doorbell for SRIOV.

Signed-off-by: Frank Min 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index fb3da07..a294f05 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -103,6 +103,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct 
amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
 
+   if (ring->use_doorbell)
+   return adev->wb.wb[ring->wptr_offs];
+
if (ring == >uvd.ring_enc[0])
return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR));
else
@@ -134,6 +137,13 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring 
*ring)
 {
struct amdgpu_device *adev = ring->adev;
 
+   if (ring->use_doorbell) {
+   /* XXX check if swapping is necessary on BE */
+   adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+   WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+   return;
+   }
+
if (ring == >uvd.ring_enc[0])
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR),
lower_32_bits(ring->wptr));
@@ -421,6 +431,15 @@ static int uvd_v7_0_sw_init(void *handle)
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = >uvd.ring_enc[i];
sprintf(ring->name, "uvd_enc%d", i);
+   if (amdgpu_sriov_vf(adev)) {
+   ring->use_doorbell = true;
+   if (i == 0)
+   ring->doorbell_index = 
AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+   else if (i == 1)
+   ring->doorbell_index = 
AMDGPU_DOORBELL64_UVD_RING2_3 * 2;
+   else
+   ring->doorbell_index = 
AMDGPU_DOORBELL64_UVD_RING4_5 * 2;
+   }
r = amdgpu_ring_init(adev, ring, 512, >uvd.irq, 0);
if (r)
return r;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 06/11] drm/amdgpu/soc15: enable UVD code path for sriov

2017-04-24 Thread Xiangliang Yu

From: Frank Min 

Enable UVD block for SRIOV.

Signed-off-by: Frank Min 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 6999ac3..4e514b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -482,8 +482,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 #endif
amdgpu_ip_block_add(adev, _v9_0_ip_block);
amdgpu_ip_block_add(adev, _v4_0_ip_block);
-   if (!amdgpu_sriov_vf(adev))
-   amdgpu_ip_block_add(adev, _v7_0_ip_block);
+   amdgpu_ip_block_add(adev, _v7_0_ip_block);
amdgpu_ip_block_add(adev, _v4_0_ip_block);
break;
default:
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 09/11] drm/amdgpu/uvd7: add sriov uvd initialization sequences

2017-04-24 Thread Xiangliang Yu

From: Frank Min 

Add UVD initialization for SRIOV.

Signed-off-by: Frank Min 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 246 ++
 1 file changed, 246 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index bf35d56..fb3da07 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -27,10 +27,14 @@
 #include "amdgpu_uvd.h"
 #include "soc15d.h"
 #include "soc15_common.h"
+#include "mmsch_v1_0.h"
 
 #include "vega10/soc15ip.h"
 #include "vega10/UVD/uvd_7_0_offset.h"
 #include "vega10/UVD/uvd_7_0_sh_mask.h"
+#include "vega10/VCE/vce_4_0_offset.h"
+#include "vega10/VCE/vce_4_0_default.h"
+#include "vega10/VCE/vce_4_0_sh_mask.h"
 #include "vega10/NBIF/nbif_6_1_offset.h"
 #include "vega10/HDP/hdp_4_0_offset.h"
 #include "vega10/MMHUB/mmhub_1_0_offset.h"
@@ -41,6 +45,7 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device 
*adev);
 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static int uvd_v7_0_start(struct amdgpu_device *adev);
 static void uvd_v7_0_stop(struct amdgpu_device *adev);
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
 
 /**
  * uvd_v7_0_ring_get_rptr - get read pointer
@@ -618,6 +623,247 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), 
adev->uvd.max_handles);
 }
 
+static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
+   struct amdgpu_mm_table *table)
+{
+   uint32_t data = 0, loop;
+   uint64_t addr = table->gpu_addr;
+   struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header 
*)table->cpu_addr;
+   uint32_t size;
+
+   size = header->header_size + header->vce_table_size + 
header->uvd_table_size;
+
+   /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of 
memory descriptor location */
+   WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), 
lower_32_bits(addr));
+   WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), 
upper_32_bits(addr));
+
+   /* 2, update vmid of descriptor */
+   data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
+   data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+   data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 
for MM scheduler */
+   WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
+
+   /* 3, notify mmsch about the size of this descriptor */
+   WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
+
+   /* 4, set resp to zero */
+   WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
+
+   /* 5, kick off the initialization and wait until 
VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
+   WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 
0x1001);
+
+   data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
+   loop = 1000;
+   while ((data & 0x1002) != 0x1002) {
+   udelay(10);
+   data = RREG32(SOC15_REG_OFFSET(VCE, 0, 
mmVCE_MMSCH_VF_MAILBOX_RESP));
+   loop--;
+   if (!loop)
+   break;
+   }
+
+   if (!loop) {
+   dev_err(adev->dev, "failed to init MMSCH, 
mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
+   return -EBUSY;
+   }
+
+   return 0;
+}
+
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
+{
+   struct amdgpu_ring *ring;
+   uint32_t offset, size, tmp;
+   uint32_t table_size = 0;
+   struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} };
+   struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} 
};
+   struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} };
+   //struct mmsch_v1_0_cmd_indirect_write indirect_wt = {{0}};
+   struct mmsch_v1_0_cmd_end end = { {0} };
+   uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+   struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header 
*)init_table;
+
+   direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+   direct_rd_mod_wt.cmd_header.command_type = 
MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+   direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+   end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+   if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) {
+   header->version = MMSCH_VERSION;
+   header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 
2;
+
+   if (header->vce_table_offset == 0 && header->vce_table_size == 
0)
+   header->uvd_table_offset = header->header_size;
+   else
+   header->uvd_table_offset = header->vce_table_size +

[PATCH 08/11] drm/amdgpu/vce4: replaced with virt_alloc_mm_table

2017-04-24 Thread Xiangliang Yu

Used virt_alloc_mm_table function to allocate MM table memory.

Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 20 +++-
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index a3d9d4d..a34cdbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -444,20 +444,9 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
 
-   if (amdgpu_sriov_vf(adev)) {
-   r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_VRAM,
-   >virt.mm_table.bo,
-   >virt.mm_table.gpu_addr,
-   (void 
*)>virt.mm_table.cpu_addr);
-   if (!r) {
-   memset((void *)adev->virt.mm_table.cpu_addr, 0, 
PAGE_SIZE);
-   printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n",
-  adev->virt.mm_table.gpu_addr,
-  adev->virt.mm_table.cpu_addr);
-   }
+   r = amdgpu_virt_alloc_mm_table(adev);
+   if (r)
return r;
-   }
 
return r;
 }
@@ -468,10 +457,7 @@ static int vce_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
/* free MM table */
-   if (amdgpu_sriov_vf(adev))
-   amdgpu_bo_free_kernel(>virt.mm_table.bo,
- >virt.mm_table.gpu_addr,
- (void *)>virt.mm_table.cpu_addr);
+   amdgpu_virt_free_mm_table(adev);
 
r = amdgpu_vce_suspend(adev);
if (r)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 07/11] drm/amdgpu/virt: add two functions for MM table

2017-04-24 Thread Xiangliang Yu

Add two functions to allocate & free MM table memory.

Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 46 
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 ++
 2 files changed, 48 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7fce7b5..1363239 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -227,3 +227,49 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
 
return 0;
 }
+
+/**
+ * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
+ * @amdgpu:amdgpu device.
+ * MM table is used by UVD and VCE for its initialization
+ * Return: Zero if allocate success.
+ */
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
+{
+   int r;
+
+   if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
+   return 0;
+
+   r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_VRAM,
+   >virt.mm_table.bo,
+   >virt.mm_table.gpu_addr,
+   (void *)>virt.mm_table.cpu_addr);
+   if (r) {
+   DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+   return r;
+   }
+
+   memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
+   DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+adev->virt.mm_table.gpu_addr,
+adev->virt.mm_table.cpu_addr);
+   return 0;
+}
+
+/**
+ * amdgpu_virt_free_mm_table() - free mm table memory
+ * @amdgpu:amdgpu device.
+ * Free MM table memory
+ */
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
+{
+   if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
+   return;
+
+   amdgpu_bo_free_kernel(>virt.mm_table.bo,
+ >virt.mm_table.gpu_addr,
+ (void *)>virt.mm_table.cpu_addr);
+   adev->virt.mm_table.gpu_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 1ee0a19..a8ed162 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -98,5 +98,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, 
bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
 int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 
 #endif
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 05/11] drm/amdgpu/vce4: move mm table constructions functions into mmsch header file

2017-04-24 Thread Xiangliang Yu

From: Frank Min 

Move mm table construction functions into mmsch header file so that
UVD can reuse it.

Signed-off-by: Frank Min 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h | 57 +
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c   | 57 -
 2 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h 
b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
index 5f0fc8b..f048f91 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -84,4 +84,61 @@ struct mmsch_v1_0_cmd_indirect_write {
uint32_t reg_value;
 };
 
+static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write 
*direct_wt,
+ uint32_t *init_table,
+ uint32_t reg_offset,
+ uint32_t value)
+{
+   direct_wt->cmd_header.reg_offset = reg_offset;
+   direct_wt->reg_value = value;
+   memcpy((void *)init_table, direct_wt, sizeof(struct 
mmsch_v1_0_cmd_direct_write));
+}
+
+static inline void mmsch_insert_direct_rd_mod_wt(struct 
mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
+uint32_t *init_table,
+uint32_t reg_offset,
+uint32_t mask, uint32_t data)
+{
+   direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
+   direct_rd_mod_wt->mask_value = mask;
+   direct_rd_mod_wt->write_data = data;
+   memcpy((void *)init_table, direct_rd_mod_wt,
+  sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
+}
+
+static inline void mmsch_insert_direct_poll(struct 
mmsch_v1_0_cmd_direct_polling *direct_poll,
+   uint32_t *init_table,
+   uint32_t reg_offset,
+   uint32_t mask, uint32_t wait)
+{
+   direct_poll->cmd_header.reg_offset = reg_offset;
+   direct_poll->mask_value = mask;
+   direct_poll->wait_value = wait;
+   memcpy((void *)init_table, direct_poll, sizeof(struct 
mmsch_v1_0_cmd_direct_polling));
+}
+
+#define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+   mmsch_insert_direct_rd_mod_wt(_rd_mod_wt, \
+ init_table, (reg), \
+ (mask), (data)); \
+   init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; 
\
+   table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; 
\
+}
+
+#define INSERT_DIRECT_WT(reg, value) { \
+   mmsch_insert_direct_wt(_wt, \
+  init_table, (reg), \
+  (value)); \
+   init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+   table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+}
+
+#define INSERT_DIRECT_POLL(reg, mask, wait) { \
+   mmsch_insert_direct_poll(_poll, \
+init_table, (reg), \
+(mask), (wait)); \
+   init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+   table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 1deb546..a3d9d4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -49,63 +49,6 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 
-static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write 
*direct_wt,
- uint32_t *init_table,
- uint32_t reg_offset,
- uint32_t value)
-{
-   direct_wt->cmd_header.reg_offset = reg_offset;
-   direct_wt->reg_value = value;
-   memcpy((void *)init_table, direct_wt, sizeof(struct 
mmsch_v1_0_cmd_direct_write));
-}
-
-static inline void mmsch_insert_direct_rd_mod_wt(struct 
mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
-uint32_t *init_table,
-uint32_t reg_offset,
-uint32_t mask, uint32_t data)
-{
-   direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
-   direct_rd_mod_wt->mask_value = mask;
-   direct_rd_mod_wt->write_data = data;
-   memcpy((void *)init_table, direct_rd_mod_wt,
-  sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
-}
-
-static inline void

[PATCH 00/11] Enable UVD and PSP loading for SRIOV

2017-04-24 Thread Xiangliang Yu

This series will enable UVD and PSP firmware loading for SRIOV.

Daniel Wang (2):
  drm/amdgpu/psp: skip loading SDMA/RLCG under SRIOV VF
  drm/amdgpu/vce4: fix a PSP loading VCE issue

Frank Min (5):
  drm/amdgpu/vce4: move mm table constructions functions into mmsch
header file
  drm/amdgpu/soc15: enable UVD code path for sriov
  drm/amdgpu/uvd7: add sriov uvd initialization sequences
  drm/amdgpu/uvd7: add uvd doorbell initialization for sriov
  drm/amdgpu/uvd7: add UVD hw init sequences for sriov

Xiangliang Yu (4):
  drm/amdgpu/virt: bypass cg and pg setting for SRIOV
  drm/amdgpu/virt: change the place of virt_init_setting
  drm/amdgpu/virt: add two functions for MM table
  drm/amdgpu/vce4: replaced with virt_alloc_mm_table

 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  48 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |   2 +
 drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h  |  57 +
 drivers/gpu/drm/amd/amdgpu/soc15.c   |  13 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c| 357 +++
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  95 ++--
 drivers/gpu/drm/amd/amdgpu/vi.c  |  10 +-
 8 files changed, 461 insertions(+), 127 deletions(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 01/11] drm/amdgpu/virt: bypass cg and pg setting for SRIOV

2017-04-24 Thread Xiangliang Yu

GPU hypervisor cover all settings of CG and PG, so guest doesn't
need to do anything. Bypass it.

Signed-off-by: Frank Min 
Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index be43823..7fce7b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -105,6 +105,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
/* enable virtual display */
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
+   adev->cg_flags = 0;
+   adev->pg_flags = 0;
 
mutex_init(>virt.lock_kiq);
mutex_init(>virt.lock_reset);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 02/11] drm/amdgpu/virt: change the place of virt_init_setting

2017-04-24 Thread Xiangliang Yu

Change place of virt_init_setting function so that can cover the
cg and pg flags configuration.

Signed-off-by: Xiangliang Yu 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/vi.c| 10 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 2c05dab..6999ac3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -538,11 +538,6 @@ static int soc15_common_early_init(void *handle)
(amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
psp_enabled = true;
 
-   if (amdgpu_sriov_vf(adev)) {
-   amdgpu_virt_init_setting(adev);
-   xgpu_ai_mailbox_set_irq_funcs(adev);
-   }
-
/*
 * nbio need be used for both sdma and gfx9, but only
 * initializes once
@@ -586,6 +581,11 @@ static int soc15_common_early_init(void *handle)
return -EINVAL;
}
 
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_init_setting(adev);
+   xgpu_ai_mailbox_set_irq_funcs(adev);
+   }
+
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, 
amdgpu_fw_load_type);
 
amdgpu_get_pcie_info(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 505c17a..48fb373 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -895,11 +895,6 @@ static int vi_common_early_init(void *handle)
(amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC)))
smc_enabled = true;
 
-   if (amdgpu_sriov_vf(adev)) {
-   amdgpu_virt_init_setting(adev);
-   xgpu_vi_mailbox_set_irq_funcs(adev);
-   }
-
adev->rev_id = vi_get_rev_id(adev);
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
@@ -1072,6 +1067,11 @@ static int vi_common_early_init(void *handle)
return -EINVAL;
}
 
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_init_setting(adev);
+   xgpu_vi_mailbox_set_irq_funcs(adev);
+   }
+
/* vi use smc load by default */
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, 
amdgpu_fw_load_type);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm: fourcc byteorder: brings header file comments in line with reality.

2017-04-24 Thread Michel Dänzer

On 22/04/17 07:05 PM, Ville Syrjälä wrote:
> On Fri, Apr 21, 2017 at 06:14:31PM +0200, Gerd Hoffmann wrote:
>>   Hi,
>>
 My personal opinion is that formats in drm_fourcc.h should be 
 independent of the CPU byte order and the function 
 drm_mode_legacy_fb_format() and drivers depending on that incorrect 
 assumption be fixed instead.
>>>
>>> The problem is this isn't a kernel-internal thing any more.  With the
>>> addition of the ADDFB2 ioctl the fourcc codes became part of the
>>> kernel/userspace abi ...
>>
>> Ok, added some printk's to the ADDFB and ADDFB2 code paths and tested a
>> bit.  Apparently pretty much all userspace still uses the ADDFB ioctl.
>> xorg (modesetting driver) does.  gnome-shell in wayland mode does.
>> Seems the big transition to ADDFB2 didn't happen yet.
>>
>> I guess that makes changing drm_mode_legacy_fb_format + drivers a
>> reasonable option ...
> 
> Yeah, I came to the same conclusion after chatting with some
> folks on irc.
> 
> So my current idea is that we change any driver that wants to follow the
> CPU endianness

This isn't really optional for various reasons, some of which have been
covered in this discussion.


> to declare support for big endian formats if the CPU is
> big endian. Presumably these are mostly the virtual GPU drivers.
> 
> Additonally we'll make the mapping performed by drm_mode_legacy_fb_format()
> driver controlled. That way drivers that got changed to follow CPU
> endianness can return a framebuffer that matches CPU endianness. And
> drivers that expect the GPU endianness to not depend on the CPU
> endianness will keep working as they do now. The downside is that users
> of the legacy addfb ioctl will need to magically know which endianness
> they will get, but that is apparently already the case. And users of
> addfb2 will keep on specifying the endianness explicitly with
> DRM_FORMAT_BIG_ENDIAN vs. 0.

I'm afraid it's not that simple.

The display hardware of older (pre-R600 generation) Radeon GPUs does not
support the "big endian" formats directly. In order to allow userspace
to access pixel data in native endianness with the CPU, we instead use
byte-swapping functionality which only affects CPU access. This means
that the GPU and CPU effectively see different representations of the
same video memory contents.

Userspace code dealing with GPU access to pixel data needs to know the
format as seen by the GPU, whereas code dealing with CPU access needs to
know the format as seen by the CPU. I don't see any way to express this
with a single format definition.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: extend vm flags to 64-bit in tracepoint

2017-04-24 Thread Junwei Zhang

Signed-off-by: Junwei Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8676eff..998ff4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -221,7 +221,7 @@
 __field(long, start)
 __field(long, last)
 __field(u64, offset)
-__field(u32, flags)
+__field(u64, flags)
 ),
 
TP_fast_assign(
@@ -231,7 +231,7 @@
   __entry->offset = mapping->offset;
   __entry->flags = mapping->flags;
   ),
-   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%010llx",
  __entry->bo, __entry->start, __entry->last,
  __entry->offset, __entry->flags)
 );
@@ -245,7 +245,7 @@
 __field(long, start)
 __field(long, last)
 __field(u64, offset)
-__field(u32, flags)
+__field(u64, flags)
 ),
 
TP_fast_assign(
@@ -255,7 +255,7 @@
   __entry->offset = mapping->offset;
   __entry->flags = mapping->flags;
   ),
-   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+   TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, 
flags=%010llx",
  __entry->bo, __entry->start, __entry->last,
  __entry->offset, __entry->flags)
 );
@@ -266,7 +266,7 @@
TP_STRUCT__entry(
 __field(u64, soffset)
 __field(u64, eoffset)
-__field(u32, flags)
+__field(u64, flags)
 ),
 
TP_fast_assign(
@@ -274,7 +274,7 @@
   __entry->eoffset = mapping->it.last + 1;
   __entry->flags = mapping->flags;
   ),
-   TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
+   TP_printk("soffs=%010llx, eoffs=%010llx, flags=%010llx",
  __entry->soffset, __entry->eoffset, __entry->flags)
 );
 
@@ -290,14 +290,14 @@
 
 TRACE_EVENT(amdgpu_vm_set_ptes,
TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
-uint32_t incr, uint32_t flags),
+uint32_t incr, uint64_t flags),
TP_ARGS(pe, addr, count, incr, flags),
TP_STRUCT__entry(
 __field(u64, pe)
 __field(u64, addr)
 __field(u32, count)
 __field(u32, incr)
-__field(u32, flags)
+__field(u64, flags)
 ),
 
TP_fast_assign(
@@ -307,7 +307,7 @@
   __entry->incr = incr;
   __entry->flags = flags;
   ),
-   TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, count=%u",
+   TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%010Lx, count=%u",
  __entry->pe, __entry->addr, __entry->incr,
  __entry->flags, __entry->count)
 );
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm: fourcc byteorder: brings header file comments in line with reality.

2017-04-24 Thread Michel Dänzer

On 23/04/17 04:24 AM, Ilia Mirkin wrote:
> 
> fbdev also creates fb's that expect cpu endianness, as disabling the
> byteswap logic caused a green fbcon terminal to show up. (So at least
> something somewhere in the fbcon -> nouveau's fbdev emulation pipeline
> is expecting cpu endianness. This happens both with nouveau's fbdev
> accel logic and without.)

In theory, there's FB_FOREIGN_ENDIAN for that. But in practice it's
probably useless because little if any userspace even checks for it, let
alone handles it correctly.


> So I think the current situation, at least wrt pre-nv50 nouveau, is
> that XRGB/ARGB are "special", since they are the only things
> exposed by drm_crtc_init. I believe those definitions should be
> updated to note that they're cpu-endian-specific (or another way of
> phrasing it more diplomatically is that they're array formats rather
> than packed formats).

That would be incorrect. :) The memory layout of 8-bit-per-component
array formats doesn't depend on endianness, that of packed formats does.
(DRM_FORMAT_*8 as currently defined are thus effectively array formats)


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/6] drm: fourcc byteorder: adapt bochs-drm to drm_mode_legacy_fb_format update

2017-04-24 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/bochs/bochs_mm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 857755ac2d..781d35bdff 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -508,7 +508,7 @@ bochs_user_framebuffer_create(struct drm_device *dev,
   (mode_cmd->pixel_format >> 16) & 0xff,
   (mode_cmd->pixel_format >> 24) & 0xff);
 
-   if (mode_cmd->pixel_format != DRM_FORMAT_XRGB)
+   if (mode_cmd->pixel_format != DRM_FORMAT_CPU_XRGB)
return ERR_PTR(-ENOENT);
 
obj = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/6] drm: fourcc byteorder: adapt virtio to drm_mode_legacy_fb_format update

2017-04-24 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/virtio/virtgpu_gem.c   |  2 +-
 drivers/gpu/drm/virtio/virtgpu_plane.c | 31 ---
 2 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c 
b/drivers/gpu/drm/virtio/virtgpu_gem.c
index cc025d8fbe..4f2c2dc731 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -99,7 +99,7 @@ int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
if (ret)
goto fail;
 
-   format = virtio_gpu_translate_format(DRM_FORMAT_XRGB);
+   format = virtio_gpu_translate_format(DRM_FORMAT_CPU_XRGB);
virtio_gpu_resource_id_get(vgdev, );
virtio_gpu_cmd_create_resource(vgdev, resid, format,
   args->width, args->height);
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c 
b/drivers/gpu/drm/virtio/virtgpu_plane.c
index adcdbd0abe..f40ffc9a70 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -39,11 +39,7 @@ static const uint32_t virtio_gpu_formats[] = {
 };
 
 static const uint32_t virtio_gpu_cursor_formats[] = {
-#ifdef __BIG_ENDIAN
-   DRM_FORMAT_BGRA,
-#else
DRM_FORMAT_ARGB,
-#endif
 };
 
 uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc)
@@ -51,32 +47,6 @@ uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc)
uint32_t format;
 
switch (drm_fourcc) {
-#ifdef __BIG_ENDIAN
-   case DRM_FORMAT_XRGB:
-   format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
-   break;
-   case DRM_FORMAT_ARGB:
-   format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
-   break;
-   case DRM_FORMAT_BGRX:
-   format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
-   break;
-   case DRM_FORMAT_BGRA:
-   format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
-   break;
-   case DRM_FORMAT_RGBX:
-   format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
-   break;
-   case DRM_FORMAT_RGBA:
-   format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
-   break;
-   case DRM_FORMAT_XBGR:
-   format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
-   break;
-   case DRM_FORMAT_ABGR:
-   format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
-   break;
-#else
case DRM_FORMAT_XRGB:
format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
break;
@@ -101,7 +71,6 @@ uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc)
case DRM_FORMAT_ABGR:
format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
break;
-#endif
default:
/*
 * This should not happen, we handle everything listed
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 0/6] drm: tackle byteorder issues, take two

2017-04-24 Thread Gerd Hoffmann

  Hi,

Ok, different approach up for discussion.  Given that userspace didn't
made the transition from ADDFB to ADDFB2 yet it seems we still can muck
with the fourcc codes without breaking everything, as long as we
maintain ADDFB and fbdev behavior (use cpu byte order format) so nothing
changes for userspace.

So, this series basically makes drm_mode_legacy_fb_format return correct
formats in bigendian mode and adapts the bochs and virtio drivers to
this change.  Other drivers must be adapted to this change too.

Ilia Mirkin figured the dispnv04 backend in nouveau turns on/off
byteswapping depending on cpu byte order.  So, one way to adapt the
driver would be to simply use the new #defines added by patch #2.  The
other way would be to support both XRGB and BGRX and turn on/off
byteswapping depending on framebuffer format instead of cpu byte order.

cheers,
  Gerd

Gerd Hoffmann (6):
  drm: fourcc byteorder: drop DRM_FORMAT_BIG_ENDIAN
  drm: fourcc byteorder: add DRM_FORMAT_CPU_*
  drm: fourcc byteorder: add bigendian support to
drm_mode_legacy_fb_format
  drm: fourcc byteorder: adapt bochs-drm to drm_mode_legacy_fb_format
update
  drm: fourcc byteorder: adapt virtio to drm_mode_legacy_fb_format
update
  drm: fourcc byteorder: virtio restrict to XRGB

 include/drm/drm_fourcc.h   | 12 ++
 include/uapi/drm/drm_fourcc.h  |  2 --
 drivers/gpu/drm/bochs/bochs_mm.c   |  2 +-
 drivers/gpu/drm/drm_fourcc.c   | 27 +--
 drivers/gpu/drm/drm_framebuffer.c  |  2 +-
 drivers/gpu/drm/virtio/virtgpu_gem.c   |  7 --
 drivers/gpu/drm/virtio/virtgpu_plane.c | 40 +-
 7 files changed, 45 insertions(+), 47 deletions(-)

-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/6] drm: fourcc byteorder: add bigendian support to drm_mode_legacy_fb_format

2017-04-24 Thread Gerd Hoffmann

Return correct fourcc codes on bigendian.  Drivers must be adapted to
this change.

Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/drm_fourcc.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index adb3ff59a4..28401d3745 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -42,11 +42,34 @@ static char printable_char(int c)
  *
  * Computes a drm fourcc pixel format code for the given @bpp/@depth values.
  * Useful in fbdev emulation code, since that deals in those values.
+ *
+ * DRM_FORMAT_* are little endian, we'll pick cpu endian here, therefore we
+ * results differ depending on byte order.
  */
 uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth)
 {
uint32_t fmt;
 
+#ifdef __BIG_ENDIAN
+   switch (bpp) {
+   case 8:
+   fmt = DRM_FORMAT_C8;
+   break;
+   case 24:
+   fmt = DRM_FORMAT_BGR888;
+   break;
+   case 32:
+   if (depth == 24)
+   fmt = DRM_FORMAT_BGRX;
+   else
+   fmt = DRM_FORMAT_BGRA;
+   break;
+   default:
+   DRM_ERROR("bad bpp, assuming b8g8r8x8 pixel format\n");
+   fmt = DRM_FORMAT_BGRX;
+   break;
+   }
+#else
switch (bpp) {
case 8:
fmt = DRM_FORMAT_C8;
@@ -73,6 +96,7 @@ uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t 
depth)
fmt = DRM_FORMAT_XRGB;
break;
}
+#endif
 
return fmt;
 }
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/6] drm: fourcc byteorder: virtio restrict to XRGB8888

2017-04-24 Thread Gerd Hoffmann

While wading through the code I've noticed we have a little issue in
virtio:  We attach a format to the bo when it is created
(DRM_IOCTL_MODE_CREATE_DUMB), not when we map it as framebuffer
(DRM_IOCTL_MODE_ADDFB).

Easy way out:  support a single format only.

Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/virtio/virtgpu_gem.c   | 5 -
 drivers/gpu/drm/virtio/virtgpu_plane.c | 9 +
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c 
b/drivers/gpu/drm/virtio/virtgpu_gem.c
index 4f2c2dc731..b09e5e5ae4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -90,7 +90,10 @@ int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
uint32_t resid;
uint32_t format;
 
-   pitch = args->width * ((args->bpp + 1) / 8);
+   if (args->bpp != 32)
+   return -EINVAL;
+
+   pitch = args->width * 4;
args->size = pitch * args->height;
args->size = ALIGN(args->size, PAGE_SIZE);
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c 
b/drivers/gpu/drm/virtio/virtgpu_plane.c
index f40ffc9a70..3a4498a223 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -28,14 +28,7 @@
 #include 
 
 static const uint32_t virtio_gpu_formats[] = {
-   DRM_FORMAT_XRGB,
-   DRM_FORMAT_ARGB,
-   DRM_FORMAT_BGRX,
-   DRM_FORMAT_BGRA,
-   DRM_FORMAT_RGBX,
-   DRM_FORMAT_RGBA,
-   DRM_FORMAT_XBGR,
-   DRM_FORMAT_ABGR,
+   DRM_FORMAT_CPU_XRGB,
 };
 
 static const uint32_t virtio_gpu_cursor_formats[] = {
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/6] drm: fourcc byteorder: add DRM_FORMAT_CPU_*

2017-04-24 Thread Gerd Hoffmann

Add fourcc variants in cpu byte order.  With these at hand we don't
need #ifdefs in drivers want support framebuffers in cpu endianess.

Signed-off-by: Gerd Hoffmann 
---
 include/drm/drm_fourcc.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h
index 6942e84b6e..cae05153e8 100644
--- a/include/drm/drm_fourcc.h
+++ b/include/drm/drm_fourcc.h
@@ -25,6 +25,18 @@
 #include 
 #include 
 
+/*
+ * DRM formats are little endian.  define cpu endian variants here, to
+ * reduce the #ifdefs needed in drivers.
+ */
+#ifdef __BIG_ENDIAN
+# define DRM_FORMAT_CPU_XRGB DRM_FORMAT_BGRX
+# define DRM_FORMAT_CPU_ARGB DRM_FORMAT_BGRA
+#else
+# define DRM_FORMAT_CPU_XRGB DRM_FORMAT_XRGB
+# define DRM_FORMAT_CPU_ARGB DRM_FORMAT_ARGB
+#endif
+
 struct drm_device;
 struct drm_mode_fb_cmd2;
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/6] drm: fourcc byteorder: drop DRM_FORMAT_BIG_ENDIAN

2017-04-24 Thread Gerd Hoffmann

It's unused.

Suggested-by: Daniel Vetter 
Signed-off-by: Gerd Hoffmann 
---
 include/uapi/drm/drm_fourcc.h | 2 --
 drivers/gpu/drm/drm_fourcc.c  | 3 +--
 drivers/gpu/drm/drm_framebuffer.c | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 995c8f9c69..305bc34be0 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -33,8 +33,6 @@ extern "C" {
 #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \
 ((__u32)(c) << 16) | ((__u32)(d) << 24))
 
-#define DRM_FORMAT_BIG_ENDIAN (1<<31) /* format is big endian instead of 
little endian */
-
 /* color index */
 #define DRM_FORMAT_C8  fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
 
diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index 9c0152df45..adb3ff59a4 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -86,12 +86,11 @@ EXPORT_SYMBOL(drm_mode_legacy_fb_format);
 const char *drm_get_format_name(uint32_t format, struct drm_format_name_buf 
*buf)
 {
snprintf(buf->str, sizeof(buf->str),
-"%c%c%c%c %s-endian (0x%08x)",
+"%c%c%c%c (0x%08x)",
 printable_char(format & 0xff),
 printable_char((format >> 8) & 0xff),
 printable_char((format >> 16) & 0xff),
 printable_char((format >> 24) & 0x7f),
-format & DRM_FORMAT_BIG_ENDIAN ? "big" : "little",
 format);
 
return buf->str;
diff --git a/drivers/gpu/drm/drm_framebuffer.c 
b/drivers/gpu/drm/drm_framebuffer.c
index fc8ef42203..efe8b5ece5 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -152,7 +152,7 @@ static int framebuffer_check(struct drm_device *dev,
int i;
 
/* check if the format is supported at all */
-   info = __drm_format_info(r->pixel_format & ~DRM_FORMAT_BIG_ENDIAN);
+   info = __drm_format_info(r->pixel_format);
if (!info) {
struct drm_format_name_buf format_name;
DRM_DEBUG_KMS("bad framebuffer format %s\n",
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix trace error for amdgpu_vm_bo_unmap

2017-04-24 Thread Junwei Zhang

Signed-off-by: Junwei Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f38e5e2..8676eff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -249,7 +249,7 @@
 ),
 
TP_fast_assign(
-  __entry->bo = bo_va->bo;
+  __entry->bo = bo_va ? bo_va->bo : NULL;
   __entry->start = mapping->it.start;
   __entry->last = mapping->it.last;
   __entry->offset = mapping->offset;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 0/6] * Dedicated vmid per process v2 *

2017-04-24 Thread Chunming Zhou

The current kernel implementation, which grabs the idle VMID from pool when 
emitting the job may:

The back-to-back submission from one process could use different VMID.
The submission to different queues from single process could use different 
VMID

It works well in most case but cannot work for the SQ thread trace capture.

The VMID for the submission that set the {SQTT}_BASE, which refers to the 
address of the trace buffer, is stored in shader engine.

If the profiling application have to use different VMIDs to submit IBs in its 
life cycle:

Some trace is not captured since it actually uses different VMID to submit 
jobs.
Some part of captured trace may come from different application since they 
are accidentally uses the owner???s VMID to submit jobs.

V2:
1. address Christian's comments:
a. drop context flags for tag process, instead, add vm ioctl.
b. change order of patches.
c. sync waiting only when vm flush needs.

2. address Alex's comments;
bump module version

Chunming Zhou (6):
  drm/amdgpu: add vm ioctl
  drm/amdgpu: add dedicated vmid field in vm struct
  drm/amdgpu: reserve vmid by vm ioctl
  drm/amdgpu: add limitation for dedicated vm number v2
  drm/amdgpu: implement grab dedicated vmid V2
  drm/amdgpu: bump module verion for reserved vmid

 drivers/gpu/drm/amd/amdgpu/amdgpu.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 159 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   5 +
 include/uapi/drm/amdgpu_drm.h  |  20 
 7 files changed, 188 insertions(+), 2 deletions(-)

-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/6] drm/amdgpu: add limitation for dedicated vm number v2

2017-04-24 Thread Chunming Zhou

v2: move #define to amdgpu_vm.h

Change-Id: Ie5958cf6dbdc1c8278e61d9158483472d6f5c6e3
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++
 4 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0831cd2..ba9d3d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1591,6 +1591,7 @@ struct amdgpu_device {
struct amdgpu_dummy_pagedummy_page;
struct amdgpu_vm_managervm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
+   atomic_treserved_vmid;
 
/* memory management */
struct amdgpu_mman  mman;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a175dfd..9993085 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1889,6 +1889,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->vm_manager.vm_pte_num_rings = 0;
adev->gart.gart_funcs = NULL;
adev->fence_context = kcl_fence_context_alloc(AMDGPU_MAX_RINGS);
+   atomic_set(>reserved_vmid, 0);
 
adev->smc_rreg = _invalid_rreg;
adev->smc_wreg = _invalid_wreg;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5f4dcc9..f7113b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -565,6 +565,10 @@ static int amdgpu_vm_alloc_dedicated_vmid(struct 
amdgpu_device *adev,
unsigned vmhub;
int r;
 
+   if (atomic_read(>reserved_vmid) >= AMDGPU_VM_MAX_RESERVED_VMID) {
+   DRM_ERROR("Over limitation of reserved vmid\n");
+   return -EINVAL;
+   }
for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) {
id_mgr = >vm_manager.id_mgr[vmhub];
 
@@ -580,6 +584,7 @@ static int amdgpu_vm_alloc_dedicated_vmid(struct 
amdgpu_device *adev,
if (r)
goto err;
}
+   atomic_inc(>reserved_vmid);
 
return 0;
 err:
@@ -2302,6 +2307,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
 {
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+   bool dedicated = false;
int i;
 
if (vm->is_kfd_vm) {
@@ -2354,9 +2360,12 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
list_add(>dedicated_vmid[i]->list,
 _mgr->ids_lru);
vm->dedicated_vmid[i] = NULL;
+   dedicated = true;
}
mutex_unlock(_mgr->lock);
}
+   if (dedicated)
+   atomic_dec(>reserved_vmid);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 23981ee..2d3e6ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -83,6 +83,8 @@
 
 /* hardcode that limit for now */
 #define AMDGPU_VA_RESERVED_SIZE(8 << 20)
+/* max vmids dedicated for process */
+#define AMDGPU_VM_MAX_RESERVED_VMID 2
 
 struct amdgpu_vm_pt {
struct amdgpu_bo*bo;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/6] drm/amdgpu: bump module verion for reserved vmid

2017-04-24 Thread Chunming Zhou

Change-Id: I1065e0430ed44f7ee6c29214b72e35a7343ea02b
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 55322b4..6799829 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,9 +64,10 @@
  * - 3.12.0 - Add query for double offchip LDS buffers
  * - 3.13.0 - Add PRT support
  * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
+ * - 3.15.0 - Add reserved vmid support
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   14
+#define KMS_DRIVER_MINOR   15
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/6] drm/amdgpu: reserve vmid by vm ioctl

2017-04-24 Thread Chunming Zhou

Change-Id: I5f80dc39dc9d44660a96a2b710b0dbb4d3b9039d
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56 ++
 1 file changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index acf9102..5f4dcc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -397,6 +397,17 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device 
*adev,
atomic_read(>gpu_reset_counter);
 }
 
+static bool amdgpu_vm_dedicated_vmid_ready(struct amdgpu_vm *vm)
+{
+   unsigned vmhub;
+
+   for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) {
+   if (!vm->dedicated_vmid[vmhub])
+   return false;
+   }
+   return true;
+}
+
 /**
  * amdgpu_vm_grab_id - allocate the next free VMID
  *
@@ -546,6 +557,45 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
return r;
 }
 
+static int amdgpu_vm_alloc_dedicated_vmid(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+   struct amdgpu_vm_id_manager *id_mgr;
+   struct amdgpu_vm_id *idle;
+   unsigned vmhub;
+   int r;
+
+   for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) {
+   id_mgr = >vm_manager.id_mgr[vmhub];
+
+   mutex_lock(_mgr->lock);
+   /* Select the first entry VMID */
+   idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id,
+   list);
+   list_del_init(>list);
+   vm->dedicated_vmid[vmhub] = idle;
+   mutex_unlock(_mgr->lock);
+
+   r = amdgpu_sync_wait(>active);
+   if (r)
+   goto err;
+   }
+
+   return 0;
+err:
+   for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) {
+   id_mgr = >vm_manager.id_mgr[vmhub];
+
+   mutex_lock(_mgr->lock);
+   if (vm->dedicated_vmid[vmhub])
+   list_add(>dedicated_vmid[vmhub]->list,
+_mgr->ids_lru);
+   vm->dedicated_vmid[vmhub] = NULL;
+   mutex_unlock(_mgr->lock);
+   }
+   return r;
+}
+
 static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
@@ -2379,9 +2429,15 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
+   int r;
 
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
+   if (!amdgpu_vm_dedicated_vmid_ready(>vm)) {
+   r = amdgpu_vm_alloc_dedicated_vmid(adev, >vm);
+   if (r)
+   return r;
+   }
break;
default:
return -EINVAL;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/6] drm/amdgpu: add vm ioctl

2017-04-24 Thread Chunming Zhou

It will be used for reserving vmid.

Change-Id: Ib7169ea999690c8e82d0dcbccdd2d97760c0270a
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 include/uapi/drm/amdgpu_drm.h   | 20 
 4 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index cad589a..7004e6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1051,6 +1051,7 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device 
*dev, unsigned int pipe,
 const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
/* KMS */
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f804d38..eb429c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2358,3 +2358,19 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
}
}
 }
+
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+   union drm_amdgpu_vm *args = data;
+   struct amdgpu_device *adev = dev->dev_private;
+   struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+   switch (args->in.op) {
+   case AMDGPU_VM_OP_RESERVE_VMID:
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0f547c6..62dbace 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -247,5 +247,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
  struct amdgpu_bo_va *bo_va);
 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 
 #endif
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 56b7a2f3..5ee639b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -51,6 +51,7 @@
 #define DRM_AMDGPU_GEM_OP  0x10
 #define DRM_AMDGPU_GEM_USERPTR 0x11
 #define DRM_AMDGPU_WAIT_FENCES 0x12
+#define DRM_AMDGPU_VM  0x13
 
 /* hybrid specific ioctls */
 #define DRM_AMDGPU_SEM 0x5b
@@ -71,6 +72,7 @@
 #define DRM_IOCTL_AMDGPU_GEM_OPDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
 
 /* hybrid specific ioctls */
 #define DRM_IOCTL_AMDGPU_GEM_DGMA  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma)
@@ -212,6 +214,24 @@ struct drm_amdgpu_ctx_in {
union drm_amdgpu_ctx_out out;
 };
 
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID  1
+struct drm_amdgpu_vm_in {
+   /** AMDGPU_VM_OP_* */
+   __u32   op;
+   __u32   flags;
+};
+
+struct drm_amdgpu_vm_out {
+   /** For future use, no flags defined so far */
+   __u64   flags;
+};
+
+union drm_amdgpu_vm {
+   struct drm_amdgpu_vm_in in;
+   struct drm_amdgpu_vm_out out;
+};
+
 /* sem related */
 #define AMDGPU_SEM_OP_CREATE_SEM1
 #define AMDGPU_SEM_OP_WAIT_SEM 2
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

90 matches

Mail list logo