RE: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Samuel Pitoiset
> Sent: Monday, February 13, 2017 5:02 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Samuel Pitoiset
> Subject: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-
> powerplay chips
> 
> Currently, only the GPU temperature, the shader clock and
> eventually the memory clock are implemented. The main goal
> is to expose this info to the userspace like Radeon.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
>  drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
>  drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34
> +++
>  drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41
> +
>  4 files changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> index 98698dcf15c7..f1876808ff58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> @@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
>   struct amdgpu_ps *cps,
>   struct amdgpu_ps *rps,
>   bool *equal);
> + int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
> +int *size);
> 
>   struct amd_vce_state* (*get_vce_clock_state)(struct
> amdgpu_device *adev, unsigned idx);
>   int (*reset_power_profile_state)(struct amdgpu_device *adev,
> @@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
>  #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
>   ((adev)->pp_enabled ? \
>   (adev)->powerplay.pp_funcs->read_sensor(adev-
> >powerplay.pp_handle, (idx), (value), (size)) : \
> - -EINVAL)
> + (adev)->pm.funcs->read_sensor((adev), (idx), (value),
> (size)))
> 
>  #define amdgpu_dpm_get_temperature(adev) \
>   ((adev)->pp_enabled ?   \
> diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
> b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
> index 578878d1d4c0..e3a06d6d9e99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
> @@ -6936,6 +6936,31 @@ static int ci_dpm_switch_power_profile(struct
> amdgpu_device *adev,
>   return 0;
>  }
> 
> +static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
> +   void *value, int *size)
> +{
> + /* size must be at least 4 bytes for all sensors */
> + if (*size < 4)
> + return -EINVAL;
> +
> + switch (idx) {
> + case AMDGPU_PP_SENSOR_GFX_SCLK:
> + *((uint32_t *)value) = ci_get_average_sclk_freq(adev);
> + *size = 4;
> + return 0;
> + case AMDGPU_PP_SENSOR_GFX_MCLK:
> + *((uint32_t *)value) = ci_get_average_mclk_freq(adev);
> + *size = 4;
> + return 0;
> + case AMDGPU_PP_SENSOR_GPU_TEMP:
> + *((uint32_t *)value) = ci_dpm_get_temp(adev);
> + *size = 4;
> + return 0;

While you are here you could add AMDGPU_PP_SENSOR_GPU_LOAD.  See 
ci_dpm_debugfs_print_current_performance_level() for the activity percent  
calculations.

Either way:
Reviewed-by: Alex Deucher 

> + default:
> + return -EINVAL;
> + }
> +}
> +
>  const struct amd_ip_funcs ci_dpm_ip_funcs = {
>   .name = "ci_dpm",
>   .early_init = ci_dpm_early_init,
> @@ -6982,6 +7007,7 @@ static const struct amdgpu_dpm_funcs
> ci_dpm_funcs = {
>   .set_power_profile_state = ci_dpm_set_power_profile_state,
>   .reset_power_profile_state = ci_dpm_reset_power_profile_state,
>   .switch_power_profile = ci_dpm_switch_power_profile,
> + .read_sensor = ci_dpm_read_sensor,
>  };
> 
>  static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
> b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
> index f5a343cb0010..13f323745729 100644
> --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
> @@ -3260,6 +3260,39 @@ static int kv_check_state_equal(struct
> amdgpu_device *adev,
>   return 0;
>  }
> 
> +static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx,
> +   void *value, int *size)
> +{
> + struct kv_power_info *pi = kv_get_pi(adev);
> + uint32_t sclk;
> + u32 pl_index =
> + (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX)
> &
> +
>   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MA
> SK) >>
> +
>   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SH
> IFT;
> +
> + /* size must be at least 4 bytes for all sensors */
> + if (*size < 4)
> + return -EINVAL;
> +
> + switch (idx) {
> + case AMDGPU_PP_SENSOR_GFX_SCLK:
> + if (pl_index 

Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Samuel Pitoiset



On 02/13/2017 07:58 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:38, Samuel Pitoiset wrote:



On 02/13/2017 07:09 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:04, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy
function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified
that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.



This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy
function
in all situations. Presumably, when drm_vma_offset_add() fails and
resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


Actually, I find it extremely suspicious that this patch resolves hangs.
By all rights, no other task should have a pointer to this bo left. It
points at problems elsewhere in the code, possibly the precise problem
I've been trying to track down.


Well, maybe we are just lucky but as I said, I checked many times to
reproduce the issue with that patch applied without any success, you can
trust me. Although I'm also starting to think that's not the right
solution (and could introduce other ones).



Could you please revert the patch, reproduce the hang, and report
/proc/$pid/stack for all the hung tasks?


Sure. The thing is: Hitman's branch has been updated during the weekend
and my local installation is broken. I need to re-download the whole
game (will take a while).

I will let you know when I'm able to grab that report.


Hmm, so I thought about this some more, and I'm no longer so sure that
your bug and mine are the same. If it was related, I'd somehow expect
you to get an error about a mutex being destroyed while it's held (at
least with lock debugging enabled).


I enabled a bunch of debugging options (include lock debugging) when I 
investigated into that issue.


Yeah, unclear if it's actually related but something definitely needs to 
be fixed.




Anyway... we need to change the contract of ttm_bo_init, I'm just not
yet sure how, because there are two points of failure: one quite early
on, and the second rather late which gets cleaned up by ttm_bo_unref.

Cheers,
Nicolai


Thanks Nicolai.


Thanks,
Nicolai



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Tom St Denis

On 02/13/2017 05:13 PM, Deucher, Alexander wrote:

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
Of Samuel Pitoiset
Sent: Monday, February 13, 2017 5:02 PM
To: amd-gfx@lists.freedesktop.org
Cc: Samuel Pitoiset
Subject: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-
powerplay chips

Currently, only the GPU temperature, the shader clock and
eventually the memory clock are implemented. The main goal
is to expose this info to the userspace like Radeon.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34
+++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41
+
 4 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 98698dcf15c7..f1876808ff58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
struct amdgpu_ps *cps,
struct amdgpu_ps *rps,
bool *equal);
+   int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
+  int *size);

struct amd_vce_state* (*get_vce_clock_state)(struct
amdgpu_device *adev, unsigned idx);
int (*reset_power_profile_state)(struct amdgpu_device *adev,
@@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->read_sensor(adev-

powerplay.pp_handle, (idx), (value), (size)) : \

-   -EINVAL)
+   (adev)->pm.funcs->read_sensor((adev), (idx), (value),
(size)))

 #define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 578878d1d4c0..e3a06d6d9e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6936,6 +6936,31 @@ static int ci_dpm_switch_power_profile(struct
amdgpu_device *adev,
return 0;
 }

+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+   /* size must be at least 4 bytes for all sensors */
+   if (*size < 4)
+   return -EINVAL;
+
+   switch (idx) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   *((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   *((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GPU_TEMP:
+   *((uint32_t *)value) = ci_dpm_get_temp(adev);
+   *size = 4;
+   return 0;


While you are here you could add AMDGPU_PP_SENSOR_GPU_LOAD.  See 
ci_dpm_debugfs_print_current_performance_level() for the activity percent  
calculations.

Either way:
Reviewed-by: Alex Deucher 


Once these land I can submit a patch to extend umr to read them.  Right 
now I only track sensors from ST/CZ and VI hardware.


Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/2] drm/amdgpu: expose temp and shader/memory clocks

2017-02-13 Thread Samuel Pitoiset
Hi,

This series exposes the current GPU temperature and the current shader
clock (and eventually the memory clock for non-APUs boards). This adds
the same functionality as the Radeon driver. The main goal is to expose
the info through the GALLIUM_HUD in Mesa.

Alex Deucher suggested to wire-up the read_sensor() interface for
pre-powerplay chips. The rest of the series is quite simple.

This has only been tested on Polaris (RX480).

Please review!
Thanks.

Samuel Pitoiset (2):
  drm/amdgpu: implement read_sensor() for pre-powerplay chips
  drm/amdgpu: expose the current temperature and shader/memory clocks

 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 27 ++
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34 +++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41 +
 include/uapi/drm/amdgpu_drm.h   |  8 ++-
 7 files changed, 140 insertions(+), 3 deletions(-)

-- 
2.11.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Samuel Pitoiset
Currently, only the GPU temperature, the shader clock and
eventually the memory clock are implemented. The main goal
is to expose this info to the userspace like Radeon.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34 +++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41 +
 4 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 98698dcf15c7..f1876808ff58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
struct amdgpu_ps *cps,
struct amdgpu_ps *rps,
bool *equal);
+   int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
+  int *size);
 
struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device 
*adev, unsigned idx);
int (*reset_power_profile_state)(struct amdgpu_device *adev,
@@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
((adev)->pp_enabled ? \

(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), 
(value), (size)) : \
-   -EINVAL)
+   (adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))
 
 #define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ?   \
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 578878d1d4c0..e3a06d6d9e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6936,6 +6936,31 @@ static int ci_dpm_switch_power_profile(struct 
amdgpu_device *adev,
return 0;
 }
 
+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+   /* size must be at least 4 bytes for all sensors */
+   if (*size < 4)
+   return -EINVAL;
+
+   switch (idx) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   *((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   *((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GPU_TEMP:
+   *((uint32_t *)value) = ci_dpm_get_temp(adev);
+   *size = 4;
+   return 0;
+   default:
+   return -EINVAL;
+   }
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm",
.early_init = ci_dpm_early_init,
@@ -6982,6 +7007,7 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
.set_power_profile_state = ci_dpm_set_power_profile_state,
.reset_power_profile_state = ci_dpm_reset_power_profile_state,
.switch_power_profile = ci_dpm_switch_power_profile,
+   .read_sensor = ci_dpm_read_sensor,
 };
 
 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f5a343cb0010..13f323745729 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -3260,6 +3260,39 @@ static int kv_check_state_equal(struct amdgpu_device 
*adev,
return 0;
 }
 
+static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+   struct kv_power_info *pi = kv_get_pi(adev);
+   uint32_t sclk;
+   u32 pl_index =
+   (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) &
+   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >>
+   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT;
+
+   /* size must be at least 4 bytes for all sensors */
+   if (*size < 4)
+   return -EINVAL;
+
+   switch (idx) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   if (pl_index < SMU__NUM_SCLK_DPM_STATE) {
+   sclk = be32_to_cpu(
+   pi->graphics_level[pl_index].SclkFrequency);
+   *((uint32_t *)value) = sclk;
+   *size = 4;
+   return 0;
+   }
+   return -EINVAL;
+   case AMDGPU_PP_SENSOR_GPU_TEMP:
+   *((uint32_t *)value) = kv_dpm_get_temp(adev);
+   *size = 4;
+   return 0;
+   default:
+   return -EINVAL;
+   }
+}
+
 const struct amd_ip_funcs kv_dpm_ip_funcs = {
.name = "kv_dpm",
.early_init = 

[PATCH 2/2] drm/amdgpu: expose the current temperature and shader/memory clocks

2017-02-13 Thread Samuel Pitoiset
The clocks are returned in Mhz and the temperature in millidegrees.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 27 +++
 include/uapi/drm/amdgpu_drm.h   |  8 +++-
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f275a6b54e9f..bae3ab8407b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -59,9 +59,10 @@
  * - 3.7.0 - Add support for VCE clock list packet
  * - 3.8.0 - Add support raster config init in the kernel
  * - 3.9.0 - Add support for memory query info about VRAM and GTT.
+ * - 3.10.0 - Add support for clocks/temperature query info.
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   9
+#define KMS_DRIVER_MINOR   10
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d5f9d6a4b661..f032d0882bc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -241,6 +241,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
uint32_t ui32 = 0;
uint64_t ui64 = 0;
int i, found;
+   int ui32_size = sizeof(ui32);
 
if (!info->return_size || !info->return_pointer)
return -EINVAL;
@@ -597,6 +598,32 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
return -EINVAL;
}
}
+   case AMDGPU_INFO_CURRENT_GPU_SCLK:
+   /* get sclk in Mhz */
+   if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
+   (void *), _size)) {
+   ui32 /= 100;
+   return copy_to_user(out, ,
+   min(size, 4u)) ? -EFAULT : 0;
+   }
+   return -EINVAL;
+   case AMDGPU_INFO_CURRENT_GPU_MCLK:
+   /* get mclk in Mhz */
+   if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
+   (void *), _size)) {
+   ui32 /= 100;
+   return copy_to_user(out, ,
+   min(size, 4u)) ? -EFAULT : 0;
+   }
+   return -EINVAL;
+   case AMDGPU_INFO_CURRENT_GPU_TEMP:
+   /* get temperature in millidegrees C */
+   if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+   (void *), _size)) {
+   return copy_to_user(out, ,
+   min(size, 4u)) ? -EFAULT : 0;
+   }
+   return -EINVAL;
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 07e3710f91cc..0db7a481046a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -532,8 +532,14 @@ struct drm_amdgpu_cs_chunk_data {
#define AMDGPU_INFO_VBIOS_SIZE  0x1
/* Subquery id: Query vbios image */
#define AMDGPU_INFO_VBIOS_IMAGE 0x2
+/* Query the current shader clock */
+#define AMDGPU_INFO_CURRENT_GPU_SCLK   0x1c
+/* Query the current memory clock */
+#define AMDGPU_INFO_CURRENT_GPU_MCLK   0x1d
+/* Query the current temperature */
+#define AMDGPU_INFO_CURRENT_GPU_TEMP   0x1e
 /* Query UVD handles */
-#define AMDGPU_INFO_NUM_HANDLES0x1C
+#define AMDGPU_INFO_NUM_HANDLES0x1f
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK  0xff
-- 
2.11.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: expose the current temperature and shader/memory clocks

2017-02-13 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Samuel Pitoiset
> Sent: Monday, February 13, 2017 5:02 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Samuel Pitoiset
> Subject: [PATCH 2/2] drm/amdgpu: expose the current temperature and
> shader/memory clocks
> 
> The clocks are returned in Mhz and the temperature in millidegrees.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 27
> +++
>  include/uapi/drm/amdgpu_drm.h   |  8 +++-
>  3 files changed, 36 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index f275a6b54e9f..bae3ab8407b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -59,9 +59,10 @@
>   * - 3.7.0 - Add support for VCE clock list packet
>   * - 3.8.0 - Add support raster config init in the kernel
>   * - 3.9.0 - Add support for memory query info about VRAM and GTT.
> + * - 3.10.0 - Add support for clocks/temperature query info.
>   */
>  #define KMS_DRIVER_MAJOR 3
> -#define KMS_DRIVER_MINOR 9
> +#define KMS_DRIVER_MINOR 10
>  #define KMS_DRIVER_PATCHLEVEL0
> 
>  int amdgpu_vram_limit = 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d5f9d6a4b661..f032d0882bc5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -241,6 +241,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
> void *data, struct drm_file
>   uint32_t ui32 = 0;
>   uint64_t ui64 = 0;
>   int i, found;
> + int ui32_size = sizeof(ui32);
> 
>   if (!info->return_size || !info->return_pointer)
>   return -EINVAL;
> @@ -597,6 +598,32 @@ static int amdgpu_info_ioctl(struct drm_device
> *dev, void *data, struct drm_file
>   return -EINVAL;
>   }
>   }
> + case AMDGPU_INFO_CURRENT_GPU_SCLK:
> + /* get sclk in Mhz */
> + if (!amdgpu_dpm_read_sensor(adev,
> AMDGPU_PP_SENSOR_GFX_SCLK,
> + (void *), _size)) {
> + ui32 /= 100;
> + return copy_to_user(out, ,
> + min(size, 4u)) ? -EFAULT : 0;
> + }
> + return -EINVAL;
> + case AMDGPU_INFO_CURRENT_GPU_MCLK:
> + /* get mclk in Mhz */
> + if (!amdgpu_dpm_read_sensor(adev,
> AMDGPU_PP_SENSOR_GFX_MCLK,
> + (void *), _size)) {
> + ui32 /= 100;
> + return copy_to_user(out, ,
> + min(size, 4u)) ? -EFAULT : 0;
> + }
> + return -EINVAL;
> + case AMDGPU_INFO_CURRENT_GPU_TEMP:
> + /* get temperature in millidegrees C */
> + if (!amdgpu_dpm_read_sensor(adev,
> AMDGPU_PP_SENSOR_GPU_TEMP,
> + (void *), _size)) {
> + return copy_to_user(out, ,
> + min(size, 4u)) ? -EFAULT : 0;
> + }
> + return -EINVAL;
>   default:
>   DRM_DEBUG_KMS("Invalid request %d\n", info->query);
>   return -EINVAL;
> diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h
> index 07e3710f91cc..0db7a481046a 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -532,8 +532,14 @@ struct drm_amdgpu_cs_chunk_data {
>   #define AMDGPU_INFO_VBIOS_SIZE  0x1
>   /* Subquery id: Query vbios image */
>   #define AMDGPU_INFO_VBIOS_IMAGE 0x2
> +/* Query the current shader clock */
> +#define AMDGPU_INFO_CURRENT_GPU_SCLK 0x1c
> +/* Query the current memory clock */
> +#define AMDGPU_INFO_CURRENT_GPU_MCLK 0x1d
> +/* Query the current temperature */
> +#define AMDGPU_INFO_CURRENT_GPU_TEMP 0x1e

I would suggest adding a single query for power related items.  E.g., 
AMDGPU_INFO_GPU_SENSOR and then add sub-queries for the different things you 
might want to query (temp, sclk, mclk, gpu load, voltages, etc.).  E.g., 
AMDGPU_INFO_GPU_SENSOR_SCLK, AMDGPU_INFO_GPU_SENSOR_MCLK, etc.

>  /* Query UVD handles */
> -#define AMDGPU_INFO_NUM_HANDLES  0x1C
> +#define AMDGPU_INFO_NUM_HANDLES  0x1f

Can't change an existing query.  New ones should be added to the end.

> 
>  #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT   0
>  #define AMDGPU_INFO_MMR_SE_INDEX_MASK0xff
> --
> 2.11.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v2 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Samuel Pitoiset
Add the GPU temperature, the shader clock and eventually the
memory clock (as well as the GPU load on CI). The main goal is
to expose this info to the userspace like Radeon.

v2: - add AMDGPU_PP_SENSOR_GPU_LOAD on CI
- update the commit description

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 ++-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 43 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34 ++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41 +++
 4 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 98698dcf15c7..f1876808ff58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
struct amdgpu_ps *cps,
struct amdgpu_ps *rps,
bool *equal);
+   int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
+  int *size);
 
struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device 
*adev, unsigned idx);
int (*reset_power_profile_state)(struct amdgpu_device *adev,
@@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
((adev)->pp_enabled ? \

(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), 
(value), (size)) : \
-   -EINVAL)
+   (adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))
 
 #define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ?   \
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 578878d1d4c0..b00e81db522d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6936,6 +6936,48 @@ static int ci_dpm_switch_power_profile(struct 
amdgpu_device *adev,
return 0;
 }
 
+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+   u32 activity_percent = 50;
+   int ret;
+
+   /* size must be at least 4 bytes for all sensors */
+   if (*size < 4)
+   return -EINVAL;
+
+   switch (idx) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   *((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   *((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GPU_TEMP:
+   *((uint32_t *)value) = ci_dpm_get_temp(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GPU_LOAD:
+   ret = ci_read_smc_soft_register(adev,
+   offsetof(SMU7_SoftRegisters,
+AverageGraphicsA),
+   _percent);
+   if (ret == 0) {
+   activity_percent += 0x80;
+   activity_percent >>= 8;
+   activity_percent =
+   activity_percent > 100 ? 100 : activity_percent;
+   }
+   *((uint32_t *)value) = activity_percent;
+   *size = 4;
+   return 0;
+   default:
+   return -EINVAL;
+   }
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm",
.early_init = ci_dpm_early_init,
@@ -6982,6 +7024,7 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
.set_power_profile_state = ci_dpm_set_power_profile_state,
.reset_power_profile_state = ci_dpm_reset_power_profile_state,
.switch_power_profile = ci_dpm_switch_power_profile,
+   .read_sensor = ci_dpm_read_sensor,
 };
 
 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f5a343cb0010..13f323745729 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -3260,6 +3260,39 @@ static int kv_check_state_equal(struct amdgpu_device 
*adev,
return 0;
 }
 
+static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+   struct kv_power_info *pi = kv_get_pi(adev);
+   uint32_t sclk;
+   u32 pl_index =
+   (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) &
+   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >>
+   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT;
+
+   /* size must 

[PATCH v2 2/2] drm/amdgpu: expose the current temperature and shader/memory clocks

2017-02-13 Thread Samuel Pitoiset
The clocks are returned in Mhz and the temperature in millidegrees.

v2: - add sub-queries for AMDPGU_INFO_GPU_SENSOR_*
- do not break the ABI

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 38 +
 include/uapi/drm/amdgpu_drm.h   | 12 +++
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f275a6b54e9f..bae3ab8407b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -59,9 +59,10 @@
  * - 3.7.0 - Add support for VCE clock list packet
  * - 3.8.0 - Add support raster config init in the kernel
  * - 3.9.0 - Add support for memory query info about VRAM and GTT.
+ * - 3.10.0 - Add support for clocks/temperature query info.
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   9
+#define KMS_DRIVER_MINOR   10
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d5f9d6a4b661..16365528b6a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -241,6 +241,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
uint32_t ui32 = 0;
uint64_t ui64 = 0;
int i, found;
+   int ui32_size = sizeof(ui32);
 
if (!info->return_size || !info->return_pointer)
return -EINVAL;
@@ -597,6 +598,43 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
return -EINVAL;
}
}
+   case AMDGPU_INFO_GPU_SENSOR: {
+   switch (info->gpu_sensor_info.type) {
+   case AMDGPU_INFO_GPU_SENSOR_SCLK:
+   /* get sclk in Mhz */
+   if (!amdgpu_dpm_read_sensor(adev,
+   AMDGPU_PP_SENSOR_GFX_SCLK,
+   (void *), _size)) 
{
+   ui32 /= 100;
+   return copy_to_user(out, ,
+   min(size, 4u)) ? -EFAULT : 
0;
+   }
+   return -EINVAL;
+   case AMDGPU_INFO_GPU_SENSOR_MCLK:
+   /* get mclk in Mhz */
+   if (!amdgpu_dpm_read_sensor(adev,
+   AMDGPU_PP_SENSOR_GFX_MCLK,
+   (void *), _size)) 
{
+   ui32 /= 100;
+   return copy_to_user(out, ,
+   min(size, 4u)) ? -EFAULT : 
0;
+   }
+   return -EINVAL;
+   case AMDGPU_INFO_GPU_SENSOR_TEMP:
+   /* get temperature in millidegrees C */
+   if (!amdgpu_dpm_read_sensor(adev,
+   AMDGPU_PP_SENSOR_GPU_TEMP,
+   (void *), _size)) 
{
+   return copy_to_user(out, ,
+   min(size, 4u)) ? -EFAULT : 
0;
+   }
+   return -EINVAL;
+   default:
+   DRM_DEBUG_KMS("Invalid request %d\n",
+ info->gpu_sensor_info.type);
+   return -EINVAL;
+   }
+   }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 07e3710f91cc..8235bb20c9dc 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -534,6 +534,14 @@ struct drm_amdgpu_cs_chunk_data {
#define AMDGPU_INFO_VBIOS_IMAGE 0x2
 /* Query UVD handles */
 #define AMDGPU_INFO_NUM_HANDLES0x1C
+/* Query GPU sensor related information */
+#define AMDGPU_INFO_GPU_SENSOR 0x1D
+   /* Query the current shader clock */
+   #define AMDGPU_INFO_GPU_SENSOR_SCLK 0x1
+   /* Query the current memory clock */
+   #define AMDGPU_INFO_GPU_SENSOR_MCLK 0x2
+   /* Query the current temperature */
+   #define AMDGPU_INFO_GPU_SENSOR_TEMP 0x3
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK  0xff
@@ -597,6 +605,10 @@ struct drm_amdgpu_info {
__u32 type;
__u32 offset;
} vbios_info;
+
+   struct {
+   __u32 type;
+   } 

Re: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Samuel Pitoiset



On 02/14/2017 12:17 AM, Tom St Denis wrote:

Hi Samuel,


Hi Tom,




It would be helpful to modify amdgpu_debugfs_sensor_read() to support
dpm based sensors as well.  This will let me add it to umr.


You mean removing the sanity check (for powerplay boards)? I can do that 
in a follow-up patch yes.




If you can swing that in here that would be helpful if not I can submit
my own patch when this lands.

Cheers,
Tom



On 02/13/2017 05:01 PM, Samuel Pitoiset wrote:

Currently, only the GPU temperature, the shader clock and
eventually the memory clock are implemented. The main goal
is to expose this info to the userspace like Radeon.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34 +++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41
+
 4 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 98698dcf15c7..f1876808ff58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
 struct amdgpu_ps *cps,
 struct amdgpu_ps *rps,
 bool *equal);
+int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
+   int *size);

 struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device
*adev, unsigned idx);
 int (*reset_power_profile_state)(struct amdgpu_device *adev,
@@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
 ((adev)->pp_enabled ? \

(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle,
(idx), (value), (size)) : \
--EINVAL)
+(adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))

 #define amdgpu_dpm_get_temperature(adev) \
 ((adev)->pp_enabled ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 578878d1d4c0..e3a06d6d9e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6936,6 +6936,31 @@ static int ci_dpm_switch_power_profile(struct
amdgpu_device *adev,
 return 0;
 }

+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+  void *value, int *size)
+{
+/* size must be at least 4 bytes for all sensors */
+if (*size < 4)
+return -EINVAL;
+
+switch (idx) {
+case AMDGPU_PP_SENSOR_GFX_SCLK:
+*((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+*size = 4;
+return 0;
+case AMDGPU_PP_SENSOR_GFX_MCLK:
+*((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+*size = 4;
+return 0;
+case AMDGPU_PP_SENSOR_GPU_TEMP:
+*((uint32_t *)value) = ci_dpm_get_temp(adev);
+*size = 4;
+return 0;
+default:
+return -EINVAL;
+}
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
 .name = "ci_dpm",
 .early_init = ci_dpm_early_init,
@@ -6982,6 +7007,7 @@ static const struct amdgpu_dpm_funcs
ci_dpm_funcs = {
 .set_power_profile_state = ci_dpm_set_power_profile_state,
 .reset_power_profile_state = ci_dpm_reset_power_profile_state,
 .switch_power_profile = ci_dpm_switch_power_profile,
+.read_sensor = ci_dpm_read_sensor,
 };

 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f5a343cb0010..13f323745729 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -3260,6 +3260,39 @@ static int kv_check_state_equal(struct
amdgpu_device *adev,
 return 0;
 }

+static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+  void *value, int *size)
+{
+struct kv_power_info *pi = kv_get_pi(adev);
+uint32_t sclk;
+u32 pl_index =
+(RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) &
+TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >>
+TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT;
+
+/* size must be at least 4 bytes for all sensors */
+if (*size < 4)
+return -EINVAL;
+
+switch (idx) {
+case AMDGPU_PP_SENSOR_GFX_SCLK:
+if (pl_index < SMU__NUM_SCLK_DPM_STATE) {
+sclk = be32_to_cpu(
+pi->graphics_level[pl_index].SclkFrequency);
+*((uint32_t *)value) = sclk;
+*size = 4;
+return 0;
+}
+return -EINVAL;
+case AMDGPU_PP_SENSOR_GPU_TEMP:
+*((uint32_t *)value) = kv_dpm_get_temp(adev);
+*size = 4;
+return 0;
+default:
+return -EINVAL;
+}
+}
+
 const struct amd_ip_funcs kv_dpm_ip_funcs = {
 .name = "kv_dpm",
 

Re: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Samuel Pitoiset



On 02/14/2017 12:46 AM, Tom St Denis wrote:

On 02/13/2017 06:40 PM, Samuel Pitoiset wrote:



On 02/14/2017 12:17 AM, Tom St Denis wrote:

Hi Samuel,


Hi Tom,




It would be helpful to modify amdgpu_debugfs_sensor_read() to support
dpm based sensors as well.  This will let me add it to umr.


You mean removing the sanity check (for powerplay boards)? I can do that
in a follow-up patch yes.


As long as the path to or the pointer for read_sensor is not null.

It's been a while since I looked at the pm code but I wonder if a sanity
check is still required even if you use the macro to read the sensor.


Ah yeah, not the function I was thinking of but should be pretty simple 
I think.




Tom






If you can swing that in here that would be helpful if not I can submit
my own patch when this lands.

Cheers,
Tom



On 02/13/2017 05:01 PM, Samuel Pitoiset wrote:

Currently, only the GPU temperature, the shader clock and
eventually the memory clock are implemented. The main goal
is to expose this info to the userspace like Radeon.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34
+++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41
+
 4 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 98698dcf15c7..f1876808ff58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
 struct amdgpu_ps *cps,
 struct amdgpu_ps *rps,
 bool *equal);
+int (*read_sensor)(struct amdgpu_device *adev, int idx, void
*value,
+   int *size);

 struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device
*adev, unsigned idx);
 int (*reset_power_profile_state)(struct amdgpu_device *adev,
@@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
 ((adev)->pp_enabled ? \

(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle,
(idx), (value), (size)) : \
--EINVAL)
+(adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))

 #define amdgpu_dpm_get_temperature(adev) \
 ((adev)->pp_enabled ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 578878d1d4c0..e3a06d6d9e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6936,6 +6936,31 @@ static int ci_dpm_switch_power_profile(struct
amdgpu_device *adev,
 return 0;
 }

+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+  void *value, int *size)
+{
+/* size must be at least 4 bytes for all sensors */
+if (*size < 4)
+return -EINVAL;
+
+switch (idx) {
+case AMDGPU_PP_SENSOR_GFX_SCLK:
+*((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+*size = 4;
+return 0;
+case AMDGPU_PP_SENSOR_GFX_MCLK:
+*((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+*size = 4;
+return 0;
+case AMDGPU_PP_SENSOR_GPU_TEMP:
+*((uint32_t *)value) = ci_dpm_get_temp(adev);
+*size = 4;
+return 0;
+default:
+return -EINVAL;
+}
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
 .name = "ci_dpm",
 .early_init = ci_dpm_early_init,
@@ -6982,6 +7007,7 @@ static const struct amdgpu_dpm_funcs
ci_dpm_funcs = {
 .set_power_profile_state = ci_dpm_set_power_profile_state,
 .reset_power_profile_state = ci_dpm_reset_power_profile_state,
 .switch_power_profile = ci_dpm_switch_power_profile,
+.read_sensor = ci_dpm_read_sensor,
 };

 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f5a343cb0010..13f323745729 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -3260,6 +3260,39 @@ static int kv_check_state_equal(struct
amdgpu_device *adev,
 return 0;
 }

+static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+  void *value, int *size)
+{
+struct kv_power_info *pi = kv_get_pi(adev);
+uint32_t sclk;
+u32 pl_index =
+(RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) &
+TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >>
+TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT;
+
+/* size must be at least 4 bytes for all sensors */
+if (*size < 4)
+return -EINVAL;
+
+switch (idx) {
+case AMDGPU_PP_SENSOR_GFX_SCLK:
+if (pl_index < SMU__NUM_SCLK_DPM_STATE) {
+sclk = be32_to_cpu(
+

Re: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Tom St Denis

Hi Samuel,

It would be helpful to modify amdgpu_debugfs_sensor_read() to support 
dpm based sensors as well.  This will let me add it to umr.


If you can swing that in here that would be helpful if not I can submit 
my own patch when this lands.


Cheers,
Tom



On 02/13/2017 05:01 PM, Samuel Pitoiset wrote:

Currently, only the GPU temperature, the shader clock and
eventually the memory clock are implemented. The main goal
is to expose this info to the userspace like Radeon.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34 +++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41 +
 4 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 98698dcf15c7..f1876808ff58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
struct amdgpu_ps *cps,
struct amdgpu_ps *rps,
bool *equal);
+   int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
+  int *size);

struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device 
*adev, unsigned idx);
int (*reset_power_profile_state)(struct amdgpu_device *adev,
@@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
((adev)->pp_enabled ? \

(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), 
(value), (size)) : \
-   -EINVAL)
+   (adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))

 #define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 578878d1d4c0..e3a06d6d9e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6936,6 +6936,31 @@ static int ci_dpm_switch_power_profile(struct 
amdgpu_device *adev,
return 0;
 }

+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+   /* size must be at least 4 bytes for all sensors */
+   if (*size < 4)
+   return -EINVAL;
+
+   switch (idx) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   *((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   *((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+   *size = 4;
+   return 0;
+   case AMDGPU_PP_SENSOR_GPU_TEMP:
+   *((uint32_t *)value) = ci_dpm_get_temp(adev);
+   *size = 4;
+   return 0;
+   default:
+   return -EINVAL;
+   }
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm",
.early_init = ci_dpm_early_init,
@@ -6982,6 +7007,7 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
.set_power_profile_state = ci_dpm_set_power_profile_state,
.reset_power_profile_state = ci_dpm_reset_power_profile_state,
.switch_power_profile = ci_dpm_switch_power_profile,
+   .read_sensor = ci_dpm_read_sensor,
 };

 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f5a343cb0010..13f323745729 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -3260,6 +3260,39 @@ static int kv_check_state_equal(struct amdgpu_device 
*adev,
return 0;
 }

+static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+   struct kv_power_info *pi = kv_get_pi(adev);
+   uint32_t sclk;
+   u32 pl_index =
+   (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) &
+   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >>
+   TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT;
+
+   /* size must be at least 4 bytes for all sensors */
+   if (*size < 4)
+   return -EINVAL;
+
+   switch (idx) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   if (pl_index < SMU__NUM_SCLK_DPM_STATE) {
+   sclk = be32_to_cpu(
+   pi->graphics_level[pl_index].SclkFrequency);
+   *((uint32_t *)value) = sclk;
+   *size = 4;
+   return 0;
+   }
+   return -EINVAL;
+   

[PATCH] drm/amdgpu: expose amdgpu_sensors on pre-powerplay chips

2017-02-13 Thread Samuel Pitoiset
Totally untested but as long as read_sensor() has been recently
implemented for dpm based boards, amdgpu_sensors can now be
exposed.

Cc: Tom St Denis 
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6f021e70f15f..1a8e3b9a2268 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3202,10 +3202,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file 
*f, char __user *buf,
idx = *pos >> 2;
 
valuesize = sizeof(values);
-   if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor)
-   r = 
adev->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, idx, 
[0], );
-   else
-   return -EINVAL;
+   r = amdgpu_dpm_read_sensor(adev, idx, [0], );
 
if (size > valuesize)
return -EINVAL;
-- 
2.11.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: implement read_sensor() for pre-powerplay chips

2017-02-13 Thread Tom St Denis

On 02/13/2017 06:40 PM, Samuel Pitoiset wrote:



On 02/14/2017 12:17 AM, Tom St Denis wrote:

Hi Samuel,


Hi Tom,




It would be helpful to modify amdgpu_debugfs_sensor_read() to support
dpm based sensors as well.  This will let me add it to umr.


You mean removing the sanity check (for powerplay boards)? I can do that
in a follow-up patch yes.


As long as the path to or the pointer for read_sensor is not null.

It's been a while since I looked at the pm code but I wonder if a sanity 
check is still required even if you use the macro to read the sensor.


Tom






If you can swing that in here that would be helpful if not I can submit
my own patch when this lands.

Cheers,
Tom



On 02/13/2017 05:01 PM, Samuel Pitoiset wrote:

Currently, only the GPU temperature, the shader clock and
eventually the memory clock are implemented. The main goal
is to expose this info to the userspace like Radeon.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 26 +
 drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 34
+++
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 41
+
 4 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 98698dcf15c7..f1876808ff58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,6 +270,8 @@ struct amdgpu_dpm_funcs {
 struct amdgpu_ps *cps,
 struct amdgpu_ps *rps,
 bool *equal);
+int (*read_sensor)(struct amdgpu_device *adev, int idx, void
*value,
+   int *size);

 struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device
*adev, unsigned idx);
 int (*reset_power_profile_state)(struct amdgpu_device *adev,
@@ -293,7 +295,7 @@ struct amdgpu_dpm_funcs {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
 ((adev)->pp_enabled ? \

(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle,
(idx), (value), (size)) : \
--EINVAL)
+(adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))

 #define amdgpu_dpm_get_temperature(adev) \
 ((adev)->pp_enabled ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 578878d1d4c0..e3a06d6d9e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6936,6 +6936,31 @@ static int ci_dpm_switch_power_profile(struct
amdgpu_device *adev,
 return 0;
 }

+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+  void *value, int *size)
+{
+/* size must be at least 4 bytes for all sensors */
+if (*size < 4)
+return -EINVAL;
+
+switch (idx) {
+case AMDGPU_PP_SENSOR_GFX_SCLK:
+*((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+*size = 4;
+return 0;
+case AMDGPU_PP_SENSOR_GFX_MCLK:
+*((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+*size = 4;
+return 0;
+case AMDGPU_PP_SENSOR_GPU_TEMP:
+*((uint32_t *)value) = ci_dpm_get_temp(adev);
+*size = 4;
+return 0;
+default:
+return -EINVAL;
+}
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
 .name = "ci_dpm",
 .early_init = ci_dpm_early_init,
@@ -6982,6 +7007,7 @@ static const struct amdgpu_dpm_funcs
ci_dpm_funcs = {
 .set_power_profile_state = ci_dpm_set_power_profile_state,
 .reset_power_profile_state = ci_dpm_reset_power_profile_state,
 .switch_power_profile = ci_dpm_switch_power_profile,
+.read_sensor = ci_dpm_read_sensor,
 };

 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f5a343cb0010..13f323745729 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -3260,6 +3260,39 @@ static int kv_check_state_equal(struct
amdgpu_device *adev,
 return 0;
 }

+static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+  void *value, int *size)
+{
+struct kv_power_info *pi = kv_get_pi(adev);
+uint32_t sclk;
+u32 pl_index =
+(RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) &
+TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >>
+TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT;
+
+/* size must be at least 4 bytes for all sensors */
+if (*size < 4)
+return -EINVAL;
+
+switch (idx) {
+case AMDGPU_PP_SENSOR_GFX_SCLK:
+if (pl_index < SMU__NUM_SCLK_DPM_STATE) {
+sclk = be32_to_cpu(
+pi->graphics_level[pl_index].SclkFrequency);
+*((uint32_t *)value) = sclk;
+*size = 4;
+return 0;
+}
+

Re: PRT support for amdgpu v3

2017-02-13 Thread Christian König

Hi Nicolai,

that one should be fixed by "drm/amdgpu: fix PRT cleanup order in the 
VM". Please test and/or review.


Thanks,
Christian.

Am 12.02.2017 um 12:36 schrieb Nicolai Hähnle:

Hi,

Some more testing uncovered a bug in cleanup paths. When the 
application segfaults while PRT mappings exist, I get a WARN_ON (which 
seems fairly straightforward) and occasionally also an RCU error 
warning -- see the attached dmesg logs.


Regular application shutdown works fine, though.

Cheers,
Nicolai

On 08.02.2017 16:04, Christian König wrote:

Hi guys,

ok I finally found time to write an unit test for this and hammered 
out the last few bugs.


Seems to work fine on my Tonga now. Please note that this set is 
based on "fix race in GEM VA map IOCTL v2", without that patch you 
will run into a NULL pointer dereference during PRT mapping.


Going to send out the unit test in a minute.

Regards,
Christian.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Add missing CIK devices

2017-02-13 Thread Tom St Denis

On 13/02/17 08:12 AM, Christian König wrote:

Am 13.02.2017 um 13:46 schrieb Tom St Denis:

Adds mullins, kabini, and hawaii ASICs to the library.

Signed-off-by: Tom St Denis 


Acked-by: Christian König .


Thanks.

If there are no objections I'll push it out in a couple hours.

Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] Add new gmc/smu registers

2017-02-13 Thread Tom St Denis
Signed-off-by: Tom St Denis 
---
 src/lib/ip/gmc60_bits.i  | 2 ++
 src/lib/ip/smu701_bits.i | 4 
 src/lib/ip/smu701_regs.i | 1 +
 src/lib/ip/smu711_bits.i | 4 
 src/lib/ip/smu711_regs.i | 1 +
 src/lib/ip/smu712_bits.i | 4 
 src/lib/ip/smu712_regs.i | 1 +
 src/lib/ip/smu713_bits.i | 4 
 src/lib/ip/smu713_regs.i | 1 +
 9 files changed, 22 insertions(+)

diff --git a/src/lib/ip/gmc60_bits.i b/src/lib/ip/gmc60_bits.i
index 746dd64fb392..4e4c052f6e79 100644
--- a/src/lib/ip/gmc60_bits.i
+++ b/src/lib/ip/gmc60_bits.i
@@ -3230,6 +3230,8 @@ static struct umr_bitfield mmVM_PRT_APERTURE3_HIGH_ADDR[] 
= {
 static struct umr_bitfield mmVM_PRT_CNTL[] = {
 { "L1_TLB_STORE_INVALID_ENTRIES", 3, 3, _bitfield_default },
 { "L2_CACHE_STORE_INVALID_ENTRIES", 2, 2, _bitfield_default },
+{ "CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS", 0, 0, _bitfield_default },
+{ "TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS", 1, 1, _bitfield_default },
 };
 static struct umr_bitfield mmVM_CONTEXTS_DISABLE[] = {
 { "DISABLE_CONTEXT_0", 0, 0, _bitfield_default },
diff --git a/src/lib/ip/smu701_bits.i b/src/lib/ip/smu701_bits.i
index 972d8b74b5db..2f50eb9f0b67 100644
--- a/src/lib/ip/smu701_bits.i
+++ b/src/lib/ip/smu701_bits.i
@@ -4391,6 +4391,10 @@ static struct umr_bitfield ixCG_FREQ_TRAN_VOTING_7[] = {
 static struct umr_bitfield ixCG_DISPLAY_GAP_CNTL2[] = {
 { "VBI_PREDICTION", 0, 31, _bitfield_default },
 };
+static struct umr_bitfield ixCURRENT_PG_STATUS[] = {
+{ "VCE_PG_STATUS", 1, 1, _bitfield_default },
+{ "UVD_PG_STATUS", 2, 2, _bitfield_default },
+};
 static struct umr_bitfield ixSCLK_MIN_DIV[] = {
 { "FRACV", 0, 11, _bitfield_default },
 { "INTV", 12, 18, _bitfield_default },
diff --git a/src/lib/ip/smu701_regs.i b/src/lib/ip/smu701_regs.i
index 63f85e1173cc..3ff965359c89 100644
--- a/src/lib/ip/smu701_regs.i
+++ b/src/lib/ip/smu701_regs.i
@@ -1091,6 +1091,7 @@
{ "ixCG_FREQ_TRAN_VOTING_6", REG_SMC, 0xc02001c0, 
_FREQ_TRAN_VOTING_6[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_6)/sizeof(ixCG_FREQ_TRAN_VOTING_6[0]), 0, 0 },
{ "ixCG_FREQ_TRAN_VOTING_7", REG_SMC, 0xc02001c4, 
_FREQ_TRAN_VOTING_7[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_7)/sizeof(ixCG_FREQ_TRAN_VOTING_7[0]), 0, 0 },
{ "ixCG_DISPLAY_GAP_CNTL2", REG_SMC, 0xc0200230, 
_DISPLAY_GAP_CNTL2[0], 
sizeof(ixCG_DISPLAY_GAP_CNTL2)/sizeof(ixCG_DISPLAY_GAP_CNTL2[0]), 0, 0 },
+   { "ixCURRENT_PG_STATUS", REG_SMC, 0xc020029c, _PG_STATUS[0], 
sizeof(ixCURRENT_PG_STATUS)/sizeof(ixCURRENT_PG_STATUS[0]), 0, 0 },
{ "ixSCLK_MIN_DIV", REG_SMC, 0xc0200308, _MIN_DIV[0], 
sizeof(ixSCLK_MIN_DIV)/sizeof(ixSCLK_MIN_DIV[0]), 0, 0 },
{ "ixLCLK_DEEP_SLEEP_CNTL2", REG_SMC, 0xc0200310, 
_DEEP_SLEEP_CNTL2[0], 
sizeof(ixLCLK_DEEP_SLEEP_CNTL2)/sizeof(ixLCLK_DEEP_SLEEP_CNTL2[0]), 0, 0 },
{ "ixCG_THERMAL_CTRL", REG_SMC, 0xc034, _THERMAL_CTRL[0], 
sizeof(ixCG_THERMAL_CTRL)/sizeof(ixCG_THERMAL_CTRL[0]), 0, 0 },
diff --git a/src/lib/ip/smu711_bits.i b/src/lib/ip/smu711_bits.i
index 6d803259e970..afd90220d5d5 100644
--- a/src/lib/ip/smu711_bits.i
+++ b/src/lib/ip/smu711_bits.i
@@ -3577,6 +3577,10 @@ static struct umr_bitfield ixCG_FREQ_TRAN_VOTING_7[] = {
 static struct umr_bitfield ixCG_DISPLAY_GAP_CNTL2[] = {
 { "VBI_PREDICTION", 0, 31, _bitfield_default },
 };
+static struct umr_bitfield ixCURRENT_PG_STATUS[] = {
+{ "VCE_PG_STATUS", 1, 1, _bitfield_default },
+{ "UVD_PG_STATUS", 2, 2, _bitfield_default },
+};
 static struct umr_bitfield ixLCLK_DEEP_SLEEP_CNTL2[] = {
 { "RFE_BUSY_MASK", 0, 0, _bitfield_default },
 { "BIF_CG_LCLK_BUSY_MASK", 1, 1, _bitfield_default },
diff --git a/src/lib/ip/smu711_regs.i b/src/lib/ip/smu711_regs.i
index 6e66ecd4ac09..efeac147288e 100644
--- a/src/lib/ip/smu711_regs.i
+++ b/src/lib/ip/smu711_regs.i
@@ -886,6 +886,7 @@
{ "ixCG_FREQ_TRAN_VOTING_6", REG_SMC, 0xc02001c0, 
_FREQ_TRAN_VOTING_6[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_6)/sizeof(ixCG_FREQ_TRAN_VOTING_6[0]), 0, 0 },
{ "ixCG_FREQ_TRAN_VOTING_7", REG_SMC, 0xc02001c4, 
_FREQ_TRAN_VOTING_7[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_7)/sizeof(ixCG_FREQ_TRAN_VOTING_7[0]), 0, 0 },
{ "ixCG_DISPLAY_GAP_CNTL2", REG_SMC, 0xc0200230, 
_DISPLAY_GAP_CNTL2[0], 
sizeof(ixCG_DISPLAY_GAP_CNTL2)/sizeof(ixCG_DISPLAY_GAP_CNTL2[0]), 0, 0 },
+   { "ixCURRENT_PG_STATUS", REG_SMC, 0xc020029c, _PG_STATUS[0], 
sizeof(ixCURRENT_PG_STATUS)/sizeof(ixCURRENT_PG_STATUS[0]), 0, 0 },
{ "ixLCLK_DEEP_SLEEP_CNTL2", REG_SMC, 0xc0200310, 
_DEEP_SLEEP_CNTL2[0], 
sizeof(ixLCLK_DEEP_SLEEP_CNTL2)/sizeof(ixLCLK_DEEP_SLEEP_CNTL2[0]), 0, 0 },
{ "ixVDDGFX_IDLE_PARAMETER", REG_SMC, 0xc020036c, 
_IDLE_PARAMETER[0], 
sizeof(ixVDDGFX_IDLE_PARAMETER)/sizeof(ixVDDGFX_IDLE_PARAMETER[0]), 0, 0 },
{ "ixVDDGFX_IDLE_CONTROL", REG_SMC, 0xc0200370, 
_IDLE_CONTROL[0], 
sizeof(ixVDDGFX_IDLE_CONTROL)/sizeof(ixVDDGFX_IDLE_CONTROL[0]), 0, 0 },
diff --git 

Re: [PATCH v2 2/2] drm/amdgpu: report the number of bytes moved at buffer creation

2017-02-13 Thread Christian König

Am 13.02.2017 um 17:28 schrieb Nicolai Hähnle:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

Like ttm_bo_validate(), ttm_bo_init() might need to move BO and
the number of bytes moved by TTM should be reported. This can help
the throttle buffer migration mechanism to make a better decision.


Hmm, this could double-count bytes if there's a concurrent CS 
submission going on.


It's only a heuristic, so I guess it's not too bad, but still - having 
at least a comment about this would be nice.


Yeah, already working on this.

I've want to give ttm_bo_init and ttm_bo_validate a "context" parameter 
to count the bytes moved as well as the bytes it is allowed to move (as 
a new feature) before returning -EBUSY.


Regards,
Christian.



Nicolai


v2: fix computation

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 402a8954c6d8..5227e4d1d5db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1720,6 +1720,7 @@ int amdgpu_cs_parser_init(struct 
amdgpu_cs_parser *p, void *data);

 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
u32 ip_instance, u32 ring,
struct amdgpu_ring **out_ring);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 
num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 
domain);

 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page 
**pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 6e948e4986ec..dade2fa9593a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -351,8 +351,7 @@ static u64 
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
  * submission. This can result in a debt that can stop buffer 
migrations

  * temporarily.
  */
-static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
- u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 
num_bytes)

 {
 spin_lock(>mm_stats.lock);
 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 556236a112c1..4aa2c8a94347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -323,6 +323,7 @@ int amdgpu_bo_create_restricted(struct 
amdgpu_device *adev,

 struct amdgpu_bo *bo;
 enum ttm_bo_type type;
 unsigned long page_align;
+u64 initial_bytes_moved;
 size_t acc_size;
 int r;

@@ -399,10 +400,15 @@ int amdgpu_bo_create_restricted(struct 
amdgpu_device *adev,

 locked = ww_mutex_trylock(>tbo.ttm_resv.lock);
 WARN_ON(!locked);
 }
+
+initial_bytes_moved = atomic64_read(>num_bytes_moved);
 r = ttm_bo_init(>mman.bdev, >tbo, size, type,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
+amdgpu_cs_report_moved_bytes(adev,
+atomic64_read(>num_bytes_moved) - initial_bytes_moved);
+
 if (unlikely(r != 0)) {
 if (!resv)
 ww_mutex_unlock(>tbo.resv->lock);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/radeon: allow unaligned shader loads on CIK

2017-02-13 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Marek Olšák
> Sent: Monday, February 13, 2017 11:58 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH] drm/radeon: allow unaligned shader loads on CIK
> 
> From: Marek Olšák 
> 
> Signed-off-by: Marek Olšák 

Add a better patch description.  With that fixed:
Reviewed-by: Alex Deucher 


> ---
>  drivers/gpu/drm/radeon/cik.c| 7 +--
>  drivers/gpu/drm/radeon/radeon_drv.c | 3 ++-
>  2 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
> index f6ff41a..ac0d939 100644
> --- a/drivers/gpu/drm/radeon/cik.c
> +++ b/drivers/gpu/drm/radeon/cik.c
> @@ -28,20 +28,23 @@
>  #include "radeon.h"
>  #include "radeon_asic.h"
>  #include "radeon_audio.h"
>  #include "cikd.h"
>  #include "atom.h"
>  #include "cik_blit_shaders.h"
>  #include "radeon_ucode.h"
>  #include "clearstate_ci.h"
>  #include "radeon_kfd.h"
> 
> +#define SH_MEM_CONFIG_GFX_DEFAULT \
> + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
> +
>  MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
>  MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
> 
> @@ -5580,21 +5583,21 @@ static int cik_pcie_gart_enable(struct
> radeon_device *rdev)
>   tmp &= ~BYPASS_VM;
>   WREG32(CHUB_CONTROL, tmp);
>   }
> 
>   /* XXX SH_MEM regs */
>   /* where to put LDS, scratch, GPUVM in FSA64 space */
>   mutex_lock(>srbm_mutex);
>   for (i = 0; i < 16; i++) {
>   cik_srbm_select(rdev, 0, 0, 0, i);
>   /* CP and shaders */
> - WREG32(SH_MEM_CONFIG, 0);
> + WREG32(SH_MEM_CONFIG,
> SH_MEM_CONFIG_GFX_DEFAULT);
>   WREG32(SH_MEM_APE1_BASE, 1);
>   WREG32(SH_MEM_APE1_LIMIT, 0);
>   WREG32(SH_MEM_BASES, 0);
>   /* SDMA GFX */
>   WREG32(SDMA0_GFX_VIRTUAL_ADDR +
> SDMA0_REGISTER_OFFSET, 0);
>   WREG32(SDMA0_GFX_APE1_CNTL +
> SDMA0_REGISTER_OFFSET, 0);
>   WREG32(SDMA0_GFX_VIRTUAL_ADDR +
> SDMA1_REGISTER_OFFSET, 0);
>   WREG32(SDMA0_GFX_APE1_CNTL +
> SDMA1_REGISTER_OFFSET, 0);
>   /* XXX SDMA RLC - todo */
>   }
> @@ -5787,21 +5790,21 @@ void cik_vm_flush(struct radeon_device *rdev,
> struct radeon_ring *ring,
>   radeon_ring_write(ring, 0);
>   radeon_ring_write(ring, VMID(vm_id));
> 
>   radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
>   radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>WRITE_DATA_DST_SEL(0)));
>   radeon_ring_write(ring, SH_MEM_BASES >> 2);
>   radeon_ring_write(ring, 0);
> 
>   radeon_ring_write(ring, 0); /* SH_MEM_BASES */
> - radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
> + radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /*
> SH_MEM_CONFIG */
>   radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
>   radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
> 
>   radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>   radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>WRITE_DATA_DST_SEL(0)));
>   radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
>   radeon_ring_write(ring, 0);
>   radeon_ring_write(ring, VMID(0));
> 
> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c
> b/drivers/gpu/drm/radeon/radeon_drv.c
> index 30bd4a6..2e5d680 100644
> --- a/drivers/gpu/drm/radeon/radeon_drv.c
> +++ b/drivers/gpu/drm/radeon/radeon_drv.c
> @@ -91,23 +91,24 @@
>   *CS to GPU on >= r600
>   *   2.41.0 - evergreen/cayman: Add SET_BASE/DRAW_INDIRECT command
> parsing support
>   *   2.42.0 - Add VCE/VUI (Video Usability Information) support
>   *   2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
>   *   2.44.0 - SET_APPEND_CNT packet3 support
>   *   2.45.0 - Allow setting shader registers using DMA/COPY packet3 on SI
>   *   2.46.0 - Add PFP_SYNC_ME support on evergreen
>   *   2.47.0 - Add UVD_NO_OP register support
>   *   2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI
>   *   2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible
> values
> + *   2.50.0 - Allows unaligned shader loads on CIK. (needed by OpenGL)
>   */
>  #define KMS_DRIVER_MAJOR 2
> -#define KMS_DRIVER_MINOR 49
> +#define KMS_DRIVER_MINOR 50
>  #define KMS_DRIVER_PATCHLEVEL0
>  int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
>  int radeon_driver_unload_kms(struct drm_device *dev);
>  void radeon_driver_lastclose_kms(struct drm_device 

Re: [PATCH] drm/radeon: allow unaligned shader loads on CIK

2017-02-13 Thread Marek Olšák
On Mon, Feb 13, 2017 at 6:00 PM, Deucher, Alexander
 wrote:
>> -Original Message-
>> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
>> Of Marek Olšák
>> Sent: Monday, February 13, 2017 11:58 AM
>> To: amd-gfx@lists.freedesktop.org
>> Subject: [PATCH] drm/radeon: allow unaligned shader loads on CIK
>>
>> From: Marek Olšák 
>>
>> Signed-off-by: Marek Olšák 
>
> Add a better patch description.  With that fixed:
> Reviewed-by: Alex Deucher 

What's a better patch description? "drm/radeon: set
SH_MEM_CONFIG.ALIGNMENT_MODE = UNALIGNED on CIK"?

Marek

>
>
>> ---
>>  drivers/gpu/drm/radeon/cik.c| 7 +--
>>  drivers/gpu/drm/radeon/radeon_drv.c | 3 ++-
>>  2 files changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
>> index f6ff41a..ac0d939 100644
>> --- a/drivers/gpu/drm/radeon/cik.c
>> +++ b/drivers/gpu/drm/radeon/cik.c
>> @@ -28,20 +28,23 @@
>>  #include "radeon.h"
>>  #include "radeon_asic.h"
>>  #include "radeon_audio.h"
>>  #include "cikd.h"
>>  #include "atom.h"
>>  #include "cik_blit_shaders.h"
>>  #include "radeon_ucode.h"
>>  #include "clearstate_ci.h"
>>  #include "radeon_kfd.h"
>>
>> +#define SH_MEM_CONFIG_GFX_DEFAULT \
>> + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
>> +
>>  MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
>>  MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
>>
>> @@ -5580,21 +5583,21 @@ static int cik_pcie_gart_enable(struct
>> radeon_device *rdev)
>>   tmp &= ~BYPASS_VM;
>>   WREG32(CHUB_CONTROL, tmp);
>>   }
>>
>>   /* XXX SH_MEM regs */
>>   /* where to put LDS, scratch, GPUVM in FSA64 space */
>>   mutex_lock(>srbm_mutex);
>>   for (i = 0; i < 16; i++) {
>>   cik_srbm_select(rdev, 0, 0, 0, i);
>>   /* CP and shaders */
>> - WREG32(SH_MEM_CONFIG, 0);
>> + WREG32(SH_MEM_CONFIG,
>> SH_MEM_CONFIG_GFX_DEFAULT);
>>   WREG32(SH_MEM_APE1_BASE, 1);
>>   WREG32(SH_MEM_APE1_LIMIT, 0);
>>   WREG32(SH_MEM_BASES, 0);
>>   /* SDMA GFX */
>>   WREG32(SDMA0_GFX_VIRTUAL_ADDR +
>> SDMA0_REGISTER_OFFSET, 0);
>>   WREG32(SDMA0_GFX_APE1_CNTL +
>> SDMA0_REGISTER_OFFSET, 0);
>>   WREG32(SDMA0_GFX_VIRTUAL_ADDR +
>> SDMA1_REGISTER_OFFSET, 0);
>>   WREG32(SDMA0_GFX_APE1_CNTL +
>> SDMA1_REGISTER_OFFSET, 0);
>>   /* XXX SDMA RLC - todo */
>>   }
>> @@ -5787,21 +5790,21 @@ void cik_vm_flush(struct radeon_device *rdev,
>> struct radeon_ring *ring,
>>   radeon_ring_write(ring, 0);
>>   radeon_ring_write(ring, VMID(vm_id));
>>
>>   radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
>>   radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>>WRITE_DATA_DST_SEL(0)));
>>   radeon_ring_write(ring, SH_MEM_BASES >> 2);
>>   radeon_ring_write(ring, 0);
>>
>>   radeon_ring_write(ring, 0); /* SH_MEM_BASES */
>> - radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
>> + radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /*
>> SH_MEM_CONFIG */
>>   radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
>>   radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
>>
>>   radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>   radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>>WRITE_DATA_DST_SEL(0)));
>>   radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
>>   radeon_ring_write(ring, 0);
>>   radeon_ring_write(ring, VMID(0));
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c
>> b/drivers/gpu/drm/radeon/radeon_drv.c
>> index 30bd4a6..2e5d680 100644
>> --- a/drivers/gpu/drm/radeon/radeon_drv.c
>> +++ b/drivers/gpu/drm/radeon/radeon_drv.c
>> @@ -91,23 +91,24 @@
>>   *CS to GPU on >= r600
>>   *   2.41.0 - evergreen/cayman: Add SET_BASE/DRAW_INDIRECT command
>> parsing support
>>   *   2.42.0 - Add VCE/VUI (Video Usability Information) support
>>   *   2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
>>   *   2.44.0 - SET_APPEND_CNT packet3 support
>>   *   2.45.0 - Allow setting shader registers using DMA/COPY packet3 on SI
>>   *   2.46.0 - Add PFP_SYNC_ME support on evergreen
>>   *   2.47.0 - Add UVD_NO_OP register support
>>   *   2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI
>>   *   2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible
>> values
>> + *   2.50.0 - Allows unaligned shader loads on CIK. (needed by OpenGL)
>>   */
>>  #define 

[PATCH 02/10] drm/amd/display: use disp clock value in context rather than bw_results

2017-02-13 Thread Harry Wentland
From: Dmytro Laktyushkin 

Change-Id: I05aeb2db7f2d43ec586436b406bb3b78886ff41b
Signed-off-by: Dmytro Laktyushkin 
Reviewed-by: Jordan Lazare 
Acked-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c   | 14 ++
 drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c|  2 +-
 .../gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c| 10 +-
 drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c|  1 +
 drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c|  1 +
 drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c  |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 9d2f78f21748..4c405f4b880e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -925,7 +925,7 @@ void pplib_apply_display_requirements(
/* TODO: dce11.2*/
pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0;
 
-   pp_display_cfg->disp_clk_khz = context->bw_results.dispclk_khz;
+   pp_display_cfg->disp_clk_khz = context->dispclk_khz;
 
fill_display_configs(context, pp_display_cfg);
 
@@ -1065,8 +1065,7 @@ bool dc_pre_update_surfaces_to_stream(
 {
int i, j;
struct core_dc *core_dc = DC_TO_CORE(dc);
-   int prev_disp_clk = core_dc->current_context->bw_results.dispclk_khz;
-   int new_disp_clk;
+   int prev_disp_clk = core_dc->current_context->dispclk_khz;
struct dc_stream_status *stream_status = NULL;
struct validate_context *context;
struct validate_context *temp_context;
@@ -1152,17 +1151,16 @@ bool dc_pre_update_surfaces_to_stream(
ret = false;
goto unexpected_fail;
}
-   new_disp_clk = context->bw_results.dispclk_khz;
 
if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)
-   && prev_disp_clk < new_disp_clk) {
+   && prev_disp_clk < context->dispclk_khz) {
pplib_apply_display_requirements(core_dc, context,
>pp_display_cfg);
context->res_ctx.pool->display_clock->funcs->set_clock(
context->res_ctx.pool->display_clock,
-   new_disp_clk * 115 / 100);
-   core_dc->current_context->bw_results.dispclk_khz = new_disp_clk;
-   core_dc->current_context->dispclk_khz = new_disp_clk;
+   context->dispclk_khz * 115 / 100);
+   core_dc->current_context->bw_results.dispclk_khz = 
context->dispclk_khz;
+   core_dc->current_context->dispclk_khz = context->dispclk_khz;
}
 
for (i = 0; i < new_surface_count; i++)
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 800b22e70c7e..55501b381692 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -770,7 +770,7 @@ enum dc_status dce100_validate_bandwidth(
struct validate_context *context)
 {
/* TODO implement when needed but for now hardcode max value*/
-   context->bw_results.dispclk_khz = 681000;
+   context->dispclk_khz = 681000;
 
return DC_OK;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index a1a5dc6fcb67..5e99f6c27c9a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1651,7 +1651,7 @@ static void apply_min_clocks(
/* get the required state based on state dependent clocks:
 * display clock and pixel clock
 */
-   req_clocks.display_clk_khz = context->bw_results.dispclk_khz;
+   req_clocks.display_clk_khz = context->dispclk_khz;
 
req_clocks.pixel_clk_khz = get_max_pixel_clock_for_all_paths(
dc, context, true);
@@ -1776,11 +1776,11 @@ enum dc_status dce110_apply_ctx_to_hw(
/*TODO: when pplib works*/
apply_min_clocks(dc, context, _state, true);
 
-   if (context->bw_results.dispclk_khz
-   > dc->current_context->bw_results.dispclk_khz)
+   if (context->dispclk_khz
+   > dc->current_context->dispclk_khz)
context->res_ctx.pool->display_clock->funcs->set_clock(
context->res_ctx.pool->display_clock,
-   context->bw_results.dispclk_khz * 115 / 100);
+   context->dispclk_khz * 115 / 100);
 
for (i = 0; i < context->res_ctx.pool->pipe_count; 

[PATCH 04/10] drm/amd/display: Audio is not switching to DP when HDMI/DP hot plug/unplug

2017-02-13 Thread Harry Wentland
From: Hersen Wu 

Change-Id: I0bc1946decf41316a0fb27df3269418e94621625
Signed-off-by: Hersen Wu 
Reviewed-by: Tony Cheng 
Acked-by: Harry Wentland 
---
 .../amd/display/dc/dce110/dce110_hw_sequencer.c| 100 +++--
 1 file changed, 75 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 5e99f6c27c9a..6a5cec0d4e1f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1732,7 +1732,6 @@ enum dc_status dce110_apply_ctx_to_hw(
struct dc_bios *dcb = dc->ctx->dc_bios;
enum dc_status status;
int i;
-   bool programmed_audio_dto = false;
enum dm_pp_clocks_state clocks_state = DM_PP_CLOCKS_STATE_INVALID;
 
/* Reset old context */
@@ -1782,6 +1781,80 @@ enum dc_status dce110_apply_ctx_to_hw(
context->res_ctx.pool->display_clock,
context->dispclk_khz * 115 / 100);
 
+   /* program audio wall clock. use HDMI as clock source if HDMI
+* audio active. Otherwise, use DP as clock source
+* first, loop to find any HDMI audio, if not, loop find DP audio
+*/
+   /* Setup audio rate clock source */
+   /* Issue:
+   * Audio lag happened on DP monitor when unplug a HDMI monitor
+   *
+   * Cause:
+   * In case of DP and HDMI connected or HDMI only, DCCG_AUDIO_DTO_SEL
+   * is set to either dto0 or dto1, audio should work fine.
+   * In case of DP connected only, DCCG_AUDIO_DTO_SEL should be dto1,
+   * set to dto0 will cause audio lag.
+   *
+   * Solution:
+   * Not optimized audio wall dto setup. When mode set, iterate pipe_ctx,
+   * find first available pipe with audio, setup audio wall DTO per 
topology
+   * instead of per pipe.
+   */
+   for (i = 0; i < context->res_ctx.pool->pipe_count; i++) {
+   struct pipe_ctx *pipe_ctx = >res_ctx.pipe_ctx[i];
+
+   if (pipe_ctx->stream == NULL)
+   continue;
+
+   if (pipe_ctx->top_pipe)
+   continue;
+
+   if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A)
+   continue;
+
+   if (pipe_ctx->audio != NULL) {
+   struct audio_output audio_output;
+
+   build_audio_output(pipe_ctx, _output);
+
+   pipe_ctx->audio->funcs->wall_dto_setup(
+   pipe_ctx->audio,
+   pipe_ctx->stream->signal,
+   _output.crtc_info,
+   _output.pll_info);
+   break;
+   }
+   }
+
+   /* no HDMI audio is found, try DP audio */
+   if (i == context->res_ctx.pool->pipe_count) {
+   for (i = 0; i < context->res_ctx.pool->pipe_count; i++) {
+   struct pipe_ctx *pipe_ctx = 
>res_ctx.pipe_ctx[i];
+
+   if (pipe_ctx->stream == NULL)
+   continue;
+
+   if (pipe_ctx->top_pipe)
+   continue;
+
+   if (!dc_is_dp_signal(pipe_ctx->stream->signal))
+   continue;
+
+   if (pipe_ctx->audio != NULL) {
+   struct audio_output audio_output;
+
+   build_audio_output(pipe_ctx, _output);
+
+   pipe_ctx->audio->funcs->wall_dto_setup(
+   pipe_ctx->audio,
+   pipe_ctx->stream->signal,
+   _output.crtc_info,
+   _output.pll_info);
+   break;
+   }
+   }
+   }
+
for (i = 0; i < context->res_ctx.pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx_old =

>current_context->res_ctx.pipe_ctx[i];
@@ -1797,21 +1870,7 @@ enum dc_status dce110_apply_ctx_to_hw(
continue;
 
if (context->res_ctx.pipe_ctx[i].audio != NULL) {
-   /* Setup audio rate clock source */
-   /* Issue:
-   * Audio lag happened on DP monitor when unplug a HDMI 
monitor
-   *
-   * Cause:
-   * In case of DP and HDMI connected or HDMI only, 
DCCG_AUDIO_DTO_SEL
-   * is set to either dto0 or dto1, audio should work fine.
-   * In case of DP connected only, DCCG_AUDIO_DTO_SEL 
should be dto1,
-   

[PATCH 01/10] Revert "drm/amd/display: Changes for enable dcc mode."

2017-02-13 Thread Harry Wentland
I goofed this one up when pulling from the internal DC
repo.

This reverts commit a73e57356fb89b6aaec3434256efb5210200d77d.

Change-Id: Ib0a30b69677b8365b5b1809e476962a488d5505d
---
 drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h  |  7 +-
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.h  | 26 +++---
 2 files changed, 9 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h 
b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
index b026157a2eea..33c1754f04f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
@@ -30,6 +30,7 @@
 #include "dmcu.h"
 
 #define DMCU_COMMON_REG_LIST_DCE_BASE() \
+   SR(DMCU_CTRL), \
SR(DMCU_RAM_ACCESS_CTRL), \
SR(DMCU_IRAM_WR_CTRL), \
SR(DMCU_IRAM_WR_DATA)
@@ -42,6 +43,8 @@
.field_name = reg_name ## __ ## field_name ## post_fix
 
 #define DMCU_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh) \
+   DMCU_SF(DMCU_CTRL, \
+   DMCU_ENABLE, mask_sh), \
DMCU_SF(DMCU_RAM_ACCESS_CTRL, \
IRAM_HOST_ACCESS_EN, mask_sh), \
DMCU_SF(DMCU_RAM_ACCESS_CTRL, \
@@ -55,7 +58,8 @@
 #define DMCU_REG_FIELD_LIST(type) \
type DMCU_IRAM_MEM_PWR_STATE; \
type IRAM_HOST_ACCESS_EN; \
-   type IRAM_WR_ADDR_AUTO_INC
+   type IRAM_WR_ADDR_AUTO_INC; \
+   type DMCU_ENABLE
 
 struct dce_dmcu_shift {
DMCU_REG_FIELD_LIST(uint8_t);
@@ -66,6 +70,7 @@ struct dce_dmcu_mask {
 };
 
 struct dce_dmcu_registers {
+   uint32_t DMCU_CTRL;
uint32_t DMCU_RAM_ACCESS_CTRL;
uint32_t DCI_MEM_PWR_STATUS;
uint32_t DMU_MEM_PWR_CNTL;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h 
b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
index 053f72b91b3c..8a07665e693b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
@@ -39,18 +39,13 @@
SRI(DC_HPD_CONTROL, HPD, id)
 
 #define LE_COMMON_REG_LIST_BASE(id) \
-   SR(BL_PWM_CNTL), \
-   SR(BL_PWM_GRP1_REG_LOCK), \
-   SR(BL_PWM_PERIOD_CNTL), \
-   SR(LVTMA_PWRSEQ_CNTL), \
-   SR(LVTMA_PWRSEQ_STATE), \
-   SR(BL_PWM_CNTL2), \
-   SR(LVTMA_PWRSEQ_REF_DIV), \
SR(MASTER_COMM_DATA_REG1), \
SR(MASTER_COMM_DATA_REG2), \
SR(MASTER_COMM_DATA_REG3), \
SR(MASTER_COMM_CMD_REG), \
SR(MASTER_COMM_CNTL_REG), \
+   SR(LVTMA_PWRSEQ_CNTL), \
+   SR(LVTMA_PWRSEQ_STATE), \
SR(DMCU_RAM_ACCESS_CTRL), \
SR(DMCU_IRAM_RD_CTRL), \
SR(DMCU_IRAM_RD_DATA), \
@@ -81,22 +76,16 @@
LE_COMMON_REG_LIST_BASE(id), \
SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
-   SR(BIOS_SCRATCH_2), \
-   SR(BL1_PWM_USER_LEVEL), \
SR(DCI_MEM_PWR_STATUS)
 
#define LE_DCE110_REG_LIST(id)\
LE_COMMON_REG_LIST_BASE(id), \
SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
-   SR(BIOS_SCRATCH_2), \
-   SR(BL1_PWM_USER_LEVEL), \
SR(DCI_MEM_PWR_STATUS)
 
#define LE_DCE80_REG_LIST(id)\
-   SR(BIOS_SCRATCH_2), \
SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
-   SR(BL1_PWM_USER_LEVEL), \
LE_COMMON_REG_LIST_BASE(id)
 
 
@@ -110,24 +99,16 @@ struct dce110_link_enc_hpd_registers {
 };
 
 struct dce110_link_enc_registers {
-   /* BL registers */
-   uint32_t BL_PWM_CNTL;
-   uint32_t BL_PWM_GRP1_REG_LOCK;
-   uint32_t BL_PWM_PERIOD_CNTL;
+   /* Backlight registers */
uint32_t LVTMA_PWRSEQ_CNTL;
uint32_t LVTMA_PWRSEQ_STATE;
-   uint32_t BL_PWM_CNTL2;
-   uint32_t LVTMA_PWRSEQ_REF_DIV;
 
/* DMCU registers */
-   uint32_t BL1_PWM_USER_LEVEL;
-   uint32_t ABM0_BL1_PWM_USER_LEVEL;
uint32_t MASTER_COMM_DATA_REG1;
uint32_t MASTER_COMM_DATA_REG2;
uint32_t MASTER_COMM_DATA_REG3;
uint32_t MASTER_COMM_CMD_REG;
uint32_t MASTER_COMM_CNTL_REG;
-   uint32_t BIOS_SCRATCH_2;
uint32_t DMCU_RAM_ACCESS_CTRL;
uint32_t DCI_MEM_PWR_STATUS;
uint32_t DMU_MEM_PWR_CNTL;
@@ -136,7 +117,6 @@ struct dce110_link_enc_registers {
uint32_t DMCU_INTERRUPT_TO_UC_EN_MASK;
uint32_t SMU_INTERRUPT_CONTROL;
 
-
/* Common DP registers */
uint32_t DIG_BE_CNTL;
uint32_t DIG_BE_EN_CNTL;
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 10/10] drm/amd/display: color distortion after DPMS+ background color fix

2017-02-13 Thread Harry Wentland
From: Charlene Liu 

Change-Id: I1573c7aa95f857d126aadd2f61f152779795aff4
Signed-off-by: Charlene Liu 
Reviewed-by: Dmytro Laktyushkin 
Acked-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 88a2fa99f5da..0afb3c64b651 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1054,6 +1054,8 @@ static enum dc_status apply_single_controller_ctx_to_hw(
if (!pipe_ctx_old->stream) {
core_link_enable_stream(pipe_ctx);
 
+   resource_build_info_frame(pipe_ctx);
+   dce110_update_info_frame(pipe_ctx);
if (dc_is_dp_signal(pipe_ctx->stream->signal))
dce110_unblank_stream(pipe_ctx,
>sink->link->public.cur_link_settings);
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 09/10] drm/amd/display: report cursor size base on the ASIC

2017-02-13 Thread Harry Wentland
From: Tony Cheng 

Change-Id: I257828f5e768e746b9ee0596e4b1dbd26fcbbf01
Signed-off-by: Tony Cheng 
Reviewed-by: Yongqiang Sun 
Acked-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 ++
 drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c | 1 +
 drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 1 +
 drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c | 1 +
 drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c   | 1 +
 5 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index d7f848495d8a..13e7134fccb8 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,6 +47,8 @@ struct dc_caps {
uint32_t max_slave_planes;
uint32_t max_downscale_ratio;
uint32_t i2c_speed_in_khz;
+
+   unsigned int max_cursor_size;
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 55501b381692..9ed7c06132fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -984,6 +984,7 @@ static bool construct(
pool->base.pipe_count = res_cap.num_timing_generator;
dc->public.caps.max_downscale_ratio = 200;
dc->public.caps.i2c_speed_in_khz = 40;
+   dc->public.caps.max_cursor_size = 128;
 
for (i = 0; i < pool->base.pipe_count; i++) {
pool->base.timing_generators[i] =
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index ee4fda514de6..f6e546addbd3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -1274,6 +1274,7 @@ static bool construct(
 
dc->public.caps.max_downscale_ratio = 150;
dc->public.caps.i2c_speed_in_khz = 100;
+   dc->public.caps.max_cursor_size = 128;
 
/*
 *  Create resources *
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index 54af84ebf2bc..526c106f6ebf 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -1256,6 +1256,7 @@ static bool construct(
pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
dc->public.caps.max_downscale_ratio = 200;
dc->public.caps.i2c_speed_in_khz = 100;
+   dc->public.caps.max_cursor_size = 128;
 
/*
 *  Create resources *
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 3de8bfb47dce..272c7fc31406 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -935,6 +935,7 @@ static bool construct(
pool->base.pipe_count = res_cap.num_timing_generator;
dc->public.caps.max_downscale_ratio = 200;
dc->public.caps.i2c_speed_in_khz = 40;
+   dc->public.caps.max_cursor_size = 128;
 
/*
 *  Create resources *
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 00/10] DC Patches Feb 13, 2017

2017-02-13 Thread Harry Wentland
A bunch of fixes today and one cleanup in regamma code.

Charlene Liu (1):
  drm/amd/display: color distortion after DPMS+ background color fix

Dmytro Laktyushkin (2):
  drm/amd/display: use disp clock value in context rather than
bw_results
  drm/amd/display: fix psr status wait

Harry Wentland (1):
  Revert "drm/amd/display: Changes for enable dcc mode."

Hersen Wu (1):
  drm/amd/display: Audio is not switching to DP when HDMI/DP hot
plug/unplug

Reza Amini (1):
  drm/amd/display: handle unsupported sink types

Sylvia Tsai (1):
  drm/amd/display: Set ignore_msa_timing flag for freesync modes

Tony Cheng (1):
  drm/amd/display: report cursor size base on the ASIC

Vitaly Prosyak (1):
  drm/amd/display: Enable regamma 25 segments and use double buffer.

Zeyu Fan (1):
  drm/amd/display: Fix program pix clk logic to unblock deep color set.

 drivers/gpu/drm/amd/display/dc/calcs/Makefile  |   2 +-
 .../gpu/drm/amd/display/dc/calcs/custom_float.c| 197 ++
 drivers/gpu/drm/amd/display/dc/core/dc.c   |  14 +-
 drivers/gpu/drm/amd/display/dc/core/dc_sink.c  |   4 +
 drivers/gpu/drm/amd/display/dc/dc.h|   2 +
 .../gpu/drm/amd/display/dc/dce/dce_clock_source.c  |  10 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h  |   7 +-
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.c  |  18 +-
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.h  |  46 +---
 .../drm/amd/display/dc/dce100/dce100_resource.c|   3 +-
 .../amd/display/dc/dce110/dce110_hw_sequencer.c| 283 ++---
 .../amd/display/dc/dce110/dce110_hw_sequencer.h|   3 -
 .../drm/amd/display/dc/dce110/dce110_resource.c|   2 +
 .../drm/amd/display/dc/dce112/dce112_resource.c|   2 +
 .../gpu/drm/amd/display/dc/dce80/dce80_resource.c  |   3 +-
 drivers/gpu/drm/amd/display/dc/dm_services.h   |   2 +-
 drivers/gpu/drm/amd/display/dc/inc/custom_float.h  |  40 +++
 .../drm/amd/display/modules/freesync/freesync.c|   6 +-
 18 files changed, 387 insertions(+), 257 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/inc/custom_float.h

-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 07/10] drm/amd/display: Set ignore_msa_timing flag for freesync modes

2017-02-13 Thread Harry Wentland
From: Sylvia Tsai 

- Set ignore_msa_timing_param to 1 only for modes that can support freesync

Change-Id: I94122df078976933ba48326f3b32567bc1a9d628
Signed-off-by: Sylvia Tsai 
Reviewed-by: Tony Cheng 
Acked-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c 
b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 1ee732768f6e..b00b1df71f3e 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -262,7 +262,11 @@ bool mod_freesync_add_stream(struct mod_freesync 
*mod_freesync,
enable_for_video = false;
}
 
-   if (caps->supported)
+   unsigned int nom_refresh_rate_micro_hz = (unsigned int)
+   (((unsigned long long) 
core_stream->public.timing.pix_clk_khz) * 1000ULL * 1000ULL * 1000ULL
+   / core_stream->public.timing.h_total / 
core_stream->public.timing.v_total);
+
+   if (caps->supported && nom_refresh_rate_micro_hz >= 
caps->min_refresh_in_micro_hz && nom_refresh_rate_micro_hz <= 
caps->max_refresh_in_micro_hz)
core_stream->public.ignore_msa_timing_param = 1;
 
core_freesync->num_entities++;
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 03/10] drm/amd/display: fix psr status wait

2017-02-13 Thread Harry Wentland
From: Dmytro Laktyushkin 

Change-Id: I66ad31b2a32b4aec0497e9bdbf69442c1b9f447a
Signed-off-by: Dmytro Laktyushkin 
Reviewed-by: Eagle Yeh 
Acked-by: Harry Wentland 
---
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.c  | 18 +++---
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.h  | 22 +++---
 drivers/gpu/drm/amd/display/dc/dm_services.h   |  2 +-
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 3847764688dd..60fdf58fc5cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -38,6 +38,14 @@
 #include "dce/dce_11_0_sh_mask.h"
 #include "dce/dce_11_0_enum.h"
 
+#ifndef DMU_MEM_PWR_CNTL__DMCU_IRAM_MEM_PWR_STATE__SHIFT
+#define DMU_MEM_PWR_CNTL__DMCU_IRAM_MEM_PWR_STATE__SHIFT 0xa
+#endif
+
+#ifndef DMU_MEM_PWR_CNTL__DMCU_IRAM_MEM_PWR_STATE_MASK
+#define DMU_MEM_PWR_CNTL__DMCU_IRAM_MEM_PWR_STATE_MASK 0x0400L
+#endif
+
 #ifndef HPD0_DC_HPD_CONTROL__DC_HPD_EN_MASK
 #define HPD0_DC_HPD_CONTROL__DC_HPD_EN_MASK  0x1000L
 #endif
@@ -1557,15 +1565,19 @@ static void get_dmcu_psr_state(struct link_encoder 
*enc, uint32_t *psr_state)
 
uint32_t count = 0;
uint32_t psrStateOffset = 0xf0;
-   uint32_t value;
+   uint32_t value = -1;
 
/* Enable write access to IRAM */
REG_UPDATE(DMCU_RAM_ACCESS_CTRL, IRAM_HOST_ACCESS_EN, 1);
 
-   do {
+   while (REG(DCI_MEM_PWR_STATUS) && value != 0 && count++ < 10) {
dm_delay_in_microseconds(ctx, 2);
REG_GET(DCI_MEM_PWR_STATUS, DMCU_IRAM_MEM_PWR_STATE, );
-   } while (value != 0 && count++ < 10);
+   }
+   while (REG(DMU_MEM_PWR_CNTL) && value != 0 && count++ < 10) {
+   dm_delay_in_microseconds(ctx, 2);
+   REG_GET(DMU_MEM_PWR_CNTL, DMCU_IRAM_MEM_PWR_STATE, );
+   }
 
/* Write address to IRAM_RD_ADDR in DMCU_IRAM_RD_CTRL */
REG_WRITE(DMCU_IRAM_RD_CTRL, psrStateOffset);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h 
b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
index 8a07665e693b..d382a6882d95 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
@@ -72,17 +72,17 @@
SRI(DP_DPHY_FAST_TRAINING, DP, id), \
SRI(DP_SEC_CNTL1, DP, id)
 
-   #define LE_COMMON_REG_LIST(id)\
-   LE_COMMON_REG_LIST_BASE(id), \
-   SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
-   SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
-   SR(DCI_MEM_PWR_STATUS)
-
-   #define LE_DCE110_REG_LIST(id)\
-   LE_COMMON_REG_LIST_BASE(id), \
-   SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
-   SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
-   SR(DCI_MEM_PWR_STATUS)
+#define LE_COMMON_REG_LIST(id)\
+   LE_COMMON_REG_LIST_BASE(id), \
+   SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
+   SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
+   SR(DCI_MEM_PWR_STATUS)
+
+#define LE_DCE110_REG_LIST(id)\
+   LE_COMMON_REG_LIST_BASE(id), \
+   SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
+   SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
+   SR(DCI_MEM_PWR_STATUS)
 
#define LE_DCE80_REG_LIST(id)\
SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h 
b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 052a43af1bd3..73c0f1f83999 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -103,7 +103,7 @@ static inline uint32_t dm_read_reg_func(
uint32_t value;
 
if (address == 0) {
-   DC_ERR("invalid register read. address = 0");
+   DC_ERR("invalid register read; address = 0\n");
return 0;
}
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 08/10] drm/amd/display: Fix program pix clk logic to unblock deep color set.

2017-02-13 Thread Harry Wentland
From: Zeyu Fan 

Change-Id: I27a24b526d7bdb8241ffe5ad1dfac58e56d71f22
Signed-off-by: Zeyu Fan 
Reviewed-by: Tony Cheng 
Acked-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index a9f39218ce82..87eba4be3249 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -854,16 +854,16 @@ static bool dce110_program_pix_clk(
if (clock_source->id != CLOCK_SOURCE_ID_EXTERNAL
&& pix_clk_params->flags.ENABLE_SS && !dc_is_dp_signal(

pix_clk_params->signal_type)) {
-
if (!enable_spread_spectrum(clk_src,

pix_clk_params->signal_type,
pll_settings))
return false;
-   /* Resync deep color DTO */
-   dce110_program_pixel_clk_resync(clk_src,
-   pix_clk_params->signal_type,
-   pix_clk_params->color_depth);
}
+   /* Resync deep color DTO */
+   dce110_program_pixel_clk_resync(clk_src,
+   pix_clk_params->signal_type,
+   pix_clk_params->color_depth);
+
break;
case DCE_VERSION_11_2:
if (clock_source->id != CLOCK_SOURCE_ID_DP_DTO) {
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 06/10] drm/amd/display: Enable regamma 25 segments and use double buffer.

2017-02-13 Thread Harry Wentland
From: Vitaly Prosyak 

Moved custom floating point calculation to the shared place
between dce's.

Change-Id: I21b6ddaec514924c520219f04c70934e5e1b6715
Signed-off-by: Vitaly Prosyak 
Reviewed-by: Tony Cheng 
Acked-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/dc/calcs/Makefile  |   2 +-
 .../gpu/drm/amd/display/dc/calcs/custom_float.c| 197 +
 .../amd/display/dc/dce110/dce110_hw_sequencer.c| 171 +-
 .../amd/display/dc/dce110/dce110_hw_sequencer.h|   3 -
 drivers/gpu/drm/amd/display/dc/inc/custom_float.h  |  40 +
 5 files changed, 240 insertions(+), 173 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/inc/custom_float.h

diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile 
b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index 4bb08aea6a03..2f4c8e771b8f 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -3,7 +3,7 @@
 # It calculates Bandwidth and Watermarks values for HW programming
 #
 
-BW_CALCS = bandwidth_calcs.o bw_fixed.o
+BW_CALCS = bandwidth_calcs.o bw_fixed.o custom_float.o
 
 AMD_DAL_BW_CALCS = $(addprefix $(AMDDALPATH)/dc/calcs/,$(BW_CALCS))
 
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c 
b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
new file mode 100644
index ..7243c37f569e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dm_services.h"
+#include "custom_float.h"
+
+
+static bool build_custom_float(
+   struct fixed31_32 value,
+   const struct custom_float_format *format,
+   bool *negative,
+   uint32_t *mantissa,
+   uint32_t *exponenta)
+{
+   uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
+
+   const struct fixed31_32 mantissa_constant_plus_max_fraction =
+   dal_fixed31_32_from_fraction(
+   (1LL << (format->mantissa_bits + 1)) - 1,
+   1LL << format->mantissa_bits);
+
+   struct fixed31_32 mantiss;
+
+   if (dal_fixed31_32_eq(
+   value,
+   dal_fixed31_32_zero)) {
+   *negative = false;
+   *mantissa = 0;
+   *exponenta = 0;
+   return true;
+   }
+
+   if (dal_fixed31_32_lt(
+   value,
+   dal_fixed31_32_zero)) {
+   *negative = format->sign;
+   value = dal_fixed31_32_neg(value);
+   } else {
+   *negative = false;
+   }
+
+   if (dal_fixed31_32_lt(
+   value,
+   dal_fixed31_32_one)) {
+   uint32_t i = 1;
+
+   do {
+   value = dal_fixed31_32_shl(value, 1);
+   ++i;
+   } while (dal_fixed31_32_lt(
+   value,
+   dal_fixed31_32_one));
+
+   --i;
+
+   if (exp_offset <= i) {
+   *mantissa = 0;
+   *exponenta = 0;
+   return true;
+   }
+
+   *exponenta = exp_offset - i;
+   } else if (dal_fixed31_32_le(
+   mantissa_constant_plus_max_fraction,
+   value)) {
+   uint32_t i = 1;
+
+   do {
+   value = dal_fixed31_32_shr(value, 1);
+   ++i;
+   } while (dal_fixed31_32_lt(
+   mantissa_constant_plus_max_fraction,
+   value));
+
+   *exponenta = exp_offset + i - 1;
+   } else {
+  

[PATCH] Fix SMC read/write

2017-02-13 Thread Tom St Denis
The registers in umr are stored as byte addresses
(mm registers are word addresses).

Signed-off-by: Tom St Denis 
---
 src/app/scan.c| 2 +-
 src/app/set_bit.c | 2 +-
 src/app/set_reg.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/app/scan.c b/src/app/scan.c
index afcadc4c507e..19c97fe1499d 100644
--- a/src/app/scan.c
+++ b/src/app/scan.c
@@ -55,7 +55,7 @@ int umr_scan_asic(struct umr_asic *asic, char *asicname, char 
*ipname, char *reg
case REG_PCIE: fd = 
asic->fd.pcie; scale = 1; break;
case REG_SMC:
if 
(options.read_smc) {
-   fd = 
asic->fd.smc; scale = 4;
+   fd = 
asic->fd.smc; scale = 1;
} else {

continue;
}
diff --git a/src/app/set_bit.c b/src/app/set_bit.c
index 899bf1a17459..d9ee7d8f3a55 100644
--- a/src/app/set_bit.c
+++ b/src/app/set_bit.c
@@ -61,7 +61,7 @@ int umr_set_register_bit(struct umr_asic *asic, char 
*regpath, char *regvalue)
case REG_MMIO: 
fd = asic->fd.mmio; scale = 4; break;
case REG_DIDT: 
fd = asic->fd.didt; scale = 1; break;
case REG_PCIE: 
fd = asic->fd.pcie; scale = 1; break;
-   case REG_SMC:  
fd = asic->fd.smc; scale = 4; break;
+   case REG_SMC:  
fd = asic->fd.smc;  scale = 1; break;
default: return 
-1;
}
if 
(asic->blocks[i]->grant) {
diff --git a/src/app/set_reg.c b/src/app/set_reg.c
index 8c5060f2dbe8..9861170d55c3 100644
--- a/src/app/set_reg.c
+++ b/src/app/set_reg.c
@@ -57,7 +57,7 @@ int umr_set_register(struct umr_asic *asic, char *regpath, 
char *regvalue)
case REG_MMIO: fd = 
asic->fd.mmio; scale = 4; break;
case REG_DIDT: fd = 
asic->fd.didt; scale = 1; break;
case REG_PCIE: fd = 
asic->fd.pcie; scale = 1; break;
-   case REG_SMC:  fd = 
asic->fd.smc; scale = 4; break;
+   case REG_SMC:  fd = 
asic->fd.smc; scale = 1; break;
default: return -1;
}

-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Nicolai Hähnle

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve AMDGPU_GEM_CREATE_VRAM_CLEARED handling 
(v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
>placement, page_align, !kernel, NULL,
acc_size, sg, resv ? resv : >tbo.ttm_resv,
_ttm_bo_destroy);
-   if (unlikely(r != 0))
+   if (unlikely(r != 0)) {
+   if (!resv)
+   ww_mutex_unlock(>tbo.resv->lock);
return r;
+   }


I was looking at this myself a couple of weeks back, and I'm pretty sure 
I had this exact same patch just to realize that it's actually incorrect.


The problem is that ttm_bo_init will actually call the destroy function 
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been freed.


This code is a huge mess. I'm surprised though: have you verified that 
this patch actually fixes a hang?


Cheers,
Nicolai




bo->tbo.priority = ilog2(bo->tbo.num_pages);
if (kernel)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Add new gmc/smu registers

2017-02-13 Thread Nicolai Hähnle

Hi Tom,

it's probably a good idea to use subject prefixes for umr patches.

git config format.subjectPrefix "PATCH umr"

or edit .git/config accordingly, e.g. for libdrm I have this in .git/config:

[format]
subjectPrefix = PATCH libdrm

Then format-patch and friends will automatically use [PATCH umr] as a 
prefix by default. Makes it a bit easier to follow and grep mailing 
lists. (And if you agree, you should then continue to tell other people 
to do the same ;))


Thanks,
Nicolai

On 13.02.2017 16:27, Tom St Denis wrote:

Signed-off-by: Tom St Denis 
---
 src/lib/ip/gmc60_bits.i  | 2 ++
 src/lib/ip/smu701_bits.i | 4 
 src/lib/ip/smu701_regs.i | 1 +
 src/lib/ip/smu711_bits.i | 4 
 src/lib/ip/smu711_regs.i | 1 +
 src/lib/ip/smu712_bits.i | 4 
 src/lib/ip/smu712_regs.i | 1 +
 src/lib/ip/smu713_bits.i | 4 
 src/lib/ip/smu713_regs.i | 1 +
 9 files changed, 22 insertions(+)

diff --git a/src/lib/ip/gmc60_bits.i b/src/lib/ip/gmc60_bits.i
index 746dd64fb392..4e4c052f6e79 100644
--- a/src/lib/ip/gmc60_bits.i
+++ b/src/lib/ip/gmc60_bits.i
@@ -3230,6 +3230,8 @@ static struct umr_bitfield mmVM_PRT_APERTURE3_HIGH_ADDR[] 
= {
 static struct umr_bitfield mmVM_PRT_CNTL[] = {
 { "L1_TLB_STORE_INVALID_ENTRIES", 3, 3, _bitfield_default },
 { "L2_CACHE_STORE_INVALID_ENTRIES", 2, 2, _bitfield_default },
+{ "CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS", 0, 0, _bitfield_default },
+{ "TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS", 1, 1, _bitfield_default },
 };
 static struct umr_bitfield mmVM_CONTEXTS_DISABLE[] = {
 { "DISABLE_CONTEXT_0", 0, 0, _bitfield_default },
diff --git a/src/lib/ip/smu701_bits.i b/src/lib/ip/smu701_bits.i
index 972d8b74b5db..2f50eb9f0b67 100644
--- a/src/lib/ip/smu701_bits.i
+++ b/src/lib/ip/smu701_bits.i
@@ -4391,6 +4391,10 @@ static struct umr_bitfield ixCG_FREQ_TRAN_VOTING_7[] = {
 static struct umr_bitfield ixCG_DISPLAY_GAP_CNTL2[] = {
 { "VBI_PREDICTION", 0, 31, _bitfield_default },
 };
+static struct umr_bitfield ixCURRENT_PG_STATUS[] = {
+{ "VCE_PG_STATUS", 1, 1, _bitfield_default },
+{ "UVD_PG_STATUS", 2, 2, _bitfield_default },
+};
 static struct umr_bitfield ixSCLK_MIN_DIV[] = {
 { "FRACV", 0, 11, _bitfield_default },
 { "INTV", 12, 18, _bitfield_default },
diff --git a/src/lib/ip/smu701_regs.i b/src/lib/ip/smu701_regs.i
index 63f85e1173cc..3ff965359c89 100644
--- a/src/lib/ip/smu701_regs.i
+++ b/src/lib/ip/smu701_regs.i
@@ -1091,6 +1091,7 @@
{ "ixCG_FREQ_TRAN_VOTING_6", REG_SMC, 0xc02001c0, 
_FREQ_TRAN_VOTING_6[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_6)/sizeof(ixCG_FREQ_TRAN_VOTING_6[0]), 0, 0 },
{ "ixCG_FREQ_TRAN_VOTING_7", REG_SMC, 0xc02001c4, 
_FREQ_TRAN_VOTING_7[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_7)/sizeof(ixCG_FREQ_TRAN_VOTING_7[0]), 0, 0 },
{ "ixCG_DISPLAY_GAP_CNTL2", REG_SMC, 0xc0200230, 
_DISPLAY_GAP_CNTL2[0], 
sizeof(ixCG_DISPLAY_GAP_CNTL2)/sizeof(ixCG_DISPLAY_GAP_CNTL2[0]), 0, 0 },
+   { "ixCURRENT_PG_STATUS", REG_SMC, 0xc020029c, _PG_STATUS[0], 
sizeof(ixCURRENT_PG_STATUS)/sizeof(ixCURRENT_PG_STATUS[0]), 0, 0 },
{ "ixSCLK_MIN_DIV", REG_SMC, 0xc0200308, _MIN_DIV[0], 
sizeof(ixSCLK_MIN_DIV)/sizeof(ixSCLK_MIN_DIV[0]), 0, 0 },
{ "ixLCLK_DEEP_SLEEP_CNTL2", REG_SMC, 0xc0200310, 
_DEEP_SLEEP_CNTL2[0], 
sizeof(ixLCLK_DEEP_SLEEP_CNTL2)/sizeof(ixLCLK_DEEP_SLEEP_CNTL2[0]), 0, 0 },
{ "ixCG_THERMAL_CTRL", REG_SMC, 0xc034, _THERMAL_CTRL[0], 
sizeof(ixCG_THERMAL_CTRL)/sizeof(ixCG_THERMAL_CTRL[0]), 0, 0 },
diff --git a/src/lib/ip/smu711_bits.i b/src/lib/ip/smu711_bits.i
index 6d803259e970..afd90220d5d5 100644
--- a/src/lib/ip/smu711_bits.i
+++ b/src/lib/ip/smu711_bits.i
@@ -3577,6 +3577,10 @@ static struct umr_bitfield ixCG_FREQ_TRAN_VOTING_7[] = {
 static struct umr_bitfield ixCG_DISPLAY_GAP_CNTL2[] = {
 { "VBI_PREDICTION", 0, 31, _bitfield_default },
 };
+static struct umr_bitfield ixCURRENT_PG_STATUS[] = {
+{ "VCE_PG_STATUS", 1, 1, _bitfield_default },
+{ "UVD_PG_STATUS", 2, 2, _bitfield_default },
+};
 static struct umr_bitfield ixLCLK_DEEP_SLEEP_CNTL2[] = {
 { "RFE_BUSY_MASK", 0, 0, _bitfield_default },
 { "BIF_CG_LCLK_BUSY_MASK", 1, 1, _bitfield_default },
diff --git a/src/lib/ip/smu711_regs.i b/src/lib/ip/smu711_regs.i
index 6e66ecd4ac09..efeac147288e 100644
--- a/src/lib/ip/smu711_regs.i
+++ b/src/lib/ip/smu711_regs.i
@@ -886,6 +886,7 @@
{ "ixCG_FREQ_TRAN_VOTING_6", REG_SMC, 0xc02001c0, 
_FREQ_TRAN_VOTING_6[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_6)/sizeof(ixCG_FREQ_TRAN_VOTING_6[0]), 0, 0 },
{ "ixCG_FREQ_TRAN_VOTING_7", REG_SMC, 0xc02001c4, 
_FREQ_TRAN_VOTING_7[0], 
sizeof(ixCG_FREQ_TRAN_VOTING_7)/sizeof(ixCG_FREQ_TRAN_VOTING_7[0]), 0, 0 },
{ "ixCG_DISPLAY_GAP_CNTL2", REG_SMC, 0xc0200230, 
_DISPLAY_GAP_CNTL2[0], 
sizeof(ixCG_DISPLAY_GAP_CNTL2)/sizeof(ixCG_DISPLAY_GAP_CNTL2[0]), 0, 0 },
+   { "ixCURRENT_PG_STATUS", REG_SMC, 0xc020029c, _PG_STATUS[0], 

RE: [PATCH 2/2] drm/amdgpu: fix PTE defines

2017-02-13 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Christian König
> Sent: Monday, February 13, 2017 8:24 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH 2/2] drm/amdgpu: fix PTE defines
> 
> From: Christian König 
> 
> Those should be 64bit, even on a 32bit system.
> 
> Signed-off-by: Christian König 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 4d26e9b..51fa12f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -53,19 +53,19 @@ struct amdgpu_bo_list_entry;
>  /* LOG2 number of continuous pages for the fragment field */
>  #define AMDGPU_LOG2_PAGES_PER_FRAG 4
> 
> -#define AMDGPU_PTE_VALID (1 << 0)
> -#define AMDGPU_PTE_SYSTEM(1 << 1)
> -#define AMDGPU_PTE_SNOOPED   (1 << 2)
> +#define AMDGPU_PTE_VALID (1ULL << 0)
> +#define AMDGPU_PTE_SYSTEM(1ULL << 1)
> +#define AMDGPU_PTE_SNOOPED   (1ULL << 2)
> 
>  /* VI only */
> -#define AMDGPU_PTE_EXECUTABLE(1 << 4)
> +#define AMDGPU_PTE_EXECUTABLE(1ULL << 4)
> 
> -#define AMDGPU_PTE_READABLE  (1 << 5)
> -#define AMDGPU_PTE_WRITEABLE (1 << 6)
> +#define AMDGPU_PTE_READABLE  (1ULL << 5)
> +#define AMDGPU_PTE_WRITEABLE (1ULL << 6)
> 
>  #define AMDGPU_PTE_FRAG(x)   ((x & 0x1f) << 7)
> 
> -#define AMDGPU_PTE_PRT   (1UL << 63)
> +#define AMDGPU_PTE_PRT   (1ULL << 63)
> 
>  /* How to programm VM fault handling */
>  #define AMDGPU_VM_FAULT_STOP_NEVER   0
> --
> 2.5.0
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: fix PRT cleanup order in the VM

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 14:23, Christian König wrote:

From: Christian König 

We need to unmap the PRTs first and then free our scheduler entity.


Thanks for the quick fix! Both patches are

Reviewed-by: Nicolai Hähnle 

... and I'll probably get around to testing them soon, as well.



Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bc32239..0b7386e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1687,8 +1687,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
int i;

-   amd_sched_entity_fini(vm->entity.sched, >entity);
-
if (!RB_EMPTY_ROOT(>va)) {
dev_err(adev->dev, "still active bo inside vm\n");
}
@@ -1706,6 +1704,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
}
amdgpu_vm_clear_freed(adev, vm);

+   amd_sched_entity_fini(vm->entity.sched, >entity);
+
for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
struct amdgpu_bo *pt = vm->page_tables[i].bo;




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/radeon: allow unaligned shader loads on CIK

2017-02-13 Thread Marek Olšák
From: Marek Olšák 

Signed-off-by: Marek Olšák 
---
 drivers/gpu/drm/radeon/cik.c| 7 +--
 drivers/gpu/drm/radeon/radeon_drv.c | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index f6ff41a..ac0d939 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -28,20 +28,23 @@
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
 #include "cikd.h"
 #include "atom.h"
 #include "cik_blit_shaders.h"
 #include "radeon_ucode.h"
 #include "clearstate_ci.h"
 #include "radeon_kfd.h"
 
+#define SH_MEM_CONFIG_GFX_DEFAULT \
+   ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
+
 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
 
@@ -5580,21 +5583,21 @@ static int cik_pcie_gart_enable(struct radeon_device 
*rdev)
tmp &= ~BYPASS_VM;
WREG32(CHUB_CONTROL, tmp);
}
 
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(>srbm_mutex);
for (i = 0; i < 16; i++) {
cik_srbm_select(rdev, 0, 0, 0, i);
/* CP and shaders */
-   WREG32(SH_MEM_CONFIG, 0);
+   WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
WREG32(SH_MEM_APE1_BASE, 1);
WREG32(SH_MEM_APE1_LIMIT, 0);
WREG32(SH_MEM_BASES, 0);
/* SDMA GFX */
WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
/* XXX SDMA RLC - todo */
}
@@ -5787,21 +5790,21 @@ void cik_vm_flush(struct radeon_device *rdev, struct 
radeon_ring *ring,
radeon_ring_write(ring, 0);
radeon_ring_write(ring, VMID(vm_id));
 
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 WRITE_DATA_DST_SEL(0)));
radeon_ring_write(ring, SH_MEM_BASES >> 2);
radeon_ring_write(ring, 0);
 
radeon_ring_write(ring, 0); /* SH_MEM_BASES */
-   radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
+   radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
 
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 WRITE_DATA_DST_SEL(0)));
radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
radeon_ring_write(ring, 0);
radeon_ring_write(ring, VMID(0));
 
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
b/drivers/gpu/drm/radeon/radeon_drv.c
index 30bd4a6..2e5d680 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -91,23 +91,24 @@
  *CS to GPU on >= r600
  *   2.41.0 - evergreen/cayman: Add SET_BASE/DRAW_INDIRECT command parsing 
support
  *   2.42.0 - Add VCE/VUI (Video Usability Information) support
  *   2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
  *   2.44.0 - SET_APPEND_CNT packet3 support
  *   2.45.0 - Allow setting shader registers using DMA/COPY packet3 on SI
  *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  *   2.47.0 - Add UVD_NO_OP register support
  *   2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI
  *   2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible 
values
+ *   2.50.0 - Allows unaligned shader loads on CIK. (needed by OpenGL)
  */
 #define KMS_DRIVER_MAJOR   2
-#define KMS_DRIVER_MINOR   49
+#define KMS_DRIVER_MINOR   50
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
 void radeon_driver_lastclose_kms(struct drm_device *dev);
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void radeon_driver_postclose_kms(struct drm_device *dev,
 struct drm_file *file_priv);
 void radeon_driver_preclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
 int radeon_suspend_kms(struct drm_device *dev, bool suspend,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

RE: [PATCH] Add new gmc/smu registers

2017-02-13 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Tom St Denis
> Sent: Monday, February 13, 2017 10:28 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: StDenis, Tom
> Subject: [PATCH] Add new gmc/smu registers
> 
> Signed-off-by: Tom St Denis 

Acked-by: Alex Deucher 

> ---
>  src/lib/ip/gmc60_bits.i  | 2 ++
>  src/lib/ip/smu701_bits.i | 4 
>  src/lib/ip/smu701_regs.i | 1 +
>  src/lib/ip/smu711_bits.i | 4 
>  src/lib/ip/smu711_regs.i | 1 +
>  src/lib/ip/smu712_bits.i | 4 
>  src/lib/ip/smu712_regs.i | 1 +
>  src/lib/ip/smu713_bits.i | 4 
>  src/lib/ip/smu713_regs.i | 1 +
>  9 files changed, 22 insertions(+)
> 
> diff --git a/src/lib/ip/gmc60_bits.i b/src/lib/ip/gmc60_bits.i
> index 746dd64fb392..4e4c052f6e79 100644
> --- a/src/lib/ip/gmc60_bits.i
> +++ b/src/lib/ip/gmc60_bits.i
> @@ -3230,6 +3230,8 @@ static struct umr_bitfield
> mmVM_PRT_APERTURE3_HIGH_ADDR[] = {
>  static struct umr_bitfield mmVM_PRT_CNTL[] = {
>{ "L1_TLB_STORE_INVALID_ENTRIES", 3, 3, _bitfield_default },
>{ "L2_CACHE_STORE_INVALID_ENTRIES", 2, 2, _bitfield_default
> },
> +  { "CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS", 0, 0,
> _bitfield_default },
> +  { "TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS", 1, 1,
> _bitfield_default },
>  };
>  static struct umr_bitfield mmVM_CONTEXTS_DISABLE[] = {
>{ "DISABLE_CONTEXT_0", 0, 0, _bitfield_default },
> diff --git a/src/lib/ip/smu701_bits.i b/src/lib/ip/smu701_bits.i
> index 972d8b74b5db..2f50eb9f0b67 100644
> --- a/src/lib/ip/smu701_bits.i
> +++ b/src/lib/ip/smu701_bits.i
> @@ -4391,6 +4391,10 @@ static struct umr_bitfield
> ixCG_FREQ_TRAN_VOTING_7[] = {
>  static struct umr_bitfield ixCG_DISPLAY_GAP_CNTL2[] = {
>{ "VBI_PREDICTION", 0, 31, _bitfield_default },
>  };
> +static struct umr_bitfield ixCURRENT_PG_STATUS[] = {
> +  { "VCE_PG_STATUS", 1, 1, _bitfield_default },
> +  { "UVD_PG_STATUS", 2, 2, _bitfield_default },
> +};
>  static struct umr_bitfield ixSCLK_MIN_DIV[] = {
>{ "FRACV", 0, 11, _bitfield_default },
>{ "INTV", 12, 18, _bitfield_default },
> diff --git a/src/lib/ip/smu701_regs.i b/src/lib/ip/smu701_regs.i
> index 63f85e1173cc..3ff965359c89 100644
> --- a/src/lib/ip/smu701_regs.i
> +++ b/src/lib/ip/smu701_regs.i
> @@ -1091,6 +1091,7 @@
>   { "ixCG_FREQ_TRAN_VOTING_6", REG_SMC, 0xc02001c0,
> _FREQ_TRAN_VOTING_6[0],
> sizeof(ixCG_FREQ_TRAN_VOTING_6)/sizeof(ixCG_FREQ_TRAN_VOTING_6[
> 0]), 0, 0 },
>   { "ixCG_FREQ_TRAN_VOTING_7", REG_SMC, 0xc02001c4,
> _FREQ_TRAN_VOTING_7[0],
> sizeof(ixCG_FREQ_TRAN_VOTING_7)/sizeof(ixCG_FREQ_TRAN_VOTING_7[
> 0]), 0, 0 },
>   { "ixCG_DISPLAY_GAP_CNTL2", REG_SMC, 0xc0200230,
> _DISPLAY_GAP_CNTL2[0],
> sizeof(ixCG_DISPLAY_GAP_CNTL2)/sizeof(ixCG_DISPLAY_GAP_CNTL2[0]), 0,
> 0 },
> + { "ixCURRENT_PG_STATUS", REG_SMC, 0xc020029c,
> _PG_STATUS[0],
> sizeof(ixCURRENT_PG_STATUS)/sizeof(ixCURRENT_PG_STATUS[0]), 0, 0 },
>   { "ixSCLK_MIN_DIV", REG_SMC, 0xc0200308, _MIN_DIV[0],
> sizeof(ixSCLK_MIN_DIV)/sizeof(ixSCLK_MIN_DIV[0]), 0, 0 },
>   { "ixLCLK_DEEP_SLEEP_CNTL2", REG_SMC, 0xc0200310,
> _DEEP_SLEEP_CNTL2[0],
> sizeof(ixLCLK_DEEP_SLEEP_CNTL2)/sizeof(ixLCLK_DEEP_SLEEP_CNTL2[0]), 0,
> 0 },
>   { "ixCG_THERMAL_CTRL", REG_SMC, 0xc034,
> _THERMAL_CTRL[0],
> sizeof(ixCG_THERMAL_CTRL)/sizeof(ixCG_THERMAL_CTRL[0]), 0, 0 },
> diff --git a/src/lib/ip/smu711_bits.i b/src/lib/ip/smu711_bits.i
> index 6d803259e970..afd90220d5d5 100644
> --- a/src/lib/ip/smu711_bits.i
> +++ b/src/lib/ip/smu711_bits.i
> @@ -3577,6 +3577,10 @@ static struct umr_bitfield
> ixCG_FREQ_TRAN_VOTING_7[] = {
>  static struct umr_bitfield ixCG_DISPLAY_GAP_CNTL2[] = {
>{ "VBI_PREDICTION", 0, 31, _bitfield_default },
>  };
> +static struct umr_bitfield ixCURRENT_PG_STATUS[] = {
> +  { "VCE_PG_STATUS", 1, 1, _bitfield_default },
> +  { "UVD_PG_STATUS", 2, 2, _bitfield_default },
> +};
>  static struct umr_bitfield ixLCLK_DEEP_SLEEP_CNTL2[] = {
>{ "RFE_BUSY_MASK", 0, 0, _bitfield_default },
>{ "BIF_CG_LCLK_BUSY_MASK", 1, 1, _bitfield_default },
> diff --git a/src/lib/ip/smu711_regs.i b/src/lib/ip/smu711_regs.i
> index 6e66ecd4ac09..efeac147288e 100644
> --- a/src/lib/ip/smu711_regs.i
> +++ b/src/lib/ip/smu711_regs.i
> @@ -886,6 +886,7 @@
>   { "ixCG_FREQ_TRAN_VOTING_6", REG_SMC, 0xc02001c0,
> _FREQ_TRAN_VOTING_6[0],
> sizeof(ixCG_FREQ_TRAN_VOTING_6)/sizeof(ixCG_FREQ_TRAN_VOTING_6[
> 0]), 0, 0 },
>   { "ixCG_FREQ_TRAN_VOTING_7", REG_SMC, 0xc02001c4,
> _FREQ_TRAN_VOTING_7[0],
> sizeof(ixCG_FREQ_TRAN_VOTING_7)/sizeof(ixCG_FREQ_TRAN_VOTING_7[
> 0]), 0, 0 },
>   { "ixCG_DISPLAY_GAP_CNTL2", REG_SMC, 0xc0200230,
> _DISPLAY_GAP_CNTL2[0],
> sizeof(ixCG_DISPLAY_GAP_CNTL2)/sizeof(ixCG_DISPLAY_GAP_CNTL2[0]), 0,
> 0 },
> + { "ixCURRENT_PG_STATUS", REG_SMC, 0xc020029c,
> _PG_STATUS[0],
> sizeof(ixCURRENT_PG_STATUS)/sizeof(ixCURRENT_PG_STATUS[0]), 0, 0 },
>   { 

Re: [PATCH] Add missing CIK devices

2017-02-13 Thread Tom St Denis

On 13/02/17 10:32 AM, Deucher, Alexander wrote:

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
Of Tom St Denis
Sent: Monday, February 13, 2017 7:46 AM
To: amd-gfx@lists.freedesktop.org
Cc: StDenis, Tom
Subject: [PATCH] Add missing CIK devices

Adds mullins, kabini, and hawaii ASICs to the library.

Signed-off-by: Tom St Denis 


The smu blocks are slightly different on CI dGPUs vs. APUs.  Hawaii should 
follow Bonaire and kabini and mullins should follow kaveri.
Acked-by: Alex Deucher 


Thanks.  Corrected.  Bonaire also had the wrong version.  I've pushed 
out v2 of the patch with those corrections.


Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Initialize pipe priority order on graphic initialization

2017-02-13 Thread Alex Deucher
On Fri, Feb 10, 2017 at 7:27 PM, ozeng  wrote:
> Initialized PIPE_ORDER_TS0/1/2/3 field of SPI_ARB_PRIORITY register to 2.
> This set the pipe priority order to:
> 02 - HP3D, CS_H, GFX, CS_M, CS_L
>
> Change-Id: I1e89a2fdcf45a99808f0f5b3cbd83ae537174023
> Signed-off-by: Oak Zeng 

Reviewed-by: Alex Deucher 


> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 8 
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 
>  2 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 8dcb929..e9d6344 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -1983,6 +1983,14 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device 
> *adev)
> WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
> (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
> WREG32(mmPA_SC_ENHANCE, 
> PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
> +
> +   tmp = RREG32(mmSPI_ARB_PRIORITY);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
> +   WREG32(mmSPI_ARB_PRIORITY, tmp);
> +
> mutex_unlock(>grbm_idx_mutex);
>
> udelay(50);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 9542fed..772c42b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -3905,6 +3905,14 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device 
> *adev)
> PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
>(adev->gfx.config.sc_earlyz_tile_fifo_size <<
> PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
> +
> +   tmp = RREG32(mmSPI_ARB_PRIORITY);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
> +   tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
> +   WREG32(mmSPI_ARB_PRIORITY, tmp);
> +
> mutex_unlock(>grbm_idx_mutex);
>
>  }
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 2/2] drm/amdgpu: report the number of bytes moved at buffer creation

2017-02-13 Thread Nicolai Hähnle

On 09.02.2017 11:33, Samuel Pitoiset wrote:

Like ttm_bo_validate(), ttm_bo_init() might need to move BO and
the number of bytes moved by TTM should be reported. This can help
the throttle buffer migration mechanism to make a better decision.


Hmm, this could double-count bytes if there's a concurrent CS submission 
going on.


It's only a heuristic, so I guess it's not too bad, but still - having 
at least a comment about this would be nice.


Nicolai


v2: fix computation

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 402a8954c6d8..5227e4d1d5db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1720,6 +1720,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 
void *data);
 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
   u32 ip_instance, u32 ring,
   struct amdgpu_ring **out_ring);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6e948e4986ec..dade2fa9593a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -351,8 +351,7 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev)
  * submission. This can result in a debt that can stop buffer migrations
  * temporarily.
  */
-static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
-u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
 {
spin_lock(>mm_stats.lock);
adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 556236a112c1..4aa2c8a94347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -323,6 +323,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
struct amdgpu_bo *bo;
enum ttm_bo_type type;
unsigned long page_align;
+   u64 initial_bytes_moved;
size_t acc_size;
int r;

@@ -399,10 +400,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device 
*adev,
locked = ww_mutex_trylock(>tbo.ttm_resv.lock);
WARN_ON(!locked);
}
+
+   initial_bytes_moved = atomic64_read(>num_bytes_moved);
r = ttm_bo_init(>mman.bdev, >tbo, size, type,
>placement, page_align, !kernel, NULL,
acc_size, sg, resv ? resv : >tbo.ttm_resv,
_ttm_bo_destroy);
+   amdgpu_cs_report_moved_bytes(adev,
+   atomic64_read(>num_bytes_moved) - initial_bytes_moved);
+
if (unlikely(r != 0)) {
if (!resv)
ww_mutex_unlock(>tbo.resv->lock);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Add new gmc/smu registers

2017-02-13 Thread Tom St Denis

On 13/02/17 11:35 AM, Nicolai Hähnle wrote:

Hi Tom,

it's probably a good idea to use subject prefixes for umr patches.

git config format.subjectPrefix "PATCH umr"

or edit .git/config accordingly, e.g. for libdrm I have this in
.git/config:

[format]
subjectPrefix = PATCH libdrm

Then format-patch and friends will automatically use [PATCH umr] as a
prefix by default. Makes it a bit easier to follow and grep mailing
lists. (And if you agree, you should then continue to tell other people
to do the same ;))


Ah that's useful.  Thanks. I've added it.

Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] Fix SMC read/write

2017-02-13 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Tom St Denis
> Sent: Monday, February 13, 2017 11:26 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: StDenis, Tom
> Subject: [PATCH] Fix SMC read/write
> 
> The registers in umr are stored as byte addresses
> (mm registers are word addresses).
> 
> Signed-off-by: Tom St Denis 

Acked-by: Alex Deucher 

> ---
>  src/app/scan.c| 2 +-
>  src/app/set_bit.c | 2 +-
>  src/app/set_reg.c | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/app/scan.c b/src/app/scan.c
> index afcadc4c507e..19c97fe1499d 100644
> --- a/src/app/scan.c
> +++ b/src/app/scan.c
> @@ -55,7 +55,7 @@ int umr_scan_asic(struct umr_asic *asic, char
> *asicname, char *ipname, char *reg
>   case REG_PCIE: fd =
> asic->fd.pcie; scale = 1; break;
>   case REG_SMC:
>   if
> (options.read_smc) {
> - fd =
> asic->fd.smc; scale = 4;
> + fd =
> asic->fd.smc; scale = 1;
>   } else {
> 
>   continue;
>   }
> diff --git a/src/app/set_bit.c b/src/app/set_bit.c
> index 899bf1a17459..d9ee7d8f3a55 100644
> --- a/src/app/set_bit.c
> +++ b/src/app/set_bit.c
> @@ -61,7 +61,7 @@ int umr_set_register_bit(struct umr_asic *asic, char
> *regpath, char *regvalue)
>   case
> REG_MMIO: fd = asic->fd.mmio; scale = 4; break;
>   case
> REG_DIDT: fd = asic->fd.didt; scale = 1; break;
>   case
> REG_PCIE: fd = asic->fd.pcie; scale = 1; break;
> - case
> REG_SMC:  fd = asic->fd.smc; scale = 4; break;
> + case
> REG_SMC:  fd = asic->fd.smc;  scale = 1; break;
>   default:
> return -1;
>   }
>   if (asic-
> >blocks[i]->grant) {
> diff --git a/src/app/set_reg.c b/src/app/set_reg.c
> index 8c5060f2dbe8..9861170d55c3 100644
> --- a/src/app/set_reg.c
> +++ b/src/app/set_reg.c
> @@ -57,7 +57,7 @@ int umr_set_register(struct umr_asic *asic, char
> *regpath, char *regvalue)
>   case REG_MMIO: fd = asic-
> >fd.mmio; scale = 4; break;
>   case REG_DIDT: fd = asic-
> >fd.didt; scale = 1; break;
>   case REG_PCIE: fd = asic-
> >fd.pcie; scale = 1; break;
> - case REG_SMC:  fd = asic-
> >fd.smc; scale = 4; break;
> + case REG_SMC:  fd = asic-
> >fd.smc; scale = 1; break;
>   default: return -1;
>   }
> 
> --
> 2.11.0
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Initial Public Release of AMDGPU debugger

2017-02-13 Thread Nils Holland
On Mon, Feb 13, 2017 at 06:55:33AM -0500, Tom St Denis wrote:
> On 13/02/17 05:00 AM, Nils Holland wrote:
> > On Sat, Feb 04, 2017 at 06:44:12PM +, StDenis, Tom wrote:
> >> Hello all,
> >>
> >>
> >> We're pleased to announce the initial public release of the AMDGPU
> >> User Mode Register debugger (umr).  This tool allows privileged
> >> users to read and write GPU registers in order to diagnose, debug,
> >> and aid in development of AMDGPU features.  The tool supports a
> >> variety of other commands for actions such as decoding ring
> >> contents, analyzing wavefronts, viewing machine status, and more.
> >> It supports SI through VI devices and requires a very recent kernel
> >> (what will be 4.10).
> >
> > Just a short question: Am I correct in noticing that the tool
> > currently only supports standalone GPUs and not the APUs, e.g. I have
> > a:
> >
> > VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI]
> > Kabini [Radeon HD 8210]
> >
> > as part of an AMD E1-2100 APU and the tool tells me:
> >
> > nils@teela ~ $ umr -c
> > ERROR: Device 0x9834 not found in UMR device table
> > ASIC not found (instance=0, did=)
> >
> > I also don't see any reference to kabini in the tool's code, so unless
> > I'm doing something wrong, this might be expected, right?
> 
> You're right that kabini hasn't been added (nor mullins or hawaii). 
> That's purely an oversight.  The focus has been largely on VI devices 
> during development but I can easily circle back and add the missing CIK 
> devices today.

Sounds very good, thanks! I'll definitely play around with it a little
when my card is supported as that will certainly help me learn and
understand more about the GPUs and their drivers. :-)

Greetings
Nils
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Initial Public Release of AMDGPU debugger

2017-02-13 Thread Tom St Denis

On 13/02/17 05:00 AM, Nils Holland wrote:

On Sat, Feb 04, 2017 at 06:44:12PM +, StDenis, Tom wrote:

Hello all,


We're pleased to announce the initial public release of the AMDGPU
User Mode Register debugger (umr).  This tool allows privileged
users to read and write GPU registers in order to diagnose, debug,
and aid in development of AMDGPU features.  The tool supports a
variety of other commands for actions such as decoding ring
contents, analyzing wavefronts, viewing machine status, and more.
It supports SI through VI devices and requires a very recent kernel
(what will be 4.10).


Just a short question: Am I correct in noticing that the tool
currently only supports standalone GPUs and not the APUs, e.g. I have
a:

VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI]
Kabini [Radeon HD 8210]

as part of an AMD E1-2100 APU and the tool tells me:

nils@teela ~ $ umr -c
ERROR: Device 0x9834 not found in UMR device table
ASIC not found (instance=0, did=)

I also don't see any reference to kabini in the tool's code, so unless
I'm doing something wrong, this might be expected, right?


You're right that kabini hasn't been added (nor mullins or hawaii). 
That's purely an oversight.  The focus has been largely on VI devices 
during development but I can easily circle back and add the missing CIK 
devices today.


For ref, it does support APUs (Carrizo, Stoney, Kaveri) :-).

Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/radeon: allow unaligned shader loads on CIK

2017-02-13 Thread Deucher, Alexander


> -Original Message-
> From: Marek Olšák [mailto:mar...@gmail.com]
> Sent: Monday, February 13, 2017 12:04 PM
> To: Deucher, Alexander
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/radeon: allow unaligned shader loads on CIK
> 
> On Mon, Feb 13, 2017 at 6:00 PM, Deucher, Alexander
>  wrote:
> >> -Original Message-
> >> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On
> Behalf
> >> Of Marek Olšák
> >> Sent: Monday, February 13, 2017 11:58 AM
> >> To: amd-gfx@lists.freedesktop.org
> >> Subject: [PATCH] drm/radeon: allow unaligned shader loads on CIK
> >>
> >> From: Marek Olšák 
> >>
> >> Signed-off-by: Marek Olšák 
> >
> > Add a better patch description.  With that fixed:
> > Reviewed-by: Alex Deucher 
> 
> What's a better patch description? "drm/radeon: set
> SH_MEM_CONFIG.ALIGNMENT_MODE = UNALIGNED on CIK"?

The title is fine.  Just add a description.  Something like:

Set alignment mode to unaligned on CIK to align with amdgpu.  This is needed for
unaligned loads to work properly in mesa.  The current setting requires dword 
alignment.

Alex

> 
> Marek
> 
> >
> >
> >> ---
> >>  drivers/gpu/drm/radeon/cik.c| 7 +--
> >>  drivers/gpu/drm/radeon/radeon_drv.c | 3 ++-
> >>  2 files changed, 7 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
> >> index f6ff41a..ac0d939 100644
> >> --- a/drivers/gpu/drm/radeon/cik.c
> >> +++ b/drivers/gpu/drm/radeon/cik.c
> >> @@ -28,20 +28,23 @@
> >>  #include "radeon.h"
> >>  #include "radeon_asic.h"
> >>  #include "radeon_audio.h"
> >>  #include "cikd.h"
> >>  #include "atom.h"
> >>  #include "cik_blit_shaders.h"
> >>  #include "radeon_ucode.h"
> >>  #include "clearstate_ci.h"
> >>  #include "radeon_kfd.h"
> >>
> >> +#define SH_MEM_CONFIG_GFX_DEFAULT \
> >> + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
> >> +
> >>  MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
> >>  MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
> >>
> >> @@ -5580,21 +5583,21 @@ static int cik_pcie_gart_enable(struct
> >> radeon_device *rdev)
> >>   tmp &= ~BYPASS_VM;
> >>   WREG32(CHUB_CONTROL, tmp);
> >>   }
> >>
> >>   /* XXX SH_MEM regs */
> >>   /* where to put LDS, scratch, GPUVM in FSA64 space */
> >>   mutex_lock(>srbm_mutex);
> >>   for (i = 0; i < 16; i++) {
> >>   cik_srbm_select(rdev, 0, 0, 0, i);
> >>   /* CP and shaders */
> >> - WREG32(SH_MEM_CONFIG, 0);
> >> + WREG32(SH_MEM_CONFIG,
> >> SH_MEM_CONFIG_GFX_DEFAULT);
> >>   WREG32(SH_MEM_APE1_BASE, 1);
> >>   WREG32(SH_MEM_APE1_LIMIT, 0);
> >>   WREG32(SH_MEM_BASES, 0);
> >>   /* SDMA GFX */
> >>   WREG32(SDMA0_GFX_VIRTUAL_ADDR +
> >> SDMA0_REGISTER_OFFSET, 0);
> >>   WREG32(SDMA0_GFX_APE1_CNTL +
> >> SDMA0_REGISTER_OFFSET, 0);
> >>   WREG32(SDMA0_GFX_VIRTUAL_ADDR +
> >> SDMA1_REGISTER_OFFSET, 0);
> >>   WREG32(SDMA0_GFX_APE1_CNTL +
> >> SDMA1_REGISTER_OFFSET, 0);
> >>   /* XXX SDMA RLC - todo */
> >>   }
> >> @@ -5787,21 +5790,21 @@ void cik_vm_flush(struct radeon_device
> *rdev,
> >> struct radeon_ring *ring,
> >>   radeon_ring_write(ring, 0);
> >>   radeon_ring_write(ring, VMID(vm_id));
> >>
> >>   radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
> >>   radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
> >>WRITE_DATA_DST_SEL(0)));
> >>   radeon_ring_write(ring, SH_MEM_BASES >> 2);
> >>   radeon_ring_write(ring, 0);
> >>
> >>   radeon_ring_write(ring, 0); /* SH_MEM_BASES */
> >> - radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
> >> + radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /*
> >> SH_MEM_CONFIG */
> >>   radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
> >>   radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
> >>
> >>   radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> >>   radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
> >>WRITE_DATA_DST_SEL(0)));
> >>   radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
> >>   radeon_ring_write(ring, 0);
> >>   radeon_ring_write(ring, VMID(0));
> >>
> >> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c
> >> b/drivers/gpu/drm/radeon/radeon_drv.c
> >> index 30bd4a6..2e5d680 100644
> >> --- a/drivers/gpu/drm/radeon/radeon_drv.c
> >> +++ b/drivers/gpu/drm/radeon/radeon_drv.c
> >> @@ -91,23 

Re: [PATCH v2 2/2] drm/amdgpu: report the number of bytes moved at buffer creation

2017-02-13 Thread Samuel Pitoiset



On 02/13/2017 05:28 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

Like ttm_bo_validate(), ttm_bo_init() might need to move BO and
the number of bytes moved by TTM should be reported. This can help
the throttle buffer migration mechanism to make a better decision.


Hmm, this could double-count bytes if there's a concurrent CS submission
going on.


You are right.

Thanks Christian for taking care of this.



It's only a heuristic, so I guess it's not too bad, but still - having
at least a comment about this would be nice.

Nicolai


v2: fix computation

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 402a8954c6d8..5227e4d1d5db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1720,6 +1720,7 @@ int amdgpu_cs_parser_init(struct
amdgpu_cs_parser *p, void *data);
 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
u32 ip_instance, u32 ring,
struct amdgpu_ring **out_ring);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64
num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32
domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page
**pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6e948e4986ec..dade2fa9593a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -351,8 +351,7 @@ static u64
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
  * submission. This can result in a debt that can stop buffer migrations
  * temporarily.
  */
-static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
- u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64
num_bytes)
 {
 spin_lock(>mm_stats.lock);
 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 556236a112c1..4aa2c8a94347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -323,6 +323,7 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 struct amdgpu_bo *bo;
 enum ttm_bo_type type;
 unsigned long page_align;
+u64 initial_bytes_moved;
 size_t acc_size;
 int r;

@@ -399,10 +400,15 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 locked = ww_mutex_trylock(>tbo.ttm_resv.lock);
 WARN_ON(!locked);
 }
+
+initial_bytes_moved = atomic64_read(>num_bytes_moved);
 r = ttm_bo_init(>mman.bdev, >tbo, size, type,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
+amdgpu_cs_report_moved_bytes(adev,
+atomic64_read(>num_bytes_moved) - initial_bytes_moved);
+
 if (unlikely(r != 0)) {
 if (!resv)
 ww_mutex_unlock(>tbo.resv->lock);




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Samuel Pitoiset



On 02/13/2017 06:49 PM, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty sure
I had this exact same patch just to realize that it's actually incorrect.

The problem is that ttm_bo_init will actually call the destroy function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.

This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


err, resv is always NULL in this situation.





Cheers,
Nicolai




 bo->tbo.priority = ilog2(bo->tbo.num_pages);
 if (kernel)




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 19:04, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.



This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


Actually, I find it extremely suspicious that this patch resolves hangs. 
By all rights, no other task should have a pointer to this bo left. It 
points at problems elsewhere in the code, possibly the precise problem 
I've been trying to track down.


Could you please revert the patch, reproduce the hang, and report 
/proc/$pid/stack for all the hung tasks?


Thanks,
Nicolai
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Samuel Pitoiset



On 02/13/2017 07:04 PM, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.


Maybe the issue is somewhere else and this not the proper solution, but 
I don't think the given patch is broken as-is. It fixes deadlocks which 
are pretty easy to reproduce with Hitman (as explained in the commit 
description).






This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


No, I was wrong. resv is always NULL in this situation. The best 
solution is probably to try to clean up that code path because I do 
agree: it's a bit messy.




Nicolai

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 19:11, Samuel Pitoiset wrote:



On 02/13/2017 07:04 PM, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.


Maybe the issue is somewhere else and this not the proper solution, but
I don't think the given patch is broken as-is. It fixes deadlocks which
are pretty easy to reproduce with Hitman (as explained in the commit
description).


I'm sorry, but a use-after-free is clearly broken.

Nicolai







This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


No, I was wrong. resv is always NULL in this situation. The best
solution is probably to try to clean up that code path because I do
agree: it's a bit messy.



Nicolai


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: fix PRT cleanup order in the VM

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 17:40, Nicolai Hähnle wrote:

On 13.02.2017 14:23, Christian König wrote:

From: Christian König 

We need to unmap the PRTs first and then free our scheduler entity.


Thanks for the quick fix! Both patches are

Reviewed-by: Nicolai Hähnle 

... and I'll probably get around to testing them soon, as well.


Hmm, I still get both the "suspicious RCU" and amdgpu_bo_gpu_offset warning.

Nicolai





Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bc32239..0b7386e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1687,8 +1687,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
 struct amdgpu_bo_va_mapping *mapping, *tmp;
 int i;

-amd_sched_entity_fini(vm->entity.sched, >entity);
-
 if (!RB_EMPTY_ROOT(>va)) {
 dev_err(adev->dev, "still active bo inside vm\n");
 }
@@ -1706,6 +1704,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
 }
 amdgpu_vm_clear_freed(adev, vm);

+amd_sched_entity_fini(vm->entity.sched, >entity);
+
 for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
 struct amdgpu_bo *pt = vm->page_tables[i].bo;






___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Add to initialization of mmVCE_VCPU_CNTL register

2017-02-13 Thread Alex Deucher
From: Alan Harrison 

Add a bit needed during initialization into the driver, where it is supposed
to be.  Currently, this is happening in the VCE firmware, and although
functional, this is the correct place to perform this initialization.

Reviewed-by: Leo Liu 
Signed-off-by: Alan Harrison 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index a8c40ee..a782985 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -510,6 +510,8 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, 
int idx)
WREG32(mmVCE_LMI_SWAP_CNTL, 0);
WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
WREG32(mmVCE_LMI_VM_CTRL, 0);
+   WREG32_OR(mmVCE_VCPU_CNTL, 0x0010);
+
if (adev->asic_type >= CHIP_STONEY) {
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 
8));
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 
8));
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty sure
I had this exact same patch just to realize that it's actually incorrect.

The problem is that ttm_bo_init will actually call the destroy function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other 
problems I'm seeing as well.


This means we need a real fix for this; I still think the current patch 
is broken.




This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the 
case, we're leaking memory.


With the patch as-is, the error paths are either leaking memory (if 
you're right) or accessing memory after it's freed (otherwise). 
Obviously, neither is good.


Nicolai
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Initial Public Release of AMDGPU debugger

2017-02-13 Thread Nils Holland
On Sat, Feb 04, 2017 at 06:44:12PM +, StDenis, Tom wrote:
> Hello all,
> 
> 
> We're pleased to announce the initial public release of the AMDGPU
> User Mode Register debugger (umr).  This tool allows privileged
> users to read and write GPU registers in order to diagnose, debug,
> and aid in development of AMDGPU features.  The tool supports a
> variety of other commands for actions such as decoding ring
> contents, analyzing wavefronts, viewing machine status, and more.
> It supports SI through VI devices and requires a very recent kernel
> (what will be 4.10).

Just a short question: Am I correct in noticing that the tool
currently only supports standalone GPUs and not the APUs, e.g. I have
a:

VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI]
Kabini [Radeon HD 8210]

as part of an AMD E1-2100 APU and the tool tells me:

nils@teela ~ $ umr -c
ERROR: Device 0x9834 not found in UMR device table
ASIC not found (instance=0, did=)

I also don't see any reference to kabini in the tool's code, so unless
I'm doing something wrong, this might be expected, right?

Greetings
Nils
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Initial Public Release of AMDGPU debugger

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 03:39, Dave Airlie wrote:

Is there any plans or would it be possible to add some sort of info on what you
are looking at with UMR. Say the GRBM busy states what sort of meaning
can be extracted from the percentage values etc, can you say with how busy
some of the blocks are what should be done next to try and optimise things
or to look for problems etc.


Honestly, I think the GRBM bits are a bit too coarse-grained to give you 
a lot of information for performance work.


You can say some extremely rough things, like if VGT is busier than CB, 
you know that something's the issue with geometry processing, but I'm 
not sure it's much more help than that.


Similarly, you _might_ be able to tell how far down the pipeline the 
hang is. (FWIW, the wave debugging is actually pretty cool, e.g. it can 
help you isolate bugs where a shader has an infinite loop.)


I think part of the issue is that for a block to count as idle for the 
purpose of GRBM, it really has to be _completely_ idle. If block A is 
stalled waiting on block B, both blocks will show as busy in the GRBM 
status. The performance counters, i.e. GALLIUM_HUD, can give you a more 
detailed picture.


I'd say that for performance work, umr is about the level where you 
might whip it out for an extremely rough view of the situation, similar 
to how you'd whip out top to get an extremely rough view of what's 
happening on the CPU. It's useful, but many/most times, you'll need 
something else to go deeper.


Cheers,
Nicolai
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 19:58, Nicolai Hähnle wrote:

On 13.02.2017 19:38, Samuel Pitoiset wrote:



On 02/13/2017 07:09 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:04, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy
function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified
that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.



This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy
function
in all situations. Presumably, when drm_vma_offset_add() fails and
resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


Actually, I find it extremely suspicious that this patch resolves hangs.
By all rights, no other task should have a pointer to this bo left. It
points at problems elsewhere in the code, possibly the precise problem
I've been trying to track down.


Well, maybe we are just lucky but as I said, I checked many times to
reproduce the issue with that patch applied without any success, you can
trust me. Although I'm also starting to think that's not the right
solution (and could introduce other ones).



Could you please revert the patch, reproduce the hang, and report
/proc/$pid/stack for all the hung tasks?


Sure. The thing is: Hitman's branch has been updated during the weekend
and my local installation is broken. I need to re-download the whole
game (will take a while).

I will let you know when I'm able to grab that report.


Hmm, so I thought about this some more, and I'm no longer so sure that
your bug and mine are the same. If it was related, I'd somehow expect
you to get an error about a mutex being destroyed while it's held (at
least with lock debugging enabled).

Anyway... we need to change the contract of ttm_bo_init, I'm just not
yet sure how, because there are two points of failure: one quite early
on, and the second rather late which gets cleaned up by ttm_bo_unref.


Maybe it would actually be best to split ttm_bo_init into two parts: the 
initial bulk of structure initialization as the first half, and the 
ttm_bo_validate in the second half.


Cheers,
Nicolai



Cheers,
Nicolai


Thanks Nicolai.


Thanks,
Nicolai




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: Fix flow control in uvd_v4_2_stop()

2017-02-13 Thread Tom St Denis
Break out of outer loop properly.

Signed-off-by: Tom St Denis 
Reported-by: Dan Carpenter 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 97af4827f652..b34cefc7ebd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -401,7 +401,8 @@ static void uvd_v4_2_stop(struct amdgpu_device *adev)
break;
mdelay(1);
}
-   break;
+   if (status & 2)
+   break;
}
 
for (i = 0; i < 10; ++i) {
@@ -411,7 +412,8 @@ static void uvd_v4_2_stop(struct amdgpu_device *adev)
break;
mdelay(1);
}
-   break;
+   if (status & 0xf)
+   break;
}
 
/* Stall UMC and register bus before resetting VCPU */
@@ -424,7 +426,8 @@ static void uvd_v4_2_stop(struct amdgpu_device *adev)
break;
mdelay(1);
}
-   break;
+   if (status & 0x240)
+   break;
}
 
WREG32_P(0x3D49, 0, ~(1 << 2));
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Samuel Pitoiset



On 02/13/2017 07:19 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:11, Samuel Pitoiset wrote:



On 02/13/2017 07:04 PM, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy
function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.


Maybe the issue is somewhere else and this not the proper solution, but
I don't think the given patch is broken as-is. It fixes deadlocks which
are pretty easy to reproduce with Hitman (as explained in the commit
description).


I'm sorry, but a use-after-free is clearly broken.


You are right. If the destroy callback is called, there is a 
use-after-free which is bad, really..




Nicolai







This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


No, I was wrong. resv is always NULL in this situation. The best
solution is probably to try to clean up that code path because I do
agree: it's a bit messy.



Nicolai



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: fix PRT cleanup order in the VM

2017-02-13 Thread Christian König

Am 13.02.2017 um 18:32 schrieb Nicolai Hähnle:

On 13.02.2017 17:40, Nicolai Hähnle wrote:

On 13.02.2017 14:23, Christian König wrote:

From: Christian König 

We need to unmap the PRTs first and then free our scheduler entity.


Thanks for the quick fix! Both patches are

Reviewed-by: Nicolai Hähnle 

... and I'll probably get around to testing them soon, as well.


Hmm, I still get both the "suspicious RCU" and amdgpu_bo_gpu_offset 
warning.


Yeah, that's actually another issue. Going to work on that as well.

I probably should come up with a more complete solution covering both cases.

Regards,
Christian.



Nicolai





Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bc32239..0b7386e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1687,8 +1687,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
 struct amdgpu_bo_va_mapping *mapping, *tmp;
 int i;

-amd_sched_entity_fini(vm->entity.sched, >entity);
-
 if (!RB_EMPTY_ROOT(>va)) {
 dev_err(adev->dev, "still active bo inside vm\n");
 }
@@ -1706,6 +1704,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
 }
 amdgpu_vm_clear_freed(adev, vm);

+amd_sched_entity_fini(vm->entity.sched, >entity);
+
 for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
 struct amdgpu_bo *pt = vm->page_tables[i].bo;








___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Samuel Pitoiset



On 02/13/2017 07:09 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:04, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.



This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


Actually, I find it extremely suspicious that this patch resolves hangs.
By all rights, no other task should have a pointer to this bo left. It
points at problems elsewhere in the code, possibly the precise problem
I've been trying to track down.


Well, maybe we are just lucky but as I said, I checked many times to 
reproduce the issue with that patch applied without any success, you can 
trust me. Although I'm also starting to think that's not the right 
solution (and could introduce other ones).




Could you please revert the patch, reproduce the hang, and report
/proc/$pid/stack for all the hung tasks?


Sure. The thing is: Hitman's branch has been updated during the weekend 
and my local installation is broken. I need to re-download the whole 
game (will take a while).


I will let you know when I'm able to grab that report.

Thanks Nicolai.


Thanks,
Nicolai

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Samuel Pitoiset



On 02/13/2017 07:41 PM, Christian König wrote:

Am 13.02.2017 um 19:32 schrieb Samuel Pitoiset:



On 02/13/2017 07:19 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:11, Samuel Pitoiset wrote:



On 02/13/2017 07:04 PM, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy
function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified
that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current
patch
is broken.


Maybe the issue is somewhere else and this not the proper solution, but
I don't think the given patch is broken as-is. It fixes deadlocks which
are pretty easy to reproduce with Hitman (as explained in the commit
description).


I'm sorry, but a use-after-free is clearly broken.


You are right. If the destroy callback is called, there is a
use-after-free which is bad, really..


bad. Calling the destroy callback when something goes wrong sound fishy
to me in the first place when the structure initialized here is
allocated by the caller.

Probably best to clean that up from the beginning.


Yes.
I wonder where the original issue comes from though. I will need to 
investigate more.




Regards,
Christian.





Nicolai







This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy
function
in all situations. Presumably, when drm_vma_offset_add() fails and
resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


No, I was wrong. resv is always NULL in this situation. The best
solution is probably to try to clean up that code path because I do
agree: it's a bit messy.



Nicolai



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Christian König

Am 13.02.2017 um 19:32 schrieb Samuel Pitoiset:



On 02/13/2017 07:19 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:11, Samuel Pitoiset wrote:



On 02/13/2017 07:04 PM, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy
function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified 
that

this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current 
patch

is broken.


Maybe the issue is somewhere else and this not the proper solution, but
I don't think the given patch is broken as-is. It fixes deadlocks which
are pretty easy to reproduce with Hitman (as explained in the commit
description).


I'm sorry, but a use-after-free is clearly broken.


You are right. If the destroy callback is called, there is a 
use-after-free which is bad, really..


bad. Calling the destroy callback when something goes wrong sound fishy 
to me in the first place when the structure initialized here is 
allocated by the caller.


Probably best to clean that up from the beginning.

Regards,
Christian.





Nicolai







This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy 
function
in all situations. Presumably, when drm_vma_offset_add() fails and 
resv

is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


No, I was wrong. resv is always NULL in this situation. The best
solution is probably to try to clean up that code path because I do
agree: it's a bit messy.



Nicolai



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()

2017-02-13 Thread Nicolai Hähnle

On 13.02.2017 19:38, Samuel Pitoiset wrote:



On 02/13/2017 07:09 PM, Nicolai Hähnle wrote:

On 13.02.2017 19:04, Nicolai Hähnle wrote:

On 13.02.2017 18:49, Samuel Pitoiset wrote:



On 02/13/2017 05:25 PM, Nicolai Hähnle wrote:

On 09.02.2017 11:33, Samuel Pitoiset wrote:

When ttm_bo_init() fails, the reservation mutex should be unlocked.

In debug build, the kernel reported "possible recursive locking
detected" in this codepath. For debugging purposes, I also added
a "WARN_ON(ww_mutex_is_locked())" when ttm_bo_init() fails and the
mutex was locked as expected.

This should fix (random) GPU hangs. The easy way to reproduce the
issue is to change the "Super Sampling" option from 1.0 to 2.0 in
Hitman. It will create a huge buffer, evict a bunch of buffers
(around ~5k) and deadlock.

This regression has been introduced pretty recently.

v2: only release the mutex if resv is NULL

Fixes: 12a852219583 ("drm/amdgpu: improve
AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)")
Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1ef1d064de4..556236a112c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -403,8 +403,11 @@ int amdgpu_bo_create_restricted(struct
amdgpu_device *adev,
 >placement, page_align, !kernel, NULL,
 acc_size, sg, resv ? resv : >tbo.ttm_resv,
 _ttm_bo_destroy);
-if (unlikely(r != 0))
+if (unlikely(r != 0)) {
+if (!resv)
+ww_mutex_unlock(>tbo.resv->lock);
 return r;
+}


I was looking at this myself a couple of weeks back, and I'm pretty
sure
I had this exact same patch just to realize that it's actually
incorrect.

The problem is that ttm_bo_init will actually call the destroy
function
(in our case, amdgpu_ttm_bo_destroy), so at this point, bo has been
freed.

This code is a huge mess. I'm surprised though: have you verified that
this patch actually fixes a hang?


Yes, I triple-checked. I can't reproduce the hangs with Hitman.


That's surprising, but a relief. Maybe it ties into some of the other
problems I'm seeing as well.

This means we need a real fix for this; I still think the current patch
is broken.



This fixes a deadlock, here's the report:
https://hastebin.com/durodivoma.xml

The resv->lock has to be unlocked when ttm_bo_init() fails (I checked
with a WARN_ON(is_locked)) because it doesn't call the destroy function
in all situations. Presumably, when drm_vma_offset_add() fails and resv
is not NULL, the mutex is not unlocked.


On which code path is the destroy function not called? If that is the
case, we're leaking memory.

With the patch as-is, the error paths are either leaking memory (if
you're right) or accessing memory after it's freed (otherwise).
Obviously, neither is good.


Actually, I find it extremely suspicious that this patch resolves hangs.
By all rights, no other task should have a pointer to this bo left. It
points at problems elsewhere in the code, possibly the precise problem
I've been trying to track down.


Well, maybe we are just lucky but as I said, I checked many times to
reproduce the issue with that patch applied without any success, you can
trust me. Although I'm also starting to think that's not the right
solution (and could introduce other ones).



Could you please revert the patch, reproduce the hang, and report
/proc/$pid/stack for all the hung tasks?


Sure. The thing is: Hitman's branch has been updated during the weekend
and my local installation is broken. I need to re-download the whole
game (will take a while).

I will let you know when I'm able to grab that report.


Hmm, so I thought about this some more, and I'm no longer so sure that 
your bug and mine are the same. If it was related, I'd somehow expect 
you to get an error about a mutex being destroyed while it's held (at 
least with lock debugging enabled).


Anyway... we need to change the contract of ttm_bo_init, I'm just not 
yet sure how, because there are two points of failure: one quite early 
on, and the second rather late which gets cleaned up by ttm_bo_unref.


Cheers,
Nicolai


Thanks Nicolai.


Thanks,
Nicolai


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[bug report] drm/amdgpu: refine uvd4.2 init/stop code.

2017-02-13 Thread Dan Carpenter
Hello Rex Zhu,

The patch 8b55d17eeea7: "drm/amdgpu: refine uvd4.2 init/stop code."
from Jan 20, 2017, leads to the following static checker warning:

drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c:397 uvd_v4_2_stop()
info: ignoring unreachable code.

drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c:407 uvd_v4_2_stop()
info: ignoring unreachable code.

drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c:420 uvd_v4_2_stop()
info: ignoring unreachable code.


drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
   390  static void uvd_v4_2_stop(struct amdgpu_device *adev)
   391  {
   392  uint32_t i, j;
   393  uint32_t status;
   394  
   395  WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
   396  
   397  for (i = 0; i < 10; ++i) {
   398  for (j = 0; j < 100; ++j) {
   399  status = RREG32(mmUVD_STATUS);
   400  if (status & 2)
   401  break;
   402  mdelay(1);
   403  }
   404  break;

This i < 10 loops don't make sense because we break after the first
iteration...  What is intended?

   405  }
   406  
   407  for (i = 0; i < 10; ++i) {
   408  for (j = 0; j < 100; ++j) {
   409  status = RREG32(mmUVD_LMI_STATUS);
   410  if (status & 0xf)
   411  break;
   412  mdelay(1);
   413  }
   414  break;
^^
   415  }
   416  
   417  /* Stall UMC and register bus before resetting VCPU */
   418  WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
   419  
   420  for (i = 0; i < 10; ++i) {
   421  for (j = 0; j < 100; ++j) {
   422  status = RREG32(mmUVD_LMI_STATUS);
   423  if (status & 0x240)
   424  break;
   425  mdelay(1);
   426  }
   427  break;
^^
   428  }
   429  

regards,
dan carpenter
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Add missing CIK devices

2017-02-13 Thread Christian König

Am 13.02.2017 um 13:46 schrieb Tom St Denis:

Adds mullins, kabini, and hawaii ASICs to the library.

Signed-off-by: Tom St Denis 


Acked-by: Christian König .


---
  src/lib/asic/CMakeLists.txt |  3 +++
  src/lib/asic/hawaii.c   | 40 
  src/lib/asic/kabini.c   | 40 
  src/lib/asic/mullins.c  | 40 
  src/lib/discover_by_did.c   | 44 
  src/lib/discover_by_name.c  |  3 +++
  src/umr.h   |  3 +++
  7 files changed, 173 insertions(+)
  create mode 100644 src/lib/asic/hawaii.c
  create mode 100644 src/lib/asic/kabini.c
  create mode 100644 src/lib/asic/mullins.c

diff --git a/src/lib/asic/CMakeLists.txt b/src/lib/asic/CMakeLists.txt
index 6cfec309b6a5..07e9ad8cca4f 100644
--- a/src/lib/asic/CMakeLists.txt
+++ b/src/lib/asic/CMakeLists.txt
@@ -6,7 +6,10 @@ add_library(asic OBJECT
carrizo.c
fiji.c
hainan.c
+  hawaii.c
+  kabini.c
kaveri.c
+  mullins.c
oland.c
pitcairn.c
polaris10.c
diff --git a/src/lib/asic/hawaii.c b/src/lib/asic/hawaii.c
new file mode 100644
index ..07cbcac31a07
--- /dev/null
+++ b/src/lib/asic/hawaii.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis 
+ *
+ */
+#include "umr.h"
+
+struct umr_asic *umr_create_hawaii(struct umr_options *options)
+{
+   return
+   umr_create_asic_helper("hawaii", FAMILY_CIK,
+   umr_create_uvd42(options),
+   umr_create_vce2(options),
+   umr_create_gmc70(options),
+   umr_create_dce80(options),
+   umr_create_gfx72(options),
+   umr_create_smu700(options),
+   umr_create_oss20(options),
+   umr_create_bif41(options),
+   NULL);
+}
diff --git a/src/lib/asic/kabini.c b/src/lib/asic/kabini.c
new file mode 100644
index ..08c3eb4da684
--- /dev/null
+++ b/src/lib/asic/kabini.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis 
+ *
+ */
+#include "umr.h"
+
+struct umr_asic *umr_create_kabini(struct umr_options *options)
+{
+   return
+   umr_create_asic_helper("kabini", FAMILY_CIK,
+   umr_create_uvd42(options),
+   umr_create_vce2(options),
+   umr_create_gmc70(options),
+   umr_create_dce80(options),
+   umr_create_gfx72(options),
+   

[PATCH] Add missing CIK devices

2017-02-13 Thread Tom St Denis
Adds mullins, kabini, and hawaii ASICs to the library.

Signed-off-by: Tom St Denis 
---
 src/lib/asic/CMakeLists.txt |  3 +++
 src/lib/asic/hawaii.c   | 40 
 src/lib/asic/kabini.c   | 40 
 src/lib/asic/mullins.c  | 40 
 src/lib/discover_by_did.c   | 44 
 src/lib/discover_by_name.c  |  3 +++
 src/umr.h   |  3 +++
 7 files changed, 173 insertions(+)
 create mode 100644 src/lib/asic/hawaii.c
 create mode 100644 src/lib/asic/kabini.c
 create mode 100644 src/lib/asic/mullins.c

diff --git a/src/lib/asic/CMakeLists.txt b/src/lib/asic/CMakeLists.txt
index 6cfec309b6a5..07e9ad8cca4f 100644
--- a/src/lib/asic/CMakeLists.txt
+++ b/src/lib/asic/CMakeLists.txt
@@ -6,7 +6,10 @@ add_library(asic OBJECT
   carrizo.c
   fiji.c
   hainan.c
+  hawaii.c
+  kabini.c
   kaveri.c
+  mullins.c
   oland.c
   pitcairn.c
   polaris10.c
diff --git a/src/lib/asic/hawaii.c b/src/lib/asic/hawaii.c
new file mode 100644
index ..07cbcac31a07
--- /dev/null
+++ b/src/lib/asic/hawaii.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis 
+ *
+ */
+#include "umr.h"
+
+struct umr_asic *umr_create_hawaii(struct umr_options *options)
+{
+   return
+   umr_create_asic_helper("hawaii", FAMILY_CIK,
+   umr_create_uvd42(options),
+   umr_create_vce2(options),
+   umr_create_gmc70(options),
+   umr_create_dce80(options),
+   umr_create_gfx72(options),
+   umr_create_smu700(options),
+   umr_create_oss20(options),
+   umr_create_bif41(options),
+   NULL);
+}
diff --git a/src/lib/asic/kabini.c b/src/lib/asic/kabini.c
new file mode 100644
index ..08c3eb4da684
--- /dev/null
+++ b/src/lib/asic/kabini.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis 
+ *
+ */
+#include "umr.h"
+
+struct umr_asic *umr_create_kabini(struct umr_options *options)
+{
+   return
+   umr_create_asic_helper("kabini", FAMILY_CIK,
+   umr_create_uvd42(options),
+   umr_create_vce2(options),
+   umr_create_gmc70(options),
+   umr_create_dce80(options),
+   umr_create_gfx72(options),
+   umr_create_smu700(options),
+   umr_create_oss20(options),
+   umr_create_bif41(options),
+  

[PATCH 2/2] drm/amdgpu: fix PTE defines

2017-02-13 Thread Christian König
From: Christian König 

Those should be 64bit, even on a 32bit system.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 4d26e9b..51fa12f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -53,19 +53,19 @@ struct amdgpu_bo_list_entry;
 /* LOG2 number of continuous pages for the fragment field */
 #define AMDGPU_LOG2_PAGES_PER_FRAG 4
 
-#define AMDGPU_PTE_VALID   (1 << 0)
-#define AMDGPU_PTE_SYSTEM  (1 << 1)
-#define AMDGPU_PTE_SNOOPED (1 << 2)
+#define AMDGPU_PTE_VALID   (1ULL << 0)
+#define AMDGPU_PTE_SYSTEM  (1ULL << 1)
+#define AMDGPU_PTE_SNOOPED (1ULL << 2)
 
 /* VI only */
-#define AMDGPU_PTE_EXECUTABLE  (1 << 4)
+#define AMDGPU_PTE_EXECUTABLE  (1ULL << 4)
 
-#define AMDGPU_PTE_READABLE(1 << 5)
-#define AMDGPU_PTE_WRITEABLE   (1 << 6)
+#define AMDGPU_PTE_READABLE(1ULL << 5)
+#define AMDGPU_PTE_WRITEABLE   (1ULL << 6)
 
 #define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7)
 
-#define AMDGPU_PTE_PRT (1UL << 63)
+#define AMDGPU_PTE_PRT (1ULL << 63)
 
 /* How to programm VM fault handling */
 #define AMDGPU_VM_FAULT_STOP_NEVER 0
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: fix PRT cleanup order in the VM

2017-02-13 Thread Christian König
From: Christian König 

We need to unmap the PRTs first and then free our scheduler entity.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bc32239..0b7386e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1687,8 +1687,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
int i;
 
-   amd_sched_entity_fini(vm->entity.sched, >entity);
-
if (!RB_EMPTY_ROOT(>va)) {
dev_err(adev->dev, "still active bo inside vm\n");
}
@@ -1706,6 +1704,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
}
amdgpu_vm_clear_freed(adev, vm);
 
+   amd_sched_entity_fini(vm->entity.sched, >entity);
+
for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
struct amdgpu_bo *pt = vm->page_tables[i].bo;
 
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx