[PATCH] drm/amdgpu: tighten gpu_recover in mailbox_flr to avoid duplicate recover in sriov

2019-01-29 Thread wentalou
sriov's gpu_recover inside xgpu_ai_mailbox_flr_work would cause duplicate 
recover in TDR.
TDR's gpu_recover would be triggered by amdgpu_job_timedout,
that could avoid vk-cts failure by unexpected recover.

Change-Id: I840dfc145e4e1be9ece6eac8d9f3501da9b28ebf
Signed-off-by: wentalou 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index b11a1c17..73851eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -266,7 +266,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
}
 
/* Trigger recovery for world switch failure if no TDR */
-   if (amdgpu_device_should_recover_gpu(adev))
+   if (amdgpu_device_should_recover_gpu(adev)
+   && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
amdgpu_device_gpu_recover(adev, NULL);
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 2/4] drm/amdgpu: Only add rqs for initialized rings.

2019-01-29 Thread Bas Nieuwenhuizen
I don't see another way to figure out if a ring is initialized if
the hardware block might not be initialized.

Entities have been fixed up to handle num_rqs = 0.

Signed-off-by: Bas Nieuwenhuizen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d85184b5b35c..30407e55593b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -124,6 +124,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
unsigned num_rings;
+   unsigned num_rqs = 0;
 
switch (i) {
case AMDGPU_HW_IP_GFX:
@@ -166,12 +167,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
break;
}
 
-   for (j = 0; j < num_rings; ++j)
-   rqs[j] = &rings[j]->sched.sched_rq[priority];
+   for (j = 0; j < num_rings; ++j) {
+   if (rings[j]->adev) {
+   rqs[num_rqs++] =
+   &rings[j]->sched.sched_rq[priority];
+   }
+   }
 
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- rqs, num_rings, &ctx->guilty);
+ rqs, num_rqs, &ctx->guilty);
if (r)
goto error_cleanup_entities;
}
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 4/4] drm/amdgpu: Add command to override the context priority.

2019-01-29 Thread Bas Nieuwenhuizen
Given a master fd we can then override the priority of the context
in another fd.

Using these overrides was recommended by Christian instead of trying
to submit from a master fd, and I am adding a way to override a
single context instead of the entire process so we can only upgrade
a single Vulkan queue and not effectively the entire process.

Reused the flags field as it was checked to be 0 anyways, so nothing
used it. This is source-incompatible (due to the name change), but
ABI compatible.

Signed-off-by: Bas Nieuwenhuizen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 41 ++-
 include/uapi/drm/amdgpu_drm.h |  3 +-
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 0b70410488b6..0767a93e4d91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -76,6 +76,39 @@ static int amdgpu_sched_process_priority_override(struct 
amdgpu_device *adev,
return 0;
 }
 
+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+ int fd,
+ unsigned ctx_id,
+ enum drm_sched_priority 
priority)
+{
+   struct file *filp = fget(fd);
+   struct amdgpu_fpriv *fpriv;
+   struct amdgpu_ctx *ctx;
+   int r;
+
+   if (!filp)
+   return -EINVAL;
+
+   r = amdgpu_file_to_fpriv(filp, &fpriv);
+   if (r) {
+   fput(filp);
+   return r;
+   }
+
+   ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+   if (!ctx) {
+   fput(filp);
+   return -EINVAL;
+   }
+
+   amdgpu_ctx_priority_override(ctx, priority);
+   amdgpu_ctx_put(ctx);
+   fput(filp);
+
+   return 0;
+}
+
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
   struct drm_file *filp)
 {
@@ -85,7 +118,7 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
int r;
 
priority = amdgpu_to_sched_priority(args->in.priority);
-   if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID)
+   if (priority == DRM_SCHED_PRIORITY_INVALID)
return -EINVAL;
 
switch (args->in.op) {
@@ -94,6 +127,12 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
   args->in.fd,
   priority);
break;
+   case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+   r = amdgpu_sched_context_priority_override(adev,
+  args->in.fd,
+  args->in.ctx_id,
+  priority);
+   break;
default:
DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
r = -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index faaad04814e4..30fa340790b2 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -275,13 +275,14 @@ union drm_amdgpu_vm {
 
 /* sched ioctl */
 #define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE  1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE  2
 
 struct drm_amdgpu_sched_in {
/* AMDGPU_SCHED_OP_* */
__u32   op;
__u32   fd;
__s32   priority;
-   __u32   flags;
+   __u32   ctx_id;
 };
 
 union drm_amdgpu_sched {
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 3/4] drm/amdgpu: Check if fd really is an amdgpu fd.

2019-01-29 Thread Bas Nieuwenhuizen
Otherwise we interpret the file private data as drm & amdgpu data
while it might not be, possibly allowing one to get memory corruption.

Signed-off-by: Bas Nieuwenhuizen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 10 +++---
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..17290cdb8ed8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -411,6 +411,8 @@ struct amdgpu_fpriv {
struct amdgpu_ctx_mgr   ctx_mgr;
 };
 
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f984bcc5..90a520034c89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1176,6 +1176,22 @@ static const struct file_operations 
amdgpu_driver_kms_fops = {
 #endif
 };
 
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
+{
+struct drm_file *file;
+
+   if (!filp)
+   return -EINVAL;
+
+   if (filp->f_op != &amdgpu_driver_kms_fops) {
+   return -EINVAL;
+   }
+
+   file = filp->private_data;
+   *fpriv = file->driver_priv;
+   return 0;
+}
+
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 bool in_vblank_irq, int *vpos, int *hpos,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 1cafe8d83a4d..0b70410488b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -54,16 +54,20 @@ static int amdgpu_sched_process_priority_override(struct 
amdgpu_device *adev,
  enum drm_sched_priority 
priority)
 {
struct file *filp = fget(fd);
-   struct drm_file *file;
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
uint32_t id;
+   int r;
 
if (!filp)
return -EINVAL;
 
-   file = filp->private_data;
-   fpriv = file->driver_priv;
+   r = amdgpu_file_to_fpriv(filp, &fpriv);
+   if (r) {
+   fput(filp);
+   return r;
+   }
+
idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
amdgpu_ctx_priority_override(ctx, priority);
 
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 1/4] drm/sched: Fix entities with 0 rqs.

2019-01-29 Thread Bas Nieuwenhuizen
Some blocks in amdgpu can have 0 rqs.

Job creation already fails with -ENOENT when entity->rq is NULL,
so jobs cannot be pushed. Without a rq there is no scheduler to
pop jobs, and rq selection already does the right thing with a
list of length 0.

So the operations we need to fix are:
  - Creation, do not set rq to rq_list[0] if the list can have length 0.
  - Do not flush any jobs when there is no rq.
  - On entity destruction handle the rq = NULL case.
  - on set_priority, do not try to change the rq if it is NULL.

Signed-off-by: Bas Nieuwenhuizen 
---
 drivers/gpu/drm/scheduler/sched_entity.c | 39 
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 4463d3826ecb..8e31b6628d09 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 {
int i;
 
-   if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
+   if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
return -EINVAL;
 
memset(entity, 0, sizeof(struct drm_sched_entity));
INIT_LIST_HEAD(&entity->list);
-   entity->rq = rq_list[0];
+   entity->rq = NULL;
entity->guilty = guilty;
entity->num_rq_list = num_rq_list;
entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
@@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 
for (i = 0; i < num_rq_list; ++i)
entity->rq_list[i] = rq_list[i];
+
+   if (num_rq_list)
+   entity->rq = rq_list[0];
+
entity->last_scheduled = NULL;
 
spin_lock_init(&entity->rq_lock);
@@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity 
*entity, long timeout)
struct task_struct *last_user;
long ret = timeout;
 
+   if (!entity->rq)
+   return 0;
+
sched = entity->rq->sched;
/**
 * The client will not queue more IBs during this fini, consume existing
@@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct 
drm_sched_entity *entity)
  */
 void drm_sched_entity_fini(struct drm_sched_entity *entity)
 {
-   struct drm_gpu_scheduler *sched;
+   struct drm_gpu_scheduler *sched = NULL;
 
-   sched = entity->rq->sched;
-   drm_sched_rq_remove_entity(entity->rq, entity);
+   if (entity->rq) {
+   sched = entity->rq->sched;
+   drm_sched_rq_remove_entity(entity->rq, entity);
+   }
 
/* Consumption of existing IBs wasn't completed. Forcefully
 * remove them here.
 */
if (spsc_queue_peek(&entity->job_queue)) {
-   /* Park the kernel for a moment to make sure it isn't processing
-* our enity.
-*/
-   kthread_park(sched->thread);
-   kthread_unpark(sched->thread);
+   if (sched) {
+   /* Park the kernel for a moment to make sure it isn't 
processing
+* our enity.
+*/
+   kthread_park(sched->thread);
+   kthread_unpark(sched->thread);
+   }
if (entity->dependency) {
dma_fence_remove_callback(entity->dependency,
  &entity->cb);
@@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity 
*entity,
for (i = 0; i < entity->num_rq_list; ++i)
drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
 
-   drm_sched_rq_remove_entity(entity->rq, entity);
-   drm_sched_entity_set_rq_priority(&entity->rq, priority);
-   drm_sched_rq_add_entity(entity->rq, entity);
+   if (entity->rq) {
+   drm_sched_rq_remove_entity(entity->rq, entity);
+   drm_sched_entity_set_rq_priority(&entity->rq, priority);
+   drm_sched_rq_add_entity(entity->rq, entity);
+   }
 
spin_unlock(&entity->rq_lock);
 }
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: csa_vaddr should not larger than AMDGPU_GMC_HOLE_START

2019-01-29 Thread Kuehling, Felix
On 2019-01-21 4:55 a.m., Koenig, Christian wrote:
> Am 21.01.19 um 05:35 schrieb Liu, Monk:
>>
>> > Actually that's not so crazy at all. See the ATC uses the CPU page
>> tables to provide parts of the virtual GPU address space.
>>
>>  
>>
>> So aperture 0->hole-start will be translated **not** by GMC9’s page
>> table but instead by CPU’s (or IOMMU ?) MMU table after ATC enabled ?
>> like GMC9 give up the role to do the translate but just deliver the
>> address to CPU MMU (or IOMMU), is that correct ?
>>
>
> More or less yes. The GMC9 page tables still take precedence before
> the ATC IIRC, but the problem is simply that two hardware engines want
> to use the same address space.

The GPU looks at GPUVM page tables first. There is a special combination
of PTE bits that tells it to forward the translation to ATC. IIRC the
important bits are V=0, S=1. The full value of the ATC PTEs is defined
in amdgpu_vm.h:

#define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM  \
    | AMDGPU_PTE_SNOOPED    \
    | AMDGPU_PTE_EXECUTABLE \
    | AMDGPU_PTE_READABLE   \
    | AMDGPU_PTE_WRITEABLE  \
    | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC))

When we enable ATC for a VM, we program the PTEs and PDEs for unmapped
addresses in the range from 0->hole-start to forward all address
translation to ATC. That means, any addresses where nothing else is
explicitly mapped, ATC handles the address translation so that the GPU
can transparently access CPU virtual addresses.

If you start mapping the CSA into that address range, you override PDEs
and PTEs for the CSA address range, so they can no longer be handled by
ATC. In the CSA address range, the GPU will no longer be able to access
SVM memory in that range.

Regards,
  Felix

>
> So when you have a collision you just mess things up no matter what
> you do.
>
>>  Besides, where did you read that HOLE_START is set to be
>> 0x8000 ? any registers actually controls this number ? I want
>> to take a close look in the GMC registers regarding it
>>
>
> As far as I know that is hard wired on Vega. I was once in a meeting
> where they discussed if it should be configurable on Navi, but I'm not
> sure if they actually did that.
>
> Christian.
>
>>  
>>
>> No offending received, don’t be sorry at all, I don’t know that part
>> before and thanks for sharing it.
>>
>>  
>>
>> Thank you
>>
>> /Monk
>>
>>  
>>
>> *From:*amd-gfx  *On Behalf Of
>> *Koenig, Christian
>> *Sent:* Friday, January 18, 2019 8:21 PM
>> *To:* Liu, Monk ; Lou, Wentao ;
>> amd-gfx@lists.freedesktop.org; Zhu, Rex 
>> *Cc:* Deng, Emily 
>> *Subject:* Re: [PATCH] drm/amdgpu: csa_vaddr should not larger than
>> AMDGPU_GMC_HOLE_START
>>
>>  
>>
>> You know what, …  when you explained range 0 to HOLE-START is
>> even not good to exposed to UMD I thought you made a typo and
>> that’s why I repeat my question again …
>>
>> Sorry my fault then. Didn't wanted to sound offending.
>>
>>
>> it’s the first time I heard that GMC9  cannot use 0 -> HOLE-START
>> even for UMD general usage …
>>
>> Well you actually can do it, but then you can't use the ATC or other
>> SVA mechanism.
>>
>>
>> With your assert in DEV_INFO the “virtual_address_offset/max” is
>> now **totally** wrong … I saw current kmd still give that range
>> from 0 to HOLE_START
>>
>> That is actually correct and for backward compatibility with old
>> userspace. But since old userspace won't use the ATC that is also not
>> a problem.
>>
>> As I said one possibility to solve this issue would be to use a low
>> CSA address for SRIOV, because the ATC isn't usable with SRIOV anyway.
>>
>> I would just like to avoid that because it sounds like the CSA for
>> some reason doesn't work at all in the higher address range and we
>> will certainly then run into issues with that on bare metal as well.
>>
>>
>> I need to check what you said with some HW guys, that sounds crazy …
>>
>> Actually that's not so crazy at all. See the ATC uses the CPU page
>> tables to provide parts of the virtual GPU address space.
>>
>> E.g. when it is enabled you can then use the same pointer to memory
>> on the CPU and the GPU.
>>
>> The problem is when the UMD now manually maps something into this
>> range you can have a clash of the address space and the MC doesn't
>> know any more if it should send a request to the CPU or the GPU page
>> tables.
>>
>> Regards,
>> Christian.
>>
>> Am 18.01.19 um 11:57 schrieb Liu, Monk:
>>
>> You know what, …  when you explained range 0 to HOLE-START is
>> even not good to exposed to UMD I thought you made a typo and
>> that’s why I repeat my question again …
>>
>> it’s the first time I heard that GMC9  cannot use 0 -> HOLE-START
>> even for UMD general usage …
>>
>> With your assert in DEV_INFO the “virtual_address_offset/max” is
>> now **totally** wrong … I saw current

Re: [PATCH 1/4] drm/amd/powerplay: support enabled ppfeatures retrieving and setting

2019-01-29 Thread Kuehling, Felix

On 2019-01-14 5:01 a.m., Evan Quan wrote:
> + features_to_disable =
> + (features_enabled ^ new_ppfeature_masks) & features_enabled;
> + features_to_enable =
> + (features_enabled ^ new_ppfeature_masks) ^ features_to_disable;

This is confusing and unnecessarily complicated. I think you can achieve
the same thing in a more obvious way like this:

features_to_disable = features_enabled & ~new_pp_feature_masks;
features_to_enable = ~features_enabled & new_pp_feature_masks;

Regards,
  Felix

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence

2019-01-29 Thread Marek Olšák
On Tue, Jan 29, 2019 at 3:01 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 28.01.19 um 22:52 schrieb Marek Olšák:
> > From: Marek Olšák 
> >
> > Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> > an IB starts.
>
> That approach has quite a number of problems (for example you can't
> allocate memory at this point).
>

Even if I drop this patch, can you describe all the problems with it?
Andrey and I would like to understand this.

Thanks,
Marek
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clean up coding style violations

2019-01-29 Thread Alex Deucher
On Tue, Jan 29, 2019 at 4:03 PM David Francis  wrote:
>
> Some of the grandfathered amd display code does not follow
> Linux coding style and emits warnings or errors on checkpatch
>
> No functional changes here - just cleanup
>
> Signed-off-by: David Francis 

Acked-by: Alex Deucher 

> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 96 +--
>  1 file changed, 47 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index cdda68aba70e..973517d35d5c 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -176,56 +176,56 @@ static const enum drm_plane_type 
> dm_plane_type_stoney[AMDGPU_MAX_PLANES] = {
>   */
>  static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc)
>  {
> +   struct amdgpu_crtc *acrtc;
> +   struct dm_crtc_state *acrtc_state;
> +
> if (crtc >= adev->mode_info.num_crtc)
> return 0;
> -   else {
> -   struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc];
> -   struct dm_crtc_state *acrtc_state = to_dm_crtc_state(
> -   acrtc->base.state);
>
> +   acrtc = adev->mode_info.crtcs[crtc];
> +   acrtc_state = to_dm_crtc_state(acrtc->base.state);
>
> -   if (acrtc_state->stream == NULL) {
> -   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
> - crtc);
> -   return 0;
> -   }
> -
> -   return dc_stream_get_vblank_counter(acrtc_state->stream);
> +   if (acrtc_state->stream == NULL) {
> +   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
> + crtc);
> +   return 0;
> }
> +
> +   return dc_stream_get_vblank_counter(acrtc_state->stream);
>  }
>
>  static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
>   u32 *vbl, u32 *position)
>  {
> uint32_t v_blank_start, v_blank_end, h_position, v_position;
> +   struct amdgpu_crtc *acrtc;
> +   struct dm_crtc_state *acrtc_state;
>
> if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
> return -EINVAL;
> -   else {
> -   struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc];
> -   struct dm_crtc_state *acrtc_state = to_dm_crtc_state(
> -   acrtc->base.state);
> -
> -   if (acrtc_state->stream ==  NULL) {
> -   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
> - crtc);
> -   return 0;
> -   }
>
> -   /*
> -* TODO rework base driver to use values directly.
> -* for now parse it back into reg-format
> -*/
> -   dc_stream_get_scanoutpos(acrtc_state->stream,
> -&v_blank_start,
> -&v_blank_end,
> -&h_position,
> -&v_position);
> +   acrtc = adev->mode_info.crtcs[crtc];
> +   acrtc_state = to_dm_crtc_state(acrtc->base.state);
>
> -   *position = v_position | (h_position << 16);
> -   *vbl = v_blank_start | (v_blank_end << 16);
> +   if (acrtc_state->stream ==  NULL) {
> +   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
> + crtc);
> +   return 0;
> }
>
> +   /*
> +* TODO rework base driver to use values directly.
> +* for now parse it back into reg-format
> +*/
> +   dc_stream_get_scanoutpos(acrtc_state->stream,
> +&v_blank_start,
> +&v_blank_end,
> +&h_position,
> +&v_position);
> +
> +   *position = v_position | (h_position << 16);
> +   *vbl = v_blank_start | (v_blank_end << 16);
> +
> return 0;
>  }
>
> @@ -293,8 +293,8 @@ static void dm_pflip_high_irq(void *interrupt_params)
>
> spin_lock_irqsave(&adev->ddev->event_lock, flags);
>
> -   if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
> -   DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d 
> !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
> +   if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
> +   DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d 
> !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p]\n",
>  amdgpu_crtc->pflip_status,
>  AMDGPU_FLIP_SUBMITTED,
>  amdgpu_crtc->crtc_id,
>

[PATCH] drm/amd/display: Clean up coding style violations

2019-01-29 Thread David Francis
Some of the grandfathered amd display code does not follow
Linux coding style and emits warnings or errors on checkpatch

No functional changes here - just cleanup

Signed-off-by: David Francis 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 96 +--
 1 file changed, 47 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index cdda68aba70e..973517d35d5c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -176,56 +176,56 @@ static const enum drm_plane_type 
dm_plane_type_stoney[AMDGPU_MAX_PLANES] = {
  */
 static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 {
+   struct amdgpu_crtc *acrtc;
+   struct dm_crtc_state *acrtc_state;
+
if (crtc >= adev->mode_info.num_crtc)
return 0;
-   else {
-   struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc];
-   struct dm_crtc_state *acrtc_state = to_dm_crtc_state(
-   acrtc->base.state);
 
+   acrtc = adev->mode_info.crtcs[crtc];
+   acrtc_state = to_dm_crtc_state(acrtc->base.state);
 
-   if (acrtc_state->stream == NULL) {
-   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
- crtc);
-   return 0;
-   }
-
-   return dc_stream_get_vblank_counter(acrtc_state->stream);
+   if (acrtc_state->stream == NULL) {
+   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
+ crtc);
+   return 0;
}
+
+   return dc_stream_get_vblank_counter(acrtc_state->stream);
 }
 
 static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
  u32 *vbl, u32 *position)
 {
uint32_t v_blank_start, v_blank_end, h_position, v_position;
+   struct amdgpu_crtc *acrtc;
+   struct dm_crtc_state *acrtc_state;
 
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
-   else {
-   struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc];
-   struct dm_crtc_state *acrtc_state = to_dm_crtc_state(
-   acrtc->base.state);
-
-   if (acrtc_state->stream ==  NULL) {
-   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
- crtc);
-   return 0;
-   }
 
-   /*
-* TODO rework base driver to use values directly.
-* for now parse it back into reg-format
-*/
-   dc_stream_get_scanoutpos(acrtc_state->stream,
-&v_blank_start,
-&v_blank_end,
-&h_position,
-&v_position);
+   acrtc = adev->mode_info.crtcs[crtc];
+   acrtc_state = to_dm_crtc_state(acrtc->base.state);
 
-   *position = v_position | (h_position << 16);
-   *vbl = v_blank_start | (v_blank_end << 16);
+   if (acrtc_state->stream ==  NULL) {
+   DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
+ crtc);
+   return 0;
}
 
+   /*
+* TODO rework base driver to use values directly.
+* for now parse it back into reg-format
+*/
+   dc_stream_get_scanoutpos(acrtc_state->stream,
+&v_blank_start,
+&v_blank_end,
+&h_position,
+&v_position);
+
+   *position = v_position | (h_position << 16);
+   *vbl = v_blank_start | (v_blank_end << 16);
+
return 0;
 }
 
@@ -293,8 +293,8 @@ static void dm_pflip_high_irq(void *interrupt_params)
 
spin_lock_irqsave(&adev->ddev->event_lock, flags);
 
-   if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
-   DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
+   if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+   DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p]\n",
 amdgpu_crtc->pflip_status,
 AMDGPU_FLIP_SUBMITTED,
 amdgpu_crtc->crtc_id,
@@ -370,7 +370,7 @@ static int dm_set_powergating_state(void *handle,
 }
 
 /* Prototypes of private functions */
-static int dm_early_init(void* handle);
+static int dm_early_init(void *handle);
 
 /* Allocate memory for FBC compressed data  */
 static void amdgpu_dm_fbc_init(struct drm_connect

Re: [PATCH] drm/amd/display: Fix fclk idle state

2019-01-29 Thread Deucher, Alexander
Acked-by: Alex Deucher 


From: amd-gfx  on behalf of 
roman...@amd.com 
Sent: Tuesday, January 29, 2019 3:16:53 PM
To: amd-gfx@lists.freedesktop.org; Xu, Feifei; Quan, Evan
Cc: Chan, Carl; Wentland, Harry; Li, Roman
Subject: [PATCH] drm/amd/display: Fix fclk idle state

From: Roman Li 

[Why]
The earlier change 'Fix 6x4K displays' led to fclk value
idling at higher DPM level.

[How]
Apply the fix only to respective multi-display configuration.

Signed-off-by: Roman Li 
---
 drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index 3c52a4fc921d..bbe051736a18 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -627,7 +627,15 @@ static void dce11_pplib_apply_display_requirements(
 dc,
 context->bw.dce.sclk_khz);

-   pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz;
+   /*
+* As workaround for >4x4K lightup set dcfclock to min_engine_clock 
value.
+* This is not required for less than 5 displays,
+* thus don't request decfclk in dc to avoid impact
+* on power saving.
+*
+*/
+   pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4)?
+   pp_display_cfg->min_engine_clock_khz : 0;

 pp_display_cfg->min_engine_clock_deep_sleep_khz
 = context->bw.dce.sclk_deep_sleep_khz;
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Fix fclk idle state

2019-01-29 Thread Roman.Li
From: Roman Li 

[Why]
The earlier change 'Fix 6x4K displays' led to fclk value
idling at higher DPM level.

[How]
Apply the fix only to respective multi-display configuration.

Signed-off-by: Roman Li 
---
 drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index 3c52a4fc921d..bbe051736a18 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -627,7 +627,15 @@ static void dce11_pplib_apply_display_requirements(
dc,
context->bw.dce.sclk_khz);
 
-   pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz;
+   /*
+* As workaround for >4x4K lightup set dcfclock to min_engine_clock 
value.
+* This is not required for less than 5 displays,
+* thus don't request decfclk in dc to avoid impact
+* on power saving.
+*
+*/
+   pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4)?
+   pp_display_cfg->min_engine_clock_khz : 0;
 
pp_display_cfg->min_engine_clock_deep_sleep_khz
= context->bw.dce.sclk_deep_sleep_khz;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines

2019-01-29 Thread Nick Desaulniers
Suggestions:
1. revert patch
2. get me the disassembly from gcc of the translation unit in question.
3. land patch that adds clang guards or something different based on 2.

Revert first; ask questions later.

On Tue, Jan 29, 2019 at 11:01 AM Wentland, Harry  wrote:
>
> On 2019-01-29 1:56 p.m., Guenter Roeck wrote:
> > On Tue, Jan 29, 2019 at 10:30:31AM -0500, Alex Deucher wrote:
> >> On Fri, Jan 25, 2019 at 10:29 AM Wentland, Harry  
> >> wrote:
> >>>
> >>> On 2019-01-24 7:52 p.m., ndesaulni...@google.com wrote:
>  arch/x86/Makefile disables SSE and SSE2 for the whole kernel.  The
>  AMDGPU drivers modified in this patch re-enable SSE but not SSE2.  Turn
>  on SSE2 to support emitting double precision floating point instructions
>  rather than calls to non-existent (usually available from gcc_s or
>  compiler_rt) floating point helper routines.
> 
>  Link: 
>  https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
>  Link: https://github.com/ClangBuiltLinux/linux/issues/327
>  Cc: sta...@vger.kernel.org # 4.19
>  Reported-by: S, Shirish 
>  Reported-by: Matthias Kaehlcke 
>  Suggested-by: James Y Knight 
>  Suggested-by: Nathan Chancellor 
>  Signed-off-by: Nick Desaulniers 
>  Tested-by: Guenter Roeck 
> >>>
> >>> Reviewed-by: Harry Wentland 
> >>>
> >>> and applied.
> >>>
> >>
> >> This patch causes a segfault:
> >> https://bugs.freedesktop.org/show_bug.cgi?id=109487
> >>
> >> Any ideas?
> >>
> >
> > Set -msse2 only for clang ? I suspect, though, that this might only
> > solve the compile problem. If I understand correctly, the first
> > warning in the log is due to BREAK_TO_DEBUGGER(), suggesting that
> > something is seriously wrong. Maybe enabling sse2 results in different
> > results from floating point operations.
> >
> > Unfortunately I don't have a system with the affected GPU,
> > so I can't run any tests on real hardware. Unless someone can test
> > with real hardware, I think we have no choice but to revert the
> > patch.
> >
>
> Might be a good idea. Even though, best to revert for now until we understand 
> what's going on.
>
> It seems like people at AMD building with gcc 5.4 are fine, but those using 
> gcc 7.3 or 8.2 experience the crash.
>
> Harry
>
> > Guenter
> >
> >> Alex
> >>
> >>> Harry
> >>>
>  ---
>   drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +-
>   drivers/gpu/drm/amd/display/dc/dml/Makefile   | 2 +-
>   2 files changed, 2 insertions(+), 2 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile 
>  b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
>  index 95f332ee3e7e..dc85a3c088af 100644
>  --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
>  +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
>  @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
>    cc_stack_align := -mstack-alignment=16
>   endif
> 
>  -calcs_ccflags := -mhard-float -msse $(cc_stack_align)
>  +calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> 
>   CFLAGS_dcn_calcs.o := $(calcs_ccflags)
>   CFLAGS_dcn_calc_auto.o := $(calcs_ccflags)
>  diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
>  b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>  index d97ca6528f9d..33c7d7588712 100644
>  --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>  +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>  @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
>    cc_stack_align := -mstack-alignment=16
>   endif
> 
>  -dml_ccflags := -mhard-float -msse $(cc_stack_align)
>  +dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> 
>   CFLAGS_display_mode_lib.o := $(dml_ccflags)
>   CFLAGS_display_pipe_clocks.o := $(dml_ccflags)
> 
> >>> ___
> >>> amd-gfx mailing list
> >>> amd-gfx@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



-- 
Thanks,
~Nick Desaulniers
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines

2019-01-29 Thread Wentland, Harry
On 2019-01-29 1:56 p.m., Guenter Roeck wrote:
> On Tue, Jan 29, 2019 at 10:30:31AM -0500, Alex Deucher wrote:
>> On Fri, Jan 25, 2019 at 10:29 AM Wentland, Harry  
>> wrote:
>>>
>>> On 2019-01-24 7:52 p.m., ndesaulni...@google.com wrote:
 arch/x86/Makefile disables SSE and SSE2 for the whole kernel.  The
 AMDGPU drivers modified in this patch re-enable SSE but not SSE2.  Turn
 on SSE2 to support emitting double precision floating point instructions
 rather than calls to non-existent (usually available from gcc_s or
 compiler_rt) floating point helper routines.

 Link: 
 https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
 Link: https://github.com/ClangBuiltLinux/linux/issues/327
 Cc: sta...@vger.kernel.org # 4.19
 Reported-by: S, Shirish 
 Reported-by: Matthias Kaehlcke 
 Suggested-by: James Y Knight 
 Suggested-by: Nathan Chancellor 
 Signed-off-by: Nick Desaulniers 
 Tested-by: Guenter Roeck 
>>>
>>> Reviewed-by: Harry Wentland 
>>>
>>> and applied.
>>>
>>
>> This patch causes a segfault:
>> https://bugs.freedesktop.org/show_bug.cgi?id=109487
>>
>> Any ideas?
>>
> 
> Set -msse2 only for clang ? I suspect, though, that this might only
> solve the compile problem. If I understand correctly, the first
> warning in the log is due to BREAK_TO_DEBUGGER(), suggesting that
> something is seriously wrong. Maybe enabling sse2 results in different
> results from floating point operations.
> 
> Unfortunately I don't have a system with the affected GPU,
> so I can't run any tests on real hardware. Unless someone can test
> with real hardware, I think we have no choice but to revert the
> patch.
> 

Might be a good idea. Even though, best to revert for now until we understand 
what's going on.

It seems like people at AMD building with gcc 5.4 are fine, but those using gcc 
7.3 or 8.2 experience the crash.

Harry

> Guenter
> 
>> Alex
>>
>>> Harry
>>>
 ---
  drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +-
  drivers/gpu/drm/amd/display/dc/dml/Makefile   | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

 diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile 
 b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
 index 95f332ee3e7e..dc85a3c088af 100644
 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
 +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
 @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
   cc_stack_align := -mstack-alignment=16
  endif

 -calcs_ccflags := -mhard-float -msse $(cc_stack_align)
 +calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)

  CFLAGS_dcn_calcs.o := $(calcs_ccflags)
  CFLAGS_dcn_calc_auto.o := $(calcs_ccflags)
 diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
 b/drivers/gpu/drm/amd/display/dc/dml/Makefile
 index d97ca6528f9d..33c7d7588712 100644
 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
 +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
 @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
   cc_stack_align := -mstack-alignment=16
  endif

 -dml_ccflags := -mhard-float -msse $(cc_stack_align)
 +dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)

  CFLAGS_display_mode_lib.o := $(dml_ccflags)
  CFLAGS_display_pipe_clocks.o := $(dml_ccflags)

>>> ___
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines

2019-01-29 Thread Guenter Roeck
On Tue, Jan 29, 2019 at 10:30:31AM -0500, Alex Deucher wrote:
> On Fri, Jan 25, 2019 at 10:29 AM Wentland, Harry  
> wrote:
> >
> > On 2019-01-24 7:52 p.m., ndesaulni...@google.com wrote:
> > > arch/x86/Makefile disables SSE and SSE2 for the whole kernel.  The
> > > AMDGPU drivers modified in this patch re-enable SSE but not SSE2.  Turn
> > > on SSE2 to support emitting double precision floating point instructions
> > > rather than calls to non-existent (usually available from gcc_s or
> > > compiler_rt) floating point helper routines.
> > >
> > > Link: 
> > > https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
> > > Link: https://github.com/ClangBuiltLinux/linux/issues/327
> > > Cc: sta...@vger.kernel.org # 4.19
> > > Reported-by: S, Shirish 
> > > Reported-by: Matthias Kaehlcke 
> > > Suggested-by: James Y Knight 
> > > Suggested-by: Nathan Chancellor 
> > > Signed-off-by: Nick Desaulniers 
> > > Tested-by: Guenter Roeck 
> >
> > Reviewed-by: Harry Wentland 
> >
> > and applied.
> >
> 
> This patch causes a segfault:
> https://bugs.freedesktop.org/show_bug.cgi?id=109487
> 
> Any ideas?
> 

Set -msse2 only for clang ? I suspect, though, that this might only
solve the compile problem. If I understand correctly, the first
warning in the log is due to BREAK_TO_DEBUGGER(), suggesting that
something is seriously wrong. Maybe enabling sse2 results in different
results from floating point operations.

Unfortunately I don't have a system with the affected GPU,
so I can't run any tests on real hardware. Unless someone can test
with real hardware, I think we have no choice but to revert the
patch.

Guenter

> Alex
> 
> > Harry
> >
> > > ---
> > >  drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +-
> > >  drivers/gpu/drm/amd/display/dc/dml/Makefile   | 2 +-
> > >  2 files changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile 
> > > b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> > > index 95f332ee3e7e..dc85a3c088af 100644
> > > --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> > > +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> > > @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
> > >   cc_stack_align := -mstack-alignment=16
> > >  endif
> > >
> > > -calcs_ccflags := -mhard-float -msse $(cc_stack_align)
> > > +calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> > >
> > >  CFLAGS_dcn_calcs.o := $(calcs_ccflags)
> > >  CFLAGS_dcn_calc_auto.o := $(calcs_ccflags)
> > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
> > > b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> > > index d97ca6528f9d..33c7d7588712 100644
> > > --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> > > +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> > > @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
> > >   cc_stack_align := -mstack-alignment=16
> > >  endif
> > >
> > > -dml_ccflags := -mhard-float -msse $(cc_stack_align)
> > > +dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> > >
> > >  CFLAGS_display_mode_lib.o := $(dml_ccflags)
> > >  CFLAGS_display_pipe_clocks.o := $(dml_ccflags)
> > >
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines"

2019-01-29 Thread Wentland, Harry
On 2019-01-29 11:57 a.m., Alex Deucher wrote:
> This reverts commit 10117450735c7a7c0858095fb46a860e7037cb9a.
> 
> Causes a crash.
> 
> Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109487
> Signed-off-by: Alex Deucher 
> Cc: sta...@vger.kernel.org # 4.19

Thanks for finding and reverting.

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +-
>  drivers/gpu/drm/amd/display/dc/dml/Makefile   | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile 
> b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> index dc85a3c088af..95f332ee3e7e 100644
> --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
>   cc_stack_align := -mstack-alignment=16
>  endif
>  
> -calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> +calcs_ccflags := -mhard-float -msse $(cc_stack_align)
>  
>  CFLAGS_dcn_calcs.o := $(calcs_ccflags)
>  CFLAGS_dcn_calc_auto.o := $(calcs_ccflags)
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
> b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index 33c7d7588712..d97ca6528f9d 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
>   cc_stack_align := -mstack-alignment=16
>  endif
>  
> -dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> +dml_ccflags := -mhard-float -msse $(cc_stack_align)
>  
>  CFLAGS_display_mode_lib.o := $(dml_ccflags)
>  CFLAGS_display_pipe_clocks.o := $(dml_ccflags)
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines"

2019-01-29 Thread Greg KH
On Tue, Jan 29, 2019 at 11:57:25AM -0500, Alex Deucher wrote:
> This reverts commit 10117450735c7a7c0858095fb46a860e7037cb9a.

$ git show  10117450735c7a7c0858095fb46a860e7037cb9a
fatal: bad object 10117450735c7a7c0858095fb46a860e7037cb9a

odd...

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines"

2019-01-29 Thread Alex Deucher
On Tue, Jan 29, 2019 at 12:38 PM Greg KH  wrote:
>
> On Tue, Jan 29, 2019 at 11:57:25AM -0500, Alex Deucher wrote:
> > This reverts commit 10117450735c7a7c0858095fb46a860e7037cb9a.
>
> $ git show  10117450735c7a7c0858095fb46a860e7037cb9a
> fatal: bad object 10117450735c7a7c0858095fb46a860e7037cb9a
>
> odd...
>

Sorry, it's queued up for drm-next, but hasn't made it to mainline yet.

Alex
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] Revert "drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines"

2019-01-29 Thread Alex Deucher
This reverts commit 10117450735c7a7c0858095fb46a860e7037cb9a.

Causes a crash.

Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109487
Signed-off-by: Alex Deucher 
Cc: sta...@vger.kernel.org # 4.19
---
 drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +-
 drivers/gpu/drm/amd/display/dc/dml/Makefile   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile 
b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index dc85a3c088af..95f332ee3e7e 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
cc_stack_align := -mstack-alignment=16
 endif
 
-calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
+calcs_ccflags := -mhard-float -msse $(cc_stack_align)
 
 CFLAGS_dcn_calcs.o := $(calcs_ccflags)
 CFLAGS_dcn_calc_auto.o := $(calcs_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 33c7d7588712..d97ca6528f9d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
cc_stack_align := -mstack-alignment=16
 endif
 
-dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
+dml_ccflags := -mhard-float -msse $(cc_stack_align)
 
 CFLAGS_display_mode_lib.o := $(dml_ccflags)
 CFLAGS_display_pipe_clocks.o := $(dml_ccflags)
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines

2019-01-29 Thread Alex Deucher
On Fri, Jan 25, 2019 at 10:29 AM Wentland, Harry  wrote:
>
> On 2019-01-24 7:52 p.m., ndesaulni...@google.com wrote:
> > arch/x86/Makefile disables SSE and SSE2 for the whole kernel.  The
> > AMDGPU drivers modified in this patch re-enable SSE but not SSE2.  Turn
> > on SSE2 to support emitting double precision floating point instructions
> > rather than calls to non-existent (usually available from gcc_s or
> > compiler_rt) floating point helper routines.
> >
> > Link: https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
> > Link: https://github.com/ClangBuiltLinux/linux/issues/327
> > Cc: sta...@vger.kernel.org # 4.19
> > Reported-by: S, Shirish 
> > Reported-by: Matthias Kaehlcke 
> > Suggested-by: James Y Knight 
> > Suggested-by: Nathan Chancellor 
> > Signed-off-by: Nick Desaulniers 
> > Tested-by: Guenter Roeck 
>
> Reviewed-by: Harry Wentland 
>
> and applied.
>

This patch causes a segfault:
https://bugs.freedesktop.org/show_bug.cgi?id=109487

Any ideas?

Alex

> Harry
>
> > ---
> >  drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +-
> >  drivers/gpu/drm/amd/display/dc/dml/Makefile   | 2 +-
> >  2 files changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile 
> > b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> > index 95f332ee3e7e..dc85a3c088af 100644
> > --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> > +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
> > @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
> >   cc_stack_align := -mstack-alignment=16
> >  endif
> >
> > -calcs_ccflags := -mhard-float -msse $(cc_stack_align)
> > +calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> >
> >  CFLAGS_dcn_calcs.o := $(calcs_ccflags)
> >  CFLAGS_dcn_calc_auto.o := $(calcs_ccflags)
> > diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
> > b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> > index d97ca6528f9d..33c7d7588712 100644
> > --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> > +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> > @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),)
> >   cc_stack_align := -mstack-alignment=16
> >  endif
> >
> > -dml_ccflags := -mhard-float -msse $(cc_stack_align)
> > +dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align)
> >
> >  CFLAGS_display_mode_lib.o := $(dml_ccflags)
> >  CFLAGS_display_pipe_clocks.o := $(dml_ccflags)
> >
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: regression in dcn linked to -msse2

2019-01-29 Thread Deucher, Alexander
See:

https://bugs.freedesktop.org/show_bug.cgi?id=109487



From: amd-gfx  on behalf of StDenis, Tom 

Sent: Tuesday, January 29, 2019 8:44:03 AM
To: Wentland, Harry; amd-gfx mailing list
Cc: Deucher, Alexander
Subject: regression in dcn linked to -msse2

Testing with the new 5.0.0-rc1 amd-staging-drm-next branch results in
this commit causing the attached lockup on init with my Raven + Polaris
system:

10117450735c7a7c0858095fb46a860e7037cb9a is the first bad commit
commit 10117450735c7a7c0858095fb46a860e7037cb9a
Author: ndesaulni...@google.com 
Date:   Thu Jan 24 16:52:59 2019 -0800

 drm/amd/display: add -msse2 to prevent Clang from emitting libcalls
to undefined SW FP routines

 arch/x86/Makefile disables SSE and SSE2 for the whole kernel.  The
 AMDGPU drivers modified in this patch re-enable SSE but not SSE2.  Turn
 on SSE2 to support emitting double precision floating point
instructions
 rather than calls to non-existent (usually available from gcc_s or
 compiler_rt) floating point helper routines.

 Link:
https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
 Link: https://github.com/ClangBuiltLinux/linux/issues/327
 Cc: sta...@vger.kernel.org # 4.19
 Reported-by: S, Shirish 
 Reported-by: Matthias Kaehlcke 
 Suggested-by: James Y Knight 
 Suggested-by: Nathan Chancellor 
 Signed-off-by: Nick Desaulniers 
 Tested-by: Guenter Roeck 
 Tested-by:  Matthias Kaehlcke 
 Tested-by: Nathan Chancellor 
 Reviewed-by: Harry Wentland 
 Signed-off-by: Harry Wentland 
 Signed-off-by: Alex Deucher 

:04 04 39f89ea177b8a69a3a2aa098aef8b1a7e58056b9
71edb262be0608ffae0996253f47492b2e714b5c M  drivers


Cheers,
Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence

2019-01-29 Thread Koenig, Christian
Am 29.01.19 um 14:32 schrieb Marek Olšák:


On Tue, Jan 29, 2019, 3:01 AM Christian König 
mailto:ckoenig.leichtzumer...@gmail.com> 
wrote:
Am 28.01.19 um 22:52 schrieb Marek Olšák:
> From: Marek Olšák mailto:marek.ol...@amd.com>>
>
> Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> an IB starts.

That approach has quite a number of problems (for example you can't
allocate memory at this point).

Better add a flag that we should only sync on scheduling for a
dependency/syncobj instead.

I don't understand. Can you give me an example of the interface and how the 
implementation would look?

For example we add a new chunk type AMDGPU_CHUNK_ID_SCHEDULED which is handled 
the same way as AMDGPU_CHUNK_ID_DEPENDENCIES.

Then in amdgpu_cs_process_fence_dep() we check if its a 
AMDGPU_CHUNK_ID_SCHEDULED and if yes extract the scheduled fence from the fence 
we got from amdgpu_ctx_get_fence().

Christian.


Thanks,
Marek


Christian.

>
> Signed-off-by: Marek Olšák mailto:marek.ol...@amd.com>>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
>   include/uapi/drm/amdgpu_drm.h   | 13 -
>   4 files changed, 33 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d67f8b1dfe80..8e2f7e558bc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
>   struct dma_fence*fence;
>   uint64_tbytes_moved_threshold;
>   uint64_tbytes_moved_vis_threshold;
>   uint64_tbytes_moved;
>   uint64_tbytes_moved_vis;
>   struct amdgpu_bo_list_entry *evictable;
>
>   /* user fence */
>   struct amdgpu_bo_list_entry uf_entry;
>
> + boolget_start_syncobj;
>   unsigned num_post_dep_syncobjs;
>   struct drm_syncobj **post_dep_syncobjs;
>   };
>
>   static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
> uint32_t ib_idx, int idx)
>   {
>   return p->job->ibs[ib_idx].ptr[idx];
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1c49b8266d69..917f3818c61c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device 
> *adev,
>   r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
> chunk_ib->ip_instance, chunk_ib->ring,
> &entity);
>   if (r)
>   return r;
>
>   if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
>   parser->job->preamble_status |=
>   AMDGPU_PREAMBLE_IB_PRESENT;
>
> + if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
> + parser->get_start_syncobj = true;
> +
>   if (parser->entity && parser->entity != entity)
>   return -EINVAL;
>
>   parser->entity = entity;
>
>   ring = to_amdgpu_ring(entity->rq->sched);
>   r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
>  chunk_ib->ib_bytes : 0, ib);
>   if (r) {
>   DRM_ERROR("Failed to get ib !\n");
> @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser 
> *p,
>   amdgpu_mn_lock(p->mn);
>   amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>
>   if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>   r = -ERESTARTSYS;
>   goto error_abort;
>   }
>   }
>
> + if (p->get_start_syncobj) {
> + struct drm_syncobj *syncobj;
> +
> + r = drm_syncobj_create(&syncobj, 0,
> +&job->base.s_fence->scheduled);
> + if (r)
> + goto error_abort;
> +
> + r = drm_syncobj_get_handle(p->filp, syncobj,
> +&cs->out.start_syncobj);
> + if (r)
> + goto error_abort;
> + drm_syncobj_put(syncobj);
> + }
> +
>   job->owner = p->filp;
>   p->fence = dma_fence_get(&job->base.s_fence->finished);
>
>   amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
>   amdgpu_cs_post_dependencies(p);
>
>   if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
>   !p->ctx->preamble_presented) {
>   job->preamble_

regression in dcn linked to -msse2

2019-01-29 Thread StDenis, Tom
Testing with the new 5.0.0-rc1 amd-staging-drm-next branch results in 
this commit causing the attached lockup on init with my Raven + Polaris 
system:

10117450735c7a7c0858095fb46a860e7037cb9a is the first bad commit
commit 10117450735c7a7c0858095fb46a860e7037cb9a
Author: ndesaulni...@google.com 
Date:   Thu Jan 24 16:52:59 2019 -0800

 drm/amd/display: add -msse2 to prevent Clang from emitting libcalls 
to undefined SW FP routines

 arch/x86/Makefile disables SSE and SSE2 for the whole kernel.  The
 AMDGPU drivers modified in this patch re-enable SSE but not SSE2.  Turn
 on SSE2 to support emitting double precision floating point 
instructions
 rather than calls to non-existent (usually available from gcc_s or
 compiler_rt) floating point helper routines.

 Link: 
https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
 Link: https://github.com/ClangBuiltLinux/linux/issues/327
 Cc: sta...@vger.kernel.org # 4.19
 Reported-by: S, Shirish 
 Reported-by: Matthias Kaehlcke 
 Suggested-by: James Y Knight 
 Suggested-by: Nathan Chancellor 
 Signed-off-by: Nick Desaulniers 
 Tested-by: Guenter Roeck 
 Tested-by:  Matthias Kaehlcke 
 Tested-by: Nathan Chancellor 
 Reviewed-by: Harry Wentland 
 Signed-off-by: Harry Wentland 
 Signed-off-by: Alex Deucher 

:04 04 39f89ea177b8a69a3a2aa098aef8b1a7e58056b9 
71edb262be0608ffae0996253f47492b2e714b5c M  drivers


Cheers,
Tom


log.gz
Description: log.gz
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence

2019-01-29 Thread Marek Olšák
On Tue, Jan 29, 2019, 3:01 AM Christian König <
ckoenig.leichtzumer...@gmail.com wrote:

> Am 28.01.19 um 22:52 schrieb Marek Olšák:
> > From: Marek Olšák 
> >
> > Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> > an IB starts.
>
> That approach has quite a number of problems (for example you can't
> allocate memory at this point).
>
> Better add a flag that we should only sync on scheduling for a
> dependency/syncobj instead.
>

I don't understand. Can you give me an example of the interface and how the
implementation would look?

Thanks,
Marek


> Christian.
>
> >
> > Signed-off-by: Marek Olšák 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
> >   include/uapi/drm/amdgpu_drm.h   | 13 -
> >   4 files changed, 33 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index d67f8b1dfe80..8e2f7e558bc9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
> >   struct dma_fence*fence;
> >   uint64_tbytes_moved_threshold;
> >   uint64_tbytes_moved_vis_threshold;
> >   uint64_tbytes_moved;
> >   uint64_tbytes_moved_vis;
> >   struct amdgpu_bo_list_entry *evictable;
> >
> >   /* user fence */
> >   struct amdgpu_bo_list_entry uf_entry;
> >
> > + boolget_start_syncobj;
> >   unsigned num_post_dep_syncobjs;
> >   struct drm_syncobj **post_dep_syncobjs;
> >   };
> >
> >   static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
> > uint32_t ib_idx, int idx)
> >   {
> >   return p->job->ibs[ib_idx].ptr[idx];
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 1c49b8266d69..917f3818c61c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct
> amdgpu_device *adev,
> >   r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
> > chunk_ib->ip_instance,
> chunk_ib->ring,
> > &entity);
> >   if (r)
> >   return r;
> >
> >   if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> >   parser->job->preamble_status |=
> >   AMDGPU_PREAMBLE_IB_PRESENT;
> >
> > + if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
> > + parser->get_start_syncobj = true;
> > +
> >   if (parser->entity && parser->entity != entity)
> >   return -EINVAL;
> >
> >   parser->entity = entity;
> >
> >   ring = to_amdgpu_ring(entity->rq->sched);
> >   r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
> >  chunk_ib->ib_bytes : 0, ib);
> >   if (r) {
> >   DRM_ERROR("Failed to get ib !\n");
> > @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct
> amdgpu_cs_parser *p,
> >   amdgpu_mn_lock(p->mn);
> >   amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> >   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> >
> >   if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
> >   r = -ERESTARTSYS;
> >   goto error_abort;
> >   }
> >   }
> >
> > + if (p->get_start_syncobj) {
> > + struct drm_syncobj *syncobj;
> > +
> > + r = drm_syncobj_create(&syncobj, 0,
> > +&job->base.s_fence->scheduled);
> > + if (r)
> > + goto error_abort;
> > +
> > + r = drm_syncobj_get_handle(p->filp, syncobj,
> > +&cs->out.start_syncobj);
> > + if (r)
> > + goto error_abort;
> > + drm_syncobj_put(syncobj);
> > + }
> > +
> >   job->owner = p->filp;
> >   p->fence = dma_fence_get(&job->base.s_fence->finished);
> >
> >   amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
> >   amdgpu_cs_post_dependencies(p);
> >
> >   if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
> >   !p->ctx->preamble_presented) {
> >   job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
> >   p->ctx->preamble_presented = true;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index c806f984bc

Re: [PATCH 1/3] drm/amdgpu: Fix entities for disabled HW blocks.

2019-01-29 Thread Koenig, Christian
Am 29.01.19 um 11:41 schrieb Bas Nieuwenhuizen:
> On Tue, Jan 29, 2019 at 11:33 AM Christian König
>  wrote:
>> Am 29.01.19 um 11:20 schrieb Bas Nieuwenhuizen:
>>> If we have some disabled HW blocks (say VCN), then the rings are
>>> not initialized. This reuslts in entities that refer to uninitialized
>>> rqs (runqueues?).
>>>
>>> In normal usage this does not result in issues because userspace
>>> generally knows to ignore the unsupported blocks, but e.g. setting
>>> the priorities on all the entities resulted in a NULL access while
>>> locking the rq spinlock.
>>>
>>> This could probably also be induced when actually adding a job for
>>> the blocks whith some less smart userspace.
>> In general I agree that we need to improve the handling here. But this
>> looks completely incorrect to me.
> In what sense is this incorrect?
>
> I'm fencing of all access to entities which use an uninitialized ring
> (and therefore rq) and do not initialize/deinitialize them.

Ok my fault you don't seem to understand the problem here :) And entity 
is not associated with one runqueue, but multiple ones.

And right now it is perfectly normal that one or more of those are not 
present. See for example most AMD GPUs have only one SDMA block with two 
instance, but we add all of them to the rq list anyway.

We should probably stop that and then make sure that entities can also 
initialize properly with zero runqueues in the list and return an error 
if you try to use those.

Christian.

>
>> We should always initialize all entities, but only with rq which are
>> initialized as well.
>>
>> When this results in zero rq then we can later on handle that gracefully.
>>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: Bas Nieuwenhuizen 
>>> ---
>>>drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 31 +++--
>>>drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
>>>2 files changed, 25 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index d85184b5b35c..6f72ce785b32 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -169,9 +169,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>>for (j = 0; j < num_rings; ++j)
>>>rqs[j] = &rings[j]->sched.sched_rq[priority];
>>>
>>> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>>> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>>> -   rqs, num_rings, 
>>> &ctx->guilty);
>>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>>> + ctx->entities[i][j].enabled = rings[0]->adev != NULL;
>>> + if (ctx->entities[i][j].enabled) {
>>> + r = 
>>> drm_sched_entity_init(&ctx->entities[i][j].entity,
>>> +   rqs, num_rings, 
>>> &ctx->guilty);
>>> + }
>>> + }
>>>if (r)
>>>goto error_cleanup_entities;
>>>}
>>> @@ -180,7 +184,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>>
>>>error_cleanup_entities:
>>>for (i = 0; i < num_entities; ++i)
>>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> + if (ctx->entities[0][i].enabled)
>>> + drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>>kfree(ctx->entities[0]);
>>>
>>>error_free_fences:
>>> @@ -229,6 +234,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 
>>> hw_ip, u32 instance,
>>>return -EINVAL;
>>>}
>>>
>>> + if (!ctx->entities[hw_ip][ring].enabled) {
>>> + DRM_DEBUG("disabled ring: %d %d\n", hw_ip, ring);
>>> + return -EINVAL;
>>> + }
>>> +
>>>*entity = &ctx->entities[hw_ip][ring].entity;
>>>return 0;
>>>}
>>> @@ -279,7 +289,8 @@ static void amdgpu_ctx_do_release(struct kref *ref)
>>>num_entities += amdgpu_ctx_num_entities[i];
>>>
>>>for (i = 0; i < num_entities; i++)
>>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> + if (ctx->entities[0][i].enabled)
>>> + drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>>
>>>amdgpu_ctx_fini(ref);
>>>}
>>> @@ -505,7 +516,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx 
>>> *ctx,
>>>for (i = 0; i < num_entities; i++) {
>>>struct drm_sched_entity *entity = 
>>> &ctx->entities[0][i].entity;
>>>
>>> - drm_sched_entity_set_priority(entity, ctx_prio);
>>> +
>>> + if (ctx->entities[0][1].enabled)
>>> + drm_sched_entity_set_priority(entity, ctx_prio);
>>>}
>>>}
>>>
>>> @@ -557,6 +570,9 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr 
>>> *

[PATCH 1/3] drm/irq: Don't check for DRIVER_HAVE_IRQ in drm_irq_(un)install

2019-01-29 Thread Daniel Vetter
If a non-legacy driver calls these it's valid to assume there is
interrupt support. The flag is really only needed for legacy drivers,
which control IRQ enabling/disabling through the DRM_IOCTL_CONTROL
legacy IOCTL.

Also remove all the flag usage from non-legacy drivers.

v2: Review from Emil:
- improve commit message
- I forgot hibmc, fix that

Cc: linux-arm-ker...@lists.infradead.org
Cc: intel-...@lists.freedesktop.org
Cc: linux-amlo...@lists.infradead.org
Cc: linux-arm-...@vger.kernel.org
Cc: freedr...@lists.freedesktop.org
Cc: virtualizat...@lists.linux-foundation.org
Cc: spice-de...@lists.freedesktop.org
Cc: amd-gfx@lists.freedesktop.org
Cc: linux-renesas-...@vger.kernel.org
Reviewed-by: Emil Velikov 
Reviewed-by: Sam Ravnborg 
Signed-off-by: Daniel Vetter 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +-
 drivers/gpu/drm/arm/hdlcd_drv.c | 2 +-
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c| 2 +-
 drivers/gpu/drm/drm_irq.c   | 6 --
 drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c   | 2 +-
 drivers/gpu/drm/gma500/psb_drv.c| 2 +-
 drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 3 +--
 drivers/gpu/drm/i915/i915_drv.c | 2 +-
 drivers/gpu/drm/meson/meson_drv.c   | 2 +-
 drivers/gpu/drm/msm/msm_drv.c   | 3 +--
 drivers/gpu/drm/mxsfb/mxsfb_drv.c   | 3 +--
 drivers/gpu/drm/qxl/qxl_drv.c   | 2 +-
 drivers/gpu/drm/radeon/radeon_drv.c | 2 +-
 drivers/gpu/drm/shmobile/shmob_drm_drv.c| 2 +-
 drivers/gpu/drm/tilcdc/tilcdc_drv.c | 2 +-
 drivers/gpu/drm/vc4/vc4_drv.c   | 1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
 drivers/staging/vboxvideo/vbox_drv.c| 2 +-
 18 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0c22bae0c736..22502417c18c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1189,7 +1189,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, 
unsigned int pipe,
 static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP | DRIVER_ATOMIC |
-   DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
+   DRIVER_IRQ_SHARED | DRIVER_GEM |
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index e68935b80917..8fc0b884c428 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -229,7 +229,7 @@ static int hdlcd_debugfs_init(struct drm_minor *minor)
 DEFINE_DRM_GEM_CMA_FOPS(fops);
 
 static struct drm_driver hdlcd_driver = {
-   .driver_features = DRIVER_HAVE_IRQ | DRIVER_GEM |
+   .driver_features = DRIVER_GEM |
   DRIVER_MODESET | DRIVER_PRIME |
   DRIVER_ATOMIC,
.irq_handler = hdlcd_irq,
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c 
b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 034a91112098..0be13eceedba 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -720,7 +720,7 @@ static void atmel_hlcdc_dc_irq_uninstall(struct drm_device 
*dev)
 DEFINE_DRM_GEM_CMA_FOPS(fops);
 
 static struct drm_driver atmel_hlcdc_dc_driver = {
-   .driver_features = DRIVER_HAVE_IRQ | DRIVER_GEM |
+   .driver_features = DRIVER_GEM |
   DRIVER_MODESET | DRIVER_PRIME |
   DRIVER_ATOMIC,
.irq_handler = atmel_hlcdc_dc_irq_handler,
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 45a07652fa00..c5babb3e4752 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -103,9 +103,6 @@ int drm_irq_install(struct drm_device *dev, int irq)
int ret;
unsigned long sh_flags = 0;
 
-   if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-   return -EOPNOTSUPP;
-
if (irq == 0)
return -EINVAL;
 
@@ -174,9 +171,6 @@ int drm_irq_uninstall(struct drm_device *dev)
bool irq_enabled;
int i;
 
-   if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-   return -EOPNOTSUPP;
-
irq_enabled = dev->irq_enabled;
dev->irq_enabled = false;
 
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c 
b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index 54ace3436605..dfc73aade325 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -137,7 +137,7 @@ static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg)
 DEFINE_DRM_GEM_CMA_FOPS(fsl_dcu_drm_fops);
 
 static struct drm_driver fsl_dcu_drm_driver = {
-   .driver_features= DRIVER_HAVE_IRQ | DRIVER_

Re: [PATCH 1/3] drm/amdgpu: Fix entities for disabled HW blocks.

2019-01-29 Thread Bas Nieuwenhuizen
On Tue, Jan 29, 2019 at 11:33 AM Christian König
 wrote:
>
> Am 29.01.19 um 11:20 schrieb Bas Nieuwenhuizen:
> > If we have some disabled HW blocks (say VCN), then the rings are
> > not initialized. This reuslts in entities that refer to uninitialized
> > rqs (runqueues?).
> >
> > In normal usage this does not result in issues because userspace
> > generally knows to ignore the unsupported blocks, but e.g. setting
> > the priorities on all the entities resulted in a NULL access while
> > locking the rq spinlock.
> >
> > This could probably also be induced when actually adding a job for
> > the blocks whith some less smart userspace.
>
> In general I agree that we need to improve the handling here. But this
> looks completely incorrect to me.

In what sense is this incorrect?

I'm fencing of all access to entities which use an uninitialized ring
(and therefore rq) and do not initialize/deinitialize them.

>
> We should always initialize all entities, but only with rq which are
> initialized as well.
>
> When this results in zero rq then we can later on handle that gracefully.
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Bas Nieuwenhuizen 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 31 +++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
> >   2 files changed, 25 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > index d85184b5b35c..6f72ce785b32 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > @@ -169,9 +169,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> >   for (j = 0; j < num_rings; ++j)
> >   rqs[j] = &rings[j]->sched.sched_rq[priority];
> >
> > - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> > - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> > -   rqs, num_rings, 
> > &ctx->guilty);
> > + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> > + ctx->entities[i][j].enabled = rings[0]->adev != NULL;
> > + if (ctx->entities[i][j].enabled) {
> > + r = 
> > drm_sched_entity_init(&ctx->entities[i][j].entity,
> > +   rqs, num_rings, 
> > &ctx->guilty);
> > + }
> > + }
> >   if (r)
> >   goto error_cleanup_entities;
> >   }
> > @@ -180,7 +184,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> >
> >   error_cleanup_entities:
> >   for (i = 0; i < num_entities; ++i)
> > - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> > + if (ctx->entities[0][i].enabled)
> > + drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> >   kfree(ctx->entities[0]);
> >
> >   error_free_fences:
> > @@ -229,6 +234,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 
> > hw_ip, u32 instance,
> >   return -EINVAL;
> >   }
> >
> > + if (!ctx->entities[hw_ip][ring].enabled) {
> > + DRM_DEBUG("disabled ring: %d %d\n", hw_ip, ring);
> > + return -EINVAL;
> > + }
> > +
> >   *entity = &ctx->entities[hw_ip][ring].entity;
> >   return 0;
> >   }
> > @@ -279,7 +289,8 @@ static void amdgpu_ctx_do_release(struct kref *ref)
> >   num_entities += amdgpu_ctx_num_entities[i];
> >
> >   for (i = 0; i < num_entities; i++)
> > - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> > + if (ctx->entities[0][i].enabled)
> > + drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> >
> >   amdgpu_ctx_fini(ref);
> >   }
> > @@ -505,7 +516,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx 
> > *ctx,
> >   for (i = 0; i < num_entities; i++) {
> >   struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
> >
> > - drm_sched_entity_set_priority(entity, ctx_prio);
> > +
> > + if (ctx->entities[0][1].enabled)
> > + drm_sched_entity_set_priority(entity, ctx_prio);
> >   }
> >   }
> >
> > @@ -557,6 +570,9 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr 
> > *mgr)
> >   for (i = 0; i < num_entities; i++) {
> >   struct drm_sched_entity *entity;
> >
> > + if (!ctx->entities[0][i].enabled)
> > + continue;
> > +
> >   entity = &ctx->entities[0][i].entity;
> >   max_wait = drm_sched_entity_flush(entity, max_wait);
> >   }
> > @@ -584,7 +600,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr 
> > *mgr)
> >   }
> >
> >   for (i = 0; i < num_entities; i++)
> > - drm_sched_entity_fi

Re: [PATCH 1/3] drm/amdgpu: Fix entities for disabled HW blocks.

2019-01-29 Thread Christian König

Am 29.01.19 um 11:20 schrieb Bas Nieuwenhuizen:

If we have some disabled HW blocks (say VCN), then the rings are
not initialized. This reuslts in entities that refer to uninitialized
rqs (runqueues?).

In normal usage this does not result in issues because userspace
generally knows to ignore the unsupported blocks, but e.g. setting
the priorities on all the entities resulted in a NULL access while
locking the rq spinlock.

This could probably also be induced when actually adding a job for
the blocks whith some less smart userspace.


In general I agree that we need to improve the handling here. But this 
looks completely incorrect to me.


We should always initialize all entities, but only with rq which are 
initialized as well.


When this results in zero rq then we can later on handle that gracefully.

Regards,
Christian.



Signed-off-by: Bas Nieuwenhuizen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 31 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
  2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d85184b5b35c..6f72ce785b32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -169,9 +169,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
for (j = 0; j < num_rings; ++j)
rqs[j] = &rings[j]->sched.sched_rq[priority];
  
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)

-   r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- rqs, num_rings, &ctx->guilty);
+   for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+   ctx->entities[i][j].enabled = rings[0]->adev != NULL;
+   if (ctx->entities[i][j].enabled) {
+   r = 
drm_sched_entity_init(&ctx->entities[i][j].entity,
+ rqs, num_rings, 
&ctx->guilty);
+   }
+   }
if (r)
goto error_cleanup_entities;
}
@@ -180,7 +184,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
  
  error_cleanup_entities:

for (i = 0; i < num_entities; ++i)
-   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+   if (ctx->entities[0][i].enabled)
+   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
kfree(ctx->entities[0]);
  
  error_free_fences:

@@ -229,6 +234,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 
hw_ip, u32 instance,
return -EINVAL;
}
  
+	if (!ctx->entities[hw_ip][ring].enabled) {

+   DRM_DEBUG("disabled ring: %d %d\n", hw_ip, ring);
+   return -EINVAL;
+   }
+
*entity = &ctx->entities[hw_ip][ring].entity;
return 0;
  }
@@ -279,7 +289,8 @@ static void amdgpu_ctx_do_release(struct kref *ref)
num_entities += amdgpu_ctx_num_entities[i];
  
  	for (i = 0; i < num_entities; i++)

-   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+   if (ctx->entities[0][i].enabled)
+   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
  
  	amdgpu_ctx_fini(ref);

  }
@@ -505,7 +516,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
  
-		drm_sched_entity_set_priority(entity, ctx_prio);

+
+   if (ctx->entities[0][1].enabled)
+   drm_sched_entity_set_priority(entity, ctx_prio);
}
  }
  
@@ -557,6 +570,9 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)

for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity;
  
+			if (!ctx->entities[0][i].enabled)

+   continue;
+
entity = &ctx->entities[0][i].entity;
max_wait = drm_sched_entity_flush(entity, max_wait);
}
@@ -584,7 +600,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
}
  
  		for (i = 0; i < num_entities; i++)

-   drm_sched_entity_fini(&ctx->entities[0][i].entity);
+   if (ctx->entities[0][i].enabled)
+   
drm_sched_entity_fini(&ctx->entities[0][i].entity);
}
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index b3b012c0a7da..183a783aedd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -33,6 +33,7 @@ struct amdgpu_ctx_entity {
uint64_tsequence;
struct dma_fence**fences;
struct drm_sched_entity entity;
+  

[PATCH 1/3] drm/amdgpu: Fix entities for disabled HW blocks.

2019-01-29 Thread Bas Nieuwenhuizen
If we have some disabled HW blocks (say VCN), then the rings are
not initialized. This reuslts in entities that refer to uninitialized
rqs (runqueues?).

In normal usage this does not result in issues because userspace
generally knows to ignore the unsupported blocks, but e.g. setting
the priorities on all the entities resulted in a NULL access while
locking the rq spinlock.

This could probably also be induced when actually adding a job for
the blocks whith some less smart userspace.

Signed-off-by: Bas Nieuwenhuizen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 31 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d85184b5b35c..6f72ce785b32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -169,9 +169,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
for (j = 0; j < num_rings; ++j)
rqs[j] = &rings[j]->sched.sched_rq[priority];
 
-   for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-   r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- rqs, num_rings, &ctx->guilty);
+   for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+   ctx->entities[i][j].enabled = rings[0]->adev != NULL;
+   if (ctx->entities[i][j].enabled) {
+   r = 
drm_sched_entity_init(&ctx->entities[i][j].entity,
+ rqs, num_rings, 
&ctx->guilty);
+   }
+   }
if (r)
goto error_cleanup_entities;
}
@@ -180,7 +184,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 error_cleanup_entities:
for (i = 0; i < num_entities; ++i)
-   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+   if (ctx->entities[0][i].enabled)
+   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
kfree(ctx->entities[0]);
 
 error_free_fences:
@@ -229,6 +234,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 
hw_ip, u32 instance,
return -EINVAL;
}
 
+   if (!ctx->entities[hw_ip][ring].enabled) {
+   DRM_DEBUG("disabled ring: %d %d\n", hw_ip, ring);
+   return -EINVAL;
+   }
+
*entity = &ctx->entities[hw_ip][ring].entity;
return 0;
 }
@@ -279,7 +289,8 @@ static void amdgpu_ctx_do_release(struct kref *ref)
num_entities += amdgpu_ctx_num_entities[i];
 
for (i = 0; i < num_entities; i++)
-   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+   if (ctx->entities[0][i].enabled)
+   drm_sched_entity_destroy(&ctx->entities[0][i].entity);
 
amdgpu_ctx_fini(ref);
 }
@@ -505,7 +516,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
 
-   drm_sched_entity_set_priority(entity, ctx_prio);
+
+   if (ctx->entities[0][1].enabled)
+   drm_sched_entity_set_priority(entity, ctx_prio);
}
 }
 
@@ -557,6 +570,9 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity;
 
+   if (!ctx->entities[0][i].enabled)
+   continue;
+
entity = &ctx->entities[0][i].entity;
max_wait = drm_sched_entity_flush(entity, max_wait);
}
@@ -584,7 +600,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
}
 
for (i = 0; i < num_entities; i++)
-   drm_sched_entity_fini(&ctx->entities[0][i].entity);
+   if (ctx->entities[0][i].enabled)
+   
drm_sched_entity_fini(&ctx->entities[0][i].entity);
}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index b3b012c0a7da..183a783aedd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -33,6 +33,7 @@ struct amdgpu_ctx_entity {
uint64_tsequence;
struct dma_fence**fences;
struct drm_sched_entity entity;
+   boolenabled;
 };
 
 struct amdgpu_ctx {
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amdgpu: Add command to override the context priority.

2019-01-29 Thread Bas Nieuwenhuizen
Given a master fd we can then override the priority of the context
in another fd.

Using these overrides was recommended by Christian instead of trying
to submit from a master fd, and I am adding a way to override a
single context instead of the entire process so we can only upgrade
a single Vulkan queue and not effectively the entire process.

Reused the flags field as it was checked to be 0 anyways, so nothing
used it. This is source-incompatible (due to the name change), but
ABI compatible.

Signed-off-by: Bas Nieuwenhuizen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 45 ++-
 include/uapi/drm/amdgpu_drm.h |  3 +-
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 0b70410488b6..78f5a42f36e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -76,6 +76,39 @@ static int amdgpu_sched_process_priority_override(struct 
amdgpu_device *adev,
return 0;
 }
 
+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+ int fd,
+ unsigned ctx_id,
+ enum drm_sched_priority 
priority)
+{
+   struct file *filp = fget(fd);
+   struct amdgpu_fpriv *fpriv;
+   struct amdgpu_ctx *ctx;
+   int r;
+
+   if (!filp)
+   return -EINVAL;
+
+   r = amdgpu_file_to_fpriv(filp, &fpriv);
+   if (r) {
+   fput(filp);
+   return r;
+   }
+
+   ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+   if (!ctx) {
+   fput(filp);
+   return -EINVAL;
+   }
+
+   amdgpu_ctx_priority_override(ctx, priority);
+   amdgpu_ctx_put(ctx);
+   fput(filp);
+
+   return 0;
+}
+
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
   struct drm_file *filp)
 {
@@ -85,15 +118,25 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
int r;
 
priority = amdgpu_to_sched_priority(args->in.priority);
-   if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID)
+   if (priority == DRM_SCHED_PRIORITY_INVALID)
return -EINVAL;
 
+if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+return -EINVAL;
+
+   WARN(1, "priority %d %d\n", args->in.priority, priority);
switch (args->in.op) {
case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
r = amdgpu_sched_process_priority_override(adev,
   args->in.fd,
   priority);
break;
+   case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+   r = amdgpu_sched_context_priority_override(adev,
+  args->in.fd,
+  args->in.ctx_id,
+  priority);
+   break;
default:
DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
r = -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index faaad04814e4..30fa340790b2 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -275,13 +275,14 @@ union drm_amdgpu_vm {
 
 /* sched ioctl */
 #define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE  1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE  2
 
 struct drm_amdgpu_sched_in {
/* AMDGPU_SCHED_OP_* */
__u32   op;
__u32   fd;
__s32   priority;
-   __u32   flags;
+   __u32   ctx_id;
 };
 
 union drm_amdgpu_sched {
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amdgpu: Check if fd really is an amdgpu fd.

2019-01-29 Thread Bas Nieuwenhuizen
Otherwise we interpret the file private data as drm & amdgpu data
while it might not be, possibly allowing one to get memory corruption.

Signed-off-by: Bas Nieuwenhuizen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 10 +++---
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..17290cdb8ed8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -411,6 +411,8 @@ struct amdgpu_fpriv {
struct amdgpu_ctx_mgr   ctx_mgr;
 };
 
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f984bcc5..90a520034c89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1176,6 +1176,22 @@ static const struct file_operations 
amdgpu_driver_kms_fops = {
 #endif
 };
 
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
+{
+struct drm_file *file;
+
+   if (!filp)
+   return -EINVAL;
+
+   if (filp->f_op != &amdgpu_driver_kms_fops) {
+   return -EINVAL;
+   }
+
+   file = filp->private_data;
+   *fpriv = file->driver_priv;
+   return 0;
+}
+
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 bool in_vblank_irq, int *vpos, int *hpos,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 1cafe8d83a4d..0b70410488b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -54,16 +54,20 @@ static int amdgpu_sched_process_priority_override(struct 
amdgpu_device *adev,
  enum drm_sched_priority 
priority)
 {
struct file *filp = fget(fd);
-   struct drm_file *file;
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
uint32_t id;
+   int r;
 
if (!filp)
return -EINVAL;
 
-   file = filp->private_data;
-   fpriv = file->driver_priv;
+   r = amdgpu_file_to_fpriv(filp, &fpriv);
+   if (r) {
+   fput(filp);
+   return r;
+   }
+
idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
amdgpu_ctx_priority_override(ctx, priority);
 
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence

2019-01-29 Thread Christian König

Am 28.01.19 um 22:52 schrieb Marek Olšák:

From: Marek Olšák 

Normal syncobjs signal when an IB finishes. Start syncobjs signal when
an IB starts.


That approach has quite a number of problems (for example you can't 
allocate memory at this point).


Better add a flag that we should only sync on scheduling for a 
dependency/syncobj instead.


Christian.



Signed-off-by: Marek Olšák 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
  include/uapi/drm/amdgpu_drm.h   | 13 -
  4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..8e2f7e558bc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
struct dma_fence*fence;
uint64_tbytes_moved_threshold;
uint64_tbytes_moved_vis_threshold;
uint64_tbytes_moved;
uint64_tbytes_moved_vis;
struct amdgpu_bo_list_entry *evictable;
  
  	/* user fence */

struct amdgpu_bo_list_entry uf_entry;
  
+	boolget_start_syncobj;

unsigned num_post_dep_syncobjs;
struct drm_syncobj **post_dep_syncobjs;
  };
  
  static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,

  uint32_t ib_idx, int idx)
  {
return p->job->ibs[ib_idx].ptr[idx];
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 1c49b8266d69..917f3818c61c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
  chunk_ib->ip_instance, chunk_ib->ring,
  &entity);
if (r)
return r;
  
  		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)

parser->job->preamble_status |=
AMDGPU_PREAMBLE_IB_PRESENT;
  
+		if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)

+   parser->get_start_syncobj = true;
+
if (parser->entity && parser->entity != entity)
return -EINVAL;
  
  		parser->entity = entity;
  
  		ring = to_amdgpu_ring(entity->rq->sched);

r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
   chunk_ib->ib_bytes : 0, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
@@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_mn_lock(p->mn);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
  
  		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {

r = -ERESTARTSYS;
goto error_abort;
}
}
  
+	if (p->get_start_syncobj) {

+   struct drm_syncobj *syncobj;
+
+   r = drm_syncobj_create(&syncobj, 0,
+  &job->base.s_fence->scheduled);
+   if (r)
+   goto error_abort;
+
+   r = drm_syncobj_get_handle(p->filp, syncobj,
+  &cs->out.start_syncobj);
+   if (r)
+   goto error_abort;
+   drm_syncobj_put(syncobj);
+   }
+
job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished);
  
  	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);

amdgpu_cs_post_dependencies(p);
  
  	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&

!p->ctx->preamble_presented) {
job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
p->ctx->preamble_presented = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f984bcc5..a230a30722d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,23 +64,24 @@
   * - 3.18.0 - Export gpu always on cu bitmap
   * - 3.19.0 - Add support for UVD MJPEG decode
   * - 3.20.0 - Add support for local BOs
   * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
   * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
   * - 3.23.0 - Add query for VRAM lost counter
   * - 3.24.0 - Add high priority compute support for gfx9
   * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
   * - 3.26.0 - GFX9: Process AMDGPU_I