from:"Monk Liu"

[PATCH 2/2] drm/sched: serialize job_timeout and scheduler

2021-08-31 Thread Monk Liu

tested-by: jingwen chen 
Signed-off-by: Monk Liu 
Signed-off-by: jingwen chen 
---
 drivers/gpu/drm/scheduler/sched_main.c | 24 
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 3e0bbc7..87d72e9 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,19 +319,17 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
+   if (!__kthread_should_park(sched->thread))
+   kthread_park(sched->thread);
+
spin_lock(>job_list_lock);
job = list_first_entry_or_null(>pending_list,
   struct drm_sched_job, list);
 
if (job) {
-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
spin_unlock(>job_list_lock);
 
+   /* vendor's timeout_job should call drm_sched_start() */
status = job->sched->ops->timedout_job(job);
 
/*
@@ -393,20 +391,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, 
struct drm_sched_job *bad)
kthread_park(sched->thread);
 
/*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
-   /*
 * Iterate the job list from later to  earlier one and either deactive
 * their HW callbacks or remove them from pending list if they already
 * signaled.
-- 
2.7.4

[PATCH 1/2] drm/sched: fix the bug of time out calculation(v4)

2021-08-31 Thread Monk Liu

issue:
in cleanup_job the cancle_delayed_work will cancel a TO timer
even the its corresponding job is still running.

fix:
do not cancel the timer in cleanup_job, instead do the cancelling
only when the heading job is signaled, and if there is a "next" job
we start_timeout again.

v2:
further cleanup the logic, and do the TDR timer cancelling if the signaled job
is the last one in its scheduler.

v3:
change the issue description
remove the cancel_delayed_work in the begining of the cleanup_job
recover the implement of drm_sched_job_begin.

v4:
remove the kthread_should_park() checking in cleanup_job routine,
we should cleanup the signaled job asap

TODO:
1)introduce pause/resume scheduler in job_timeout to serial the handling
of scheduler and job_timeout.
2)drop the bad job's del and insert in scheduler due to above serialization
(no race issue anymore with the serialization)

tested-by: jingwen 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/scheduler/sched_main.c | 26 +-
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..3e0bbc7 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -676,15 +676,6 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 {
struct drm_sched_job *job, *next;
 
-   /*
-* Don't destroy jobs while the timeout worker is running  OR thread
-* is being parked and hence assumed to not touch pending_list
-*/
-   if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-   !cancel_delayed_work(>work_tdr)) ||
-   kthread_should_park())
-   return NULL;
-
spin_lock(>job_list_lock);
 
job = list_first_entry_or_null(>pending_list,
@@ -693,17 +684,21 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (job && dma_fence_is_signaled(>s_fence->finished)) {
/* remove job from pending_list */
list_del_init(>list);
+
+   /* cancel this job's TO timer */
+   cancel_delayed_work(>work_tdr);
/* make the scheduled timestamp more accurate */
next = list_first_entry_or_null(>pending_list,
typeof(*next), list);
-   if (next)
+
+   if (next) {
next->s_fence->scheduled.timestamp =
job->s_fence->finished.timestamp;
-
+   /* start TO timer for next job */
+   drm_sched_start_timeout(sched);
+   }
} else {
job = NULL;
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
}
 
spin_unlock(>job_list_lock);
@@ -791,11 +786,8 @@ static int drm_sched_main(void *param)
  (entity = 
drm_sched_select_entity(sched))) ||
 kthread_should_stop());
 
-   if (cleanup_job) {
+   if (cleanup_job)
sched->ops->free_job(cleanup_job);
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
-   }
 
if (!entity)
continue;
-- 
2.7.4

[PATCH 2/2] drm/sched: serialize job_timeout and scheduler

2021-08-31 Thread Monk Liu

tested-by: jingwen chen 
Signed-off-by: Monk Liu 
Signed-off-by: jingwen chen 
---
 drivers/gpu/drm/scheduler/sched_main.c | 24 
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index ecf8140..894fdb24 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,19 +319,17 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
+   if (!__kthread_should_park(sched->thread))
+   kthread_park(sched->thread);
+
spin_lock(>job_list_lock);
job = list_first_entry_or_null(>pending_list,
   struct drm_sched_job, list);
 
if (job) {
-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
spin_unlock(>job_list_lock);
 
+   /* vendor's timeout_job should call drm_sched_start() */
status = job->sched->ops->timedout_job(job);
 
/*
@@ -393,20 +391,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, 
struct drm_sched_job *bad)
kthread_park(sched->thread);
 
/*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
-   /*
 * Iterate the job list from later to  earlier one and either deactive
 * their HW callbacks or remove them from pending list if they already
 * signaled.
-- 
2.7.4

[PATCH 1/2] drm/sched: fix the bug of time out calculation(v3)

2021-08-31 Thread Monk Liu

issue:
in cleanup_job the cancle_delayed_work will cancel a TO timer
even the its corresponding job is still running.

fix:
do not cancel the timer in cleanup_job, instead do the cancelling
only when the heading job is signaled, and if there is a "next" job
we start_timeout again.

v2:
further cleanup the logic, and do the TDR timer cancelling if the signaled job
is the last one in its scheduler.

v3:
change the issue description
remove the cancel_delayed_work in the begining of the cleanup_job
recover the implement of drm_sched_job_begin.

TODO:
1)introduce pause/resume scheduler in job_timeout to serial the handling
of scheduler and job_timeout.
2)drop the bad job's del and insert in scheduler due to above serialization
(no race issue anymore with the serialization)

tested-by: jingwen 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/scheduler/sched_main.c | 25 ++---
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..ecf8140 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -676,13 +676,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 {
struct drm_sched_job *job, *next;
 
-   /*
-* Don't destroy jobs while the timeout worker is running  OR thread
-* is being parked and hence assumed to not touch pending_list
-*/
-   if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-   !cancel_delayed_work(>work_tdr)) ||
-   kthread_should_park())
+   if (kthread_should_park())
return NULL;
 
spin_lock(>job_list_lock);
@@ -693,17 +687,21 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (job && dma_fence_is_signaled(>s_fence->finished)) {
/* remove job from pending_list */
list_del_init(>list);
+
+   /* cancel this job's TO timer */
+   cancel_delayed_work(>work_tdr);
/* make the scheduled timestamp more accurate */
next = list_first_entry_or_null(>pending_list,
typeof(*next), list);
-   if (next)
+
+   if (next) {
next->s_fence->scheduled.timestamp =
job->s_fence->finished.timestamp;
-
+   /* start TO timer for next job */
+   drm_sched_start_timeout(sched);
+   }
} else {
job = NULL;
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
}
 
spin_unlock(>job_list_lock);
@@ -791,11 +789,8 @@ static int drm_sched_main(void *param)
  (entity = 
drm_sched_select_entity(sched))) ||
 kthread_should_stop());
 
-   if (cleanup_job) {
+   if (cleanup_job)
sched->ops->free_job(cleanup_job);
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
-   }
 
if (!entity)
continue;
-- 
2.7.4

[PATCH] drm/sched: fix the bug of time out calculation(v3)

2021-08-25 Thread Monk Liu

issue:
in cleanup_job the cancle_delayed_work will cancel a TO timer
even the its corresponding job is still running.

fix:
do not cancel the timer in cleanup_job, instead do the cancelling
only when the heading job is signaled, and if there is a "next" job
we start_timeout again.

v2:
further cleanup the logic, and do the TDR timer cancelling if the signaled job
is the last one in its scheduler.

v3:
change the issue description
remove the cancel_delayed_work in the begining of the cleanup_job
recover the implement of drm_sched_job_begin.

TODO:
1)introduce pause/resume scheduler in job_timeout to serial the handling
of scheduler and job_timeout.
2)drop the bad job's del and insert in scheduler due to above serialization
(no race issue anymore with the serialization)

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/scheduler/sched_main.c | 25 ++---
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..ecf8140 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -676,13 +676,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 {
struct drm_sched_job *job, *next;
 
-   /*
-* Don't destroy jobs while the timeout worker is running  OR thread
-* is being parked and hence assumed to not touch pending_list
-*/
-   if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-   !cancel_delayed_work(>work_tdr)) ||
-   kthread_should_park())
+   if (kthread_should_park())
return NULL;
 
spin_lock(>job_list_lock);
@@ -693,17 +687,21 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (job && dma_fence_is_signaled(>s_fence->finished)) {
/* remove job from pending_list */
list_del_init(>list);
+
+   /* cancel this job's TO timer */
+   cancel_delayed_work(>work_tdr);
/* make the scheduled timestamp more accurate */
next = list_first_entry_or_null(>pending_list,
typeof(*next), list);
-   if (next)
+
+   if (next) {
next->s_fence->scheduled.timestamp =
job->s_fence->finished.timestamp;
-
+   /* start TO timer for next job */
+   drm_sched_start_timeout(sched);
+   }
} else {
job = NULL;
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
}
 
spin_unlock(>job_list_lock);
@@ -791,11 +789,8 @@ static int drm_sched_main(void *param)
  (entity = 
drm_sched_select_entity(sched))) ||
 kthread_should_stop());
 
-   if (cleanup_job) {
+   if (cleanup_job)
sched->ops->free_job(cleanup_job);
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
-   }
 
if (!entity)
continue;
-- 
2.7.4

[PATCH] drm/sched: fix the bug of time out calculation(v2)

2021-08-24 Thread Monk Liu

the original logic is wrong that the timeout will not be retriggerd
after the previous job siganled, and that lead to the scenario that all
jobs in the same scheduler shares the same timeout timer from the very
begining job in this scheduler which is wrong.

we should modify the timer everytime a previous job signaled.

v2:
further cleanup the logic, and do the TDR timer cancelling if the signaled job
is the last one in its scheduler.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/scheduler/sched_main.c | 29 -
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..8c102ac 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job 
*s_job)
struct drm_gpu_scheduler *sched = s_job->sched;
 
spin_lock(>job_list_lock);
-   list_add_tail(_job->list, >pending_list);
-   drm_sched_start_timeout(sched);
+   if (list_empty(>pending_list)) {
+   list_add_tail(_job->list, >pending_list);
+   drm_sched_start_timeout(sched);
+   } else {
+   /* the old jobs in pending list are not finished yet
+* no need to restart TDR timer here, it is already
+* handled by drm_sched_get_cleanup_job
+*/
+   list_add_tail(_job->list, >pending_list);
+   }
+
spin_unlock(>job_list_lock);
 }
 
@@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (job && dma_fence_is_signaled(>s_fence->finished)) {
/* remove job from pending_list */
list_del_init(>list);
+
/* make the scheduled timestamp more accurate */
next = list_first_entry_or_null(>pending_list,
typeof(*next), list);
-   if (next)
+   if (next) {
+   /* if we still have job in pending list we need modify 
the TDR timer */
+   mod_delayed_work(system_wq, >work_tdr, 
sched->timeout);
next->s_fence->scheduled.timestamp =
job->s_fence->finished.timestamp;
+   } else {
+   /* cancel the TDR timer if no job in pending list */
+   cancel_delayed_work(>work_tdr);
+   }
 
} else {
job = NULL;
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
}
 
spin_unlock(>job_list_lock);
@@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
  (entity = 
drm_sched_select_entity(sched))) ||
 kthread_should_stop());
 
-   if (cleanup_job) {
+   if (cleanup_job)
sched->ops->free_job(cleanup_job);
-   /* queue timeout for next job */
-   drm_sched_start_timeout(sched);
-   }
 
if (!entity)
continue;
-- 
2.7.4

[PATCH] drm/sched: fix the bug of time out calculation

2021-08-24 Thread Monk Liu

the original logic is wrong that the timeout will not be retriggerd
after the previous job siganled, and that lead to the scenario that all
jobs in the same scheduler shares the same timeout timer from the very
begining job in this scheduler which is wrong.

we should modify the timer everytime a previous job signaled.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/scheduler/sched_main.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..fb27025 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -235,6 +235,13 @@ static void drm_sched_start_timeout(struct 
drm_gpu_scheduler *sched)
schedule_delayed_work(>work_tdr, sched->timeout);
 }
 
+static void drm_sched_restart_timeout(struct drm_gpu_scheduler *sched)
+{
+   if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
+   !list_empty(>pending_list))
+   mod_delayed_work(system_wq, >work_tdr, sched->timeout);
+}
+
 /**
  * drm_sched_fault - immediately start timeout handler
  *
@@ -693,6 +700,11 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (job && dma_fence_is_signaled(>s_fence->finished)) {
/* remove job from pending_list */
list_del_init(>list);
+
+   /* once the job deleted from pending list we should restart
+* the timeout calculation for the next job.
+*/
+   drm_sched_restart_timeout(sched);
/* make the scheduled timestamp more accurate */
next = list_first_entry_or_null(>pending_list,
typeof(*next), list);
-- 
2.7.4

[PATCH] drm/amdgpu: fix reload KMD hang on GFX10 KIQ

2020-08-09 Thread Monk Liu

GFX10 KIQ will hang if we try below steps:
modprobe amdgpu
rmmod amdgpu
modprobe amdgpu sched_hw_submission=4

Due to KIQ is always living there even after KMD unloaded
thus when doing the realod KIQ will crash upon its register
being programed by different values with the previous loading
(the config like HQD addr, ring size, is easily changed if we alter
the sched_hw_submission)

the fix is we must inactive KIQ first before touching any
of its registgers

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 622f442..0702c94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6435,6 +6435,10 @@ static int gfx_v10_0_kiq_init_register(struct 
amdgpu_ring *ring)
struct v10_compute_mqd *mqd = ring->mqd_ptr;
int j;
 
+   /* inactivate the queue */
+   if (amdgpu_sriov_vf(adev))
+   WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v5)

2020-07-31 Thread Monk Liu

what:
the MQD's save and restore of KCQ (kernel compute queue)
cost lots of clocks during world switch which impacts a lot
to multi-VF performance

how:
introduce a paramter to control the number of KCQ to avoid
performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

v2:
refine namings

v3:
choose queues for each ring to that try best to cross pipes evenly.

v4:
fix indentation
some cleanupsin the gfx_compute_queue_acquire()

v5:
further fix on indentations
more cleanupsin gfx_compute_queue_acquire()

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 49 --
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 ++-
 7 files changed, 76 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..de11136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..cf445bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+   amdgpu_num_kcq = 8;
+   dev_warn(adev->dev, "set kernel compute queue number to 8 due 
to invalid paramter provided by user\n");
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..b545c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup 
(8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0cd9de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,40 +202,29 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
-
-   /* policy for amdgpu compute queue ownership */
-   for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-   queue = i % adev->gfx.mec.num_queue_per_pipe;
-   pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-   % adev->gfx.mec.num_pipe_per_mec;
-   mec = (i / adev->gfx.mec.num_queue_per_pipe)
-   / adev->gfx.mec.num_pipe_per_mec;
-
-   /* we've run out of HW */
-   if (mec >= adev->gfx.mec.num_mec)
-   break;
-
-   if (multipipe_policy) {
-   /* policy: amdgpu owns the first two queues of the 
first MEC */
-   if (mec == 0 && queue < 2)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
-   } else {
-   /* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
-

[PATCH 1/2] drm/amdgpu: fix reload KMD hang on KIQ

2020-07-31 Thread Monk Liu

KIQ will hang if we try below steps:
modprobe amdgpu
rmmod amdgpu
modprobe amdgpu sched_hw_submission=4

the cause is that due to KIQ is always living there even
after we unload KMD thus when doing the realod of KMD
KIQ will crash upon its register programed with different
values with the previous configuration (the config
like HQD addr, ring size, is easily changed if we alter
the sched_hw_submission)

the fix is we must inactive KIQ first before touching any
of its registgers

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e8..f571e25 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6433,6 +6433,9 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring 
*ring)
struct v10_compute_mqd *mqd = ring->mqd_ptr;
int j;
 
+   /* activate the queue */
+   WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)

2020-07-30 Thread Monk Liu

what:
the MQD's save and restore of KCQ (kernel compute queue)
cost lots of clocks during world switch which impacts a lot
to multi-VF performance

how:
introduce a paramter to control the number of KCQ to avoid
performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

v2:
refine namings

v3:
choose queues for each ring to that try best to cross pipes evenly.

v4:
fix indentation
some cleanupsin the gfx_compute_queue_acquire() function

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 52 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +-
 7 files changed, 80 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..de11136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..cf445bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+   amdgpu_num_kcq = 8;
+   dev_warn(adev->dev, "set kernel compute queue number to 8 due 
to invalid paramter provided by user\n");
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..b545c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup 
(8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..b43df8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,40 +202,34 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+   int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+adev->gfx.mec.num_queue_per_pipe,
+adev->gfx.num_compute_rings);
+
+   if (multipipe_policy) {
+   /* policy: make queues evenly cross all pipes on MEC1 only */
+   for (i = 0; i < max_queues_per_mec; i++) {
+   pipe = i % adev->gfx.mec.num_pipe_per_mec;
+   queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+   adev->gfx.mec.num_queue_per_pipe;
+
+   set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+   adev->gfx.mec.queue_bitmap);
+   }
+   } else {
+   /* policy: amdgpu owns all queues in the given pipe */
+   for (i = 0; i < max_queues_per_mec; ++i) {
+   queue = i % adev->gfx.mec.num_queue_per_pipe;
+   pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+

[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v4)

2020-07-30 Thread Monk Liu

what:
the MQD's save and restore of KCQ (kernel compute queue)
cost lots of clocks during world switch which impacts a lot
to multi-VF performance

how:
introduce a paramter to control the number of KCQ to avoid
performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

v2:
refine namings

v3:
choose queues for each ring to that try best to cross pipes evenly.

v4:
fix indentation
some cleanupsin the gfx_compute_queue_acquire() function

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 52 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +-
 7 files changed, 80 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..de11136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..cf445bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+   amdgpu_num_kcq = 8;
+   dev_warn(adev->dev, "set kernel compute queue number to 8 due 
to invalid paramter provided by user\n");
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..b545c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup 
(8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..24b3461 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,40 +202,34 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+   int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+adev->gfx.mec.num_queue_per_pipe,
+adev->gfx.num_compute_rings);
+
+   if (multipipe_policy) {
+   /* policy: make queues evenly cross all pipes on MEC1 only */
+   for (i = 0; i < max_queues_per_mec; i++) {
+   pipe = i % adev->gfx.mec.num_pipe_per_mec;
+   queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+   adev->gfx.mec.num_queue_per_pipe;
+
+   set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+   adev->gfx.mec.queue_bitmap);
+   }
+   } else {
+   /* policy: amdgpu owns all queues in the given pipe */
+   for (i = 0; i < max_queues_per_mec; ++i) {
+   queue = i % adev->gfx.mec.num_queue_per_pipe;
+   pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+

[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3)

2020-07-28 Thread Monk Liu

what:
the MQD's save and restore of KCQ (kernel compute queue)
cost lots of clocks during world switch which impacts a lot
to multi-VF performance

how:
introduce a paramter to control the number of KCQ to avoid
performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

v2:
refine namings

v3:
choose queues for each ring to that try best to cross pipes evenly.

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 58 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 
 7 files changed, 87 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..de11136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..cf445bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+   amdgpu_num_kcq = 8;
+   dev_warn(adev->dev, "set kernel compute queue number to 8 due 
to invalid paramter provided by user\n");
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..b545c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup 
(8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..f83a9a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,40 +202,42 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+   int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+
adev->gfx.mec.num_queue_per_pipe,
+
adev->gfx.num_compute_rings);
+
+   if (multipipe_policy) {
+   /* policy: make queues evenly cross all pipes on MEC1 only */
+   for (i = 0; i < max_queues_per_mec; i++) {
+   pipe = i % adev->gfx.mec.num_pipe_per_mec;
+   queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+   adev->gfx.mec.num_queue_per_pipe;
+
+   set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+   adev->gfx.mec.queue_bitmap);
+   }
+   } else {
+   int mec;
 
-   /* policy for amdgpu compute queue ownership */
-   for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-   queue = i % adev->gfx.mec.num_queue_per_pipe;
-   pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-   % adev->gfx.mec

[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v2)

2020-07-27 Thread Monk Liu

what:
the MQD's save and restore of kernel compute queues cost lots of clocks
during world switch which impacts a lot to multi-VF performance

how:
introduce a paramter to control the number of kernel compute queues to
avoid performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +++---
 7 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq_user_set;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..18b93ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
+   amdgpu_num_kcq_user_set = 8;
+   dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter 
provided by user\n");
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe, mec, j = 0;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
/* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
 
if (multipipe_policy) {
/* policy: amdgpu owns the first two queues of the 
first MEC */
-   if (mec == 0 && queue < 2)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && queue < 2) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
} else {
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && pipe == 0) {
+   if (j++ < adev->gfx.num_compute_rings)
+

[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want

2020-07-27 Thread Monk Liu

what:
KCQ cost many clocks during world switch which impacts a lot to multi-VF
performance

how:
introduce a paramter to control the number of KCQ to avoid performance
drop if there is no KQC needed

notes:
this paramter only affects gfx 8/9/10

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +++---
 7 files changed, 69 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq_user_set;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..61c7583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
+   amdgpu_num_kcq_user_set = 8;
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe, mec, j = 0;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
/* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
 
if (multipipe_policy) {
/* policy: amdgpu owns the first two queues of the 
first MEC */
-   if (mec == 0 && queue < 2)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && queue < 2) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
} else {
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && pipe == 0) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
}
}
 
-   /* update the number of active compute rings */
-   adev->gfx.num_compute_rings =
-   bitmap_weight(adev->gfx.mec.queue_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES);
-
-

[PATCH] drm/amdgpu: make IB test synchronize with init for SRIOV(v2)

2020-06-29 Thread Monk Liu

issue:
originally we kickoff IB test asynchronously with driver's
init, thus
the IB test may still running when the driver loading
done (modprobe amdgpu done).
if we shutdown VM immediately after amdgpu driver
loaded then GPU may
hang because the IB test is still running

fix:
flush the delayed_init routine at the bottom of device_init
to avoid driver loading done prior to the IB test completes

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 457f5d2..7a4e965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3295,6 +3295,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
queue_delayed_work(system_wq, >delayed_init_work,
   msecs_to_jiffies(AMDGPU_RESUME_MS));
 
+   if (amdgpu_sriov_vf(adev))
+   flush_delayed_work(>delayed_init_work);
+
r = sysfs_create_files(>dev->kobj, amdgpu_dev_attributes);
if (r) {
dev_err(adev->dev, "Could not create amdgpu device attr\n");
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: make IB test synchronize with init for SRIOV

2020-06-29 Thread Monk Liu

From: pengzhou 

issue:
originally we kickoff IB test asynchronously with driver's init, thus
the IB test may still running when the driver loading done (modprobe amdgpu 
done).
if we shutdown VM immediately after amdgpu driver loaded then GPU may
hang because the IB test is still running

fix:
make IB test synchronize with driver init thus it won't still running
when we shutdown the VM.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 29 -
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 457f5d2..4f54660 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3292,8 +3292,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* must succeed. */
amdgpu_ras_resume(adev);
 
-   queue_delayed_work(system_wq, >delayed_init_work,
+   if (amdgpu_sriov_vf(adev)) {
+   r = amdgpu_ib_ring_tests(adev);
+   if (r) {
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+   return r;
+   }
+   } else {
+   queue_delayed_work(system_wq, >delayed_init_work,
   msecs_to_jiffies(AMDGPU_RESUME_MS));
+   }
 
r = sysfs_create_files(>dev->kobj, amdgpu_dev_attributes);
if (r) {
@@ -3329,7 +3337,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
int r;
 
DRM_INFO("amdgpu: finishing device.\n");
-   flush_delayed_work(>delayed_init_work);
+   if (!amdgpu_sriov_vf(adev))
+   flush_delayed_work(>delayed_init_work);
adev->shutdown = true;
 
/* make sure IB test finished before entering exclusive mode
@@ -3425,7 +3434,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
if (fbcon)
amdgpu_fbdev_set_suspend(adev, 1);
 
-   cancel_delayed_work_sync(>delayed_init_work);
+   if (!amdgpu_sriov_vf(adev))
+   cancel_delayed_work_sync(>delayed_init_work);
 
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
@@ -3528,8 +3538,16 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
if (r)
return r;
 
-   queue_delayed_work(system_wq, >delayed_init_work,
+   if (amdgpu_sriov_vf(adev)) {
+   r = amdgpu_ib_ring_tests(adev);
+   if (r) {
+   DRM_ERROR("ib ring test failed (%d).\n", r);
+   return r;
+   }
+   } else {
+   queue_delayed_work(system_wq, >delayed_init_work,
   msecs_to_jiffies(AMDGPU_RESUME_MS));
+   }
 
if (!amdgpu_device_has_dc_support(adev)) {
/* pin cursors */
@@ -3554,7 +3572,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
return r;
 
/* Make sure IB tests flushed */
-   flush_delayed_work(>delayed_init_work);
+   if (!amdgpu_sriov_vf(adev))
+   flush_delayed_work(>delayed_init_work);
 
/* blat the mode back in */
if (fbcon) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix one vf mode

2020-04-26 Thread Monk Liu

still need to call system_enable_features for one vf mode
but need to block the SMU request from SRIOV case and allows
the software side change pass in "smu_v11_0_system_features_control"

by this patlch the pp_dpm_mclk/sclk now shows correct output

Signed-off-by: Monk Liu 
Singed-off-by: Rohit 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c |  8 
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c  | 13 +
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 5964d63..bfb026c 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1183,7 +1183,15 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
return ret;
}
}
+   } else {
+   /* we need to enable some SMU features for one vf mode */
+   if (amdgpu_sriov_is_pp_one_vf(adev)) {
+   ret = smu_system_features_control(smu, true);
+   if (ret)
+   return ret;
+   }
}
+
if (adev->asic_type != CHIP_ARCTURUS) {
ret = smu_notify_display_change(smu);
if (ret)
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c 
b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 3e1b3ed..6fb2fd1 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -764,6 +764,9 @@ int smu_v11_0_write_pptable(struct smu_context *smu)
struct smu_table_context *table_context = >smu_table;
int ret = 0;
 
+   if (amdgpu_sriov_vf(smu->adev))
+   return 0;
+
ret = smu_update_table(smu, SMU_TABLE_PPTABLE, 0,
   table_context->driver_pptable, true);
 
@@ -922,10 +925,12 @@ int smu_v11_0_system_features_control(struct smu_context 
*smu,
uint32_t feature_mask[2];
int ret = 0;
 
-   ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
-SMU_MSG_DisableAllSmuFeatures), NULL);
-   if (ret)
-   return ret;
+   if (!amdgpu_sriov_vf(smu->adev)) {
+   ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
+
SMU_MSG_DisableAllSmuFeatures), NULL);
+   if (ret)
+   return ret;
+   }
 
bitmap_zero(feature->enabled, feature->feature_num);
bitmap_zero(feature->supported, feature->feature_num);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: extent threshold of waiting FLR_COMPLETE

2020-04-23 Thread Monk Liu

to 5s to satisfy WHOLE GPU reset which need 3+ seconds to
finish

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 2 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 52a6975..83b453f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -26,7 +26,7 @@
 
 #define AI_MAILBOX_POLL_ACK_TIMEDOUT   500
 #define AI_MAILBOX_POLL_MSG_TIMEDOUT   12000
-#define AI_MAILBOX_POLL_FLR_TIMEDOUT   500
+#define AI_MAILBOX_POLL_FLR_TIMEDOUT   5000
 
 enum idh_request {
IDH_REQ_GPU_INIT_ACCESS = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 45bcf43..52605e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -26,7 +26,7 @@
 
 #define NV_MAILBOX_POLL_ACK_TIMEDOUT   500
 #define NV_MAILBOX_POLL_MSG_TIMEDOUT   6000
-#define NV_MAILBOX_POLL_FLR_TIMEDOUT   500
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT   5000
 
 enum idh_request {
IDH_REQ_GPU_INIT_ACCESS = 1,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: limit smu_set_mp1_state to pp_one_vf or bare-metal

2020-04-23 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3d601d5..810141f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2465,7 +2465,7 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
}
adev->ip_blocks[i].status.hw = false;
/* handle putting the SMC in the appropriate state */
-   if(!amdgpu_sriov_vf(adev)){
+   if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_is_pp_one_vf(adev)) {
if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_SMC) {
r = amdgpu_dpm_set_mp1_state(adev, 
adev->mp1_state);
if (r) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 8/8] drm/amdgpu: for nv12 always need smu ip

2020-04-23 Thread Monk Liu

because nv12 SRIOV support one vf mode

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/nv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 995bdec..9c42316 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -498,8 +498,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, _v10_0_ip_block);
amdgpu_device_ip_block_add(adev, _ih_ip_block);
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
-   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
-   !amdgpu_sriov_vf(adev))
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, _virtual_ip_block);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/8] drm/amdgpu: skip cg/pg set for SRIOV

2020-04-23 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8a579ce..909ef08 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7095,6 +7095,10 @@ static int gfx_v10_0_set_powergating_state(void *handle,
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_PG_STATE_GATE);
+
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
@@ -7115,6 +7119,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/8] drm/amdgpu: clear the messed up checking logic

2020-04-23 Thread Monk Liu

for MI100 + ASICS, we always support SW_SMU for bare-metal
and for SRIOV one_vf_mode

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 2bb1e0c..361a5b6 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -571,15 +571,10 @@ bool is_support_sw_smu(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_VEGA20)
return (amdgpu_dpm == 2) ? true : false;
else if (adev->asic_type >= CHIP_ARCTURUS) {
-   if (amdgpu_sriov_vf(adev) &&
-   !(adev->asic_type == CHIP_ARCTURUS &&
- amdgpu_sriov_is_pp_one_vf(adev)))
-
-   return false;
-   else
+ if (amdgpu_sriov_is_pp_one_vf(adev) || !amdgpu_sriov_vf(adev))
return true;
-   } else
-   return false;
+   }
+   return false;
 }
 
 bool is_support_sw_smu_xgmi(struct amdgpu_device *adev)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/8] drm/amdgpu: provide RREG32_SOC15_NO_KIQ, will be used later

2020-04-23 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index c893c64..56d02aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -35,6 +35,9 @@
 #define RREG32_SOC15(ip, inst, reg) \
RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
 
+#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
+   RREG32_NO_KIQ(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+
 #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + 
offset)
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/8] drm/amdgpu: skip sysfs node not belong to one vf mode

2020-04-23 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 48 --
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 49e2e43..c762deb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -3271,26 +3271,27 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
 
-
-   ret = device_create_file(adev->dev, _attr_pp_num_states);
-   if (ret) {
-   DRM_ERROR("failed to create device file pp_num_states\n");
-   return ret;
-   }
-   ret = device_create_file(adev->dev, _attr_pp_cur_state);
-   if (ret) {
-   DRM_ERROR("failed to create device file pp_cur_state\n");
-   return ret;
-   }
-   ret = device_create_file(adev->dev, _attr_pp_force_state);
-   if (ret) {
-   DRM_ERROR("failed to create device file pp_force_state\n");
-   return ret;
-   }
-   ret = device_create_file(adev->dev, _attr_pp_table);
-   if (ret) {
-   DRM_ERROR("failed to create device file pp_table\n");
-   return ret;
+   if (!amdgpu_sriov_vf(adev)) {
+   ret = device_create_file(adev->dev, _attr_pp_num_states);
+   if (ret) {
+   DRM_ERROR("failed to create device file 
pp_num_states\n");
+   return ret;
+   }
+   ret = device_create_file(adev->dev, _attr_pp_cur_state);
+   if (ret) {
+   DRM_ERROR("failed to create device file 
pp_cur_state\n");
+   return ret;
+   }
+   ret = device_create_file(adev->dev, _attr_pp_force_state);
+   if (ret) {
+   DRM_ERROR("failed to create device file 
pp_force_state\n");
+   return ret;
+   }
+   ret = device_create_file(adev->dev, _attr_pp_table);
+   if (ret) {
+   DRM_ERROR("failed to create device file pp_table\n");
+   return ret;
+   }
}
 
ret = device_create_file(adev->dev, _attr_pp_dpm_sclk);
@@ -3337,6 +3338,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
}
+
+   /* the reset are not needed for SRIOV one vf mode */
+   if (amdgpu_sriov_vf(adev)) {
+   adev->pm.sysfs_initialized = true;
+   return ret;
+   }
+
if (adev->asic_type != CHIP_ARCTURUS) {
ret = device_create_file(adev->dev, _attr_pp_dpm_pcie);
if (ret) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/8] drm/amdgpu: enable one vf mode for nv12

2020-04-23 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 12 +++-
 drivers/gpu/drm/amd/powerplay/navi10_ppt.c |  6 +++-
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c  | 49 +-
 3 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 361a5b6..5964d63 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -347,13 +347,13 @@ int smu_get_dpm_freq_by_index(struct smu_context *smu, 
enum smu_clk_type clk_typ
param = (uint32_t)(((clk_id & 0x) << 16) | (level & 0x));
 
ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDpmFreqByIndex,
- param, );
+ param, value);
if (ret)
return ret;
 
/* BIT31:  0 - Fine grained DPM, 1 - Dicrete DPM
 * now, we un-support it */
-   *value = param & 0x7fff;
+   *value = *value & 0x7fff;
 
return ret;
 }
@@ -535,7 +535,6 @@ int smu_update_table(struct smu_context *smu, enum 
smu_table_id table_index, int
int table_id = smu_table_get_index(smu, table_index);
uint32_t table_size;
int ret = 0;
-
if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0)
return -EINVAL;
 
@@ -691,7 +690,6 @@ int smu_feature_is_enabled(struct smu_context *smu, enum 
smu_feature_mask mask)
 
if (smu->is_apu)
return 1;
-
feature_id = smu_feature_get_index(smu, mask);
if (feature_id < 0)
return 0;
@@ -1339,6 +1337,9 @@ static int smu_hw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = >smu;
 
+   if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+   return 0;
+
ret = smu_start_smc_engine(smu);
if (ret) {
pr_err("SMU is not ready yet!\n");
@@ -1352,9 +1353,6 @@ static int smu_hw_init(void *handle)
smu_set_gfx_cgpg(>smu, true);
}
 
-   if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-   return 0;
-
if (!smu->pm_enabled)
return 0;
 
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c 
b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index c94270f..2184d24 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -1817,7 +1817,8 @@ static int navi10_get_power_limit(struct smu_context *smu,
int power_src;
 
if (!smu->power_limit) {
-   if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
+   if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT) &&
+   !amdgpu_sriov_vf(smu->adev)) {
power_src = smu_power_get_index(smu, 
SMU_POWER_SOURCE_AC);
if (power_src < 0)
return -EINVAL;
@@ -1960,6 +1961,9 @@ static int navi10_set_default_od_settings(struct 
smu_context *smu, bool initiali
OverDriveTable_t *od_table, *boot_od_table;
int ret = 0;
 
+   if (amdgpu_sriov_vf(smu->adev))
+   return 0;
+
ret = smu_v11_0_set_default_od_settings(smu, initialize, 
sizeof(OverDriveTable_t));
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c 
b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index a97b296..3e1b3ed 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -57,7 +57,7 @@ static int smu_v11_0_send_msg_without_waiting(struct 
smu_context *smu,
  uint16_t msg)
 {
struct amdgpu_device *adev = smu->adev;
-   WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
+   WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
return 0;
 }
 
@@ -65,7 +65,7 @@ static int smu_v11_0_read_arg(struct smu_context *smu, 
uint32_t *arg)
 {
struct amdgpu_device *adev = smu->adev;
 
-   *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
+   *arg = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_82);
return 0;
 }
 
@@ -75,7 +75,7 @@ static int smu_v11_0_wait_for_response(struct smu_context 
*smu)
uint32_t cur_value, i, timeout = adev->usec_timeout * 10;
 
for (i = 0; i < timeout; i++) {
-   cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
+   cur_value = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90);
if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0)
return cur_value == 0x1 ? 0 : -EIO;
 
@@ -83,7 +83,10 @@ static int smu_v11_0_wait_for_response(struct smu_context

[PATCH 3/8] drm/amdgpu: sriov is forbidden to call disable DPM

2020-04-23 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 88b4e56..2bb1e0c 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1403,6 +1403,9 @@ static int smu_hw_init(void *handle)
 
 static int smu_stop_dpms(struct smu_context *smu)
 {
+   if (amdgpu_sriov_vf(smu->adev))
+   return 0;
+
return smu_system_features_control(smu, false);
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/8] drm/amdgpu: ignore TA ucode for SRIOV

2020-04-23 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 0afd610..b4b0242 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -194,6 +194,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+   if (amdgpu_sriov_vf(adev))
+   break;
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", 
chip_name);
err = request_firmware(>psp.ta_fw, fw_name, adev->dev);
if (err) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/7] drm/amdgpu: cleanup idh event/req for NV headers

2020-03-24 Thread Monk Liu

1) drop the headers from AI in mxgpu_nv.c, should refer to mxgpu_nv.h

2) the IDH_EVENT_MAX is not used and not aligned with host side
   so drop it
3) the IDH_TEXT_MESSAG was provided in host but not defined in guest

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c |  1 -
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 22 ++
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h |  3 ++-
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 37dbe0f..52a6975 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -46,7 +46,8 @@ enum idh_event {
IDH_SUCCESS,
IDH_FAIL,
IDH_QUERY_ALIVE,
-   IDH_EVENT_MAX
+
+   IDH_TEXT_MESSAGE = 255,
 };
 
 extern const struct amdgpu_virt_ops xgpu_ai_virt_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 237fa5e..d9ce12c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -30,7 +30,6 @@
 #include "navi10_ih.h"
 #include "soc15_common.h"
 #include "mxgpu_nv.h"
-#include "mxgpu_ai.h"
 
 static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 99b15f6..c80bbc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -28,6 +28,28 @@
 #define NV_MAILBOX_POLL_MSG_TIMEDOUT   12000
 #define NV_MAILBOX_POLL_FLR_TIMEDOUT   500
 
+enum idh_request {
+   IDH_REQ_GPU_INIT_ACCESS = 1,
+   IDH_REL_GPU_INIT_ACCESS,
+   IDH_REQ_GPU_FINI_ACCESS,
+   IDH_REL_GPU_FINI_ACCESS,
+   IDH_REQ_GPU_RESET_ACCESS,
+
+   IDH_LOG_VF_ERROR   = 200,
+};
+
+enum idh_event {
+   IDH_CLR_MSG_BUF = 0,
+   IDH_READY_TO_ACCESS_GPU,
+   IDH_FLR_NOTIFICATION,
+   IDH_FLR_NOTIFICATION_CMPL,
+   IDH_SUCCESS,
+   IDH_FAIL,
+   IDH_QUERY_ALIVE,
+
+   IDH_TEXT_MESSAGE = 255,
+};
+
 extern const struct amdgpu_virt_ops xgpu_nv_virt_ops;
 
 void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
index f13dc6c..713ee66 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
@@ -43,7 +43,8 @@ enum idh_event {
IDH_READY_TO_ACCESS_GPU,
IDH_FLR_NOTIFICATION,
IDH_FLR_NOTIFICATION_CMPL,
-   IDH_EVENT_MAX
+
+   IDH_TEXT_MESSAGE = 255
 };
 
 extern const struct amdgpu_virt_ops xgpu_vi_virt_ops;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/7] drm/amdgpu: adjust sequence of ip_discovery init and timeout_setting

2020-03-24 Thread Monk Liu

what:
1)move timtout setting before ip_early_init to reduce exclusive mode
cost for SRIOV

2)move ip_discovery_init() to inside of amdgpu_discovery_reg_base_init()
it is a prepare for the later upcoming patches.

why:
in later upcoming patches we would use a new mailbox event --
"req_gpu_init_data", which is a callback hooked in adev->virt.ops and
this callback send a new event "REQ_GPU_INIT_DAT" to host to notify
host to do some preparation like "IP discovery/vbios on the VF FB"
and this callback must be:

A) invoked after set_ip_block() because virt.ops is configured during
set_ip_block()

B) invoked before ip_discovery_init() becausen ip_discovery_init()
need host side prepares everything in VF FB first.

current place of ip_discovery_init() is before we can invoke callback
of adev->virt.ops, thus we must move ip_discovery_init() to a place
after the adev->virt.ops all settle done, and the perfect place is in
amdgpu_discovery_reg_base_init()

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h |  1 -
 3 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 273706b..724ad84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3079,12 +3079,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* detect hw virtualization here */
amdgpu_detect_virtualization(adev);
 
-   if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
-   r = amdgpu_discovery_init(adev);
-   if (r) {
-   dev_err(adev->dev, "amdgpu_discovery_init failed\n");
-   return r;
-   }
+   r = amdgpu_device_get_job_timeout_settings(adev);
+   if (r) {
+   dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+   return r;
}
 
/* early init functions */
@@ -3092,12 +3090,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
-   r = amdgpu_device_get_job_timeout_settings(adev);
-   if (r) {
-   dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-   return r;
-   }
-
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 37e1fcf..43bb22a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -156,7 +156,7 @@ static inline bool amdgpu_discovery_verify_checksum(uint8_t 
*data, uint32_t size
return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
 }
 
-int amdgpu_discovery_init(struct amdgpu_device *adev)
+static int amdgpu_discovery_init(struct amdgpu_device *adev)
 {
struct table_info *info;
struct binary_header *bhdr;
@@ -255,10 +255,12 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device 
*adev)
uint8_t num_base_address;
int hw_ip;
int i, j, k;
+   int r;
 
-   if (!adev->discovery) {
-   DRM_ERROR("ip discovery uninitialized\n");
-   return -EINVAL;
+   r = amdgpu_discovery_init(adev);
+   if (r) {
+   DRM_ERROR("amdgpu_discovery_init failed\n");
+   return r;
}
 
bhdr = (struct binary_header *)adev->discovery;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index ba78e15..d50d597 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -26,7 +26,6 @@
 
 #define DISCOVERY_TMR_SIZE  (64 << 10)
 
-int amdgpu_discovery_init(struct amdgpu_device *adev);
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/7] drm/amdgpu: introduce new idh_request/event enum

2020-03-24 Thread Monk Liu

new idh_request and ihd_event to prepare for the
new handshake protocol implementation later

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index c80bbc7..598ed2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -34,6 +34,7 @@ enum idh_request {
IDH_REQ_GPU_FINI_ACCESS,
IDH_REL_GPU_FINI_ACCESS,
IDH_REQ_GPU_RESET_ACCESS,
+   IDH_REQ_GPU_INIT_DATA,
 
IDH_LOG_VF_ERROR   = 200,
 };
@@ -46,6 +47,7 @@ enum idh_event {
IDH_SUCCESS,
IDH_FAIL,
IDH_QUERY_ALIVE,
+   IDH_REQ_GPU_INIT_DATA_READY,
 
IDH_TEXT_MESSAGE = 255,
 };
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/7] drm/amdgpu: equip new req_init_data handshake

2020-03-24 Thread Monk Liu

by this new handshake host side can prepare vbios/ip-discovery
and pf exchange data upon recieving this request without
stopping world switch.

this way the world switch is less impacted by VF's exclusive mode
request

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +++
 drivers/gpu/drm/amd/amdgpu/nv.c| 15 +--
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ca609b6..273706b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1800,6 +1800,21 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
amdgpu_amdkfd_device_probe(adev);
 
if (amdgpu_sriov_vf(adev)) {
+   /* handle vbios stuff prior full access mode for new handshake 
*/
+   if (adev->virt.req_init_data_ver == 1) {
+   if (!amdgpu_get_bios(adev)) {
+   DRM_ERROR("failed to get vbios\n");
+   return -EINVAL;
+   }
+
+   r = amdgpu_atombios_init(adev);
+   if (r) {
+   dev_err(adev->dev, "amdgpu_atombios_init 
failed\n");
+   amdgpu_vf_error_put(adev, 
AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+   return r;
+   }
+   }
+
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
@@ -1832,6 +1847,10 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
}
/* get the vbios after the asic_funcs are set up */
if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_COMMON) {
+   /* skip vbios handling for new handshake */
+   if (amdgpu_sriov_vf(adev) && 
adev->virt.req_init_data_ver == 1)
+   continue;
+
/* Read BIOS */
if (!amdgpu_get_bios(adev))
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index a67d78d..7768880 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -457,16 +457,19 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 {
int r;
 
-   /* Set IP register base before any HW register access */
-   r = nv_reg_base_init(adev);
-   if (r)
-   return r;
-
adev->nbio.funcs = _v2_3_funcs;
adev->nbio.hdp_flush_reg = _v2_3_hdp_flush_reg;
 
-   if (amdgpu_sriov_vf(adev))
+   if (amdgpu_sriov_vf(adev)) {
adev->virt.ops = _nv_virt_ops;
+   /* try send GPU_INIT_DATA request to host */
+   amdgpu_virt_request_init_data(adev);
+   }
+
+   /* Set IP register base before any HW register access */
+   r = nv_reg_base_init(adev);
+   if (r)
+   return r;
 
switch (adev->asic_type) {
case CHIP_NAVI10:
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/7] drm/amdgpu: postpone entering fullaccess mode

2020-03-24 Thread Monk Liu

if host support new handshake we only need to enter
fullaccess_mode in ip_init() part, otherwise we need
to do it before reading vbios (becuase host prepares vbios
for VF only after received REQ_GPU_INIT event under
legacy handshake)

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 724ad84..b61161a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1814,10 +1814,14 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
return r;
}
}
+   }
 
+   /* we need to send REQ_GPU here for legacy handshaker otherwise the 
vbios
+* will not be prepared by host for this VF */
+   if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
-   return -EAGAIN;
+   return r;
}
 
adev->pm.pp_feature = amdgpu_pp_feature_mask;
@@ -1977,6 +1981,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
if (r)
return r;
 
+   if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
+   r = amdgpu_virt_request_full_gpu(adev, true);
+   if (r)
+   return -EAGAIN;
+   }
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/7] drm/amdgpu: use static mmio offset for NV mailbox

2020-03-24 Thread Monk Liu

what:
with the new "req_init_data" handshake we need to use mailbox
before do IP discovery, so in mxgpu_nv.c file the original
SOC15_REG method won'twork because that depends on IP discovery
complete first.

how:
so the solution is to always use static MMIO offset for NV+ mailbox
registers.
HW team confirm us all MAILBOX registers will be at the same
offset for all ASICs, no IP discovery needed for those registers

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 52 +++
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 18 ++--
 2 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 6b9e390..ce2bf1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -52,8 +52,7 @@ static void xgpu_nv_mailbox_set_valid(struct amdgpu_device 
*adev, bool val)
  */
 static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
 {
-   return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-   mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+   return RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
 }
 
 
@@ -62,8 +61,7 @@ static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
 {
u32 reg;
 
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-
mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+   reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
if (reg != event)
return -ENOENT;
 
@@ -116,7 +114,6 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, 
enum idh_event event)
 static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
  enum idh_request req, u32 data1, u32 data2, u32 data3)
 {
-   u32 reg;
int r;
uint8_t trn;
 
@@ -135,19 +132,10 @@ static void xgpu_nv_mailbox_trans_msg (struct 
amdgpu_device *adev,
}
} while (trn);
 
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-
mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0,
-   MSGBUF_DATA, req);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0),
- reg);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1),
-   data1);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2),
-   data2);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3),
-   data3);
-
+   WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req);
+   WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1);
+   WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2);
+   WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW3, data3);
xgpu_nv_mailbox_set_valid(adev, true);
 
/* start to poll ack */
@@ -192,8 +180,7 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
if (req == IDH_REQ_GPU_INIT_DATA)
{
adev->virt.req_init_data_ver =
-   RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-   
mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW1));
+   RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
 
/* assume V1 in case host doesn't set version 
number */
if (adev->virt.req_init_data_ver < 1)
@@ -204,8 +191,7 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
/* Retrieve checksum from mailbox2 */
if (req == IDH_REQ_GPU_INIT_ACCESS || req == 
IDH_REQ_GPU_RESET_ACCESS) {
adev->virt.fw_reserve.checksum_key =
-   RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-   mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2));
+   RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
}
}
 
@@ -256,11 +242,14 @@ static int xgpu_nv_set_mailbox_ack_irq(struct 
amdgpu_device *adev,
unsigned type,
enum amdgpu_interrupt_state state)
 {
-   u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF_MAILBOX_INT_CNTL));
+   u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL);
 
-   tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN,
-   (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), 
tmp);
+   if (state == AMDGPU_IRQ_STATE_ENABLE)
+   tmp |= 2;
+   else
+   tmp &= ~2;
+
+   WREG32

[PATCH 3/7] drm/amdgpu: introduce new request and its function

2020-03-24 Thread Monk Liu

1) modify xgpu_nv_send_access_requests to support
new idh request

2) introduce new function: req_gpu_init_data() which
is used to notify host to prepare vbios/ip-discovery/pfvf exchange

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c| 48 ++--
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h|  2 +-
 4 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 43a1ee3..135a16c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -152,6 +152,19 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
return 0;
 }
 
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
+{
+   struct amdgpu_virt *virt = >virt;
+
+   if (virt->ops && virt->ops->req_init_data)
+   virt->ops->req_init_data(adev);
+
+   if (adev->virt.req_init_data_ver > 0)
+   DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+   else
+   DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+}
+
 /**
  * amdgpu_virt_wait_reset() - wait for reset gpu completed
  * @amdgpu:amdgpu device.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 74f9843..f6ae3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -59,6 +59,7 @@ struct amdgpu_vf_error_buffer {
 struct amdgpu_virt_ops {
int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
+   int (*req_init_data)(struct amdgpu_device *adev);
int (*reset_gpu)(struct amdgpu_device *adev);
int (*wait_reset)(struct amdgpu_device *adev);
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 
data2, u32 data3);
@@ -263,6 +264,7 @@ struct amdgpu_virt {
struct amdgpu_virt_fw_reserve   fw_reserve;
uint32_t gim_feature;
uint32_t reg_access_mode;
+   int req_init_data_ver;
 };
 
 #define amdgpu_sriov_enabled(adev) \
@@ -303,6 +305,7 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct 
amdgpu_device *adev,
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
 int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index d9ce12c..6b9e390 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -109,7 +109,6 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, 
enum idh_event event)
timeout -= 10;
} while (timeout > 1);
 
-   pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
 
return -ETIME;
 }
@@ -163,18 +162,45 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
enum idh_request req)
 {
int r;
+   enum idh_event event = -1;
 
xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
 
-   /* start to check msg if request is idh_req_gpu_init_access */
-   if (req == IDH_REQ_GPU_INIT_ACCESS ||
-   req == IDH_REQ_GPU_FINI_ACCESS ||
-   req == IDH_REQ_GPU_RESET_ACCESS) {
-   r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+   switch (req) {
+   case IDH_REQ_GPU_INIT_ACCESS:
+   case IDH_REQ_GPU_FINI_ACCESS:
+   case IDH_REQ_GPU_RESET_ACCESS:
+   event = IDH_READY_TO_ACCESS_GPU;
+   break;
+   case IDH_REQ_GPU_INIT_DATA:
+   event = IDH_REQ_GPU_INIT_DATA_READY;
+   break;
+   default:
+   break;
+   }
+
+   if (event != -1) {
+   r = xgpu_nv_poll_msg(adev, event);
if (r) {
-   pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give 
up\n");
-   return r;
+   if (req != IDH_REQ_GPU_INIT_DATA) {
+   pr_err("Doesn't get msg:%d from pf, 
error=%d\n", event, r);
+   return r;
+   }
+   else /* host doesn't support REQ_GPU_INIT_DATA 
handshake */
+   adev->virt.req_init_data_ver = 0;
+   } else {
+   if (req == IDH_REQ_GPU_INIT_DATA

[PATCH 2/4] drm/amdgpu: purge ip_discovery headers

2020-03-24 Thread Monk Liu

those two headers are not needed for ip discovery

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 27d8ae1..37e1fcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -23,9 +23,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_discovery.h"
-#include "soc15_common.h"
 #include "soc15_hw_ip.h"
-#include "nbio/nbio_2_3_offset.h"
 #include "discovery.h"
 
 #define mmRCC_CONFIG_MEMSIZE   0xde3
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amdgpu: don't try to reserve training bo for sriov

2020-03-24 Thread Monk Liu

1) SRIOV guest KMD doesn't care training buffer
2) if we resered training buffer that will overlap with IP discovery
reservation because training buffer is at vram_size - 0x8000 and
IP discovery is at ()vram_size - 0x1 => vram_size -1)

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 665db23..54cfa3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1859,9 +1859,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 *The reserved vram for memory training must be pinned to the specified
 *place on the VRAM, so reserve it early.
 */
-   r = amdgpu_ttm_training_reserve_vram_init(adev);
-   if (r)
-   return r;
+   if (!amdgpu_sriov_vf(adev))
+   r = amdgpu_ttm_training_reserve_vram_init(adev);
+   if (r)
+   return r;
 
/* allocate memory as required for VGA
 * This is used for VGA emulation and pre-OS scanout buffers to
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amdgpu: cleanup all virtualization detection routine

2020-03-24 Thread Monk Liu

we need to move virt detection much earlier because:
1) HW team confirms us that RCC_IOV_FUNC_IDENTIFIER will always
be at DE5 (dw) mmio offset from vega10, this way there is no
need to implement detect_hw_virt() routine in each nbio/chip file.
for VI SRIOV chip (tonga & fiji), the BIF_IOV_FUNC_IDENTIFIER is at
0x1503

2) we need to acknowledged we are SRIOV VF before we do IP discovery because
the IP discovery content will be updated by host everytime after it recieved
a new coming "REQ_GPU_INIT_DATA" request from guest (there will be patches
for this new handshake soon).

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h   |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 33 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  6 
 drivers/gpu/drm/amd/amdgpu/cik.c   |  8 --
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 18 
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 18 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c |  7 -
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 18 
 drivers/gpu/drm/amd/amdgpu/nv.c|  2 --
 drivers/gpu/drm/amd/amdgpu/si.c|  8 --
 drivers/gpu/drm/amd/amdgpu/soc15.c |  1 -
 drivers/gpu/drm/amd/amdgpu/vi.c| 24 
 .../amd/include/asic_reg/nbif/nbif_6_1_offset.h|  2 ++
 .../amd/include/asic_reg/nbio/nbio_7_0_offset.h|  2 ++
 .../amd/include/asic_reg/nbio/nbio_7_4_offset.h|  2 ++
 16 files changed, 48 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e55dbcd..ca609b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3057,6 +3057,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
adev->enable_mes = true;
 
+   /* detect hw virtualization here */
+   amdgpu_detect_virtualization(adev);
+
if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
r = amdgpu_discovery_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 919bd56..edaac24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -77,7 +77,6 @@ struct amdgpu_nbio_funcs {
  u32 *flags);
void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev);
-   void (*detect_hw_virt)(struct amdgpu_device *adev);
void (*remap_hdp_registers)(struct amdgpu_device *adev);
void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device 
*adev);
void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device 
*adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index adc813c..43a1ee3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -287,3 +287,36 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
}
}
 }
+
+void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+{
+   uint32_t reg;
+
+   switch (adev->asic_type) {
+   case CHIP_TONGA:
+   case CHIP_FIJI:
+   reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+   break;
+   case CHIP_VEGA10:
+   case CHIP_VEGA20:
+   case CHIP_NAVI10:
+   case CHIP_NAVI12:
+   case CHIP_ARCTURUS:
+   reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
+   break;
+   default: /* other chip doesn't support SRIOV */
+   reg = 0;
+   break;
+   }
+
+   if (reg & 1)
+   adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
+
+   if (reg & 0x8000)
+   adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+
+   if (!reg) {
+   if (is_virtual_machine())   /* passthrough mode exclus 
sriov mod */
+   adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+   }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 0a95b13..74f9843 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -30,6 +30,11 @@
 #define AMDGPU_PASSTHROUGH_MODE(1 << 3) /* thw whole GPU is pass 
through for VM */
 #define AMDGPU_SRIOV_CAPS_RUNTIME  (1 << 4) /* is out of full access mode 
*/
 
+/* all asic after AI use this offset */
+#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
+/* tonga/fiji use this offset */
+#define mmBIF_IO

[PATCH 3/4] drm/amdgpu: amends feature bits for MM bandwidth mgr

2020-03-24 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f0128f7..0a95b13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -83,6 +83,8 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_GIM_LOAD_UCODES   = 0x2,
/* VRAM LOST by GIM */
AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
+   /* MM bandwidth */
+   AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
/* PP ONE VF MODE in GIM */
AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
 };
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: revise RLCG access path

2020-03-15 Thread Monk Liu

what changed:
1)provide new implementation interface for the rlcg access path
2)put SQ_CMD/SQ_IND_INDEX to GFX9 RLCG path to let debugfs's reg_op
function can access reg that need RLCG path help

now even debugfs's reg_op can used to dump wave.

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 50 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h|  3 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 74 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 95 -
 drivers/gpu/drm/amd/amdgpu/soc15.h  |  7 +++
 drivers/gpu/drm/amd/amdgpu/soc15_common.h   |  5 +-
 9 files changed, 221 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c00831f..87c25230 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -999,6 +999,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, 
uint32_t v,
+   uint32_t acc_flags);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 02bb1be1..c0f9a65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -179,7 +179,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct 
file *f,
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
-   WREG32(*pos >> 2, value);
+   amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 
0);
}
if (r) {
result = r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a35c899..729565f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -306,6 +306,26 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t 
offset, uint8_t value)
BUG();
 }
 
+void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v, uint32_t acc_flags)
+{
+   trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
+
+   if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
+   writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+   else {
+   unsigned long flags;
+
+   spin_lock_irqsave(>mmio_idx_lock, flags);
+   writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 
4));
+   writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
+   spin_unlock_irqrestore(>mmio_idx_lock, flags);
+   }
+
+   if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index 
== 0x5702C) {
+   udelay(500);
+   }
+}
+
 /**
  * amdgpu_mm_wreg - write to a memory mapped IO register
  *
@@ -319,8 +339,6 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t 
offset, uint8_t value)
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
 {
-   trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
-
if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
adev->last_mm_index = v;
}
@@ -328,20 +346,26 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v,
if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) 
&& amdgpu_sriov_runtime(adev)))
return amdgpu_kiq_wreg(adev, reg, v);
 
-   if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
-   writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
-   else {
-   unsigned long flags;
+   amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
+}
 
-   spin_lock_irqsave(>mmio_idx_lock, flags);
-   writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 
4));
-   writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
-   spin_unlock_irqrestore(>mmio_idx_lock, flags);
-   }
+/*
+ * amdgpu_mm_wreg_mmio_rlc -  write register either w

[refactor RLCG wreg path 1/2] drm/amdgpu: refactor RLCG access path part 1

2020-03-10 Thread Monk Liu

what changed:
1)provide new implementation interface for the rlcg access path
2)put SQ_CMD/SQ_IND_INDEX/SQ_IND_DATA to GFX9 RLCG path to align with
SRIOV RLCG logic

background:
we what to clear the code path for WREG32_RLC, to make it only covered
and handled by amdgpu_mm_wreg() routine, this way we can let RLCG
to serve the register access even through UMR (via debugfs interface)
the current implementation cannot achieve that goal because it can only
hardcode everywhere, but UMR only pass "offset" as varable to driver

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |  80 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 177 +++-
 drivers/gpu/drm/amd/amdgpu/soc15.h  |   7 ++
 4 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 52509c2..60bb3e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+   bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
 struct amdgpu_rlc {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 82ef08d..3222cd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -224,6 +224,56 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x, 0x0080)
 };
 
+static const struct soc15_reg_rlcg rlcg_access_gc_10_0[] = {
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_HI)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_LO)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_LENGTH)},
+   {SOC15_REG_ENTRY(GC, 0, mmCP_ME_CNTL)},
+};
+
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+{
+   static void *scratch_reg0;
+   static void *scratch_reg1;
+   static void *scratch_reg2;
+   static void *scratch_reg3;
+   static void *spare_int;
+   static uint32_t grbm_cntl;
+   static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+
+   scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+   scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+
+   grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
+   grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;
+
+   if (amdgpu_sriov_runtime(adev)) {
+   pr_err("shoudn't call rlcg write register during runtime\n");
+   return;
+   }
+
+   writel(v, scratch_reg0);
+   writel(offset | 0x8000, scratch_reg1);
+   writel(1, spare_int);
+   for (i = 0; i < retries; i++) {
+   u32 tmp;
+
+   tmp = readl(scratch_reg1);
+   if (!(tmp & 0x8000))
+   break;
+
+   udelay(10);
+   }
+
+   if (i >= retries)
+   pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+}
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
 {
/* Pending on emulation bring up */
@@ -4247,6 +4297,32 @@ static void gfx_v10_0_update_spm_vmid(struct 
amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+   uint32_t offset,
+   struct soc15_reg_rlcg *entries, int 
arr_size)
+{
+   int i;
+   uint32_t reg;
+
+   for (i = 0; i < arr_size; i++) {
+   const struct soc15_reg_rlcg *entry;
+
+   entry = [i];
+   reg = 
adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+   if (offset == reg)
+   return true;
+   }
+
+   return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(stru

[refactor RLCG wreg path 2/2] drm/amdgpu: refactor RLCG access path part 2

2020-03-10 Thread Monk Liu

switch to new RLCG access path, and drop the legacy
WREG32_RLC macros

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   5 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 104 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |  28 +++---
 drivers/gpu/drm/amd/amdgpu/soc15.c|  11 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  57 
 8 files changed, 93 insertions(+), 152 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2..a21f005 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -105,8 +105,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
 
lock_srbm(kgd, 0, 0, 0, vmid);
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
 
unlock_srbm(kgd);
@@ -242,13 +242,13 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32(reg, mqd_hqd[reg - hqd_base]);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -277,25 +277,25 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
   lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
   upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
   lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+  get_queue_mask(adev, pipe_id, queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 
release_queue(kgd);
 
@@ -547,7 +547,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
 
if (m->cp_hqd_vmid == 0)
-   WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+   WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -561,7 +561,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
 
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@@ -656,7 +656,7 @@ int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
 
mutex_lock(>grbm_idx_mutex);
 
-   WREG32_SOC15_

[PATCH 1/2] drm/amdgpu: refactor RLCG access path part 1

2020-03-10 Thread Monk Liu

what changed:
1)provide new implementation interface for the rlcg access path
2)put SQ_CMD/SQ_IND_INDEX/SQ_IND_DATA to GFX9 RLCG path to align with
SRIOV RLCG logic

background:
we what to clear the code path for WREG32_RLC, to make it only covered
and handled by amdgpu_mm_wreg() routine, this way we can let RLCG
to serve the register access even through UMR (via debugfs interface)
the current implementation cannot achieve that goal because it can only
hardcode everywhere, but UMR only pass "offset" as varable to driver

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |  80 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 177 +++-
 drivers/gpu/drm/amd/amdgpu/soc15.h  |   7 ++
 4 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 52509c2..60bb3e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+   bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
 struct amdgpu_rlc {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 82ef08d..3222cd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -224,6 +224,56 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x, 0x0080)
 };
 
+static const struct soc15_reg_rlcg rlcg_access_gc_10_0[] = {
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_HI)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_LO)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_LENGTH)},
+   {SOC15_REG_ENTRY(GC, 0, mmCP_ME_CNTL)},
+};
+
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+{
+   static void *scratch_reg0;
+   static void *scratch_reg1;
+   static void *scratch_reg2;
+   static void *scratch_reg3;
+   static void *spare_int;
+   static uint32_t grbm_cntl;
+   static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+
+   scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+   scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+
+   grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
+   grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;
+
+   if (amdgpu_sriov_runtime(adev)) {
+   pr_err("shoudn't call rlcg write register during runtime\n");
+   return;
+   }
+
+   writel(v, scratch_reg0);
+   writel(offset | 0x8000, scratch_reg1);
+   writel(1, spare_int);
+   for (i = 0; i < retries; i++) {
+   u32 tmp;
+
+   tmp = readl(scratch_reg1);
+   if (!(tmp & 0x8000))
+   break;
+
+   udelay(10);
+   }
+
+   if (i >= retries)
+   pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+}
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
 {
/* Pending on emulation bring up */
@@ -4247,6 +4297,32 @@ static void gfx_v10_0_update_spm_vmid(struct 
amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+   uint32_t offset,
+   struct soc15_reg_rlcg *entries, int 
arr_size)
+{
+   int i;
+   uint32_t reg;
+
+   for (i = 0; i < arr_size; i++) {
+   const struct soc15_reg_rlcg *entry;
+
+   entry = [i];
+   reg = 
adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+   if (offset == reg)
+   return true;
+   }
+
+   return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(stru

[PATCH 2/2] drm/amdgpu: refactor RLCG access path part 2

2020-03-10 Thread Monk Liu

switch to new RLCG access path, and drop the legacy
WREG32_RLC macros

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   5 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 104 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |  28 +++---
 drivers/gpu/drm/amd/amdgpu/soc15.c|  11 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  57 
 8 files changed, 93 insertions(+), 152 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2..a21f005 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -105,8 +105,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
 
lock_srbm(kgd, 0, 0, 0, vmid);
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
 
unlock_srbm(kgd);
@@ -242,13 +242,13 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32(reg, mqd_hqd[reg - hqd_base]);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -277,25 +277,25 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
   lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
   upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
   lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+  get_queue_mask(adev, pipe_id, queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 
release_queue(kgd);
 
@@ -547,7 +547,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
 
if (m->cp_hqd_vmid == 0)
-   WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+   WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -561,7 +561,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
 
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@@ -656,7 +656,7 @@ int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
 
mutex_lock(>grbm_idx_mutex);
 
-   WREG32_SOC15_

[enable VCN2.0 for NV12 SRIOV 5/6] drm/amdgpu: disable clock/power gating for SRIOV

2020-03-05 Thread Monk Liu

and disable MC resum in VCN2.0 as well

those are not concerned by VF driver

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index dd500d1..f2745fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -320,6 +320,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
uint32_t offset;
 
+   if (amdgpu_sriov_vf(adev))
+   return;
+
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
@@ -464,6 +467,9 @@ static void vcn_v2_0_disable_clock_gating(struct 
amdgpu_device *adev)
 {
uint32_t data;
 
+   if (amdgpu_sriov_vf(adev))
+   return;
+
/* UVD disable CGC */
data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -622,6 +628,9 @@ static void vcn_v2_0_enable_clock_gating(struct 
amdgpu_device *adev)
 {
uint32_t data = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return;
+
/* enable UVD CGC */
data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -674,6 +683,9 @@ static void vcn_v2_0_disable_static_power_gating(struct 
amdgpu_device *adev)
uint32_t data = 0;
int ret;
 
+   if (amdgpu_sriov_vf(adev))
+   return;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
@@ -721,6 +733,9 @@ static void vcn_v2_0_enable_static_power_gating(struct 
amdgpu_device *adev)
uint32_t data = 0;
int ret;
 
+   if (amdgpu_sriov_vf(adev))
+   return;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
/* Before power off, this indicator has to be turned on */
data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
@@ -1231,6 +1246,9 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_CG_STATE_GATE);
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
if (enable) {
/* wait for STATUS to clear */
if (vcn_v2_0_is_idle(handle))
@@ -1686,6 +1704,11 @@ static int vcn_v2_0_set_powergating_state(void *handle,
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   if (amdgpu_sriov_vf(adev)) {
+   adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+   return 0;
+   }
+
if (state == adev->vcn.cur_state)
return 0;
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[enable VCN2.0 for NV12 SRIOV 3/6] drm/amdgpu: implement initialization part on VCN2.0 for SRIOV

2020-03-05 Thread Monk Liu

one dec ring and one enc ring

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 231 +-
 1 file changed, 228 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index c387c81..421e5bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -29,6 +29,7 @@
 #include "soc15d.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_psp.h"
+#include "mmsch_v2_0.h"
 
 #include "vcn/vcn_2_0_0_offset.h"
 #include "vcn/vcn_2_0_0_sh_mask.h"
@@ -54,7 +55,7 @@ static int vcn_v2_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
 static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state 
*new_state);
-
+static int vcn_v2_0_start_sriov(struct amdgpu_device *adev);
 /**
  * vcn_v2_0_early_init - set function pointers
  *
@@ -67,7 +68,10 @@ static int vcn_v2_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
adev->vcn.num_vcn_inst = 1;
-   adev->vcn.num_enc_rings = 2;
+   if (amdgpu_sriov_vf(adev))
+   adev->vcn.num_enc_rings = 1;
+   else
+   adev->vcn.num_enc_rings = 2;
 
vcn_v2_0_set_dec_ring_funcs(adev);
vcn_v2_0_set_enc_ring_funcs(adev);
@@ -154,7 +158,10 @@ static int vcn_v2_0_sw_init(void *handle)
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = >vcn.inst->ring_enc[i];
ring->use_doorbell = true;
-   ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 
1) + 2 + i;
+   if (!amdgpu_sriov_vf(adev))
+   ring->doorbell_index = 
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
+   else
+   ring->doorbell_index = 
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i;
sprintf(ring->name, "vcn_enc%d", i);
r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0);
if (r)
@@ -163,6 +170,10 @@ static int vcn_v2_0_sw_init(void *handle)
 
adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
 
+   r = amdgpu_virt_alloc_mm_table(adev);
+   if (r)
+   return r;
+
return 0;
 }
 
@@ -178,6 +189,8 @@ static int vcn_v2_0_sw_fini(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -203,6 +216,9 @@ static int vcn_v2_0_hw_init(void *handle)
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
 ring->doorbell_index, 0);
 
+   if (amdgpu_sriov_vf(adev))
+   vcn_v2_0_start_sriov(adev);
+
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
@@ -1680,6 +1696,215 @@ static int vcn_v2_0_set_powergating_state(void *handle,
return ret;
 }
 
+static int vcn_v2_0_start_mmsch(struct amdgpu_device *adev,
+   struct amdgpu_mm_table *table)
+{
+   uint32_t data = 0, loop;
+   uint64_t addr = table->gpu_addr;
+   struct mmsch_v2_0_init_header *header;
+   uint32_t size;
+   int i;
+
+   header = (struct mmsch_v2_0_init_header *)table->cpu_addr;
+   size = header->header_size + header->vcn_table_size;
+
+   /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+* of memory descriptor location
+*/
+   WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+   WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+   /* 2, update vmid of descriptor */
+   data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
+   data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+   /* use domain0 for MM scheduler */
+   data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+   WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
+
+   /* 3, notify mmsch about the size of this descriptor */
+   WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+   /* 4, set resp to zero */
+   WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+   adev->vcn.inst->ring_dec.wptr = 0;
+   adev->vcn.inst->ring_dec.wptr_old = 0;
+   vcn_v2_0_dec_ring_set_wptr(>vcn.inst->ring_dec);
+
+   for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+   adev->vcn.inst->ring_enc[i].wptr = 0;
+   adev->vcn.inst->ring_enc[i].wptr_old = 0;
+   vcn_v2_0_enc_ring_set_wptr(>vcn.inst->ring_enc[i]);
+   }
+
+

[PATCH] drm/amdgpu: enable vcn 2.0 for SRIOV on NV12

2020-03-05 Thread Monk Liu

this a patch that port from SRIOV project branch
to fix those IB/RING test fail on VCN 2.0 rings

Signed-off-by: Darlington Opara 
Signed-off-by: Jiange Zhao 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c|  14 +-
 drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h| 141 ++
 drivers/gpu/drm/amd/amdgpu/nv.c|   3 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c  | 303 -
 .../amd/include/asic_reg/vcn/vcn_2_0_0_offset.h| 197 ++
 6 files changed, 637 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 5727f00a..0120130 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -181,6 +181,9 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
struct dma_fence *fence = NULL;
long r = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
r = amdgpu_jpeg_dec_set_reg(ring, 1, );
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index f96464e..ca7c9a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -359,6 +359,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
@@ -497,10 +500,6 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
struct dma_fence *fence;
long r;
 
-   /* temporarily disable ib test for sriov */
-   if (amdgpu_sriov_vf(adev))
-   return 0;
-
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
if (r)
goto error;
@@ -527,6 +526,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
r = amdgpu_ring_alloc(ring, 16);
if (r)
return r;
@@ -661,10 +663,6 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
struct amdgpu_bo *bo = NULL;
long r;
 
-   /* temporarily disable ib test for sriov */
-   if (amdgpu_sriov_vf(adev))
-   return 0;
-
r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
  AMDGPU_GEM_DOMAIN_VRAM,
  , NULL, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h 
b/drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h
new file mode 100644
index 000..ad99e92
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V2_0_H__
+#define __MMSCH_V2_0_H__
+
+#define MMSCH_VERSION_MAJOR2
+#define MMSCH_VERSION_MINOR0
+#define MMSCH_VERSION  (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+enum mmsch_v2_0_command_type {
+   MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+   MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+   MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+   MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+   MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v2_0_init_header {
+   uint32_t version;
+   uint32_t header_size;
+   uint32_t vcn_init_status;
+   uint32_t vcn_table_offset;
+   uint32_t vcn_table_size;
+};
+
+struct mmsch_v2_0_cmd_direct_reg_header {
+   uint32_t reg_offset   : 28

[PATCH 2/3] drm/amdgpu: don't use pipe1 of gfx10

2020-03-02 Thread Monk Liu

what:
we found sometimes IDLE fail after vf guest finished IB test
on GFX ring1 (pipe1)

why:
below is what CP team stated (Manu):
GFX Pipe 1 is there in HW, but as part of optimization all driver
decided not to use pipe 1 at all, otherwise driver has to sacrifice
context so all 7 context will not be able for GFX pipe 0. That’s
why I skip setting of state for gfx pipe 1 as decided by all driver
team

fix:
since CP team won't help us to debug any issues that related with
gfx pipe1, so based on above reason, let's skip gfx ring 1 (pipe1)
even for both bare-metal and SRIOV

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 29 ++---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 0555989..afae4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1308,7 +1308,7 @@ static int gfx_v10_0_sw_init(void *handle)
case CHIP_NAVI14:
case CHIP_NAVI12:
adev->gfx.me.num_me = 1;
-   adev->gfx.me.num_pipe_per_me = 2;
+   adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -2713,18 +2713,21 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device 
*adev)
 
amdgpu_ring_commit(ring);
 
-   /* submit cs packet to copy state 0 to next available state */
-   ring = >gfx.gfx_ring[1];
-   r = amdgpu_ring_alloc(ring, 2);
-   if (r) {
-   DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
-   return r;
-   }
+   if (adev->gfx.me.num_pipe_per_me == 2) {
+   /* submit cs packet to copy state 0 to next available state */
+   ring = >gfx.gfx_ring[1];
 
-   amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
-   amdgpu_ring_write(ring, 0);
+   r = amdgpu_ring_alloc(ring, 2);
+   if (r) {
+   DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+   return r;
+   }
 
-   amdgpu_ring_commit(ring);
+   amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+   amdgpu_ring_write(ring, 0);
+
+   amdgpu_ring_commit(ring);
+   }
 
return 0;
 }
@@ -2822,6 +2825,9 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device 
*adev)
mutex_unlock(>srbm_mutex);
 
/* Init gfx ring 1 for pipe 1 */
+   if (adev->gfx.me.num_pipe_per_me == 1)
+   goto do_start;
+
mutex_lock(>srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
ring = >gfx.gfx_ring[1];
@@ -2860,6 +2866,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device 
*adev)
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
mutex_unlock(>srbm_mutex);
 
+do_start:
/* start the ring */
gfx_v10_0_cp_gfx_start(adev);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu: stop using sratch_reg in IB test

2020-03-02 Thread Monk Liu

scratch_reg0 is used by RLCG for register access usage
in SRIOV case.

both CP firmware and driver can invoke RLCG to do
certain register access (through scratch_reg0/1/2/3)
but rlcg now dosen't have race concern so if two
clients are in parallel doing the RLCG reg access
then we are colliding,

GFX IB test is a runtime work, so it is forbidden
to use scrach_reg0/1/2/3 during IB test period

note:
Although we can only have this change for SRIOV, but
looks it doesn't worth the effort to differentiate
bare-metal with SRIOV on the GFX ib test

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 38 +++---
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index afae4cc..b86a531 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -500,29 +500,28 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
-   uint32_t scratch;
-   uint32_t tmp = 0;
+   unsigned index;
+   uint64_t gpu_addr;
+   uint32_t tmp;
long r;
 
-   r = amdgpu_gfx_scratch_get(adev, );
-   if (r) {
-   DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+   r = amdgpu_device_wb_get(adev, );
+   if (r)
return r;
-   }
-
-   WREG32(scratch, 0xCAFEDEAD);
 
+   gpu_addr = adev->wb.gpu_addr + (index * 4);
+   adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(, 0, sizeof(ib));
-   r = amdgpu_ib_get(adev, NULL, 256, );
-   if (r) {
-   DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+   r = amdgpu_ib_get(adev, NULL, 16, );
+   if (r)
goto err1;
-   }
 
-   ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
-   ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
-   ib.ptr[2] = 0xDEADBEEF;
-   ib.length_dw = 3;
+   ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+   ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+   ib.ptr[2] = lower_32_bits(gpu_addr);
+   ib.ptr[3] = upper_32_bits(gpu_addr);
+   ib.ptr[4] = 0xDEADBEEF;
+   ib.length_dw = 5;
 
r = amdgpu_ib_schedule(ring, 1, , NULL, );
if (r)
@@ -530,15 +529,13 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
 
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
-   DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
-   DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
 
-   tmp = RREG32(scratch);
+   tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF)
r = 0;
else
@@ -547,8 +544,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
amdgpu_ib_free(adev, , NULL);
dma_fence_put(f);
 err1:
-   amdgpu_gfx_scratch_free(adev, scratch);
-
+   amdgpu_device_wb_free(adev, index);
return r;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu: fix IB test MCBP bug

2020-03-02 Thread Monk Liu

1)for gfx IB test we shouldn't insert DE meta data

2)we should make sure IB test finished before we
send event 3 to hypervisor otherwise the IDLE from
event 3 will preempt IB test, which is not designed
as a compatible structure for MCBP

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 3 ---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 351096a..572eb6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3195,6 +3195,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
flush_delayed_work(>delayed_init_work);
adev->shutdown = true;
 
+   /* make sure IB test finished before entering exclusive mode
+* to avoid preemption on IB test
+* */
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_request_full_gpu(adev, false);
+
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
if (adev->mode_info.mode_config_initialized){
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 0f35639..0b1511a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -88,9 +88,6 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (adev->rmmio == NULL)
goto done_free;
 
-   if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_request_full_gpu(adev, false);
-
if (adev->runpm) {
pm_runtime_get_sync(dev->dev);
pm_runtime_forbid(dev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 94ca9ff..0555989 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4432,7 +4432,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
if (flags & AMDGPU_IB_PREEMPTED)
control |= INDIRECT_BUFFER_PRE_RESUME(1);
 
-   if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+   if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
gfx_v10_0_ring_emit_de_meta(ring,
(!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 393a132..b14f46a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6116,7 +6116,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
-   if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+   if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
gfx_v8_0_ring_emit_de_meta(ring);
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0156479..d8d256e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4985,7 +4985,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
-   if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+   if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
gfx_v9_0_ring_emit_de_meta(ring);
}
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix psp ucode not loaded in bare-metal

2020-02-20 Thread Monk Liu

for bare-metal we alawys need to load sys/sos/kdb

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 3494966..51839ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1081,7 +1081,7 @@ static int psp_hw_start(struct psp_context *psp)
struct amdgpu_device *adev = psp->adev;
int ret;
 
-   if (!amdgpu_sriov_vf(adev) && !adev->in_gpu_reset) {
+   if (!amdgpu_sriov_vf(adev)) {
if (psp->kdb_bin_size &&
(psp->funcs->bootloader_load_kdb != NULL)) {
ret = psp_bootloader_load_kdb(psp);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu: fix colliding of preemption

2020-02-17 Thread Monk Liu

what:
some os preemption path is messed up with world switch preemption

fix:
cleanup those logics so os preemption not mixed with world switch

this patch is a general fix for issues comes from SRIOV MCBP, but
there is still UMD side issues not resovlved yet, so this patch
cannot fix all world switch bug.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 8 
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index a2ee30b..7854c05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -70,7 +70,8 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
uint32_t index = 0;
int r;
 
-   if (vmid == 0 || !amdgpu_mcbp)
+   /* don't enable OS preemption on SDMA under SRIOV */
+   if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp)
return 0;
 
r = amdgpu_sdma_get_index_from_ring(ring, );
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 5e9fb09..7b61583 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4413,7 +4413,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+   if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
if (flags & AMDGPU_IB_PREEMPTED)
@@ -4421,7 +4421,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
 
if (!(ib->flags & AMDGPU_IB_FLAG_CE))
gfx_v10_0_ring_emit_de_meta(ring,
-   flags & AMDGPU_IB_PREEMPTED ? true : false);
+   (!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -4569,9 +4569,9 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct 
amdgpu_ring *ring,
 {
uint32_t dw2 = 0;
 
-   if (amdgpu_mcbp)
+   if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev))
gfx_v10_0_ring_emit_ce_meta(ring,
-   flags & AMDGPU_IB_PREEMPTED ? true : false);
+   (!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
 
dw2 |= 0x8000; /* set load_enable otherwise this package is just 
NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu: cleanup some incorrect reg access for SRIOV

2020-02-17 Thread Monk Liu

SWDEV-220810 - some register access from VF is wrong

1)
we shouldn't load PSP kdb and sys/sos for VF, they are
supposed to be handled by hypervisor

2)
ih reroute doesn't work on VF thus we should avoid calling
it, besides VF should not use those PSP register sets for PF

3)
shouldn't load SMU ucode under SRIOV, otherwise PSP would report
error

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a16c810..3494966 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1081,7 +1081,7 @@ static int psp_hw_start(struct psp_context *psp)
struct amdgpu_device *adev = psp->adev;
int ret;
 
-   if (!amdgpu_sriov_vf(adev) || !adev->in_gpu_reset) {
+   if (!amdgpu_sriov_vf(adev) && !adev->in_gpu_reset) {
if (psp->kdb_bin_size &&
(psp->funcs->bootloader_load_kdb != NULL)) {
ret = psp_bootloader_load_kdb(psp);
@@ -1318,7 +1318,7 @@ static int psp_np_fw_load(struct psp_context *psp)
 
if (psp->autoload_supported) {
ucode = >firmware.ucode[AMDGPU_UCODE_ID_SMC];
-   if (!ucode->fw)
+   if (!ucode->fw || amdgpu_sriov_vf(adev))
goto out;
 
ret = psp_execute_np_fw_load(psp, ucode);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 0829188..8ab3bf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -420,7 +420,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp,
struct psp_ring *ring;
struct amdgpu_device *adev = psp->adev;
 
-   psp_v11_0_reroute_ih(psp);
+   if (!amdgpu_sriov_vf(adev))
+   psp_v11_0_reroute_ih(psp);
 
ring = >km_ring;
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu: fix memory leak during TDR test

2020-02-17 Thread Monk Liu

fix system memory leak regression introduced
by this previous change of 201331 - Single VF Mode Test

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c 
b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 9d15acf..3363f1c 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -978,8 +978,12 @@ int smu_v11_0_init_max_sustainable_clocks(struct 
smu_context *smu)
struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks;
int ret = 0;
 
-   max_sustainable_clocks = kzalloc(sizeof(struct 
smu_11_0_max_sustainable_clocks),
+   if (smu->smu_table.max_sustainable_clocks == NULL)
+   max_sustainable_clocks = kzalloc(sizeof(struct 
smu_11_0_max_sustainable_clocks),
 GFP_KERNEL);
+   else
+   max_sustainable_clocks = smu->smu_table.max_sustainable_clocks;
+
smu->smu_table.max_sustainable_clocks = (void *)max_sustainable_clocks;
 
max_sustainable_clocks->uclock = smu->smu_table.boot_values.uclk / 100;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] avoid to use NULL pointer

2020-02-14 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 src/lib/umr_read_pm4_stream.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lib/umr_read_pm4_stream.c b/src/lib/umr_read_pm4_stream.c
index 60bea49..317b638 100644
--- a/src/lib/umr_read_pm4_stream.c
+++ b/src/lib/umr_read_pm4_stream.c
@@ -325,6 +325,9 @@ int umr_pm4_decode_ring_is_halted(struct umr_asic *asic, 
char *ringname)
// since the kernel returned values might be unwrapped.
for (n = 0; n < 100; n++) {
ringdata = umr_read_ring_data(asic, ringname, );
+   if (!ringdata)
+   return 1;
+
ringsize /= 4;
ringdata[0] %= ringsize;
ringdata[1] %= ringsize;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] avoid to use NULL pointer

2020-02-14 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 src/lib/umr_read_pm4_stream.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lib/umr_read_pm4_stream.c b/src/lib/umr_read_pm4_stream.c
index 60bea49..317b638 100644
--- a/src/lib/umr_read_pm4_stream.c
+++ b/src/lib/umr_read_pm4_stream.c
@@ -325,6 +325,9 @@ int umr_pm4_decode_ring_is_halted(struct umr_asic *asic, 
char *ringname)
// since the kernel returned values might be unwrapped.
for (n = 0; n < 100; n++) {
ringdata = umr_read_ring_data(asic, ringname, );
+   if (!ringdata)
+   return 1;
+
ringsize /= 4;
ringdata[0] %= ringsize;
ringdata[1] %= ringsize;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: fix double gpu_recovery for NV of SRIOV

2019-12-17 Thread Monk Liu

issues:
gpu_recover() is re-entered by the mailbox interrupt
handler mxgpu_nv.c

fix:
we need to bypass the gpu_recover() invoke in mailbox
interrupt as long as the timeout is not infinite (thus the TDR
will be triggered automatically after time out, no need to invoke
gpu_recover() through mailbox interrupt.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 0d8767e..1c3a7d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -269,7 +269,11 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
}
 
/* Trigger recovery for world switch failure if no TDR */
-   if (amdgpu_device_should_recover_gpu(adev))
+   if (amdgpu_device_should_recover_gpu(adev)
+   && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover(adev, NULL);
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: fix KIQ ring test fail in TDR of SRIOV

2019-12-17 Thread Monk Liu

issues:
MEC is ruined by the amdkfd_pre_reset after VF FLR done

fix:
amdkfd_pre_reset() would ruin MEC after hypervisor finished the VF FLR,
the correct sequence is do amdkfd_pre_reset before VF FLR but there is
a limitation to block this sequence:
if we do pre_reset() before VF FLR, it would go KIQ way to do register
access and stuck there, because KIQ probably won't work by that time
(e.g. you already made GFX hang)

so the best way right now is to simply remove it.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 605cef6..ae962b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3672,8 +3672,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
if (r)
return r;
 
-   amdgpu_amdkfd_pre_reset(adev);
-
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix KIQ ring test fail in TDR

2019-12-17 Thread Monk Liu

issues:
there are two issue may lead to TDR failure for SRIOV
1) gpu_recover() is re-entered by the mailbox interrupt
handler mxgpu_nv.c
2) MEC is ruined by the amdkfd_pre_reset after VF FLR done

fix:
for 1) we need to bypass the gpu_recover() invoke in mailbox
interrupt as long as the timeout is not infinite (thus the TDR
will be triggered automatically after time out, no need to invoke
gpu_recover() through mailbox interrupt.

for 2) amdkfd_pre_reset() would ruin MEC after hypervisor finished
the VF FLR, the correct sequence is do amdkfd_pre_reset before VF FLR
but there is a limitation to block this sequence:
if we do pre_reset() before VF FLR, it would go KIQ way to do register
access and stuck there, because KIQ probably won't work by that time
(e.g. you already made GFX hang)

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 --
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  | 6 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 605cef6..ae962b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3672,8 +3672,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
if (r)
return r;
 
-   amdgpu_amdkfd_pre_reset(adev);
-
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 0d8767e..1c3a7d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -269,7 +269,11 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
}
 
/* Trigger recovery for world switch failure if no TDR */
-   if (amdgpu_device_should_recover_gpu(adev))
+   if (amdgpu_device_should_recover_gpu(adev)
+   && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover(adev, NULL);
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix GFX10 missing CSIB set(v2)

2019-11-29 Thread Monk Liu

still need to init csb even for SRIOV

v2:
drop init_pg() for gfx10 at all since
PG and GFX off feature will be fully controled
by RLC and SMU fw for gfx10

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 38 ++
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 53d11e9..a8eebc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1766,22 +1766,6 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
return 0;
 }
 
-static int gfx_v10_0_init_pg(struct amdgpu_device *adev)
-{
-   int i;
-   int r;
-
-   r = gfx_v10_0_init_csb(adev);
-   if (r)
-   return r;
-
-   for (i = 0; i < adev->num_vmhubs; i++)
-   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-
-   /* TODO: init power gating */
-   return 0;
-}
-
 void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
 {
u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
@@ -1873,22 +1857,21 @@ static int gfx_v10_0_rlc_load_microcode(struct 
amdgpu_device *adev)
 static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
 {
int r;
-
-   if (amdgpu_sriov_vf(adev))
-   return 0;
+   int i;
 
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
 
-   r = gfx_v10_0_init_pg(adev);
-   if (r)
-   return r;
+   gfx_v10_0_init_csb(adev);
 
-   /* enable RLC SRM */
-   gfx_v10_0_rlc_enable_srm(adev);
+   for (i = 0; i < adev->num_vmhubs; i++)
+   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
+   if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+   gfx_v10_0_rlc_enable_srm(adev);
} else {
adev->gfx.rlc.funcs->stop(adev);
 
@@ -1910,9 +1893,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device 
*adev)
return r;
}
 
-   r = gfx_v10_0_init_pg(adev);
-   if (r)
-   return r;
+   gfx_v10_0_init_csb(adev);
+
+   for (i = 0; i < adev->num_vmhubs; i++)
+   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
adev->gfx.rlc.funcs->start(adev);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: should stop GFX ring in hw_fini

2019-11-29 Thread Monk Liu

To align with the scheme from gfx9

disabling GFX ring after VM shutdown could avoid
garbage data be fetched to GFX RB which may lead
to unnecessary screw up on GFX

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 230e8af..882cf1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3809,6 +3809,7 @@ static int gfx_v10_0_hw_fini(void *handle)
if (amdgpu_gfx_disable_kcq(adev))
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
+   gfx_v10_0_cp_gfx_enable(adev, false);
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: fix GFX10 missing CSIB set

2019-11-29 Thread Monk Liu

still need to init csb even for SRIOV

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 74edfd9..230e8af 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1874,14 +1874,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device 
*adev)
 {
int r;
 
-   if (amdgpu_sriov_vf(adev))
-   return 0;
-
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
 
+   if (amdgpu_sriov_vf(adev)) {
+   gfx_v10_0_init_csb(adev);
+   return 0;
+   }
+
r = gfx_v10_0_init_pg(adev);
if (r)
return r;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: should stop GFX ring in hw_fini

2019-11-29 Thread Monk Liu

To align with the scheme from gfx9:

without disabling gfx ring in hw_fini we would
hit GFX hang if a guest VM is destroyed suddenly
when running a game

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 230e8af..882cf1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3809,6 +3809,7 @@ static int gfx_v10_0_hw_fini(void *handle)
if (amdgpu_gfx_disable_kcq(adev))
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
+   gfx_v10_0_cp_gfx_enable(adev, false);
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix calltrace during kmd unload(v2)

2019-11-27 Thread Monk Liu

kernel would report a warning on double unpin
on the csb BO because we unpin it during hw_fini
but actually we don't need to pin/unpin it during
hw_init/fini since it is created with kernel pinned

v2:
get_csb in init_rlc so hw_init() will make CSIB content
back even after reset or s3.
take care of gfx7/8 as well

v3:
use bo_create_kernel instead of bo_create_reserved for CSB
otherwise the bo_free_kernel() on CSB is not aligned and
would led to its internal reserve pending there forever

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 58 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 40 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 40 +--
 5 files changed, 6 insertions(+), 144 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6..6373bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, 
u32 dws)
  */
 int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
 {
-   volatile u32 *dst_ptr;
u32 dws;
int r;
 
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = 
adev->gfx.rlc.funcs->get_csb_size(adev);
-   r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+   r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
  AMDGPU_GEM_DOMAIN_VRAM,
  >gfx.rlc.clear_state_obj,
  >gfx.rlc.clear_state_gpu_addr,
@@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
return r;
}
 
-   /* set up the cs buffer */
-   dst_ptr = adev->gfx.rlc.cs_ptr;
-   adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
-   amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7372904..7703b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -991,39 +991,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
 }
 
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-   int r;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (unlikely(r != 0))
-   return r;
-
-   r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-   AMDGPU_GEM_DOMAIN_VRAM);
-   if (!r)
-   adev->gfx.rlc.clear_state_gpu_addr =
-   amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-   return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-   int r;
-
-   if (!adev->gfx.rlc.clear_state_obj)
-   return;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-   if (likely(r == 0)) {
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   }
-}
-
 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
 {
amdgpu_bo_free_kernel(>gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1785,25 +1752,7 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct 
amdgpu_device *adev,
 
 static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
 {
-   int r;
-
-   if (adev->in_gpu_reset) {
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (r)
-   return r;
-
-   r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj,
-  (void **)>gfx.rlc.cs_ptr);
-   if (!r) {
-   adev->gfx.rlc.funcs->get_csb_buffer(adev,
-   adev->gfx.rlc.cs_ptr);
-   amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-   }
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   if (r)
-   return r;
-   }
+   adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
 
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
@@ -3774,10 +3723,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   r = gfx_v10_0_csb_vram_pin(adev);
-   if (r)
-   return r;
-
if (!amdgpu_

[PATCH] drm/amdgpu: fix calltrace during kmd unload(v2)

2019-11-27 Thread Monk Liu

kernel would report a warning on double unpin
on the csb BO because we unpin it during hw_fini
but actually we don't need to pin/unpin it during
hw_init/fini since it is created with kernel pinned

v2:
get_csb in init_rlc so hw_init() will make CSIB content
back even after reset or s3.
take care of gfx7/8 as well

remove all those useless code for gfx9/10

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c |  8 -
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 58 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 40 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 40 +--
 5 files changed, 5 insertions(+), 143 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6..e832b2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -124,7 +124,6 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 
dws)
  */
 int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
 {
-   volatile u32 *dst_ptr;
u32 dws;
int r;
 
@@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
return r;
}
 
-   /* set up the cs buffer */
-   dst_ptr = adev->gfx.rlc.cs_ptr;
-   adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
-   amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7372904..7703b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -991,39 +991,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
 }
 
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-   int r;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (unlikely(r != 0))
-   return r;
-
-   r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-   AMDGPU_GEM_DOMAIN_VRAM);
-   if (!r)
-   adev->gfx.rlc.clear_state_gpu_addr =
-   amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-   return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-   int r;
-
-   if (!adev->gfx.rlc.clear_state_obj)
-   return;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-   if (likely(r == 0)) {
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   }
-}
-
 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
 {
amdgpu_bo_free_kernel(>gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1785,25 +1752,7 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct 
amdgpu_device *adev,
 
 static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
 {
-   int r;
-
-   if (adev->in_gpu_reset) {
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (r)
-   return r;
-
-   r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj,
-  (void **)>gfx.rlc.cs_ptr);
-   if (!r) {
-   adev->gfx.rlc.funcs->get_csb_buffer(adev,
-   adev->gfx.rlc.cs_ptr);
-   amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-   }
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   if (r)
-   return r;
-   }
+   adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
 
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
@@ -3774,10 +3723,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   r = gfx_v10_0_csb_vram_pin(adev);
-   if (r)
-   return r;
-
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
 
@@ -3865,7 +3810,6 @@ static int gfx_v10_0_hw_fini(void *handle)
}
gfx_v10_0_cp_enable(adev, false);
gfx_v10_0_enable_gui_idle_interrupt(adev, false);
-   gfx_v10_0_csb_vram_unpin(adev);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 43ae8fc..648d767 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4554,6 +4554,8 @@ static int gfx_v7_0_hw_init(void *handle)
 
gfx_v7_0_constants_init(ad

[PATCH] drm/amdgpu: fix calltrace during kmd unload(v2)

2019-11-27 Thread Monk Liu

kernel would report a warning on double unpin
on the csb BO because we unpin it during hw_fini
but actually we don't need to pin/unpin it during
hw_init/fini since it is created with kernel pinned

v2:
get_csb in init_rlc so hw_init() will make CSIB content
back even after reset or s3.
take care of gfx7/8 as well

remove all those useless code for gfx9/10

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c |  8 -
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 58 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 40 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 40 +--
 5 files changed, 5 insertions(+), 143 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6..e832b2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -124,7 +124,6 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 
dws)
  */
 int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
 {
-   volatile u32 *dst_ptr;
u32 dws;
int r;
 
@@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
return r;
}
 
-   /* set up the cs buffer */
-   dst_ptr = adev->gfx.rlc.cs_ptr;
-   adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
-   amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7372904..7703b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -991,39 +991,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
 }
 
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-   int r;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (unlikely(r != 0))
-   return r;
-
-   r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-   AMDGPU_GEM_DOMAIN_VRAM);
-   if (!r)
-   adev->gfx.rlc.clear_state_gpu_addr =
-   amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-   return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-   int r;
-
-   if (!adev->gfx.rlc.clear_state_obj)
-   return;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-   if (likely(r == 0)) {
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   }
-}
-
 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
 {
amdgpu_bo_free_kernel(>gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1785,25 +1752,7 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct 
amdgpu_device *adev,
 
 static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
 {
-   int r;
-
-   if (adev->in_gpu_reset) {
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (r)
-   return r;
-
-   r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj,
-  (void **)>gfx.rlc.cs_ptr);
-   if (!r) {
-   adev->gfx.rlc.funcs->get_csb_buffer(adev,
-   adev->gfx.rlc.cs_ptr);
-   amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-   }
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   if (r)
-   return r;
-   }
+   adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
 
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
@@ -3774,10 +3723,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   r = gfx_v10_0_csb_vram_pin(adev);
-   if (r)
-   return r;
-
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
 
@@ -3865,7 +3810,6 @@ static int gfx_v10_0_hw_fini(void *handle)
}
gfx_v10_0_cp_enable(adev, false);
gfx_v10_0_enable_gui_idle_interrupt(adev, false);
-   gfx_v10_0_csb_vram_unpin(adev);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 43ae8fc..648d767 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4554,6 +4554,8 @@ static int gfx_v7_0_hw_init(void *handle)
 
gfx_v7_0_constants_init(ad

[PATCH 2/5] drm/amdgpu: skip rlc ucode loading for SRIOV gfx10

2019-11-26 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 80 +-
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 879c0a1..a56cba9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -691,59 +691,61 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device 
*adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = 
le32_to_cpu(cp_hdr->ucode_feature_version);
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
-   err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
-   rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
-   version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
-   version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-   if (version_major == 2 && version_minor == 1)
-   adev->gfx.rlc.is_rlc_v2_1 = true;
-
-   adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
-   adev->gfx.rlc_feature_version = 
le32_to_cpu(rlc_hdr->ucode_feature_version);
-   adev->gfx.rlc.save_and_restore_offset =
+   if (!amdgpu_sriov_vf(adev)) {
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
+   err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
+   if (err)
+   goto out;
+   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
+   version_major = 
le16_to_cpu(rlc_hdr->header.header_version_major);
+   version_minor = 
le16_to_cpu(rlc_hdr->header.header_version_minor);
+   if (version_major == 2 && version_minor == 1)
+   adev->gfx.rlc.is_rlc_v2_1 = true;
+
+   adev->gfx.rlc_fw_version = 
le32_to_cpu(rlc_hdr->header.ucode_version);
+   adev->gfx.rlc_feature_version = 
le32_to_cpu(rlc_hdr->ucode_feature_version);
+   adev->gfx.rlc.save_and_restore_offset =
le32_to_cpu(rlc_hdr->save_and_restore_offset);
-   adev->gfx.rlc.clear_state_descriptor_offset =
+   adev->gfx.rlc.clear_state_descriptor_offset =
le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
-   adev->gfx.rlc.avail_scratch_ram_locations =
+   adev->gfx.rlc.avail_scratch_ram_locations =
le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
-   adev->gfx.rlc.reg_restore_list_size =
+   adev->gfx.rlc.reg_restore_list_size =
le32_to_cpu(rlc_hdr->reg_restore_list_size);
-   adev->gfx.rlc.reg_list_format_start =
+   adev->gfx.rlc.reg_list_format_start =
le32_to_cpu(rlc_hdr->reg_list_format_start);
-   adev->gfx.rlc.reg_list_format_separate_start =
+   adev->gfx.rlc.reg_list_format_separate_start =
le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
-   adev->gfx.rlc.starting_offsets_start =
+   adev->gfx.rlc.starting_offsets_start =
le32_to_cpu(rlc_hdr->starting_offsets_start);
-   adev->gfx.rlc.reg_list_format_size_bytes =
+   adev->gfx.rlc.reg_list_format_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
-   adev->gfx.rlc.reg_list_size_bytes =
+   adev->gfx.rlc.reg_list_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_size_bytes);
-   adev->gfx.rlc.register_list_format =
+   adev->gfx.rlc.register_list_format =
kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
-   adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
-   if (!adev->gfx.rlc.register_list_format) {
-   err = -ENOMEM;
-   goto out;
-   }
+   adev->gfx.rlc.reg_list_size_bytes, 
GFP_KERNEL);
+   if (!adev->gfx.rlc.register_list_format) {
+   err = -ENOMEM;
+   goto out;
+   }
 
-   tmp = (unsigned int *)((uintptr_t)rlc_hdr +
-   
le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
-   for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
-   adev->gfx.rlc.register_list_format[i] = l

[PATCH 5/5] drm/amdgpu: fix calltrace during kmd unload

2019-11-26 Thread Monk Liu

kernel would report a warning on double unpin
on the csb BO because we unpin it during hw_fini
but actually we don't need to pin/unpin it during
hw_init/fini since it is created with kernel pinned

remove all those useless code for gfx9/10

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c |  1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 38 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 -
 3 files changed, 78 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6..289fada 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -145,7 +145,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
dst_ptr = adev->gfx.rlc.cs_ptr;
adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index a56cba9..5ee7467 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -996,39 +996,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
 }
 
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-   int r;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (unlikely(r != 0))
-   return r;
-
-   r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-   AMDGPU_GEM_DOMAIN_VRAM);
-   if (!r)
-   adev->gfx.rlc.clear_state_gpu_addr =
-   amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-   return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-   int r;
-
-   if (!adev->gfx.rlc.clear_state_obj)
-   return;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-   if (likely(r == 0)) {
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   }
-}
-
 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
 {
amdgpu_bo_free_kernel(>gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -3780,10 +3747,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   r = gfx_v10_0_csb_vram_pin(adev);
-   if (r)
-   return r;
-
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
 
@@ -3871,7 +3834,6 @@ static int gfx_v10_0_hw_fini(void *handle)
}
gfx_v10_0_cp_enable(adev, false);
gfx_v10_0_enable_gui_idle_interrupt(adev, false);
-   gfx_v10_0_csb_vram_unpin(adev);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 4cc2e50..524a7ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1683,39 +1683,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return 0;
 }
 
-static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
-{
-   int r;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
-   if (unlikely(r != 0))
-   return r;
-
-   r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
-   AMDGPU_GEM_DOMAIN_VRAM);
-   if (!r)
-   adev->gfx.rlc.clear_state_gpu_addr =
-   amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
-   return r;
-}
-
-static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
-   int r;
-
-   if (!adev->gfx.rlc.clear_state_obj)
-   return;
-
-   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
-   if (likely(r == 0)) {
-   amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
-   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-   }
-}
-
 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 {
amdgpu_bo_free_kernel(>gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -3694,10 +3661,6 @@ static int gfx_v9_0_hw_init(void *handle)
 
gfx_v9_0_constants_init(adev);
 
-   r = gfx_v9_0_csb_vram_pin(adev);
-   if (r)
-   return r;
-
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -3779,8 +3742,6 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
 
-   gfx_v9_0_csb_vram_unpin(adev

[PATCH 4/5] drm/amdgpu: use CPU to flush vmhub if sched stopped

2019-11-26 Thread Monk Liu

otherwse the flush_gpu_tlb will hang if we unload the
KMD becuase the schedulers already stopped

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 321f8a9..4bb4d27 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -326,7 +326,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
-   adev->in_gpu_reset) {
+   adev->in_gpu_reset ||
+   ring->sched.ready == false) {
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
mutex_unlock(>mman.gtt_window_lock);
return;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/5] drm/amdgpu: fix GFX10 missing CSIB set

2019-11-26 Thread Monk Liu

still need to init csb even for SRIOV

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 4d6df35..879c0a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1926,14 +1926,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device 
*adev)
 {
int r;
 
-   if (amdgpu_sriov_vf(adev))
-   return 0;
-
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
 
+   if (amdgpu_sriov_vf(adev)) {
+   gfx_v10_0_init_csb(adev);
+   return 0;
+   }
+
r = gfx_v10_0_init_pg(adev);
if (r)
return r;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/5] drm/amdgpu: do autoload right after MEC loaded for SRIOV VF

2019-11-26 Thread Monk Liu

since we don't have RLCG ucode loading and no SRlist as well

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 96a6b00..b65fda9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1490,8 +1490,7 @@ static int psp_np_fw_load(struct psp_context *psp)
return ret;
 
/* Start rlc autoload after psp recieved all the gfx firmware */
-   if (psp->autoload_supported && ucode->ucode_id ==
-   AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+   if (psp->autoload_supported && ucode->ucode_id == 
(amdgpu_sriov_vf(adev) ? AMDGPU_UCODE_ID_CP_MEC2 : 
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
ret = psp_rlc_autoload(psp);
if (ret) {
DRM_ERROR("Failed to start rlc autoload\n");
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix an UMC hw arbitrator bug(v2)

2019-09-24 Thread Monk Liu

issue:
the UMC6 h/w bug is that when MCLK is doing the switch
in the middle of a page access being preempted by high
priority client (e.g. DISPLAY) then UMC and the mclk switch
would stuck there due to deadlock

how:
fixed by disabling auto PreChg for UMC to avoid high
priority client preempting other client's access on
the same page, thus the deadlock could be avoided

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  7 +++
 drivers/gpu/drm/amd/amdgpu/umc_v6_0.c   | 37 +
 drivers/gpu/drm/amd/amdgpu/umc_v6_0.h   | 31 +++
 5 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v6_0.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index c3cd271..508e93c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -84,7 +84,7 @@ amdgpu-y += \
 
 # add UMC block
 amdgpu-y += \
-   umc_v6_1.o
+   umc_v6_1.o umc_v6_0.o
 
 # add IH block
 amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 3ec36d9..17d3ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -63,6 +63,7 @@ struct amdgpu_umc_funcs {
void (*enable_umc_index_mode)(struct amdgpu_device *adev,
uint32_t umc_instance);
void (*disable_umc_index_mode)(struct amdgpu_device *adev);
+   void (*patch_for_umc)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_umc {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6102dea..7047d8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -51,6 +51,7 @@
 #include "gfxhub_v1_1.h"
 #include "mmhub_v9_4.h"
 #include "umc_v6_1.h"
+#include "umc_v6_0.h"
 
 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
 
@@ -696,6 +697,9 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device 
*adev)
 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
 {
switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   adev->umc.funcs = _v6_0_funcs;
+   break;
case CHIP_VEGA20:
adev->umc.max_ras_err_cnt_per_query = 
UMC_V6_1_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
@@ -1302,6 +1306,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device 
*adev)
for (i = 0; i < adev->num_vmhubs; ++i)
gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
 
+   if (adev->umc.funcs && adev->umc.funcs->patch_for_umc)
+   adev->umc.funcs->patch_for_umc(adev);
+
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 (unsigned)(adev->gmc.gart_size >> 20),
 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
new file mode 100644
index 000..ab04420
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_0.h"
+#include "amdgpu.h"
+
+static void umc_v6_0_patch(struct amdgpu_device *adev)
+{
+   unsigned i,j;
+
+   for (i = 0; i < 4; i++)
+   for (j = 0; j < 4; j++)
+   WREG32((i*0x10 + 0x5010c + j*0x2000)/4, 0x1002);
+}
+
+const struct amdgpu_umc_funcs umc_v6_0_funcs = {
+   .patch_for_um

[PATCH] drm/amdgpu: fix an UMC hw arbitrator bug

2019-09-23 Thread Monk Liu

issue:
the UMC h/w bug is that when MCLK is doing the switch
in the middle of a page access being preempted by high
priority client (e.g. DISPLAY) then UMC and the mclk switch
would stuck there due to deadlock

how:
fixed by disabling auto PreChg for UMC to avoid high
priority client preempting other client's access on
the same page, thus the deadlock could be avoided

Signed-off-by: Monk Liu 
Change-Id: Iaf6eb2a20a4785ec8440e64d5e0cae67aa0603da
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6102dea..8271b0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1306,6 +1306,24 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device 
*adev)
 (unsigned)(adev->gmc.gart_size >> 20),
 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
adev->gart.ready = true;
+
+   /* disable auto Pchg is a w/a for the vega10 UMC hardware bug */
+   WREG32(0x5010c/4, 0x1002);
+   WREG32(0x5210c/4, 0x1002);
+   WREG32(0x5410c/4, 0x1002);
+   WREG32(0x5610c/4, 0x1002);
+   WREG32(0x15010c/4, 0x1002);
+   WREG32(0x15210c/4, 0x1002);
+   WREG32(0x15410c/4, 0x1002);
+   WREG32(0x15610c/4, 0x1002);
+   WREG32(0x25010c/4, 0x1002);
+   WREG32(0x25210c/4, 0x1002);
+   WREG32(0x25410c/4, 0x1002);
+   WREG32(0x25610c/4, 0x1002);
+   WREG32(0x35010c/4, 0x1002);
+   WREG32(0x35210c/4, 0x1002);
+   WREG32(0x35410c/4, 0x1002);
+   WREG32(0x35610c/4, 0x1002);
return 0;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: introduce vram lost for reset

2019-08-27 Thread Monk Liu

for SOC15/vega10 the BACO reset & mode1 would introduce vram lost
in high end address range, current kmd's vram lost checking cannot
catch it since it only check very ahead visible frame buffer

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f6ae565..11e0fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1155,6 +1155,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) 
((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
 #define amdgpu_asic_need_reset_on_init(adev) 
(adev)->asic_funcs->need_reset_on_init((adev))
 #define amdgpu_asic_get_pcie_replay_count(adev) 
((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
 
 /* Common functions */
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 02b3e7d..31690e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3482,7 +3482,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-   atomic_inc(>vram_lost_counter);
+   amdgpu_inc_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
}
 
@@ -3648,7 +3648,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU 
reset!\n");
-   
atomic_inc(_adev->vram_lost_counter);
+   amdgpu_inc_vram_lost(tmp_adev);
}
 
r = amdgpu_gtt_mgr_recover(
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index fe2212df..8af7501 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -557,10 +557,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 {
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_BACO:
+   amdgpu_inc_vram_lost(adev);
return soc15_asic_baco_reset(adev);
case AMD_RESET_METHOD_MODE2:
return soc15_mode2_reset(adev);
default:
+   amdgpu_inc_vram_lost(adev);
return soc15_asic_mode1_reset(adev);
}
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: introduce vram lost paramter for reset function

2019-08-22 Thread Monk Liu

for SOC15/vega10 the BACO reset would introduce vram lost in
the high end address range and current kmd's vram lost
checking cannot catch it since it only check visible frame buffer

TODO:
to confirm if mode1/2 reset would introduce vram lost

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
 drivers/gpu/drm/amd/amdgpu/cik.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c| 10 +++---
 drivers/gpu/drm/amd/amdgpu/si.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c |  4 +++-
 drivers/gpu/drm/amd/amdgpu/vi.c|  2 +-
 7 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f6ae565..1fe3756 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -552,7 +552,7 @@ struct amdgpu_asic_funcs {
int (*read_register)(struct amdgpu_device *adev, u32 se_num,
 u32 sh_num, u32 reg_offset, u32 *value);
void (*set_vga_state)(struct amdgpu_device *adev, bool state);
-   int (*reset)(struct amdgpu_device *adev);
+   int (*reset)(struct amdgpu_device *adev, bool *lost);
enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
/* get the reference clock */
u32 (*get_xclk)(struct amdgpu_device *adev);
@@ -1136,7 +1136,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
  * ASICs macro.
  */
 #define amdgpu_asic_set_vga_state(adev, state) 
(adev)->asic_funcs->set_vga_state((adev), (state))
-#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
+#define amdgpu_asic_reset(adev, lost) (adev)->asic_funcs->reset((adev), (lost))
 #define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) 
(adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 02b3e7d..8668cb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2546,7 +2546,7 @@ static void amdgpu_device_xgmi_reset_func(struct 
work_struct *__work)
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
 
-   adev->asic_reset_res =  amdgpu_asic_reset(adev);
+   adev->asic_reset_res =  amdgpu_asic_reset(adev, NULL);
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
 adev->asic_reset_res, adev->ddev->unique);
@@ -2751,7 +2751,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 *  E.g., driver was not cleanly unloaded previously, etc.
 */
if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
-   r = amdgpu_asic_reset(adev);
+   r = amdgpu_asic_reset(adev, NULL);
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
@@ -3084,7 +3084,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
suspend, bool fbcon)
pci_disable_device(dev->pdev);
pci_set_power_state(dev->pdev, PCI_D3hot);
} else {
-   r = amdgpu_asic_reset(adev);
+   r = amdgpu_asic_reset(adev, NULL);
if (r)
DRM_ERROR("amdgpu asic reset failed\n");
}
@@ -3604,7 +3604,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
if (!queue_work(system_highpri_wq, 
_adev->xgmi_reset_work))
r = -EALREADY;
} else
-   r = amdgpu_asic_reset(tmp_adev);
+   r = amdgpu_asic_reset(tmp_adev, _lost);
 
if (r) {
DRM_ERROR("ASIC reset failed with error, %d for 
drm dev, %s",
@@ -3645,7 +3645,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
if (r)
goto out;
 
-   vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
+   if (!vram_lost)
+   vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
+
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU 
reset!\n");

atomic_inc(_adev->vram_lost_counter);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/

[PATCH] drm/amdgpu: fix double ucode load by PSP(v3)

2019-07-31 Thread Monk Liu

previously the ucode loading of PSP was repreated, one executed in
phase_1 init/re-init/resume and the other in fw_loading routine

Avoid this double loading by clearing ip_blocks.status.hw in suspend or reset
prior to the FW loading and any block's hw_init/resume

v2:
still do the smu fw loading since it is needed by bare-metal

v3:
drop the change in reinit_early_sriov, just clear all block's status.hw
in the head place and set the status.hw after hw_init done is enough

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59 +++---
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6cb358c..30436ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1673,28 +1673,34 @@ static int amdgpu_device_fw_loading(struct 
amdgpu_device *adev)
 
if (adev->asic_type >= CHIP_VEGA10) {
for (i = 0; i < adev->num_ip_blocks; i++) {
-   if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_PSP) {
-   if (adev->in_gpu_reset || adev->in_suspend) {
-   if (amdgpu_sriov_vf(adev) && 
adev->in_gpu_reset)
-   break; /* sriov gpu reset, psp 
need to do hw_init before IH because of hw limit */
-   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
-   if (r) {
-   DRM_ERROR("resume of IP block 
<%s> failed %d\n",
+   if (adev->ip_blocks[i].version->type != 
AMD_IP_BLOCK_TYPE_PSP)
+   continue;
+
+   /* no need to do the fw loading again if already done*/
+   if (adev->ip_blocks[i].status.hw == true)
+   break;
+
+   if (adev->in_gpu_reset || adev->in_suspend) {
+   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
+   if (r) {
+   DRM_ERROR("resume of IP block <%s> 
failed %d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
-   } else {
-   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
-   if (r) {
-   DRM_ERROR("hw_init of IP block 
<%s> failed %d\n",
- 
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
+   return r;
+   }
+   } else {
+   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
+   if (r) {
+   DRM_ERROR("hw_init of IP block <%s> 
failed %d\n",
+ 
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
-   adev->ip_blocks[i].status.hw = true;
}
+
+   adev->ip_blocks[i].status.hw = true;
+   break;
}
}
+
r = amdgpu_pm_load_smu_firmware(adev, _version);
 
return r;
@@ -2136,7 +2142,9 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
if (r) {
DRM_ERROR("suspend of IP block <%s> failed 
%d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
+   adev->ip_blocks[i].status.hw = false;
}
}
 
@@ -2176,14 +2184,16 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
if (is_support_sw_smu(adev)) {
/* todo */
} else if (adev->powerplay.pp_funcs &&
-  adev->powerplay.pp_funcs->set_mp1_state) {
+  
adev->powerplay.pp_funcs->set_mp1_state) {
r = adev->powerplay.pp_funcs->set_mp1_s

[PATCH] drm/amdgpu: fix double ucode load by PSP(v2)

2019-07-31 Thread Monk Liu

previously the ucode loading of PSP was repreated, one executed in
phase_1 init/re-init/resume and the other in fw_loading routine

Avoid this double loading by clearing ip_blocks.status.hw in suspend or reset
prior to the FW loading and any block's hw_init/resume

v2:
still do the smu fw loading since it is needed by bare-metal

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 77 +++---
 1 file changed, 48 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6cb358c..38b14ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1673,28 +1673,34 @@ static int amdgpu_device_fw_loading(struct 
amdgpu_device *adev)
 
if (adev->asic_type >= CHIP_VEGA10) {
for (i = 0; i < adev->num_ip_blocks; i++) {
-   if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_PSP) {
-   if (adev->in_gpu_reset || adev->in_suspend) {
-   if (amdgpu_sriov_vf(adev) && 
adev->in_gpu_reset)
-   break; /* sriov gpu reset, psp 
need to do hw_init before IH because of hw limit */
-   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
-   if (r) {
-   DRM_ERROR("resume of IP block 
<%s> failed %d\n",
+   if (adev->ip_blocks[i].version->type != 
AMD_IP_BLOCK_TYPE_PSP)
+   continue;
+
+   /* no need to do the fw loading again if already done*/
+   if (adev->ip_blocks[i].status.hw == true)
+   break;
+
+   if (adev->in_gpu_reset || adev->in_suspend) {
+   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
+   if (r) {
+   DRM_ERROR("resume of IP block <%s> 
failed %d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
-   } else {
-   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
-   if (r) {
-   DRM_ERROR("hw_init of IP block 
<%s> failed %d\n",
- 
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
+   return r;
+   }
+   } else {
+   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
+   if (r) {
+   DRM_ERROR("hw_init of IP block <%s> 
failed %d\n",
+ 
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
-   adev->ip_blocks[i].status.hw = true;
}
+
+   adev->ip_blocks[i].status.hw = true;
+   break;
}
}
+
r = amdgpu_pm_load_smu_firmware(adev, _version);
 
return r;
@@ -2128,6 +2134,7 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
+
/* displays are handled separately */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
/* XXX handle errors */
@@ -2136,7 +2143,9 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
if (r) {
DRM_ERROR("suspend of IP block <%s> failed 
%d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
+   adev->ip_blocks[i].status.hw = false;
}
}
 
@@ -2176,14 +2185,16 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
if (is_support_sw_smu(adev)) {
/* todo */
} else if (

[PATCH 3/3] drm/amdgpu: fix double ucode load by PSP

2019-07-30 Thread Monk Liu

previously the ucode loading of PSP was repreated, one executed in
phase_1 init/re-init/resume and the other in fw_loading routine

Avoid this double loading by clearing ip_blocks.status.hw in suspend or reset
prior to the FW loading and any block's hw_init/resume

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 79 ++
 1 file changed, 49 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6cb358c..25e721d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1673,29 +1673,35 @@ static int amdgpu_device_fw_loading(struct 
amdgpu_device *adev)
 
if (adev->asic_type >= CHIP_VEGA10) {
for (i = 0; i < adev->num_ip_blocks; i++) {
-   if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_PSP) {
-   if (adev->in_gpu_reset || adev->in_suspend) {
-   if (amdgpu_sriov_vf(adev) && 
adev->in_gpu_reset)
-   break; /* sriov gpu reset, psp 
need to do hw_init before IH because of hw limit */
-   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
-   if (r) {
-   DRM_ERROR("resume of IP block 
<%s> failed %d\n",
+   if (adev->ip_blocks[i].version->type != 
AMD_IP_BLOCK_TYPE_PSP)
+   continue;
+
+   /* no need to do the fw loading again if already done*/
+   if (adev->ip_blocks[i].status.hw == true)
+   break;
+
+   if (adev->in_gpu_reset || adev->in_suspend) {
+   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
+   if (r) {
+   DRM_ERROR("resume of IP block <%s> 
failed %d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
-   } else {
-   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
-   if (r) {
-   DRM_ERROR("hw_init of IP block 
<%s> failed %d\n",
- 
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
+   return r;
+   }
+   } else {
+   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
+   if (r) {
+   DRM_ERROR("hw_init of IP block <%s> 
failed %d\n",
+ 
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
-   adev->ip_blocks[i].status.hw = true;
}
+
+   adev->ip_blocks[i].status.hw = true;
+   break;
}
+   } else {
+   r = amdgpu_pm_load_smu_firmware(adev, _version);
}
-   r = amdgpu_pm_load_smu_firmware(adev, _version);
 
return r;
 }
@@ -2128,6 +2134,7 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
+
/* displays are handled separately */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
/* XXX handle errors */
@@ -2136,7 +2143,9 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
if (r) {
DRM_ERROR("suspend of IP block <%s> failed 
%d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
+   adev->ip_blocks[i].status.hw = false;
}
}
 
@@ -2176,14 +2185,16 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
if (is_support_sw_smu(adev)) {
/* todo *

[PATCH 1/3] drm/amdgpu: cleanup vega10 SRIOV code path

2019-07-30 Thread Monk Liu

we can simplify all those unnecessary function under
SRIOV for vega10 since:
1) PSP L1 policy is by force enabled in SRIOV
2) original logic always set all flags which make itself
   a dummy step

besides,
1) the ih_doorbell_range set should also be skipped
for VEGA10 SRIOV.
2) the gfx_common registers should also be skipped
for VEGA10 SRIOV.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 45 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 13 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 17 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c| 10 +++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  | 15 --
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 ++-
 drivers/gpu/drm/amd/amdgpu/soc15.c | 11 +++-
 drivers/gpu/drm/amd/amdgpu/soc15_common.h  |  5 ++--
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 18 ++--
 11 files changed, 38 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 127ed01..6cb358c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1571,9 +1571,6 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
-
-   /* query the reg access mode at the very beginning */
-   amdgpu_virt_init_reg_access_mode(adev);
}
 
adev->pm.pp_feature = amdgpu_pp_feature_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 1d68729..f04eb1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -426,48 +426,3 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, 
bool lowest)
 
return clk;
 }
-
-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
-{
-   struct amdgpu_virt *virt = >virt;
-
-   if (virt->ops && virt->ops->init_reg_access_mode)
-   virt->ops->init_reg_access_mode(adev);
-}
-
-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
-{
-   bool ret = false;
-   struct amdgpu_virt *virt = >virt;
-
-   if (amdgpu_sriov_vf(adev)
-   && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
-   ret = true;
-
-   return ret;
-}
-
-bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
-{
-   bool ret = false;
-   struct amdgpu_virt *virt = >virt;
-
-   if (amdgpu_sriov_vf(adev)
-   && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
-   && !(amdgpu_sriov_runtime(adev)))
-   ret = true;
-
-   return ret;
-}
-
-bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
-{
-   bool ret = false;
-   struct amdgpu_virt *virt = >virt;
-
-   if (amdgpu_sriov_vf(adev)
-   && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
-   ret = true;
-
-   return ret;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f510773..b0b2bdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,12 +48,6 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
 };
 
-/* According to the fw feature, some new reg access modes are supported */
-#define AMDGPU_VIRT_REG_ACCESS_LEGACY  (1 << 0) /* directly mmio */
-#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH  (1 << 1) /* by PSP */
-#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
-#define AMDGPU_VIRT_REG_SKIP_SEETING   (1 << 3) /* Skip setting reg */
-
 /**
  * struct amdgpu_virt_ops - amdgpu device virt operations
  */
@@ -65,7 +59,6 @@ struct amdgpu_virt_ops {
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 
data2, u32 data3);
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
-   void (*init_reg_access_mode)(struct amdgpu_device *adev);
 };
 
 /*
@@ -315,10 +308,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, 
unsigned long obj_size,
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
 uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
-
-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
-bool amdgpu_virt_support_rlc_prg_

[PATCH 2/3] drm/amdgpu: fix incorrect judge on sos fw version

2019-07-30 Thread Monk Liu

for SRIOV the SOS fw of PSP is loaded in hypervisor thus
guest won't tell the version of it, and judging feature by
reading the sos fw version in guest side is completely wrong

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index ec3a056..ba32758 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -634,7 +634,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
 
 static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
 {
-   if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
+   if (amdgpu_sriov_vf(psp->adev))
return true;
 
return false;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] SWDEV-196010 Calltrace caught in amdgpu_vm_sdma.c file

2019-07-15 Thread Monk Liu

don't commit sdma vm job if no updates needed and free
the ib

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index aeba9e6..ff6d37e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1304,9 +1304,15 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
goto error;
}
 
-   r = vm->update_funcs->commit(, >last_update);
-   if (r)
-   goto error;
+
+   if (params.ib->length_dw > 0) {
+   r = vm->update_funcs->commit(, >last_update);
+   if (r)
+   goto error;
+   } else {
+   amdgpu_sa_bo_free(adev, >sa_bo, NULL);
+   }
+
return 0;
 
 error:
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: drop the incorrect soft_reset for SRIOV

2019-06-10 Thread Monk Liu

It's incorrect to do soft reset for SRIOV, when GFX
hang the WREG would stuck there becuase it goes KIQ way.

the GPU reset counter is incorrect: always increase twice
for each timedout

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 8f5026c..ff6976e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -399,7 +399,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, 
unsigned int vmid,
 {
ktime_t deadline = ktime_add_us(ktime_get(), 1);
 
-   if (!ring->funcs->soft_recovery || !fence)
+   if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || 
!fence)
return false;
 
atomic_inc(>adev->gpu_reset_counter);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/ttm: fix ttm client driver (e.g. amdgpu) reload issue

2019-06-05 Thread Monk Liu

need to clear bo glob and mem glob during their release
otherwise their member value would be wrongly used in the
next glob init stage and lead to wild pointer access problems:

1) kobj.state_initialized is 1
2) ttm_bo_glob.bo_count isn't cleared and referenced via it
   on member "swap_lru" would hit out of bound array accessing
   bug

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
 drivers/gpu/drm/ttm/ttm_memory.c | 8 
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7de667..6434eac 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1604,6 +1604,8 @@ static void ttm_bo_global_kobj_release(struct kobject 
*kobj)
container_of(kobj, struct ttm_bo_global, kobj);
 
__free_page(glob->dummy_read_page);
+
+   memset(glob, 0, sizeof(*glob));
 }
 
 static void ttm_bo_global_release(void)
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index 8617958..7128bbf 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -229,9 +229,17 @@ static const struct sysfs_ops ttm_mem_global_ops = {
.store = _mem_global_store,
 };
 
+void ttm_mem_glob_kobj_release(struct kobject *kobj)
+{
+   struct ttm_mem_global *glob = container_of(kobj, struct ttm_mem_global, 
kobj);
+
+   memset(glob, 0, sizeof(*glob));
+}
+
 static struct kobj_type ttm_mem_glob_kobj_type = {
.sysfs_ops = _mem_global_ops,
.default_attrs = ttm_mem_global_attrs,
+   .release = ttm_mem_glob_kobj_release,
 };
 
 static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: suppress repeating tmo report

2019-05-13 Thread Monk Liu

only report once per TMO job and the timer would
be restarted upon the job finished if it's just slow.

Suggested-by: Christian König 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1397942..1500bb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 
if (amdgpu_device_should_recover_gpu(ring->adev))
amdgpu_device_gpu_recover(ring->adev, job);
+   else
+   drm_sched_suspend_timeout(>sched);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: avoid duplicated tmo report on same job

2019-05-10 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 12 +++-
 include/drm/gpu_scheduler.h|  1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d6286ed..f1dc0ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3356,8 +3356,7 @@ bool amdgpu_device_should_recover_gpu(struct 
amdgpu_device *adev)
return true;
 
 disabled:
-   DRM_INFO("GPU recovery disabled.\n");
-   return false;
+   return false;
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1397942..ca62253 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info ti;
+   bool recover;
 
memset(, 0, sizeof(struct amdgpu_task_info));
 
@@ -42,6 +43,11 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
return;
}
 
+   recover = amdgpu_device_should_recover_gpu(ring->adev);
+   if (s_job->sched->last_tmo_id == s_job->id)
+   goto skip_report;
+
+   s_job->sched->last_tmo_id = s_job->id;
amdgpu_vm_get_task_info(ring->adev, job->pasid, );
DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
  job->base.sched->name, atomic_read(>fence_drv.last_seq),
@@ -49,7 +55,11 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
-   if (amdgpu_device_should_recover_gpu(ring->adev))
+   if (!recover)
+   DRM_INFO("GPU recovery disabled.\n");
+
+skip_report:
+   if (recover)
amdgpu_device_gpu_recover(ring->adev, job);
 }
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 9c2a957..1944d27 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -282,6 +282,7 @@ struct drm_gpu_scheduler {
int hang_limit;
atomic_tnum_jobs;
boolready;
+   uint64_t last_tmo_id;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/sched: fix the duplicated TMO message for one IB

2019-05-08 Thread Monk Liu

we don't need duplicated IB's timeout error message reported endlessly,
just one report per timedout IB is enough

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/scheduler/sched_main.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index c1aaf85..d6c17f1 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -308,7 +308,6 @@ static void drm_sched_job_timedout(struct work_struct *work)
 {
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
-   unsigned long flags;
 
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
job = list_first_entry_or_null(>ring_mirror_list,
@@ -316,10 +315,6 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
 
if (job)
job->sched->ops->timedout_job(job);
-
-   spin_lock_irqsave(>job_list_lock, flags);
-   drm_sched_start_timeout(sched);
-   spin_unlock_irqrestore(>job_list_lock, flags);
 }
 
  /**
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix vm_cpu_update hit NULL pointer

2019-03-21 Thread Monk Liu

should use amdgpu_bo_map, otherwise you'll hit NULL
pointer bug if with amdgpu_bo_kptr

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 9d53982..1fb6295a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -76,8 +76,10 @@ static int amdgpu_vm_cpu_update(struct 
amdgpu_vm_update_params *p,
 {
unsigned int i;
uint64_t value;
+   void *ptr;
 
-   pe += (unsigned long)amdgpu_bo_kptr(bo);
+   amdgpu_bo_kmap(bo, );
+   pe += (unsigned long)ptr;
 
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm: should break if already get the best size

2018-11-22 Thread Monk Liu

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/drm_mm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 3cc5fbd..369fd9b 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -318,6 +318,8 @@ static struct drm_mm_node *best_hole(struct drm_mm *mm, u64 
size)
if (size <= node->hole_size) {
best = node;
rb = rb->rb_right;
+   if (size == node->hole_size)
+   break;
} else {
rb = rb->rb_left;
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu: drop the sched_sync

2018-11-02 Thread Monk Liu

Reasons to drop it:

1) simplify the code: just introduce field member "need_pipe_sync"
for job is good enough to tell if the explicit dependency fence
need followed by a pipeline sync.

2) after GPU_recover the explicit fence from sched_syn will not
come back so the required pipeline_sync following it is missed,
consider scenario below:
>now on ring buffer:
Job-A -> pipe_sync -> Job-B
>TDR occured on Job-A, and after GPU recover:
>now on ring buffer:
Job-A -> Job-B

because the fence from sched_sync is used and freed after ib_schedule
in first time, it will never come back, with this patch this issue
could be avoided.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 16 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 14 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  3 +--
 3 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index c48207b3..ac7d2da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -122,7 +122,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
 {
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = [0];
-   struct dma_fence *tmp = NULL;
bool skip_preamble, need_ctx_switch;
unsigned patch_offset = ~0;
struct amdgpu_vm *vm;
@@ -166,16 +165,13 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
 
need_ctx_switch = ring->current_ctx != fence_ctx;
-   if (ring->funcs->emit_pipeline_sync && job &&
-   ((tmp = amdgpu_sync_get_fence(>sched_sync, NULL)) ||
-(amdgpu_sriov_vf(adev) && need_ctx_switch) ||
-amdgpu_vm_need_pipeline_sync(ring, job))) {
-   need_pipe_sync = true;
 
-   if (tmp)
-   trace_amdgpu_ib_pipe_sync(job, tmp);
-
-   dma_fence_put(tmp);
+   if (ring->funcs->emit_pipeline_sync && job) {
+   if ((need_ctx_switch && amdgpu_sriov_vf(adev)) ||
+   amdgpu_vm_need_pipeline_sync(ring, job))
+   need_pipe_sync = true;
+   else if (job->need_pipe_sync)
+   need_pipe_sync = true;
}
 
if (ring->funcs->insert_start)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1d71f8c..dae997d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -71,7 +71,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned 
num_ibs,
(*job)->num_ibs = num_ibs;
 
amdgpu_sync_create(&(*job)->sync);
-   amdgpu_sync_create(&(*job)->sched_sync);
(*job)->vram_lost_counter = atomic_read(>vram_lost_counter);
(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
 
@@ -117,7 +116,6 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_ring_priority_put(ring, s_job->s_priority);
dma_fence_put(job->fence);
amdgpu_sync_free(>sync);
-   amdgpu_sync_free(>sched_sync);
kfree(job);
 }
 
@@ -127,7 +125,6 @@ void amdgpu_job_free(struct amdgpu_job *job)
 
dma_fence_put(job->fence);
amdgpu_sync_free(>sync);
-   amdgpu_sync_free(>sched_sync);
kfree(job);
 }
 
@@ -182,14 +179,9 @@ static struct dma_fence *amdgpu_job_dependency(struct 
drm_sched_job *sched_job,
bool need_pipe_sync = false;
int r;
 
-   fence = amdgpu_sync_get_fence(>sync, _pipe_sync);
-   if (fence && need_pipe_sync) {
-   if (drm_sched_dependency_optimized(fence, s_entity)) {
-   r = amdgpu_sync_fence(ring->adev, >sched_sync,
- fence, false);
-   if (r)
-   DRM_ERROR("Error adding fence (%d)\n", r);
-   }
+   if (fence && need_pipe_sync && drm_sched_dependency_optimized(fence, 
s_entity)) {
+   trace_amdgpu_ib_pipe_sync(job, fence);
+   job->need_pipe_sync = true;
}
 
while (fence == NULL && vm && !job->vmid) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index e1b46a6..c1d00f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -41,7 +41,6 @@ struct amdgpu_job {
struct drm_sched_jobbase;
struct amdgpu_vm*vm;
struct amdgpu_sync  sync;
-   struct amdgpu_sync  sched_sync;
struct amdgpu_ib*ibs;
struct dma_fence*fence; /* the hw fence */
uint32_tpreamble_status;

[PATCH 3/3] drm/amdgpu: drop need_vm_flush/need_pipe_sync

2018-11-02 Thread Monk Liu

use a flag to hold need_flush and need_pipe_sync

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c   | 14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h   |  6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  4 ++--
 7 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index ac7d2da..3231a3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -170,7 +170,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
if ((need_ctx_switch && amdgpu_sriov_vf(adev)) ||
amdgpu_vm_need_pipeline_sync(ring, job))
need_pipe_sync = true;
-   else if (job->need_pipe_sync)
+   else if (job->flags & JOB_NEED_PIPE_SYNC)
need_pipe_sync = true;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index df9b173..ed9b6c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -311,7 +311,9 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
dma_fence_put((*id)->flushed_updates);
(*id)->flushed_updates = dma_fence_get(updates);
}
-   job->vm_needs_flush = needs_flush;
+
+   if (needs_flush)
+   job->flags |= JOB_NEED_VM_FLUSH;
return 0;
 }
 
@@ -341,7 +343,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
struct dma_fence *updates = sync->last_vm_update;
int r;
 
-   job->vm_needs_flush = vm->use_cpu_for_update;
+   if (vm->use_cpu_for_update)
+   job->flags |= JOB_NEED_VM_FLUSH;
 
/* Check if we can use a VMID already assigned to this VM */
list_for_each_entry_reverse((*id), _mgr->ids_lru, list) {
@@ -380,7 +383,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
(*id)->flushed_updates = dma_fence_get(updates);
}
 
-   job->vm_needs_flush |= needs_flush;
+   if (needs_flush)
+   job->flags |= JOB_NEED_VM_FLUSH;
return 0;
}
 
@@ -438,7 +442,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
 
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
-   job->vm_needs_flush = true;
+   job->flags |= JOB_NEED_VM_FLUSH;
}
 
list_move_tail(>list, _mgr->ids_lru);
@@ -447,7 +451,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
id->pd_gpu_addr = job->vm_pd_addr;
id->owner = vm->entity.fence_context;
 
-   if (job->vm_needs_flush) {
+   if (job->flags & JOB_NEED_VM_FLUSH) {
dma_fence_put(id->last_flush);
id->last_flush = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index dae997d..82e190d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -181,7 +181,7 @@ static struct dma_fence *amdgpu_job_dependency(struct 
drm_sched_job *sched_job,
 
if (fence && need_pipe_sync && drm_sched_dependency_optimized(fence, 
s_entity)) {
trace_amdgpu_ib_pipe_sync(job, fence);
-   job->need_pipe_sync = true;
+   job->flags |= JOB_NEED_PIPE_SYNC;
}
 
while (fence == NULL && vm && !job->vmid) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index c1d00f0..f9e8ecd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -37,6 +37,9 @@
 
 struct amdgpu_fence;
 
+#define JOB_NEED_VM_FLUSH   1   /* require a vm flush for this job */
+#define JOB_NEED_PIPE_SYNC  2   /* require a pipeline sync for this job */
+
 struct amdgpu_job {
struct drm_sched_jobbase;
struct amdgpu_vm*vm;
@@ -46,7 +49,6 @@ struct amdgpu_job {
uint32_tpreamble_status;
uint32_tnum_ibs;
void*owner;
-   boolvm_needs_flush;
uint64_tvm_pd_addr;
unsignedvmid;
unsignedpasid;
@@ -58,7 +60,7 @@ struct amdgpu_job {
/* user fence handling */
uint64_tuf_addr;

[PATCH 1/3] drm/amdgpu: rename explicit to need_pipe_sync for better understanding

2018-11-02 Thread Monk Liu

and differentiate it with explicit_bo flag

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 7 +--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e0af44f..1d71f8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -179,11 +179,11 @@ static struct dma_fence *amdgpu_job_dependency(struct 
drm_sched_job *sched_job,
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence;
-   bool explicit = false;
+   bool need_pipe_sync = false;
int r;
 
-   fence = amdgpu_sync_get_fence(>sync, );
-   if (fence && explicit) {
+   fence = amdgpu_sync_get_fence(>sync, _pipe_sync);
+   if (fence && need_pipe_sync) {
if (drm_sched_dependency_optimized(fence, s_entity)) {
r = amdgpu_sync_fence(ring->adev, >sched_sync,
  fence, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 2d6f5ec..a7e1ea8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -36,7 +36,7 @@
 struct amdgpu_sync_entry {
struct hlist_node   node;
struct dma_fence*fence;
-   boolexplicit;
+   bool   explicit;
 };
 
 static struct kmem_cache *amdgpu_sync_slab;
@@ -126,6 +126,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
  *
  * @sync: sync object to add the fence to
  * @f: fence to add
+ * @explicit: this fence is an explicit dependency
  *
  * Tries to add the fence to an existing hash entry. Returns true when an entry
  * was found, false otherwise.
@@ -153,6 +154,8 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, 
struct dma_fence *f,
  *
  * @sync: sync object to add fence to
  * @fence: fence to sync to
+ * @explicit: True to indicate the given @f need a pipeline sync upon the case
+ *that the job of @sync runs on the same ring with it
  *
  */
 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
@@ -295,7 +298,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync 
*sync,
  * amdgpu_sync_get_fence - get the next fence from the sync object
  *
  * @sync: sync object to use
- * @explicit: true if the next fence is explicit
+ * @explicit: true if the next fence is explicitly defined., e.g. dependency, 
syncobj, etc...
  *
  * Get and removes the next fence from the sync object not signaled yet.
  */
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: refactoring mailbox to fix TDR handshake bugs(v2)

2018-03-07 Thread Monk Liu

this patch actually refactor mailbox implmentations, and
all below changes are needed together to fix all those mailbox
handshake issues exposured by heavey TDR test.

1)refactor all mailbox functions based on byte accessing for mb_control
reason is to avoid touching non-related bits when writing trn/rcv part of
mailbox_control, this way some incorrect INTR sent to hypervisor
side could be avoided, and it fixes couple handshake bug.

2)trans_msg function re-impled: put a invalid
logic before transmitting message to make sure the ACK bit is in
a clear status, otherwise there is chance that ACK asserted already
before transmitting message and lead to fake ACK polling.
(hypervisor side have some tricks to workaround ACK bit being corrupted
by VF FLR which hase an side effects that may make guest side ACK bit
asserted wrongly), and clear TRANS_MSG words after message transferred.

3)for mailbox_flr_work, it is also re-worked: it takes the mutex lock
first if invoked, to block gpu recover's participate too early while
hypervisor side is doing VF FLR. (hypervisor sends FLR_NOTIFY to guest
before doing VF FLR and sentds FLR_COMPLETE after VF FLR done, and
the FLR_NOTIFY will trigger interrupt to guest which lead to
mailbox_flr_work being invoked)

This can avoid the issue that mailbox trans msg being cleared by its VF FLR.

4)for mailbox_rcv_irq IRQ routine, it should only peek msg and schedule
mailbox_flr_work, instead of ACK to hypervisor itself, because FLR_NOTIFY
msg sent from hypervisor side doesn't need VF's ACK (this is because
VF's ACK would lead to hypervisor clear its trans_valid/msg, and this
would cause handshake bug if trans_valid/msg is cleared not due to
correct VF ACK but from a wrong VF ACK like this "FLR_NOTIFY" one)

This fixed handshake bug that sometimes GUEST always couldn't receive
"READY_TO_ACCESS_GPU" msg from hypervisor.

5)seperate polling time limite accordingly:
POLL ACK cost no more than 500ms
POLL MSG cost no more than 12000ms
POLL FLR finish cost no more than 500ms

6) we still need to set adev into in_gpu_reset mode after we received
FLR_NOTIFY from host side, this can prevent innocent app wrongly succesed
to open amdgpu dri device.

FLR_NOFITY is received due to an IDLE hang detected from hypervisor side
which indicating GPU is already die in this VF.

v2:
use MACRO as the offset of mailbox_control register
don't test if NOTIFY_CMPL event in rcv_msg since it won't
recieve that message anymore

Change-Id: I17df8b4490a5b53a1cc2bd6c8f9bc3ee14c23f1a
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 196 ++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |   7 +-
 2 files changed, 109 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 271452d..8b47484 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -33,56 +33,34 @@
 
 static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
 {
-   u32 reg;
-   int timeout = AI_MAILBOX_TIMEDOUT;
-   u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-  mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
-
-   /*Wait for RCV_MSG_VALID to be 0*/
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   while (reg & mask) {
-   if (timeout <= 0) {
-   pr_err("RCV_MSG_VALID is not cleared\n");
-   break;
-   }
-   mdelay(1);
-   timeout -=1;
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-
mmBIF_BX_PF0_MAILBOX_CONTROL));
-   }
+   WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
 }
 
 static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
 {
-   u32 reg;
+   WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
 
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
-   TRN_MSG_VALID, val ? 1 : 0);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
- reg);
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since i

[PATCH 2/2] drm/amdgpu: refactoring mailbox to fix TDR handshake bugs

2018-03-06 Thread Monk Liu

this patch actually refactor mailbox implmentations, and
all below changes are needed together to fix all those mailbox
handshake issues exposured by heavey TDR test.

1)refactor all mailbox functions based on byte accessing for mb_control
reason is to avoid touching non-related bits when writing trn/rcv part of
mailbox_control, this way some incorrect INTR sent to hypervisor
side could be avoided, and it fixes couple handshake bug.

2)trans_msg function re-impled: put a invalid
logic before transmitting message to make sure the ACK bit is in
a clear status, otherwise there is chance that ACK asserted already
before transmitting message and lead to fake ACK polling.
(hypervisor side have some tricks to workaround ACK bit being corrupted
by VF FLR which hase an side effects that may make guest side ACK bit
asserted wrongly), and clear TRANS_MSG words after message transferred.

3)for mailbox_flr_work, it is also re-worked: it takes the mutex lock
first if invoked, to block gpu recover's participate too early while
hypervisor side is doing VF FLR. (hypervisor sends FLR_NOTIFY to guest
before doing VF FLR and sentds FLR_COMPLETE after VF FLR done, and
the FLR_NOTIFY will trigger interrupt to guest which lead to
mailbox_flr_work being invoked)

This can avoid the issue that mailbox trans msg being cleared by its VF FLR.

4)for mailbox_rcv_irq IRQ routine, it should only peek msg and schedule
mailbox_flr_work, instead of ACK to hypervisor itself, because FLR_NOTIFY
msg sent from hypervisor side doesn't need VF's ACK (this is because
VF's ACK would lead to hypervisor clear its trans_valid/msg, and this
would cause handshake bug if trans_valid/msg is cleared not due to
correct VF ACK but from a wrong VF ACK like this "FLR_NOTIFY" one)

This fixed handshake bug that sometimes GUEST always couldn't receive
"READY_TO_ACCESS_GPU" msg from hypervisor.

5)seperate polling time limite accordingly:
POLL ACK cost no more than 500ms
POLL MSG cost no more than 12000ms
POLL FLR finish cost no more than 500ms

6) we still need to set adev into in_gpu_reset mode after we received
FLR_NOTIFY from host side, this can prevent innocent app wrongly succesed
to open amdgpu dri device.

FLR_NOFITY is received due to an IDLE hang detected from hypervisor side
which indicating GPU is already die in this VF.

Change-Id: I17df8b4490a5b53a1cc2bd6c8f9bc3ee14c23f1a
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 200 ++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |   4 +-
 2 files changed, 111 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 271452d..8d09380 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -33,56 +33,42 @@
 
 static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
 {
-   u32 reg;
-   int timeout = AI_MAILBOX_TIMEDOUT;
-   u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-  mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
-
-   /*Wait for RCV_MSG_VALID to be 0*/
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   while (reg & mask) {
-   if (timeout <= 0) {
-   pr_err("RCV_MSG_VALID is not cleared\n");
-   break;
-   }
-   mdelay(1);
-   timeout -=1;
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-
mmBIF_BX_PF0_MAILBOX_CONTROL));
-   }
+   const u32 offset = SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1;
+   WREG8(offset, 2);
 }
 
 static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
 {
-   u32 reg;
+   const u32 offset = SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF0_MAILBOX_CONTROL) * 4;
+   WREG8(offset, val ? 1 : 0);
+}
 
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
-   TRN_MSG_VALID, val ? 1 : 0);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
- reg);
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since i

[PATCH 1/2] drm/amdgpu: imlement mmio byte access helpers

2018-03-06 Thread Monk Liu

mailbox register can be accessed with a byte boundry according
to BIF team, so this patch prepares register byte access
and will be used by following patches

Change-Id: I1e84f1c6e8e75dc42eb5be09c492fa5e7eb7502a
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++
 2 files changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 292c7e7..72385bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1635,6 +1635,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
 
@@ -1658,6 +1661,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
 #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
 #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", 
amdgpu_mm_rreg(adev, (reg), 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 65584f6..c8e1940 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -121,6 +121,32 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
return ret;
 }
 
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ *
+*/
+
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
+   if (offset < adev->rmmio_size)
+   return (readb(adev->rmmio + offset));
+   BUG();
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ *
+*/
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value) {
+   if (offset < adev->rmmio_size)
+   writeb(value, adev->rmmio + offset);
+   else
+   BUG();
+}
+
+
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
 {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/4] drm/amdgpu: refactoring mailbox to fix TDR handshake bugs

2018-03-05 Thread Monk Liu

this patch actually refactor mailbox implmentations, and
all below changes are needed together to fix all those mailbox
handshake issues exposured by heavey TDR test.

1)refactor all mailbox functions based on byte accessing for mb_control
reason is to avoid touching non-related bits when writing trn/rcv part of
mailbox_control, this way some incorrect INTR sent to hypervisor
side could be avoided, and it fixes couple handshake bug.

2)trans_msg function re-impled: put a invalid
logic before transmitting message to make sure the ACK bit is in
a clear status, otherwise there is chance that ACK asserted already
before transmitting message and lead to fake ACK polling.
(hypervisor side have some tricks to workaround ACK bit being corrupted
by VF FLR which hase an side effects that may make guest side ACK bit
asserted wrongly), and clear TRANS_MSG words after message transferred.

3)for mailbox_flr_work, it is also re-worked: it takes the mutex lock
first if invoked, to block gpu recover's participate too early while
hypervisor side is doing VF FLR. (hypervisor sends FLR_NOTIFY to guest
before doing VF FLR and sentds FLR_COMPLETE after VF FLR done, and
the FLR_NOTIFY will trigger interrupt to guest which lead to
mailbox_flr_work being invoked)

This can avoid the issue that mailbox trans msg being cleared by its VF FLR.

4)for mailbox_rcv_irq IRQ routine, it should only peek msg and schedule
mailbox_flr_work, instead of ACK to hypervisor itself, because FLR_NOTIFY
msg sent from hypervisor side doesn't need VF's ACK (this is because
VF's ACK would lead to hypervisor clear its trans_valid/msg, and this
would cause handshake bug if trans_valid/msg is cleared not due to
correct VF ACK but from a wrong VF ACK like this "FLR_NOTIFY" one)

This fixed handshake bug that sometimes GUEST always couldn't receive
"READY_TO_ACCESS_GPU" msg from hypervisor.

5)seperate polling time limite accordingly:
POLL ACK cost no more than 500ms
POLL MSG cost no more than 12000ms
POLL FLR finish cost no more than 500ms

6) we still need to set adev into in_gpu_reset mode after we received
FLR_NOTIFY from host side, this can prevent innocent app wrongly succesed
to open amdgpu dri device.

FLR_NOFITY is received due to an IDLE hang detected from hypervisor side
which indicating GPU is already die in this VF.

Change-Id: I17df8b4490a5b53a1cc2bd6c8f9bc3ee14c23f1a
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 200 ++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |   4 +-
 2 files changed, 111 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 271452d..8d09380 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -33,56 +33,42 @@
 
 static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
 {
-   u32 reg;
-   int timeout = AI_MAILBOX_TIMEDOUT;
-   u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-  mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
-
-   /*Wait for RCV_MSG_VALID to be 0*/
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   while (reg & mask) {
-   if (timeout <= 0) {
-   pr_err("RCV_MSG_VALID is not cleared\n");
-   break;
-   }
-   mdelay(1);
-   timeout -=1;
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-
mmBIF_BX_PF0_MAILBOX_CONTROL));
-   }
+   const u32 offset = SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1;
+   WREG8(offset, 2);
 }
 
 static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
 {
-   u32 reg;
+   const u32 offset = SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF0_MAILBOX_CONTROL) * 4;
+   WREG8(offset, val ? 1 : 0);
+}
 
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
-   TRN_MSG_VALID, val ? 1 : 0);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
- reg);
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since i

[PATCH 4/4] dma-buf/reservation: should keep the new fence in add_shared_inplace

2018-03-05 Thread Monk Liu

Change-Id: If6a979ba9fd6c923b82212f35f07a9ff31c86767
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/dma-buf/reservation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 314eb10..29b7e45 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -118,7 +118,7 @@ reservation_object_add_shared_inplace(struct 
reservation_object *obj,
old_fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(obj));
 
-   if (old_fence->context == fence->context) {
+   if (dma_fence_is_later(fence, old_fence)) {
/* memory barrier is added by write_seqcount_begin */
RCU_INIT_POINTER(fobj->shared[i], fence);
write_seqcount_end(>seq);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/4] drm/amdgpu: give warning before sleep in kiq_r/wreg

2018-03-05 Thread Monk Liu

to catch error that may schedule in atomic context early on

Change-Id: I49dec7c55470011729b7fa7d3e1ecfe1f38ed89f
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 42c1401..21adb1b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -167,6 +167,9 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_read;
 
+   if (in_interrupt())
+   might_sleep();
+
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -212,7 +215,11 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, 
uint32_t reg, uint32_t v)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;
 
+   if (in_interrupt())
+   might_sleep();
+
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amdgpu: imlement mmio byte access helpers

2018-03-05 Thread Monk Liu

mailbox register can be accessed with a byte boundry according
to BIF team, so this patch prepares register byte access
and will be used by following patches

Change-Id: I1e84f1c6e8e75dc42eb5be09c492fa5e7eb7502a
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++
 2 files changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 292c7e7..72385bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1635,6 +1635,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
 
@@ -1658,6 +1661,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
 #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
 #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", 
amdgpu_mm_rreg(adev, (reg), 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 65584f6..c8e1940 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -121,6 +121,32 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
return ret;
 }
 
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ *
+*/
+
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
+   if (offset < adev->rmmio_size)
+   return (readb(adev->rmmio + offset));
+   BUG();
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ *
+*/
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value) {
+   if (offset < adev->rmmio_size)
+   writeb(value, adev->rmmio + offset);
+   else
+   BUG();
+}
+
+
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
 {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] dma-buf/reservation: should keep the new fence in add_shared_inplace

2018-03-05 Thread Monk Liu

Change-Id: If6a979ba9fd6c923b82212f35f07a9ff31c86767
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/dma-buf/reservation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 375de41..9b875267 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -118,7 +118,7 @@ reservation_object_add_shared_inplace(struct 
reservation_object *obj,
old_fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(obj));
 
-   if (old_fence->context == fence->context) {
+   if (dma_fence_is_later(fence, old_fence)) {
/* memory barrier is added by write_seqcount_begin */
RCU_INIT_POINTER(fobj->shared[i], fence);
write_seqcount_end(>seq);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amdgpu: fix object unreserved issue

2018-03-04 Thread Monk Liu

bo_do_create will unreserve in in the end if @resv
is NULL, which cause the following bo_create_shadow
run without lock held on @resv

Change-Id: Iaad24b8aea60522f25188874ab9d5c5f13f1a941
Signed-off-by: Monk Liu <monk@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 216799cc..d8a818d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -507,9 +507,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
 
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
-   if (!resv)
-   WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
-   NULL));
+   if (!resv) {
+   WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, 
NULL));
+   r = amdgpu_bo_reserve(*bo_ptr, kernel?true:false);
+   if (unlikely(r))
+   return r;
+   }
 
r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

1 2 3 4 5 >

1 - 100 of 435 matches

Mail list logo