[PATCH] drm/amdgpu/psp: add psp memory training implementation(v3)

2019-10-15 Thread Tianci Yin
From: "Tianci.Yin" 

add memory training implementation code to save resume time.

Change-Id: I625794a780b11d824ab57ef39cc33b872c6dc6c9
Reviewed-by: Alex Deucher 
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   9 ++
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 161 
 3 files changed, 171 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8704f93cabf2..c2b776fd82b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -151,6 +151,7 @@ extern uint amdgpu_sdma_phase_quantum;
 extern char *amdgpu_disable_cu;
 extern char *amdgpu_virtual_display;
 extern uint amdgpu_pp_feature_mask;
+extern uint amdgpu_force_long_training;
 extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index da7cbee25c61..c7d086569acb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -127,6 +127,7 @@ char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
 /* OverDrive(bit 14) disabled by default*/
 uint amdgpu_pp_feature_mask = 0xbfff;
+uint amdgpu_force_long_training = 0;
 int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
@@ -390,6 +391,14 @@ module_param_named(sched_hw_submission, 
amdgpu_sched_hw_submission, int, 0444);
 MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
 module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
 
+/**
+ * DOC: forcelongtraining (uint)
+ * Force long memory training in resume.
+ * The default is zero, indicates short training in resume.
+ */
+MODULE_PARM_DESC(forcelongtraining, "force memory long training");
+module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
+
 /**
  * DOC: pcie_gen_cap (uint)
  * Override PCIE gen speed capabilities. See the CAIL flags in 
drivers/gpu/drm/amd/include/amd_pcie.h.
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 2ba0f68ced10..19339de0cf12 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -58,6 +58,8 @@ MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
 #define mmRLC_GPM_UCODE_DATA_NV10  0x5b62
 #define mmSDMA0_UCODE_ADDR_NV100x5880
 #define mmSDMA0_UCODE_DATA_NV100x5881
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US  300
 
 static int psp_v11_0_init_microcode(struct psp_context *psp)
 {
@@ -902,6 +904,162 @@ static int psp_v11_0_rlc_autoload_start(struct 
psp_context *psp)
return psp_rlc_autoload_start(psp);
 }
 
+static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+   int ret;
+   int i;
+   uint32_t data_32;
+   int max_wait;
+   struct amdgpu_device *adev = psp->adev;
+
+   data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
+
+   max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+   for (i = 0; i < max_wait; i++) {
+   ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, 
mmMP0_SMN_C2PMSG_35),
+  0x8000, 0x8000, false);
+   if (ret == 0)
+   break;
+   }
+   if (i < max_wait)
+   ret = 0;
+   else
+   ret = -ETIME;
+
+   DRM_DEBUG("%s training %s, cost %d * %dms.\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+   return ret;
+}
+
+static void psp_v11_0_memory_training_fini(struct psp_context *psp)
+{
+   struct psp_memory_training_context *ctx = >mem_train_ctx;
+
+   ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+   kfree(ctx->sys_cache);
+   ctx->sys_cache = NULL;
+}
+
+static int psp_v11_0_memory_training_init(struct psp_context *psp)
+{
+   int ret;
+   struct psp_memory_training_context *ctx = >mem_train_ctx;
+
+   if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
+   DRM_DEBUG("memory training is not supported!\n");
+   return 0;
+   }
+
+   ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
+   if (ctx->sys_cache == NULL) {
+   DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+   ret = -ENOMEM;
+   goto Err_out;
+   }
+
+   
DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ 

[PATCH] drm/amdgpu: fix amdgpu trace event print string format error

2019-10-15 Thread Wang, Kevin(Yang)
the trace event print string format error.
(use integer type to handle string)

before:
amdgpu_test_kev-1556  [002]   138.508781: amdgpu_cs_ioctl:
sched_job=8, timeline=gfx_0.0.0, context=177, seqno=1,
ring_name=94d01c207bf0, num_ibs=2

after:
amdgpu_test_kev-1506  [004]   370.703783: amdgpu_cs_ioctl:
sched_job=12, timeline=gfx_0.0.0, context=234, seqno=2,
ring_name=gfx_0.0.0, num_ibs=1

change trace event list:
1.amdgpu_cs_ioctl
2.amdgpu_sched_run_job
3.amdgpu_ib_pipe_sync

Signed-off-by: Kevin Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8227ebd0f511..f940526c5889 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -170,7 +170,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 __field(unsigned int, context)
 __field(unsigned int, seqno)
 __field(struct dma_fence *, fence)
-__field(char *, ring_name)
+__string(ring, 
to_amdgpu_ring(job->base.sched)->name)
 __field(u32, num_ibs)
 ),
 
@@ -179,12 +179,12 @@ TRACE_EVENT(amdgpu_cs_ioctl,
   __assign_str(timeline, 
AMDGPU_JOB_GET_TIMELINE_NAME(job))
   __entry->context = 
job->base.s_fence->finished.context;
   __entry->seqno = job->base.s_fence->finished.seqno;
-  __entry->ring_name = 
to_amdgpu_ring(job->base.sched)->name;
+  __assign_str(ring, 
to_amdgpu_ring(job->base.sched)->name)
   __entry->num_ibs = job->num_ibs;
   ),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, 
ring_name=%s, num_ibs=%u",
  __entry->sched_job_id, __get_str(timeline), 
__entry->context,
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
 );
 
 TRACE_EVENT(amdgpu_sched_run_job,
@@ -195,7 +195,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 __string(timeline, 
AMDGPU_JOB_GET_TIMELINE_NAME(job))
 __field(unsigned int, context)
 __field(unsigned int, seqno)
-__field(char *, ring_name)
+__string(ring, 
to_amdgpu_ring(job->base.sched)->name)
 __field(u32, num_ibs)
 ),
 
@@ -204,12 +204,12 @@ TRACE_EVENT(amdgpu_sched_run_job,
   __assign_str(timeline, 
AMDGPU_JOB_GET_TIMELINE_NAME(job))
   __entry->context = 
job->base.s_fence->finished.context;
   __entry->seqno = job->base.s_fence->finished.seqno;
-  __entry->ring_name = 
to_amdgpu_ring(job->base.sched)->name;
+  __assign_str(ring, 
to_amdgpu_ring(job->base.sched)->name)
   __entry->num_ibs = job->num_ibs;
   ),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, 
ring_name=%s, num_ibs=%u",
  __entry->sched_job_id, __get_str(timeline), 
__entry->context,
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
 );
 
 
@@ -473,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
-__field(const char *,name)
+__string(ring, sched_job->base.sched->name);
 __field(uint64_t, id)
 __field(struct dma_fence *, fence)
 __field(uint64_t, ctx)
@@ -481,14 +481,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
 ),
 
TP_fast_assign(
-  __entry->name = sched_job->base.sched->name;
+  __assign_str(ring, sched_job->base.sched->name)
   __entry->id = sched_job->base.id;
   __entry->fence = fence;
   __entry->ctx = fence->context;
   __entry->seqno = fence->seqno;
   ),
TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, 
context=%llu, seq=%u",
- __entry->name, __entry->id,
+ __get_str(ring), __entry->id,
  __entry->fence, __entry->ctx,
  __entry->seqno)
 );
-- 
2.17.1


[PATCH] drm/amdgpu/psp: declare PSP TA firmware

2019-10-15 Thread chen gong
Add PSP TA firmware declaration for raven raven2 picasso

Signed-off-by: chen gong 
---
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index b96484a..b345e69 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -40,6 +40,9 @@
 MODULE_FIRMWARE("amdgpu/raven_asd.bin");
 MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
 MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_ta.bin");
 
 static int psp_v10_0_init_microcode(struct psp_context *psp)
 {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu/powerplay: add renoir funcs to support dc

2019-10-15 Thread Liang, Prike
Ok, see you newly patch DCFCLK_DPM_LEVELS has been aligned well.

Thanks,
 Prike
> -Original Message-
> From: amd-gfx  On Behalf Of Liang,
> Prike
> Sent: Wednesday, October 16, 2019 10:22 AM
> To: Wu, Hersen ; amd-gfx@lists.freedesktop.org
> Cc: Wentland, Harry ; Wang, Kevin(Yang)
> ; Wu, Hersen 
> Subject: RE: [PATCH] drm/amdgpu/powerplay: add renoir funcs to support
> dc
> 
> Regards the comment inline, saw you have fixed the not enable
> CONFIG_DRM_AMD_DC_DCN2_1 potential compile issue.
> 
> BTW, would you help clarify why PP_SMU_NUM_DCFCLK_DPM_LEVELS is
> different from the smu12_driver_if.h define NUM_DCFCLK_DPM_LEVELS .
> Is there can track the macro definition update ?
> 
> Thanks,
> Prike
> > -Original Message-
> > From: Liang, Prike
> > Sent: Friday, October 11, 2019 10:34 PM
> > To: Hersen Wu ; amd-gfx@lists.freedesktop.org
> > Cc: Wu, Hersen ; Wang, Kevin(Yang)
> > ; Wentland, Harry 
> > Subject: RE: [PATCH] drm/amdgpu/powerplay: add renoir funcs to support
> > dc
> >
> >
> >
> > > -Original Message-
> > > From: amd-gfx  On Behalf Of
> > > Hersen Wu
> > > Sent: Thursday, October 10, 2019 10:58 PM
> > > To: amd-gfx@lists.freedesktop.org
> > > Cc: Wu, Hersen ; Wang, Kevin(Yang)
> > > ; Wentland, Harry
> 
> > > Subject: [PATCH] drm/amdgpu/powerplay: add renoir funcs to support
> > > dc
> > >
> > > there are two paths for renoir dc access smu.
> > > one dc access smu directly using bios smc
> > > interface: set disply, dprefclk, etc.
> > > another goes through pplib for get dpm clock table and set watermmark.
> > >
> > > Signed-off-by: Hersen Wu 
> > > ---
> > >  .../amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c  | 16 +---
> > >  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c| 35 +++
> > >  .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h| 16 ++--
> > >  drivers/gpu/drm/amd/powerplay/renoir_ppt.c| 96
> > > +++
> > >  drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 39 
> > >  5 files changed, 141 insertions(+), 61 deletions(-)
> > >
> > > diff --git
> > > a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > > index f4cfa0caeba8..95564b8de3ce 100644
> > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > > +++
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > > @@ -589,10 +589,9 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
> > >   if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges)
> > >   pp_funcs->set_watermarks_for_clocks_ranges(pp_handle,
> > >
> > > _with_clock_ranges);
> > > - else if (adev->smu.funcs &&
> > > -  adev->smu.funcs->set_watermarks_for_clock_ranges)
> > > + else
> > >   smu_set_watermarks_for_clock_ranges(>smu,
> > > - _with_clock_ranges);
> > > + _with_clock_ranges);
> > >  }
> > >
> > >  void pp_rv_set_pme_wa_enable(struct pp_smu *pp) @@ -665,7 +664,6
> > @@
> > > enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp,  {
> > >   const struct dc_context *ctx = pp->dm;
> > >   struct amdgpu_device *adev = ctx->driver_context;
> > > - struct smu_context *smu = >smu;
> > >   struct dm_pp_wm_sets_with_clock_ranges_soc15
> > > wm_with_clock_ranges;
> > >   struct dm_pp_clock_range_for_dmif_wm_set_soc15
> > > *wm_dce_clocks =
> > >   wm_with_clock_ranges.wm_dmif_clocks_ranges;
> > > @@ -708,15 +706,7 @@ enum pp_smu_status
> > pp_nv_set_wm_ranges(struct
> > > pp_smu *pp,
> > >   ranges->writer_wm_sets[i].min_drain_clk_mhz *
> > 1000;
> > >   }
> > >
> > > - if (!smu->funcs)
> > > - return PP_SMU_RESULT_UNSUPPORTED;
> > > -
> > > - /* 0: successful or smu.funcs->set_watermarks_for_clock_ranges =
> > > NULL;
> > > -  * 1: fail
> > > -  */
> > > - if (smu_set_watermarks_for_clock_ranges(>smu,
> > > - _with_clock_ranges))
> > > - return PP_SMU_RESULT_UNSUPPORTED;
> > > + smu_set_watermarks_for_clock_ranges(>smu,
> > >   _with_clock_ranges);
> > >
> > >   return PP_SMU_RESULT_OK;
> > >  }
> > > diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > > b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > > index c9266ea70331..1b71c38cdf96 100644
> > > --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > > +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > > @@ -1834,6 +1834,41 @@ int smu_set_mp1_state(struct smu_context
> > *smu,
> > >   return ret;
> > >  }
> > >
> > > +int smu_write_watermarks_table(struct smu_context *smu) {
> > > + int ret = 0;
> > > + struct smu_table_context *smu_table = >smu_table;
> > > + struct smu_table *table = NULL;
> > > +
> > > + table = _table->tables[SMU_TABLE_WATERMARKS];
> > > +
> > > + if (!table->cpu_addr)
> > > + return -EINVAL;
> > > +
> > > + ret = smu_update_table(smu, SMU_TABLE_WATERMARKS, 0, table-
> > > >cpu_addr,
> > > + true);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +int 

Re: [PATCH 1/8] drm/amdgpu: update amdgpu_discovery to handle revision

2019-10-15 Thread Yin, Tianci (Rico)
Thanks Luben!
Patch 8 v2 has sent out, please review again.

From: Tuikov, Luben 
Sent: Wednesday, October 16, 2019 2:01
To: Yin, Tianci (Rico) ; amd-gfx@lists.freedesktop.org 

Cc: Deucher, Alexander ; Koenig, Christian 

Subject: Re: [PATCH 1/8] drm/amdgpu: update amdgpu_discovery to handle revision

Patches 1-7: Looks good.
Reviewed-by: Luben Tuikov 

Patch 8: NAK! for the same exact reason as the previous review. No changes to 
NAK reasoning from previous review.

Regards,
Luben

On 2019-10-13 11:21 p.m., Tianci Yin wrote:
> From: "Tianci.Yin" 
>
> update amdgpu_discovery to get IP revision.
>
> Change-Id: If8152103d03b58e1dc0f32db63625e290f5f08a0
> Reviewed-by: Alex Deucher 
> Signed-off-by: Tianci.Yin 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 2 +-
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 71198c5318e1..ddd8364102a2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -333,7 +333,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device 
> *adev)
>  }
>
>  int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
> - int *major, int *minor)
> + int *major, int *minor, int *revision)
>  {
>struct binary_header *bhdr;
>struct ip_discovery_header *ihdr;
> @@ -369,6 +369,8 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device 
> *adev, int hw_id,
>*major = ip->major;
>if (minor)
>*minor = ip->minor;
> + if (revision)
> + *revision = ip->revision;
>return 0;
>}
>ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 
> 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> index 5a6693d7d269..ba78e15d9b05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> @@ -30,7 +30,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev);
>  void amdgpu_discovery_fini(struct amdgpu_device *adev);
>  int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
>  int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
> -int *major, int *minor);
> +int *major, int *minor, int *revision);
>  int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
>
>  #endif /* __AMDGPU_DISCOVERY__ */
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu/powerplay: add renoir funcs to support dc

2019-10-15 Thread Liang, Prike
Regards the comment inline, saw you have fixed the not enable 
CONFIG_DRM_AMD_DC_DCN2_1 potential compile issue.

BTW, would you help clarify why PP_SMU_NUM_DCFCLK_DPM_LEVELS is different from 
the smu12_driver_if.h define NUM_DCFCLK_DPM_LEVELS .
Is there can track the macro definition update ?

Thanks,
Prike
> -Original Message-
> From: Liang, Prike
> Sent: Friday, October 11, 2019 10:34 PM
> To: Hersen Wu ; amd-gfx@lists.freedesktop.org
> Cc: Wu, Hersen ; Wang, Kevin(Yang)
> ; Wentland, Harry 
> Subject: RE: [PATCH] drm/amdgpu/powerplay: add renoir funcs to support
> dc
> 
> 
> 
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Hersen Wu
> > Sent: Thursday, October 10, 2019 10:58 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Wu, Hersen ; Wang, Kevin(Yang)
> > ; Wentland, Harry 
> > Subject: [PATCH] drm/amdgpu/powerplay: add renoir funcs to support dc
> >
> > there are two paths for renoir dc access smu.
> > one dc access smu directly using bios smc
> > interface: set disply, dprefclk, etc.
> > another goes through pplib for get dpm clock table and set watermmark.
> >
> > Signed-off-by: Hersen Wu 
> > ---
> >  .../amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c  | 16 +---
> >  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c| 35 +++
> >  .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h| 16 ++--
> >  drivers/gpu/drm/amd/powerplay/renoir_ppt.c| 96
> > +++
> >  drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 39 
> >  5 files changed, 141 insertions(+), 61 deletions(-)
> >
> > diff --git
> > a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > index f4cfa0caeba8..95564b8de3ce 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> > @@ -589,10 +589,9 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
> > if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges)
> > pp_funcs->set_watermarks_for_clocks_ranges(pp_handle,
> >
> > _with_clock_ranges);
> > -   else if (adev->smu.funcs &&
> > -adev->smu.funcs->set_watermarks_for_clock_ranges)
> > +   else
> > smu_set_watermarks_for_clock_ranges(>smu,
> > -   _with_clock_ranges);
> > +   _with_clock_ranges);
> >  }
> >
> >  void pp_rv_set_pme_wa_enable(struct pp_smu *pp) @@ -665,7 +664,6
> @@
> > enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp,  {
> > const struct dc_context *ctx = pp->dm;
> > struct amdgpu_device *adev = ctx->driver_context;
> > -   struct smu_context *smu = >smu;
> > struct dm_pp_wm_sets_with_clock_ranges_soc15
> > wm_with_clock_ranges;
> > struct dm_pp_clock_range_for_dmif_wm_set_soc15
> > *wm_dce_clocks =
> > wm_with_clock_ranges.wm_dmif_clocks_ranges;
> > @@ -708,15 +706,7 @@ enum pp_smu_status
> pp_nv_set_wm_ranges(struct
> > pp_smu *pp,
> > ranges->writer_wm_sets[i].min_drain_clk_mhz *
> 1000;
> > }
> >
> > -   if (!smu->funcs)
> > -   return PP_SMU_RESULT_UNSUPPORTED;
> > -
> > -   /* 0: successful or smu.funcs->set_watermarks_for_clock_ranges =
> > NULL;
> > -* 1: fail
> > -*/
> > -   if (smu_set_watermarks_for_clock_ranges(>smu,
> > -   _with_clock_ranges))
> > -   return PP_SMU_RESULT_UNSUPPORTED;
> > +   smu_set_watermarks_for_clock_ranges(>smu,
> > _with_clock_ranges);
> >
> > return PP_SMU_RESULT_OK;
> >  }
> > diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > index c9266ea70331..1b71c38cdf96 100644
> > --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > @@ -1834,6 +1834,41 @@ int smu_set_mp1_state(struct smu_context
> *smu,
> > return ret;
> >  }
> >
> > +int smu_write_watermarks_table(struct smu_context *smu) {
> > +   int ret = 0;
> > +   struct smu_table_context *smu_table = >smu_table;
> > +   struct smu_table *table = NULL;
> > +
> > +   table = _table->tables[SMU_TABLE_WATERMARKS];
> > +
> > +   if (!table->cpu_addr)
> > +   return -EINVAL;
> > +
> > +   ret = smu_update_table(smu, SMU_TABLE_WATERMARKS, 0, table-
> > >cpu_addr,
> > +   true);
> > +
> > +   return ret;
> > +}
> > +
> > +int smu_set_watermarks_for_clock_ranges(struct smu_context *smu,
> > +   struct dm_pp_wm_sets_with_clock_ranges_soc15
> > *clock_ranges) {
> > +   int ret = 0;
> > +   struct smu_table *watermarks = 
> > >smu_table.tables[SMU_TABLE_WATERMARKS];
> > +   void *table = watermarks->cpu_addr;
> > +
> > +   if (!smu->disable_watermark &&
> > +   smu_feature_is_enabled(smu,
> > SMU_FEATURE_DPM_DCEFCLK_BIT) &&
> > +   smu_feature_is_enabled(smu,
> > SMU_FEATURE_DPM_SOCCLK_BIT)) {
> > +   smu_set_watermarks_table(smu, table, clock_ranges);

Re: [PATCH 8/8] drm/amdgpu/psp: add psp memory training implementation

2019-10-15 Thread Yin, Tianci (Rico)
Thanks very much! Please review again.

Rico

From: Tuikov, Luben 
Sent: Wednesday, October 16, 2019 1:59
To: Yin, Tianci (Rico) ; amd-gfx@lists.freedesktop.org 

Cc: Koenig, Christian ; Deucher, Alexander 

Subject: Re: [PATCH 8/8] drm/amdgpu/psp: add psp memory training implementation

On 2019-10-13 11:21 p.m., Tianci Yin wrote:
> From: "Tianci.Yin" 
>
> add memory training implementation code to save resume time.
>
> Change-Id: I625794a780b11d824ab57ef39cc33b872c6dc6c9
> Reviewed-by: Alex Deucher 
> Signed-off-by: Tianci.Yin 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   9 ++
>  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 159 
>  3 files changed, 169 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8704f93cabf2..c2b776fd82b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -151,6 +151,7 @@ extern uint amdgpu_sdma_phase_quantum;
>  extern char *amdgpu_disable_cu;
>  extern char *amdgpu_virtual_display;
>  extern uint amdgpu_pp_feature_mask;
> +extern uint amdgpu_force_long_training;
>  extern int amdgpu_job_hang_limit;
>  extern int amdgpu_lbpw;
>  extern int amdgpu_compute_multipipe;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index da7cbee25c61..c7d086569acb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -127,6 +127,7 @@ char *amdgpu_disable_cu = NULL;
>  char *amdgpu_virtual_display = NULL;
>  /* OverDrive(bit 14) disabled by default*/
>  uint amdgpu_pp_feature_mask = 0xbfff;
> +uint amdgpu_force_long_training = 0;
>  int amdgpu_job_hang_limit = 0;
>  int amdgpu_lbpw = -1;
>  int amdgpu_compute_multipipe = -1;
> @@ -390,6 +391,14 @@ module_param_named(sched_hw_submission, 
> amdgpu_sched_hw_submission, int, 0444);
>  MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
>  module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
>
> +/**
> + * DOC: forcelongtraining (uint)
> + * Force long memory training in resume.
> + * The default is zero, indicates short training in resume.
> + */
> +MODULE_PARM_DESC(forcelongtraining, "force memory long training");
> +module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 
> 0444);
> +
>  /**
>   * DOC: pcie_gen_cap (uint)
>   * Override PCIE gen speed capabilities. See the CAIL flags in 
> drivers/gpu/drm/amd/include/amd_pcie.h.
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 2ba0f68ced10..b7efaa3e913c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -902,6 +902,162 @@ static int psp_v11_0_rlc_autoload_start(struct 
> psp_context *psp)
>return psp_rlc_autoload_start(psp);
>  }
>
> +static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int 
> msg)
> +{
> + int ret = 0;
> + int i = 0;
> + uint32_t data_32 = 0;

NAK!

Leave all of those integer variables uninitialized.

> + struct amdgpu_device *adev = psp->adev;
> +
> + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
> + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
> + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
> +
> + /*max 5s*/
> + while (i < 50) {
> + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, 
> mmMP0_SMN_C2PMSG_35),
> +0x8000, 0x8000, false);
> + if (ret == 0)
> + break;
> + i++;
> + }

NAK!

For-loop please:

for (i = 0; i < 50; i++) {
ret = ...;
}

Regards,
Luben

> + DRM_DEBUG("%s training %s, cost %d * %dms.\n",
> +   (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
> +   (ret == 0) ? "succeed" : "failed",
> +   i, adev->usec_timeout/1000);
> + return ret;
> +}
> +
> +static int psp_v11_0_memory_training_fini(struct psp_context *psp)
> +{
> + int ret = 0;
> + struct psp_memory_training_context *ctx = >mem_train_ctx;
> +
> + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
> + if(ctx->sys_cache) {
> + kfree(ctx->sys_cache);
> + ctx->sys_cache = NULL;
> + }
> +
> + return ret;
> +}
> +
> +static int psp_v11_0_memory_training_init(struct psp_context *psp)
> +{
> + int ret = 0;
> + struct psp_memory_training_context *ctx = >mem_train_ctx;
> +
> + if(ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
> + DRM_DEBUG("memory training does not support!\n");
> + return 0;
> + }
> +
> + ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
> + if(ctx->sys_cache == NULL) {
> + DRM_ERROR("alloc mem_train_ctx.sys_cache failed(%d)!\n", ret);
> + ret = 

RE: [PATCH] drm/amdgpu/display: fix build error casused by CONFIG_DRM_AMD_DC_DCN2_1

2019-10-15 Thread Liang, Prike
Reviewed-by: Prike Liang 

BTW, would you help clarify why PP_SMU_NUM_DCFCLK_DPM_LEVELS is different from 
the smu12_driver_if.h define 
NUM_DCFCLK_DPM_LEVELS in you other patch about drm/amdgpu/powerplay: add renoir 
funcs to support dc.

Is there can track the macro definition update ?

Thanks,
Prike
> -Original Message-
> From: amd-gfx  On Behalf Of
> Hersen Wu
> Sent: Wednesday, October 16, 2019 12:51 AM
> To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
> Cc: Li, Sun peng (Leo) ; Lakha, Bhawanpreet
> ; Wentland, Harry
> ; Wu, Hersen 
> Subject: [PATCH] drm/amdgpu/display: fix build error casused by
> CONFIG_DRM_AMD_DC_DCN2_1
> 
> when CONFIG_DRM_AMD_DC_DCN2_1 is not enable in .config, there is build
> error. struct dpm_clocks shoud not be guarded.
> 
> Signed-off-by: Hersen Wu 
> ---
>  drivers/gpu/drm/amd/display/dc/dm_pp_smu.h | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
> b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
> index 24d65dbbd749..ef7df9ef6d7e 100644
> --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
> @@ -249,8 +249,6 @@ struct pp_smu_funcs_nv {  };  #endif
> 
> -#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
> -
>  #define PP_SMU_NUM_SOCCLK_DPM_LEVELS  8  #define
> PP_SMU_NUM_DCFCLK_DPM_LEVELS  8
>  #define PP_SMU_NUM_FCLK_DPM_LEVELS4
> @@ -288,7 +286,6 @@ struct pp_smu_funcs_rn {
>   enum pp_smu_status (*get_dpm_clock_table) (struct pp_smu *pp,
>   struct dpm_clocks *clock_table);
>  };
> -#endif
> 
>  struct pp_smu_funcs {
>   struct pp_smu ctx;
> --
> 2.17.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: AMDGPU and 16B stack alignment

2019-10-15 Thread Nick Desaulniers
On Tue, Oct 15, 2019 at 1:26 PM Arvind Sankar  wrote:
>
> On Tue, Oct 15, 2019 at 11:05:56AM -0700, Nick Desaulniers wrote:
> > Hmmm...I would have liked to remove it outright, as it is an ABI
> > mismatch that is likely to result in instability and non-fun-to-debug
> > runtime issues in the future.  I suspect my patch does work for GCC
> > 7.1+.  The question is: Do we want to either:
> > 1. mark AMDGPU broken for GCC < 7.1, or
> > 2. continue supporting it via stack alignment mismatch?
> >
> > 2 is brittle, and may break at any point in the future, but if it's
> > working for someone it does make me feel bad to outright disable it.
> > What I'd image 2 looks like is (psuedo code in a Makefile):
> >
> > if CC_IS_GCC && GCC_VERSION < 7.1:
> >   set stack alignment to 16B and hope for the best
> >
> > So my diff would be amended to keep the stack alignment flags, but
> > only to support GCC < 7.1.  And that assumes my change compiles with
> > GCC 7.1+. (Looks like it does for me locally with GCC 8.3, but I would
> > feel even more confident if someone with hardware to test on and GCC
> > 7.1+ could boot test).
> > --
> > Thanks,
> > ~Nick Desaulniers
>
> If we do keep it, would adding -mstackrealign make it more robust?
> That's simple and will only add the alignment to functions that require
> 16-byte alignment (at least on gcc).

I think there's also `-mincoming-stack-boundary=`.
https://github.com/ClangBuiltLinux/linux/issues/735#issuecomment-540038017

>
> Alternative is to use
> __attribute__((force_align_arg_pointer)) on functions that might be
> called from 8-byte-aligned code.

Which is hard to automate and easy to forget.  Likely a large diff to fix today.

>
> It looks like -mstackrealign should work from gcc 5.3 onwards.

The kernel would generally like to support GCC 4.9+.

There's plenty of different ways to keep layering on duct tape and
bailing wire to support differing ABIs, but that's just adding
technical debt that will have to be repaid one day.  That's why the
cleanest solution IMO is mark the driver broken for old toolchains,
and use a code-base-consistent stack alignment.  Bending over
backwards to support old toolchains means accepting stack alignment
mismatches, which is in the "unspecified behavior" ring of the
"undefined behavior" Venn diagram.  I have the same opinion on relying
on explicitly undefined behavior.

I'll send patches for fixing up Clang, but please consider my strong
advice to generally avoid stack alignment mismatches, regardless of
compiler.
--
Thanks,
~Nick Desaulniers


Re: [PATCH 2/3] drm/amdgpu/uvd:Allocate enc session bo for uvd6.0 ring IB test

2019-10-15 Thread Alex Deucher
On Tue, Oct 15, 2019 at 6:08 PM Zhu, James  wrote:
>
> Allocate 256K enc session bo for uvd6.0 ring IB test to fix S3 resume
> corruption issue.
>
> Signed-off-by: James Zhu 
> ---
>  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 16 ++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 670784a..c79ce73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -220,7 +220,7 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring 
> *ring, uint32_t handle
> return r;
>
> ib = >ibs[0];
> -   dummy = ib->gpu_addr + 1024;
> +   dummy = ring->adev->vcn.enc_session_gpu_addr;
>
> ib->length_dw = 0;
> ib->ptr[ib->length_dw++] = 0x0018;
> @@ -282,7 +282,7 @@ static int uvd_v6_0_enc_get_destroy_msg(struct 
> amdgpu_ring *ring,
> return r;
>
> ib = >ibs[0];
> -   dummy = ib->gpu_addr + 1024;
> +   dummy = ring->adev->vcn.enc_session_gpu_addr + 128 * PAGE_SIZE;
>
> ib->length_dw = 0;
> ib->ptr[ib->length_dw++] = 0x0018;
> @@ -326,9 +326,16 @@ static int uvd_v6_0_enc_get_destroy_msg(struct 
> amdgpu_ring *ring,
>   */
>  static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>  {
> +   struct amdgpu_device *adev = ring->adev;
> struct dma_fence *fence = NULL;
> long r;
>
> +   r = amdgpu_bo_create_kernel(adev, 2 * 128, PAGE_SIZE,

I just sent a similar patch set.  Note that the amdgpu_bo functions
takes the size in bytes, so this is only 256 bytes (well, probably 4K
due to page alignment).  I think VCN also needs this fix.  Do the
create and destroy need to reference the same session info?

Alex

> +   AMDGPU_GEM_DOMAIN_VRAM, >vcn.enc_session_bo,
> +   >vcn.enc_session_gpu_addr, 
> >vcn.enc_session_cpu_addr);
> +   if (r)
> +   return r;
> +
> r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
> if (r)
> goto error;
> @@ -345,6 +352,11 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring 
> *ring, long timeout)
>
>  error:
> dma_fence_put(fence);
> +
> +   amdgpu_bo_free_kernel(>vcn.enc_session_bo,
> + >vcn.enc_session_gpu_addr,
> + (void **)>vcn.enc_session_cpu_addr);
> +
> return r;
>  }
>
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu/vcn: fix allocation size in enc ring test

2019-10-15 Thread Alex Deucher
We need to allocate a large enough buffer for the
session info, otherwise the IB test can overwrite
other memory.

- Session info is 128K according to mesa
- Use the same session info for create and destroy

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 35 -
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 7a6beb2e7c4e..3199e4a5ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -569,13 +569,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring 
*ring)
 }
 
 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
- struct dma_fence **fence)
+struct amdgpu_bo *bo,
+struct dma_fence **fence)
 {
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
-   uint64_t dummy;
+   uint64_t addr;
int i, r;
 
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
@@ -583,14 +584,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   addr = amdgpu_bo_gpu_offset(bo);
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
ib->ptr[ib->length_dw++] = 0x0001; /* session info */
ib->ptr[ib->length_dw++] = handle;
-   ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-   ib->ptr[ib->length_dw++] = dummy;
+   ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+   ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x000b;
 
ib->ptr[ib->length_dw++] = 0x0014;
@@ -621,13 +622,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand
 }
 
 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-   struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
 {
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
-   uint64_t dummy;
+   uint64_t addr;
int i, r;
 
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
@@ -635,14 +637,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   addr = amdgpu_bo_gpu_offset(bo);
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
ib->ptr[ib->length_dw++] = 0x0001;
ib->ptr[ib->length_dw++] = handle;
-   ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-   ib->ptr[ib->length_dw++] = dummy;
+   ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+   ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x000b;
 
ib->ptr[ib->length_dw++] = 0x0014;
@@ -675,13 +677,20 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
struct dma_fence *fence = NULL;
+   struct amdgpu_bo *bo = NULL;
long r;
 
-   r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
+   r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ , NULL, NULL);
+   if (r)
+   return r;
+
+   r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
 
-   r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, );
+   r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, );
if (r)
goto error;
 
@@ -693,6 +702,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, 
long timeout)
 
 error:
dma_fence_put(fence);
+   amdgpu_bo_unreserve(bo);
+   amdgpu_bo_unref();
return r;
 }
 
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu/uvd7: fix allocation size in enc ring test (v2)

2019-10-15 Thread Alex Deucher
We need to allocate a large enough buffer for the
session info, otherwise the IB test can overwrite
other memory.

v2: - session info is 128K according to mesa
- use the same session info for create and destroy

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 33 ++-
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 01f658fa72c6..0995378d8263 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -214,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring 
*ring)
  * Open up a stream for HW test
  */
 static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
+  struct amdgpu_bo *bo,
   struct dma_fence **fence)
 {
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
-   uint64_t dummy;
+   uint64_t addr;
int i, r;
 
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
@@ -228,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   addr = amdgpu_bo_gpu_offset(bo);
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
ib->ptr[ib->length_dw++] = 0x0001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x;
-   ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-   ib->ptr[ib->length_dw++] = dummy;
+   ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+   ib->ptr[ib->length_dw++] = addr;
 
ib->ptr[ib->length_dw++] = 0x0014;
ib->ptr[ib->length_dw++] = 0x0002; /* task info */
@@ -275,13 +276,14 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle
  * Close up a stream for HW test or if userspace failed to do so
  */
 static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t 
handle,
-   struct dma_fence **fence)
+   struct amdgpu_bo *bo,
+   struct dma_fence **fence)
 {
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
-   uint64_t dummy;
+   uint64_t addr;
int i, r;
 
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
@@ -289,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t handl
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   addr = amdgpu_bo_gpu_offset(bo);
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
ib->ptr[ib->length_dw++] = 0x0001;
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x;
-   ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-   ib->ptr[ib->length_dw++] = dummy;
+   ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+   ib->ptr[ib->length_dw++] = addr;
 
ib->ptr[ib->length_dw++] = 0x0014;
ib->ptr[ib->length_dw++] = 0x0002;
@@ -334,13 +336,20 @@ static int uvd_v7_0_enc_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t handl
 static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
struct dma_fence *fence = NULL;
+   struct amdgpu_bo *bo = NULL;
long r;
 
-   r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
+   r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ , NULL, NULL);
+   if (r)
+   return r;
+
+   r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
 
-   r = uvd_v7_0_enc_get_destroy_msg(ring, 1, );
+   r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, );
if (r)
goto error;
 
@@ -352,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
 
 error:
dma_fence_put(fence);
+   amdgpu_bo_unreserve(bo);
+   amdgpu_bo_unref();
return r;
 }
 
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu/uvd6: fix allocation size in enc ring test (v2)

2019-10-15 Thread Alex Deucher
We need to allocate a large enough buffer for the
session info, otherwise the IB test can overwrite
other memory.

v2: - session info is 128K according to mesa
- use the same session info for create and destroy

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 31 ++-
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 670784a78512..217084d56ab8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -206,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring 
*ring)
  * Open up a stream for HW test
  */
 static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t 
handle,
+  struct amdgpu_bo *bo,
   struct dma_fence **fence)
 {
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
-   uint64_t dummy;
+   uint64_t addr;
int i, r;
 
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
@@ -220,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   addr = amdgpu_bo_gpu_offset(bo);
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
ib->ptr[ib->length_dw++] = 0x0001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x0001;
-   ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-   ib->ptr[ib->length_dw++] = dummy;
+   ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+   ib->ptr[ib->length_dw++] = addr;
 
ib->ptr[ib->length_dw++] = 0x0014;
ib->ptr[ib->length_dw++] = 0x0002; /* task info */
@@ -268,13 +269,14 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle
  */
 static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint32_t handle,
+   struct amdgpu_bo *bo,
struct dma_fence **fence)
 {
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
-   uint64_t dummy;
+   uint64_t addr;
int i, r;
 
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
@@ -282,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct 
amdgpu_ring *ring,
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   addr = amdgpu_bo_gpu_offset(bo);
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
ib->ptr[ib->length_dw++] = 0x0001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x0001;
-   ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-   ib->ptr[ib->length_dw++] = dummy;
+   ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+   ib->ptr[ib->length_dw++] = addr;
 
ib->ptr[ib->length_dw++] = 0x0014;
ib->ptr[ib->length_dw++] = 0x0002; /* task info */
@@ -327,13 +329,20 @@ static int uvd_v6_0_enc_get_destroy_msg(struct 
amdgpu_ring *ring,
 static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
struct dma_fence *fence = NULL;
+   struct amdgpu_bo *bo = NULL;
long r;
 
-   r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
+   r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ , NULL, NULL);
+   if (r)
+   return r;
+
+   r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
 
-   r = uvd_v6_0_enc_get_destroy_msg(ring, 1, );
+   r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, );
if (r)
goto error;
 
@@ -345,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
 
 error:
dma_fence_put(fence);
+   amdgpu_bo_unreserve(bo);
+   amdgpu_bo_unref();
return r;
 }
 
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu/uvd:Allocate enc session bo for uvd6.0 ring IB test

2019-10-15 Thread Zhu, James
Allocate 256K enc session bo for uvd6.0 ring IB test to fix S3 resume
corruption issue.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 670784a..c79ce73 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -220,7 +220,7 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   dummy = ring->adev->vcn.enc_session_gpu_addr;
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
@@ -282,7 +282,7 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring 
*ring,
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   dummy = ring->adev->vcn.enc_session_gpu_addr + 128 * PAGE_SIZE;
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
@@ -326,9 +326,16 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring 
*ring,
  */
 static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
+   struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL;
long r;
 
+   r = amdgpu_bo_create_kernel(adev, 2 * 128, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_VRAM, >vcn.enc_session_bo,
+   >vcn.enc_session_gpu_addr, 
>vcn.enc_session_cpu_addr);
+   if (r)
+   return r;
+
r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
if (r)
goto error;
@@ -345,6 +352,11 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
 
 error:
dma_fence_put(fence);
+
+   amdgpu_bo_free_kernel(>vcn.enc_session_bo,
+ >vcn.enc_session_gpu_addr,
+ (void **)>vcn.enc_session_cpu_addr);
+
return r;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 0/3] Fixed amdgpu resume failure from suspend

2019-10-15 Thread Zhu, James
UVD session info size can be upto 128K, we need to allocate a large
enough buffer for the session info, otherwise the IB test can overwrite
other's memory.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241

James Zhu (3):
  drm/amdgpu/uvd:Add uvd enc session bo
  drm/amdgpu/uvd:Allocate enc session bo for uvd6.0 ring IB test
  drm/amdgpu/uvd:Allocate enc session bo for uvd7.0 ring IB test

 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  4 
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   | 16 ++--
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   | 16 ++--
 3 files changed, 32 insertions(+), 4 deletions(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu/uvd:Add uvd enc session bo

2019-10-15 Thread Zhu, James
Add uvd enc session bo for uvd encode IB test.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 5eb6328..1e39c8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -67,6 +67,10 @@ struct amdgpu_uvd {
unsignedharvest_config;
/* store image width to adjust nb memory state */
unsigneddecode_image_width;
+
+   struct amdgpu_bo *enc_session_bo;
+   void *enc_session_cpu_addr;
+   uint64_t  enc_session_gpu_addr;
 };
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu/uvd:Allocate enc session bo for uvd7.0 ring IB test

2019-10-15 Thread Zhu, James
Allocate 256K enc session bo for uvd6.0 ring IB test to fix S3 resume
corruption issue.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 01f658f..1b17fc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -228,7 +228,7 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring 
*ring, uint32_t handle
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   dummy = ring->adev->vcn.enc_session_gpu_addr;
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
@@ -289,7 +289,7 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring 
*ring, uint32_t handl
return r;
 
ib = >ibs[0];
-   dummy = ib->gpu_addr + 1024;
+   dummy = ring->adev->vcn.enc_session_gpu_addr + 128 * PAGE_SIZE;
 
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x0018;
@@ -333,9 +333,16 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring 
*ring, uint32_t handl
  */
 static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
+   struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL;
long r;
 
+   r = amdgpu_bo_create_kernel(adev, 2 * 128, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_VRAM, >vcn.enc_session_bo,
+   >vcn.enc_session_gpu_addr, 
>vcn.enc_session_cpu_addr);
+   if (r)
+   return r;
+
r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
if (r)
goto error;
@@ -352,6 +359,11 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
 
 error:
dma_fence_put(fence);
+
+   amdgpu_bo_free_kernel(>vcn.enc_session_bo,
+ >vcn.enc_session_gpu_addr,
+ (void **)>vcn.enc_session_cpu_addr);
+
return r;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/display: Use pixel encoding 444 for dongle usb-c to hdmi

2019-10-15 Thread Julien Isorce
Hi,

Gentle ping ?

Thx
Julien

On Fri, Oct 11, 2019 at 12:31 PM Julien Isorce 
wrote:

> Hi Harry,
>
> Do you need more information ?
>
> Thx
> Julien
>
> On Tue, Oct 8, 2019 at 11:15 AM Julien Isorce 
> wrote:
>
>> Hi Harry,
>>
>> I can reproduce on LG, Samsung and NEC monitors.
>>
>> "Have you checked whether the driver picks RGB or YCBCR420 without your
>> patch?" -> it was selecting RGB .
>>
>> For example on https://commons.wikimedia.org/wiki/File:Gray_scale.jpg ,
>> the second band from the left, will be entirely pinkish.
>> Since the issue also happens without dongle, so with a direct cable from
>> the miniDP from the graphic card to DisplayPort on the screen I think there
>> is more serious issue with RGB output in amdgpu. But it is not easy to
>> reproduce, you should try on above image.
>>
>> In any case, the goal with the patch is just to get the same output when
>> using 2 screens at the same time, one connected to hdmi output of the
>> graphic card and one connected  to usb-c to graphic card (hdmi cable with
>> dongle). So prior this patch, the first one would use YCbCr 444 and the
>> second would use RGB.
>> After this patch, both will use YCbCr 444 (both are hdmi).
>> The patch does not change the case for miniDP to DisplayPort, the driver
>> will still use RGB. Because maybe the RGB issue is also specific to that
>> graphic card which
>> is VEGA"M". So that is why the patch only tries to match hdmi cases
>> together, whether it is direct connection or through usb-c.
>>
>> -
>> Julien
>>
>>
>>
>> On Tue, Oct 8, 2019 at 10:44 AM Harry Wentland  wrote:
>>
>>> Hi Julien,
>>>
>>> curious which monitor you're using.
>>>
>>> Have you checked whether the driver picks RGB or YCBCR420 without your
>>> patch?
>>>
>>> I'm not sure I understand how the pinkish color issue looks. Do you see
>>> a pinkish color at the transition from grey to another color? Or is the
>>> entire grey area pinkish?
>>>
>>> Thanks,
>>> Harry
>>>
>>> On 2019-10-08 12:06 p.m., Julien Isorce wrote:
>>> > Hi,
>>> >
>>> > Gentle ping ?
>>> >
>>> > Thx
>>> > Julien
>>> >
>>> > On Tue, Oct 1, 2019 at 3:21 PM Julien Isorce >> > > wrote:
>>> >
>>> > Fix pinkish color issue around grey areas. This also happens
>>> > when not using any dongle so directly with a usb-c to Display
>>> > Port cable. Meaning there is something wrong when using pixel
>>> > encoding RGB with amd driver in the general case. In the meantime
>>> > just use the same pixel encoding as when using HDMI without dongle.
>>> > This way users will see the same thing on 2 identical screens when
>>> > one is connected with hdmi-to-hdmi and the other is connected with
>>> > usb-c-to-hdmi.
>>> >
>>> > Signed-off-by: Julien Isorce >> > >
>>> > ---
>>> >  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +
>>> >  1 file changed, 5 insertions(+)
>>> >
>>> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> > index d3f404f097eb..8139dcc0bfba 100644
>>> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> > @@ -3313,6 +3313,7 @@ static void
>>> > fill_stream_properties_from_drm_display_mode(
>>> >  {
>>> > struct dc_crtc_timing *timing_out = >timing;
>>> > const struct drm_display_info *info =
>>> >display_info;
>>> > +   const struct dc_link *link = stream->sink->link;
>>> >
>>> > memset(timing_out, 0, sizeof(struct dc_crtc_timing));
>>> >
>>> > @@ -3327,6 +3328,10 @@ static void
>>> > fill_stream_properties_from_drm_display_mode(
>>> > else if ((connector->display_info.color_formats &
>>> > DRM_COLOR_FORMAT_YCRCB444)
>>> > && stream->signal ==
>>> SIGNAL_TYPE_HDMI_TYPE_A)
>>> > timing_out->pixel_encoding =
>>> PIXEL_ENCODING_YCBCR444;
>>> > +   else if ((connector->display_info.color_formats &
>>> > DRM_COLOR_FORMAT_YCRCB444)
>>> > +   && stream->sink->sink_signal ==
>>> > SIGNAL_TYPE_DISPLAY_PORT
>>> > +   && link->dpcd_caps.dongle_type ==
>>> > DISPLAY_DONGLE_DP_HDMI_CONVERTER)
>>> > +   timing_out->pixel_encoding =
>>> PIXEL_ENCODING_YCBCR444;
>>> > else
>>> > timing_out->pixel_encoding = PIXEL_ENCODING_RGB;
>>> >
>>> > --
>>> > 2.17.1
>>> >
>>> >
>>> > ___
>>> > amd-gfx mailing list
>>> > amd-gfx@lists.freedesktop.org
>>> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> >
>>>
>>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: AMDGPU and 16B stack alignment

2019-10-15 Thread Arvind Sankar
On Tue, Oct 15, 2019 at 11:05:56AM -0700, Nick Desaulniers wrote:
> Hmmm...I would have liked to remove it outright, as it is an ABI
> mismatch that is likely to result in instability and non-fun-to-debug
> runtime issues in the future.  I suspect my patch does work for GCC
> 7.1+.  The question is: Do we want to either:
> 1. mark AMDGPU broken for GCC < 7.1, or
> 2. continue supporting it via stack alignment mismatch?
> 
> 2 is brittle, and may break at any point in the future, but if it's
> working for someone it does make me feel bad to outright disable it.
> What I'd image 2 looks like is (psuedo code in a Makefile):
> 
> if CC_IS_GCC && GCC_VERSION < 7.1:
>   set stack alignment to 16B and hope for the best
> 
> So my diff would be amended to keep the stack alignment flags, but
> only to support GCC < 7.1.  And that assumes my change compiles with
> GCC 7.1+. (Looks like it does for me locally with GCC 8.3, but I would
> feel even more confident if someone with hardware to test on and GCC
> 7.1+ could boot test).
> -- 
> Thanks,
> ~Nick Desaulniers

If we do keep it, would adding -mstackrealign make it more robust?
That's simple and will only add the alignment to functions that require
16-byte alignment (at least on gcc).

Alternative is to use
__attribute__((force_align_arg_pointer)) on functions that might be
called from 8-byte-aligned code.

It looks like -mstackrealign should work from gcc 5.3 onwards.


Re: AMDGPU and 16B stack alignment

2019-10-15 Thread Nick Desaulniers
On Tue, Oct 15, 2019 at 11:30 AM Alex Deucher  wrote:
>
> On Tue, Oct 15, 2019 at 2:07 PM Nick Desaulniers
>  wrote:
> >
> > On Tue, Oct 15, 2019 at 12:19 AM Arnd Bergmann  wrote:
> > >
> > > On Tue, Oct 15, 2019 at 9:08 AM S, Shirish  wrote:
> > > > On 10/15/2019 3:52 AM, Nick Desaulniers wrote:
> > >
> > > > My gcc build fails with below errors:
> > > >
> > > > dcn_calcs.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 
> > > > and 12
> > > >
> > > > dcn_calc_math.c:1:0: error: -mpreferred-stack-boundary=3 is not between 
> > > > 4 and 12
> >
> > I was able to reproduce this failure on pre-7.1 versions of GCC.  It
> > seems that when:
> > 1. code is using doubles
> > 2. setting -mpreferred-stack-boundary=3 -mno-sse2, ie. 8B stack alignment
> > than GCC produces that error:
> > https://godbolt.org/z/7T8nbH
> >
> > That's already a tall order of constraints, so it's understandable
> > that the compiler would just error likely during instruction
> > selection, but was eventually taught how to solve such constraints.
> >
> > > >
> > > > While GPF observed on clang builds seem to be fixed.
> >
> > Thanks for the report.  Your testing these patches is invaluable, Shirish!
> >
> > >
> > > Ok, so it seems that gcc insists on having at least 2^4 bytes stack
> > > alignment when
> > > SSE is enabled on x86-64, but does not actually rely on that for
> > > correct operation
> > > unless it's using sse2. So -msse always has to be paired with
> > >  -mpreferred-stack-boundary=3.
> >
> > Seemingly only for older versions of GCC, pre 7.1.
> >
> > >
> > > For clang, it sounds like the opposite is true: when passing 16 byte
> > > stack alignment
> > > and having sse/sse2 enabled, it requires the incoming stack to be 16
> > > byte aligned,
> >
> > I don't think it requires the incoming stack to be 16B aligned for
> > sse2, I think it requires the incoming and current stack alignment to
> > match. Today it does not, which is why we observe GPFs.
> >
> > > but passing 8 byte alignment makes it do the right thing.
> > >
> > > So, should we just always pass $(call cc-option, 
> > > -mpreferred-stack-boundary=4)
> > > to get the desired outcome on both?
> >
> > Hmmm...I would have liked to remove it outright, as it is an ABI
> > mismatch that is likely to result in instability and non-fun-to-debug
> > runtime issues in the future.  I suspect my patch does work for GCC
> > 7.1+.  The question is: Do we want to either:
> > 1. mark AMDGPU broken for GCC < 7.1, or
> > 2. continue supporting it via stack alignment mismatch?
> >
> > 2 is brittle, and may break at any point in the future, but if it's
> > working for someone it does make me feel bad to outright disable it.
> > What I'd image 2 looks like is (psuedo code in a Makefile):
>
> Well, it's been working as is for years now, at least with gcc, so I'd
> hate to break that.

Ok, I'm happy to leave that as is for GCC, then.  Would you prefer I
modify it for GCC >7.1 or just leave it alone (maybe I'll add a
comment about *why* it's done for GCC)? Would you prefer 1 patch or 4?

>
> Alex
>
> >
> > if CC_IS_GCC && GCC_VERSION < 7.1:
> >   set stack alignment to 16B and hope for the best

Ie, this ^

> >
> > So my diff would be amended to keep the stack alignment flags, but
> > only to support GCC < 7.1.  And that assumes my change compiles with
> > GCC 7.1+. (Looks like it does for me locally with GCC 8.3, but I would
> > feel even more confident if someone with hardware to test on and GCC
> > 7.1+ could boot test).

-- 
Thanks,
~Nick Desaulniers


[RFC PATCH RESEND] drm/amd/display: Add back missing backlight level rounding

2019-10-15 Thread Lukáš Krejčí
[Why]
Having the rounding of the backlight value restored to the way it was
seemingly gets rid of backlight flickering on certain Stoney Ridge
laptops.

[How]
Rescale the backlight level between min and max input signal value and
round it to a number between 0x0 and 0xFF. Then, use the rounding mode
that was previously in driver_set_backlight_level() and
dmcu_set_backlight_level(), i.e. rescale the backlight level between 0x0
and 0x1000 by multiplying it by 0x101 and use the most significant bit
of the fraction (or in this case the 8th bit of the value because it's
the same thing, e.g. C3 * 0x101 = 0xC3C3 and C3 * 0x10101 = 0xC3C3C3) to
round it.

Fixes: 262485a50fd4 ("drm/amd/display: Expand dc to use 16.16 bit backlight")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204957
Signed-off-by: Lukáš Krejčí 
---

Notes:
Bug:
- Can be reproduced on HP 15-rb020nc (Stoney Ridge R2 E2-9000e APU) and
  Acer Aspire 3 A315-21G-91JL (Stoney Ridge R3 A4-9120 APU).

- For me, the bug is inconsistent - it does not happen on every boot,
  but if it does happen, it's usually within three minutes after bootup.

- It concerns the backlight. That means it can be reproduced on the
  framebuffer console.

- Reproduces on Arch Linux (custom built) live CD, linux kernel v5.3.2
  and Ubuntu 19.04, kernel-ppa/mainline v5.0.0-rc1, v5.0.0, v5.3.2, v5.3.4,
  v5.4-rc2.

- Can not be reproduced on kernel v5.3.4 with this patch applied or on
  v4.20.0, 4.20.17, 4.19.75 (this bug is a regression).

- The other person that reproduced the issue (see the Bugzilla link
  above) confirmed that the patch works for them too.

Patch:
- Is the comment modified by this commit correct? Both
  driver_set_backlight_level() and dmcu_set_backlight_level() check the
  17th bit of `brightness` aka `backlight_pwm_u16_16`, but
  262485a50fd4532a8d71165190adc7a0a19bcc9e ("drm/amd/display: Expand dc
  to use 16.16 bit backlight") specifically mentions 0x as the max
  backlight value.

- use_smooth_brightness is false (no DMCU firmware available) on my
  laptop, so the other code path (dmcu_set_backlight_level()) is
  untested.

- I'm not sure why the rounding fixes the issue and whether this
  function is the right place to add back the rounding (and whether it
  even is the right way to solve the issue), so that's why this patch is
  RFC.

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a52f0b13a2c8..af9a5f46b671 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2083,17 +2083,22 @@ static int amdgpu_dm_backlight_update_status(struct 
backlight_device *bd)
 * The brightness input is in the range 0-255
 * It needs to be rescaled to be between the
 * requested min and max input signal
-*
-* It also needs to be scaled up by 0x101 to
-* match the DC interface which has a range of
-* 0 to 0x
 */
brightness =
brightness
-   * 0x101
+   * 0x100
* (caps.max_input_signal - caps.min_input_signal)
/ AMDGPU_MAX_BL_LEVEL
-   + caps.min_input_signal * 0x101;
+   + caps.min_input_signal * 0x100;
+
+   brightness = (brightness >> 8) + ((brightness >> 7) & 1);
+   /*
+* It also needs to be scaled up by 0x101 and
+* rounded off to match the DC interface which
+* has a range of 0 to 0x1
+*/
+   brightness *= 0x101;
+   brightness += (brightness >> 7) & 1;
 
if (dc_link_set_backlight_level(dm->backlight_link,
brightness, 0))
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: always reset asic when going into suspend

2019-10-15 Thread Alex Deucher
On Tue, Oct 15, 2019 at 2:50 AM Daniel Drake  wrote:
>
> On Asus UX434DA (Ryzen7 3700U), upon resume from s2idle, the screen
> turns on again and shows the pre-suspend image, but the display remains
> frozen from that point onwards.
>
> The kernel logs show errors:
>
>  [drm] psp command failed and response status is (0x7)
>  [drm] Fence fallback timer expired on ring sdma0
>  [drm] Fence fallback timer expired on ring gfx
>  amdgpu :03:00.0: [drm:amdgpu_ib_ring_tests] *ERROR* IB test failed on 
> gfx (-22).
>  [drm:process_one_work] *ERROR* ib ring test failed (-22).
>
> This can also be reproduced with pm_test:
>  # echo devices > /sys/power/pm_test
>  # echo freeze > /sys/power/mem
>
> The same reproducer causes the same problem on Asus X512DK (Ryzen5 3500U)
> even though that model is normally able to suspend and resume OK via S3.
>
> Experimenting, I observed that this error condition can be invoked on
> any amdgpu product by executing in succession:
>
>   amdgpu_device_suspend(drm_dev, true, true);
>   amdgpu_device_resume(drm_dev, true, true);
>
> i.e. it appears that the resume routine is unable to get the device out
> of suspended state, except for the S3 suspend case where it presumably has
> a bit of extra help from the firmware or hardware.
>
> However, I also observed that the runtime suspend/resume routines work
> OK when tested like this, which lead me to the key difference in these
> two cases: the ASIC reset, which only happens in the runtime suspend path.
>
> Since it takes less than 1ms, we should do the ASIC reset in all
> suspend paths, fixing resume from s2idle on these products.
>

Is s2idle actually powering down the GPU?  Do you see a difference in
power usage?  I think you are just working around the fact that the
GPU never actually gets powered down.  Leaving the GPU in the reset
state probably uses more power than not suspending it in the first
place.

Alex

> Link: https://bugs.freedesktop.org/show_bug.cgi?id=111811
> Signed-off-by: Daniel Drake 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 5a1939dbd4e3..7f4870e974fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3082,15 +3082,16 @@ int amdgpu_device_suspend(struct drm_device *dev, 
> bool suspend, bool fbcon)
>  */
> amdgpu_bo_evict_vram(adev);
>
> +   amdgpu_asic_reset(adev);
> +   r = amdgpu_asic_reset(adev);
> +   if (r)
> +   DRM_ERROR("amdgpu asic reset failed\n");
> +
> pci_save_state(dev->pdev);
> if (suspend) {
> /* Shut down the device */
> pci_disable_device(dev->pdev);
> pci_set_power_state(dev->pdev, PCI_D3hot);
> -   } else {
> -   r = amdgpu_asic_reset(adev);
> -   if (r)
> -   DRM_ERROR("amdgpu asic reset failed\n");
> }
>
> return 0;
> --
> 2.20.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: AMDGPU and 16B stack alignment

2019-10-15 Thread Alex Deucher
On Tue, Oct 15, 2019 at 2:07 PM Nick Desaulniers
 wrote:
>
> On Tue, Oct 15, 2019 at 12:19 AM Arnd Bergmann  wrote:
> >
> > On Tue, Oct 15, 2019 at 9:08 AM S, Shirish  wrote:
> > > On 10/15/2019 3:52 AM, Nick Desaulniers wrote:
> >
> > > My gcc build fails with below errors:
> > >
> > > dcn_calcs.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 
> > > 12
> > >
> > > dcn_calc_math.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 
> > > and 12
>
> I was able to reproduce this failure on pre-7.1 versions of GCC.  It
> seems that when:
> 1. code is using doubles
> 2. setting -mpreferred-stack-boundary=3 -mno-sse2, ie. 8B stack alignment
> than GCC produces that error:
> https://godbolt.org/z/7T8nbH
>
> That's already a tall order of constraints, so it's understandable
> that the compiler would just error likely during instruction
> selection, but was eventually taught how to solve such constraints.
>
> > >
> > > While GPF observed on clang builds seem to be fixed.
>
> Thanks for the report.  Your testing these patches is invaluable, Shirish!
>
> >
> > Ok, so it seems that gcc insists on having at least 2^4 bytes stack
> > alignment when
> > SSE is enabled on x86-64, but does not actually rely on that for
> > correct operation
> > unless it's using sse2. So -msse always has to be paired with
> >  -mpreferred-stack-boundary=3.
>
> Seemingly only for older versions of GCC, pre 7.1.
>
> >
> > For clang, it sounds like the opposite is true: when passing 16 byte
> > stack alignment
> > and having sse/sse2 enabled, it requires the incoming stack to be 16
> > byte aligned,
>
> I don't think it requires the incoming stack to be 16B aligned for
> sse2, I think it requires the incoming and current stack alignment to
> match. Today it does not, which is why we observe GPFs.
>
> > but passing 8 byte alignment makes it do the right thing.
> >
> > So, should we just always pass $(call cc-option, 
> > -mpreferred-stack-boundary=4)
> > to get the desired outcome on both?
>
> Hmmm...I would have liked to remove it outright, as it is an ABI
> mismatch that is likely to result in instability and non-fun-to-debug
> runtime issues in the future.  I suspect my patch does work for GCC
> 7.1+.  The question is: Do we want to either:
> 1. mark AMDGPU broken for GCC < 7.1, or
> 2. continue supporting it via stack alignment mismatch?
>
> 2 is brittle, and may break at any point in the future, but if it's
> working for someone it does make me feel bad to outright disable it.
> What I'd image 2 looks like is (psuedo code in a Makefile):

Well, it's been working as is for years now, at least with gcc, so I'd
hate to break that.

Alex

>
> if CC_IS_GCC && GCC_VERSION < 7.1:
>   set stack alignment to 16B and hope for the best
>
> So my diff would be amended to keep the stack alignment flags, but
> only to support GCC < 7.1.  And that assumes my change compiles with
> GCC 7.1+. (Looks like it does for me locally with GCC 8.3, but I would
> feel even more confident if someone with hardware to test on and GCC
> 7.1+ could boot test).
> --
> Thanks,
> ~Nick Desaulniers
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 15/15] mm/hmm: remove hmm_mirror and related

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

The only two users of this are now converted to use mmu_range_notifier,
delete all the code and update hmm.rst.

Signed-off-by: Jason Gunthorpe 
---
 Documentation/vm/hmm.rst | 105 ---
 include/linux/hmm.h  | 183 +
 mm/Kconfig   |   1 -
 mm/hmm.c | 284 +--
 4 files changed, 33 insertions(+), 540 deletions(-)

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 0a5960beccf76d..a247643035c4e2 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -147,49 +147,16 @@ Address space mirroring implementation and API
 Address space mirroring's main objective is to allow duplication of a range of
 CPU page table into a device page table; HMM helps keep both synchronized. A
 device driver that wants to mirror a process address space must start with the
-registration of an hmm_mirror struct::
-
- int hmm_mirror_register(struct hmm_mirror *mirror,
- struct mm_struct *mm);
-
-The mirror struct has a set of callbacks that are used
-to propagate CPU page tables::
-
- struct hmm_mirror_ops {
- /* release() - release hmm_mirror
-  *
-  * @mirror: pointer to struct hmm_mirror
-  *
-  * This is called when the mm_struct is being released.  The callback
-  * must ensure that all access to any pages obtained from this mirror
-  * is halted before the callback returns. All future access should
-  * fault.
-  */
- void (*release)(struct hmm_mirror *mirror);
-
- /* sync_cpu_device_pagetables() - synchronize page tables
-  *
-  * @mirror: pointer to struct hmm_mirror
-  * @update: update information (see struct mmu_notifier_range)
-  * Return: -EAGAIN if update.blockable false and callback need to
-  * block, 0 otherwise.
-  *
-  * This callback ultimately originates from mmu_notifiers when the CPU
-  * page table is updated. The device driver must update its page table
-  * in response to this callback. The update argument tells what action
-  * to perform.
-  *
-  * The device driver must not return from this callback until the device
-  * page tables are completely updated (TLBs flushed, etc); this is a
-  * synchronous call.
-  */
- int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
-   const struct hmm_update *update);
- };
-
-The device driver must perform the update action to the range (mark range
-read only, or fully unmap, etc.). The device must complete the update before
-the driver callback returns.
+registration of a mmu_range_notifier::
+
+ mrn->ops = _ops;
+ int mmu_range_notifier_insert(struct mmu_range_notifier *mrn,
+ unsigned long start, unsigned long length,
+ struct mm_struct *mm);
+
+During the driver_ops->invalidate() callback the device driver must perform
+the update action to the range (mark range read only, or fully unmap,
+etc.). The device must complete the update before the driver callback returns.
 
 When the device driver wants to populate a range of virtual addresses, it can
 use::
@@ -216,70 +183,46 @@ The usage pattern is::
   struct hmm_range range;
   ...
 
+  range.notifier = 
   range.start = ...;
   range.end = ...;
   range.pfns = ...;
   range.flags = ...;
   range.values = ...;
   range.pfn_shift = ...;
-  hmm_range_register(, mirror);
 
-  /*
-   * Just wait for range to be valid, safe to ignore return value as we
-   * will use the return value of hmm_range_fault() below under the
-   * mmap_sem to ascertain the validity of the range.
-   */
-  hmm_range_wait_until_valid(, TIMEOUT_IN_MSEC);
+  if (!mmget_not_zero(mrn->notifier.mm))
+  return -EFAULT;
 
  again:
+  range.notifier_seq = mmu_range_read_begin();
   down_read(>mmap_sem);
   ret = hmm_range_fault(, HMM_RANGE_SNAPSHOT);
   if (ret) {
   up_read(>mmap_sem);
-  if (ret == -EBUSY) {
-/*
- * No need to check hmm_range_wait_until_valid() return value
- * on retry we will get proper error with hmm_range_fault()
- */
-hmm_range_wait_until_valid(, TIMEOUT_IN_MSEC);
-goto again;
-  }
-  hmm_range_unregister();
+  if (ret == -EBUSY)
+ goto again;
   return ret;
   }
+  up_read(>mmap_sem);
+
   take_lock(driver->update);
-  if (!hmm_range_valid()) {
+  if (mmu_range_read_retry(, range.notifier_seq) {
   release_lock(driver->update);
-  up_read(>mmap_sem);
   goto again;
   }
 
-  // Use pfns array content to update device page table
+  /* Use pfns array content to update device page table,
+   * under the update lock */
 
-  hmm_range_unregister();
 

[PATCH hmm 13/15] drm/amdgpu: Use mmu_range_insert instead of hmm_mirror

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Remove the interval tree in the driver and rely on the tree maintained by
the mmu_notifier for delivering mmu_notifier invalidation callbacks.

For some reason amdgpu has a very complicated arrangement where it tries
to prevent duplicate entries in the interval_tree, this is not necessary,
each amdgpu_bo can be its own stand alone entry. interval_tree already
allows duplicates and overlaps in the tree.

Also, there is no need to remove entries upon a release callback, the
mmu_range API safely allows objects to remain registered beyond the
lifetime of the mm. The driver only has to stop touching the pages during
release.

Cc: Alex Deucher 
Cc: Christian König 
Cc: David (ChunMing) Zhou 
Cc: amd-gfx@lists.freedesktop.org
Signed-off-by: Jason Gunthorpe 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 341 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  13 +-
 6 files changed, 84 insertions(+), 282 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bd37df5dd6d048..60591a5d420021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1006,6 +1006,8 @@ struct amdgpu_device {
struct mutex  lock_reset;
struct amdgpu_doorbell_index doorbell_index;
 
+   struct mutexnotifier_lock;
+
int asic_reset_res;
struct work_struct  xgmi_reset_work;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6d021ecc8d598f..47700302a08b7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -481,8 +481,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem 
*mem,
  *
  * Returns 0 for success, negative errno for errors.
  */
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
-  uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
 {
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -1195,7 +1194,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 
if (user_addr) {
-   ret = init_user_pages(*mem, current->mm, user_addr);
+   ret = init_user_pages(*mem, user_addr);
if (ret)
goto allocate_init_user_pages_failed;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5a1939dbd4e3e6..38f97998aaddb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2633,6 +2633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(>virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(>lock_reset);
+   mutex_init(>notifier_lock);
mutex_init(>virt.dpm_mutex);
mutex_init(>psp.mutex);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 31d4deb5d29484..4ffd7b90f4d907 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -50,66 +50,6 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
-/**
- * struct amdgpu_mn_node
- *
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
-   struct interval_tree_node   it;
-   struct list_headbos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
-   struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
-   struct amdgpu_device *adev = amn->adev;
-   struct amdgpu_mn_node *node, *next_node;
-   struct amdgpu_bo *bo, *next_bo;
-
-   mutex_lock(>mn_lock);
-   down_write(>lock);
-   hash_del(>node);
-   rbtree_postorder_for_each_entry_safe(node, next_node,
->objects.rb_root, it.rb) {
-   list_for_each_entry_safe(bo, next_bo, >bos, mn_list) {
-   bo->mn = NULL;
-   list_del_init(>mn_list);
-   }
-   kfree(node);
-   }
-   up_write(>lock);
-   mutex_unlock(>mn_lock);
-
-   hmm_mirror_unregister(>mirror);
-   kfree(amn);
-}
-
-/**
- * 

[PATCH hmm 14/15] drm/amdgpu: Use mmu_range_notifier instead of hmm_mirror

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Convert the collision-retry lock around hmm_range_fault to use the one now
provided by the mmu_range notifier.

Although this driver does not seem to use the collision retry lock that
hmm provides correctly, it can still be converted over to use the
mmu_range_notifier api instead of hmm_mirror without too much trouble.

This also deletes another place where a driver is associating additional
data (struct amdgpu_mn) with a mmu_struct.

Cc: Alex Deucher 
Cc: Christian König 
Cc: David (ChunMing) Zhou 
Cc: amd-gfx@lists.freedesktop.org
Signed-off-by: Jason Gunthorpe 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 130 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|  49 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  76 +-
 5 files changed, 50 insertions(+), 223 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 47700302a08b7f..1bcedb9b477dce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1738,6 +1738,10 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
return ret;
}
 
+   /*
+* FIXME: Cannot ignore the return code, must hold
+* notifier_lock
+*/
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 
/* Mark the BO as valid unless it was invalidated
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2e53feed40e230..76771f5f0b60ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -607,8 +607,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->tv.num_shared = 2;
 
amdgpu_bo_list_get_list(p->bo_list, >validated);
-   if (p->bo_list->first_userptr != p->bo_list->num_entries)
-   p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
 
INIT_LIST_HEAD();
amdgpu_vm_get_pd_bo(>vm, >validated, >vm_pd);
@@ -1291,11 +1289,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
 
-   /* No memory allocation is allowed while holding the mn lock.
-* p->mn is hold until amdgpu_cs_submit is finished and fence is added
-* to BOs.
+   /* No memory allocation is allowed while holding the notifier lock.
+* The lock is held until amdgpu_cs_submit is finished and fence is
+* added to BOs.
 */
-   amdgpu_mn_lock(p->mn);
+   mutex_lock(>adev->notifier_lock);
 
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1338,13 +1336,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, >vm);
 
ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);
-   amdgpu_mn_unlock(p->mn);
+   mutex_unlock(>adev->notifier_lock);
 
return 0;
 
 error_abort:
drm_sched_job_cleanup(>base);
-   amdgpu_mn_unlock(p->mn);
+   mutex_unlock(>adev->notifier_lock);
 
 error_unlock:
amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 4ffd7b90f4d907..cf1efe7ffc3a83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -50,28 +50,6 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
-/**
- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
-   if (mn)
-   down_write(>lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
-   if (mn)
-   up_write(>lock);
-}
-
 /**
  * amdgpu_mn_invalidate_gfx - callback to notify about mm change
  *
@@ -143,92 +121,6 @@ static const struct mmu_range_notifier_ops 
amdgpu_mn_hsa_ops = {
.invalidate = amdgpu_mn_invalidate_hsa,
 };
 
-static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror,
-const struct mmu_notifier_range *update)
-{
-   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
-   if (!mmu_notifier_range_blockable(update))
-   return false;
-
-   down_read(>lock);
-   up_read(>lock);
-   return 0;
-}
-
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + 

[PATCH hmm 06/15] RDMA/hfi1: Use mmu_range_notifier_inset for user_exp_rcv

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

This converts one of the two users of mmu_notifiers to use the new API.
The conversion is fairly straightforward, however the existing use of
notifiers here seems to be racey.

Cc: Mike Marciniszyn 
Cc: Dennis Dalessandro 
Signed-off-by: Jason Gunthorpe 
---
 drivers/infiniband/hw/hfi1/file_ops.c |   2 +-
 drivers/infiniband/hw/hfi1/hfi.h  |   2 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c | 144 --
 drivers/infiniband/hw/hfi1/user_exp_rcv.h |   3 +-
 4 files changed, 58 insertions(+), 93 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/file_ops.c 
b/drivers/infiniband/hw/hfi1/file_ops.c
index f9a7e9d29c8ba2..7c5e3fb224139a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1138,7 +1138,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, 
unsigned long arg, u32 len)
HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
/* adjust flag if this fd is not able to cache */
-   if (!fd->handler)
+   if (!fd->use_mn)
cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
 
cinfo.num_active = hfi1_count_active_units();
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index fa45350a9a1d32..fc10d65fc3e13c 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1444,7 +1444,7 @@ struct hfi1_filedata {
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
-   struct mmu_rb_handler *handler;
+   bool use_mn;
struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c 
b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 3592a9ec155e85..2aca28a0b3ca47 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -59,11 +59,10 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
  struct tid_user_buf *tbuf,
  u32 rcventry, struct tid_group *grp,
  u16 pageidx, unsigned int npages);
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static bool tid_rb_invalidate(struct mmu_range_notifier *mrn,
+ const struct mmu_notifier_range *range);
 static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
struct tid_group *grp,
unsigned int start, u16 count,
@@ -73,10 +72,8 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 
tidinfo,
  struct tid_group **grp);
 static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
 
-static struct mmu_rb_ops tid_rb_ops = {
-   .insert = tid_rb_insert,
-   .remove = tid_rb_remove,
-   .invalidate = tid_rb_invalidate
+static const struct mmu_range_notifier_ops tid_mn_ops = {
+   .invalidate = tid_rb_invalidate,
 };
 
 /*
@@ -87,7 +84,6 @@ static struct mmu_rb_ops tid_rb_ops = {
 int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
   struct hfi1_ctxtdata *uctxt)
 {
-   struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
 
spin_lock_init(>tid_lock);
@@ -109,20 +105,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
fd->entry_to_rb = NULL;
return -ENOMEM;
}
-
-   /*
-* Register MMU notifier callbacks. If the registration
-* fails, continue without TID caching for this context.
-*/
-   ret = hfi1_mmu_rb_register(fd, fd->mm, _rb_ops,
-  dd->pport->hfi1_wq,
-  >handler);
-   if (ret) {
-   dd_dev_info(dd,
-   "Failed MMU notifier registration %d\n",
-   ret);
-   ret = 0;
-   }
+   fd->use_mn = true;
}
 
/*
@@ -139,7 +122,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
 * init.
 */
spin_lock(>tid_lock);
-   if (uctxt->subctxt_cnt && fd->handler) {
+   if (uctxt->subctxt_cnt && fd->use_mn) {
u16 remainder;
 
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -158,18 +141,10 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
 {
struct hfi1_ctxtdata 

[PATCH hmm 07/15] drm/radeon: use mmu_range_notifier_insert

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

The new API is an exact match for the needs of radeon.

For some reason radeon tries to remove overlapping ranges from the
interval tree, but interval trees (and mmu_range_notifier_insert)
support overlapping ranges directly. Simply delete all this code.

Since this driver is missing a invalidate_range_end callback, but
still calls get_user_pages(), it cannot be correct against all races.

Cc: Alex Deucher 
Cc: Christian König 
Cc: David (ChunMing) Zhou 
Cc: amd-gfx@lists.freedesktop.org
Cc: Petr Cvek 
Signed-off-by: Jason Gunthorpe 
---
 drivers/gpu/drm/radeon/radeon.h|   9 +-
 drivers/gpu/drm/radeon/radeon_mn.c | 218 ++---
 2 files changed, 51 insertions(+), 176 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d59b004f669583..27959f3ace1152 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -68,6 +68,10 @@
 #include 
 #include 
 
+#ifdef CONFIG_MMU_NOTIFIER
+#include 
+#endif
+
 #include 
 #include 
 #include 
@@ -509,8 +513,9 @@ struct radeon_bo {
struct ttm_bo_kmap_obj  dma_buf_vmap;
pid_t   pid;
 
-   struct radeon_mn*mn;
-   struct list_headmn_list;
+#ifdef CONFIG_MMU_NOTIFIER
+   struct mmu_range_notifier   notifier;
+#endif
 };
 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, tbo.base)
 
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c 
b/drivers/gpu/drm/radeon/radeon_mn.c
index dbab9a3a969b9e..2ea90874c535f5 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -36,131 +36,50 @@
 
 #include "radeon.h"
 
-struct radeon_mn {
-   struct mmu_notifier mn;
-
-   /* objects protected by lock */
-   struct mutexlock;
-   struct rb_root_cached   objects;
-};
-
-struct radeon_mn_node {
-   struct interval_tree_node   it;
-   struct list_headbos;
-};
-
 /**
- * radeon_mn_invalidate_range_start - callback to notify about mm change
+ * radeon_mn_invalidate - callback to notify about mm change
  *
  * @mn: our notifier
- * @mn: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @range: the VMA under invalidation
  *
  * We block for all BOs between start and end to be idle and
  * unmap them by move them into system domain again.
  */
-static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
-   const struct mmu_notifier_range *range)
+static bool radeon_mn_invalidate(struct mmu_range_notifier *mn,
+const struct mmu_notifier_range *range)
 {
-   struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
+   struct radeon_bo *bo = container_of(mn, struct radeon_bo, notifier);
struct ttm_operation_ctx ctx = { false, false };
-   struct interval_tree_node *it;
-   unsigned long end;
-   int ret = 0;
-
-   /* notification is exclusive, but interval is inclusive */
-   end = range->end - 1;
-
-   /* TODO we should be able to split locking for interval tree and
-* the tear down.
-*/
-   if (mmu_notifier_range_blockable(range))
-   mutex_lock(>lock);
-   else if (!mutex_trylock(>lock))
-   return -EAGAIN;
-
-   it = interval_tree_iter_first(>objects, range->start, end);
-   while (it) {
-   struct radeon_mn_node *node;
-   struct radeon_bo *bo;
-   long r;
-
-   if (!mmu_notifier_range_blockable(range)) {
-   ret = -EAGAIN;
-   goto out_unlock;
-   }
-
-   node = container_of(it, struct radeon_mn_node, it);
-   it = interval_tree_iter_next(it, range->start, end);
+   long r;
 
-   list_for_each_entry(bo, >bos, mn_list) {
+   if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
+   return true;
 
-   if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
-   continue;
+   if (!mmu_notifier_range_blockable(range))
+   return false;
 
-   r = radeon_bo_reserve(bo, true);
-   if (r) {
-   DRM_ERROR("(%ld) failed to reserve user bo\n", 
r);
-   continue;
-   }
-
-   r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
-   true, false, MAX_SCHEDULE_TIMEOUT);
-   if (r <= 0)
-   DRM_ERROR("(%ld) failed to wait for user bo\n", 
r);
-
-   radeon_ttm_placement_from_domain(bo, 
RADEON_GEM_DOMAIN_CPU);
-   r = ttm_bo_validate(>tbo, >placement, );
-   if (r)
-   

[PATCH hmm 09/15] xen/gntdev: use mmu_range_notifier_insert

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

gntdev simply wants to monitor a specific VMA for any notifier events,
this can be done straightforwardly using mmu_range_notifier_insert() over
the VMA's VA range.

The notifier should be attached until the original VMA is destroyed.

It is unclear if any of this is even sane, but at least a lot of duplicate
code is removed.

Cc: Oleksandr Andrushchenko 
Cc: Boris Ostrovsky 
Cc: xen-de...@lists.xenproject.org
Cc: Juergen Gross 
Cc: Stefano Stabellini 
Signed-off-by: Jason Gunthorpe 
---
 drivers/xen/gntdev-common.h |   8 +-
 drivers/xen/gntdev.c| 179 ++--
 2 files changed, 48 insertions(+), 139 deletions(-)

diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2f8b949c3eeb14..b201fdd20b667b 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -21,15 +21,8 @@ struct gntdev_dmabuf_priv;
 struct gntdev_priv {
/* Maps with visible offsets in the file descriptor. */
struct list_head maps;
-   /*
-* Maps that are not visible; will be freed on munmap.
-* Only populated if populate_freeable_maps == 1
-*/
-   struct list_head freeable_maps;
/* lock protects maps and freeable_maps. */
struct mutex lock;
-   struct mm_struct *mm;
-   struct mmu_notifier mn;
 
 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
/* Device for which DMA memory is allocated. */
@@ -49,6 +42,7 @@ struct gntdev_unmap_notify {
 };
 
 struct gntdev_grant_map {
+   struct mmu_range_notifier notifier;
struct list_head next;
struct vm_area_struct *vma;
int index;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a446a7221e13e9..234153f556afd6 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -65,7 +65,6 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be 
mapped by "
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
 static int use_ptemod;
-#define populate_freeable_maps use_ptemod
 
 static int unmap_grant_pages(struct gntdev_grant_map *map,
 int offset, int pages);
@@ -251,12 +250,6 @@ void gntdev_put_map(struct gntdev_priv *priv, struct 
gntdev_grant_map *map)
evtchn_put(map->notify.event);
}
 
-   if (populate_freeable_maps && priv) {
-   mutex_lock(>lock);
-   list_del(>next);
-   mutex_unlock(>lock);
-   }
-
if (map->pages && !use_ptemod)
unmap_grant_pages(map, 0, map->count);
gntdev_free_map(map);
@@ -445,17 +438,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
 
pr_debug("gntdev_vma_close %p\n", vma);
-   if (use_ptemod) {
-   /* It is possible that an mmu notifier could be running
-* concurrently, so take priv->lock to ensure that the vma won't
-* vanishing during the unmap_grant_pages call, since we will
-* spin here until that completes. Such a concurrent call will
-* not do any unmapping, since that has been done prior to
-* closing the vma, but it may still iterate the unmap_ops list.
-*/
-   mutex_lock(>lock);
+   if (use_ptemod && map->vma == vma) {
+   mmu_range_notifier_remove(>notifier);
map->vma = NULL;
-   mutex_unlock(>lock);
}
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
@@ -477,109 +462,43 @@ static const struct vm_operations_struct gntdev_vmops = {
 
 /* -- */
 
-static bool in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end)
-{
-   if (!map->vma)
-   return false;
-   if (map->vma->vm_start >= end)
-   return false;
-   if (map->vma->vm_end <= start)
-   return false;
-
-   return true;
-}
-
-static int unmap_if_in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end,
- bool blockable)
+static bool gntdev_invalidate(struct mmu_range_notifier *mn,
+ const struct mmu_notifier_range *range)
 {
+   struct gntdev_grant_map *map =
+   container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
int err;
 
-   if (!in_range(map, start, end))
-   return 0;
+   if (!mmu_notifier_range_blockable(range))
+   return false;
 
-   if (!blockable)
-   return -EAGAIN;
+   /*
+* If the VMA is split or otherwise changed the notifier is not
+* updated, but we don't want to process VA's outside the modified
+* VMA. FIXME: It would be much more understandable to just prevent
+* modifying the 

[PATCH hmm 00/15] Consolidate the mmu notifier interval_tree and locking

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

8 of the mmu_notifier using drivers (i915_gem, radeon_mn, umem_odp, hfi1,
scif_dma, vhost, gntdev, hmm) drivers are using a common pattern where
they only use invalidate_range_start/end and immediately check the
invalidating range against some driver data structure to tell if the
driver is interested. Half of them use an interval_tree, the others are
simple linear search lists.

Of the ones I checked they largely seem to have various kinds of races,
bugs and poor implementation. This is a result of the complexity in how
the notifier interacts with get_user_pages(). It is extremely difficult to
use it correctly.

Consolidate all of this code together into the core mmu_notifier and
provide a locking scheme similar to hmm_mirror that allows the user to
safely use get_user_pages() and reliably know if the page list still
matches the mm.

This new arrangment plays nicely with the !blockable mode for
OOM. Scanning the interval tree is done such that the intersection test
will always succeed, and since there is no invalidate_range_end exposed to
drivers the scheme safely allows multiple drivers to be subscribed.

Four places are converted as an example of how the new API is used.
Four are left for future patches:
 - i915_gem has complex locking around destruction of a registration,
   needs more study
 - hfi1 (2nd user) needs access to the rbtree
 - scif_dma has a complicated logic flow
 - vhost's mmu notifiers are already being rewritten

This is still being tested, but I figured to send it to start getting help
from the xen, amd and hfi drivers which I cannot test here.

It would be intended for the hmm tree.

Jason Gunthorpe (15):
  mm/mmu_notifier: define the header pre-processor parts even if
disabled
  mm/mmu_notifier: add an interval tree notifier
  mm/hmm: allow hmm_range to be used with a mmu_range_notifier or
hmm_mirror
  mm/hmm: define the pre-processor related parts of hmm.h even if
disabled
  RDMA/odp: Use mmu_range_notifier_insert()
  RDMA/hfi1: Use mmu_range_notifier_inset for user_exp_rcv
  drm/radeon: use mmu_range_notifier_insert
  xen/gntdev: Use select for DMA_SHARED_BUFFER
  xen/gntdev: use mmu_range_notifier_insert
  nouveau: use mmu_notifier directly for invalidate_range_start
  nouveau: use mmu_range_notifier instead of hmm_mirror
  drm/amdgpu: Call find_vma under mmap_sem
  drm/amdgpu: Use mmu_range_insert instead of hmm_mirror
  drm/amdgpu: Use mmu_range_notifier instead of hmm_mirror
  mm/hmm: remove hmm_mirror and related

 Documentation/vm/hmm.rst  | 105 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   9 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 445 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|  53 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  13 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 111 ++--
 drivers/gpu/drm/nouveau/nouveau_svm.c | 229 +---
 drivers/gpu/drm/radeon/radeon.h   |   9 +-
 drivers/gpu/drm/radeon/radeon_mn.c| 218 ++-
 drivers/infiniband/core/device.c  |   1 -
 drivers/infiniband/core/umem_odp.c| 288 +-
 drivers/infiniband/hw/hfi1/file_ops.c |   2 +-
 drivers/infiniband/hw/hfi1/hfi.h  |   2 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c | 144 ++---
 drivers/infiniband/hw/hfi1/user_exp_rcv.h |   3 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h  |   7 +-
 drivers/infiniband/hw/mlx5/mr.c   |   3 +-
 drivers/infiniband/hw/mlx5/odp.c  |  48 +-
 drivers/xen/Kconfig   |   3 +-
 drivers/xen/gntdev-common.h   |   8 +-
 drivers/xen/gntdev.c  | 179 ++
 include/linux/hmm.h   | 195 +--
 include/linux/mmu_notifier.h  | 124 +++-
 include/rdma/ib_umem_odp.h|  65 +--
 include/rdma/ib_verbs.h   |   2 -
 kernel/fork.c |   1 -
 mm/Kconfig|   2 +-
 mm/hmm.c  | 275 +
 mm/mmu_notifier.c | 542 +-
 32 files changed, 1180 insertions(+), 1923 deletions(-)

-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH hmm 04/15] mm/hmm: define the pre-processor related parts of hmm.h even if disabled

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Only the function calls are stubbed out with static inlines that always
fail. This is the standard way to write a header for an optional component
and makes it easier for drivers that only optionally need HMM_MIRROR.

Signed-off-by: Jason Gunthorpe 
---
 include/linux/hmm.h | 59 -
 kernel/fork.c   |  1 -
 2 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 8ac1fd6a81af8f..2666eb08a40615 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -62,8 +62,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_HMM_MIRROR
-
 #include 
 #include 
 #include 
@@ -374,6 +372,15 @@ struct hmm_mirror {
struct list_headlist;
 };
 
+/*
+ * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case.
+ */
+#define HMM_FAULT_ALLOW_RETRY  (1 << 0)
+
+/* Don't fault in missing PTEs, just snapshot the current state. */
+#define HMM_FAULT_SNAPSHOT (1 << 1)
+
+#ifdef CONFIG_HMM_MIRROR
 int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
 void hmm_mirror_unregister(struct hmm_mirror *mirror);
 
@@ -383,14 +390,6 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
 int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror);
 void hmm_range_unregister(struct hmm_range *range);
 
-/*
- * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case.
- */
-#define HMM_FAULT_ALLOW_RETRY  (1 << 0)
-
-/* Don't fault in missing PTEs, just snapshot the current state. */
-#define HMM_FAULT_SNAPSHOT (1 << 1)
-
 long hmm_range_fault(struct hmm_range *range, unsigned int flags);
 
 long hmm_range_dma_map(struct hmm_range *range,
@@ -401,6 +400,44 @@ long hmm_range_dma_unmap(struct hmm_range *range,
 struct device *device,
 dma_addr_t *daddrs,
 bool dirty);
+#else
+int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
+{
+   return -EOPNOTSUPP;
+}
+
+void hmm_mirror_unregister(struct hmm_mirror *mirror)
+{
+}
+
+int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
+{
+   return -EOPNOTSUPP;
+}
+
+void hmm_range_unregister(struct hmm_range *range)
+{
+}
+
+static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags)
+{
+   return -EOPNOTSUPP;
+}
+
+static inline long hmm_range_dma_map(struct hmm_range *range,
+struct device *device, dma_addr_t *daddrs,
+unsigned int flags)
+{
+   return -EOPNOTSUPP;
+}
+
+static inline long hmm_range_dma_unmap(struct hmm_range *range,
+  struct device *device,
+  dma_addr_t *daddrs, bool dirty)
+{
+   return -EOPNOTSUPP;
+}
+#endif
 
 /*
  * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
@@ -411,6 +448,4 @@ long hmm_range_dma_unmap(struct hmm_range *range,
  */
 #define HMM_RANGE_DEFAULT_TIMEOUT 1000
 
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
 #endif /* LINUX_HMM_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index f9572f41612628..4561a65d19db88 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,7 +40,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH hmm 10/15] nouveau: use mmu_notifier directly for invalidate_range_start

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

There is no reason to get the invalidate_range_start() callback via an
indirection through hmm_mirror, just register a normal notifier directly.

Cc: Ben Skeggs 
Cc: dri-de...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: Ralph Campbell 
Signed-off-by: Jason Gunthorpe 
---
 drivers/gpu/drm/nouveau/nouveau_svm.c | 95 ++-
 1 file changed, 63 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c 
b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 668d4bd0c118f1..577f8811925a59 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -88,6 +88,7 @@ nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
 }
 
 struct nouveau_svmm {
+   struct mmu_notifier notifier;
struct nouveau_vmm *vmm;
struct {
unsigned long start;
@@ -96,7 +97,6 @@ struct nouveau_svmm {
 
struct mutex mutex;
 
-   struct mm_struct *mm;
struct hmm_mirror mirror;
 };
 
@@ -251,10 +251,11 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 
start, u64 limit)
 }
 
 static int
-nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
-   const struct mmu_notifier_range *update)
+nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
+   const struct mmu_notifier_range *update)
 {
-   struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
+   struct nouveau_svmm *svmm =
+   container_of(mn, struct nouveau_svmm, notifier);
unsigned long start = update->start;
unsigned long limit = update->end;
 
@@ -264,6 +265,9 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror 
*mirror,
SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
 
mutex_lock(>mutex);
+   if (unlikely(!svmm->vmm))
+   goto out;
+
if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
if (start < svmm->unmanaged.start) {
nouveau_svmm_invalidate(svmm, start,
@@ -273,19 +277,31 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror 
*mirror,
}
 
nouveau_svmm_invalidate(svmm, start, limit);
+
+out:
mutex_unlock(>mutex);
return 0;
 }
 
-static void
-nouveau_svmm_release(struct hmm_mirror *mirror)
+static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
+{
+   kfree(container_of(mn, struct nouveau_svmm, notifier));
+}
+
+static const struct mmu_notifier_ops nouveau_mn_ops = {
+   .invalidate_range_start = nouveau_svmm_invalidate_range_start,
+   .free_notifier = nouveau_svmm_free_notifier,
+};
+
+static int
+nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
+   const struct mmu_notifier_range *update)
 {
+   return 0;
 }
 
-static const struct hmm_mirror_ops
-nouveau_svmm = {
+static const struct hmm_mirror_ops nouveau_svmm = {
.sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables,
-   .release = nouveau_svmm_release,
 };
 
 void
@@ -294,7 +310,10 @@ nouveau_svmm_fini(struct nouveau_svmm **psvmm)
struct nouveau_svmm *svmm = *psvmm;
if (svmm) {
hmm_mirror_unregister(>mirror);
-   kfree(*psvmm);
+   mutex_lock(>mutex);
+   svmm->vmm = NULL;
+   mutex_unlock(>mutex);
+   mmu_notifier_put(>notifier);
*psvmm = NULL;
}
 }
@@ -320,7 +339,7 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
mutex_lock(>mutex);
if (cli->svm.cli) {
ret = -EBUSY;
-   goto done;
+   goto out_free;
}
 
/* Allocate a new GPU VMM that can support SVM (managed by the
@@ -335,24 +354,33 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
.fault_replay = true,
}, sizeof(struct gp100_vmm_v0), >svm.vmm);
if (ret)
-   goto done;
+   goto out_free;
 
-   /* Enable HMM mirroring of CPU address-space to VMM. */
-   svmm->mm = get_task_mm(current);
-   down_write(>mm->mmap_sem);
+   down_write(>mm->mmap_sem);
svmm->mirror.ops = _svmm;
-   ret = hmm_mirror_register(>mirror, svmm->mm);
-   if (ret == 0) {
-   cli->svm.svmm = svmm;
-   cli->svm.cli = cli;
-   }
-   up_write(>mm->mmap_sem);
-   mmput(svmm->mm);
+   ret = hmm_mirror_register(>mirror, current->mm);
+   if (ret)
+   goto out_mm_unlock;
 
-done:
+   svmm->notifier.ops = _mn_ops;
+   ret = __mmu_notifier_register(>notifier, current->mm);
if (ret)
-   nouveau_svmm_fini();
+   goto out_hmm_unregister;
+   /* Note, ownership of svmm transfers to mmu_notifier */
+
+   cli->svm.svmm = svmm;
+   

[PATCH hmm 12/15] drm/amdgpu: Call find_vma under mmap_sem

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

find_vma() must be called under the mmap_sem, reorganize this code to
do the vma check after entering the lock.

Further, fix the unlocked use of struct task_struct's mm, instead use
the mm from hmm_mirror which has an active mm_grab. Also the mm_grab
must be converted to a mm_get before acquiring mmap_sem or calling
find_vma().

Fixes: 66c45500bfdc ("drm/amdgpu: use new HMM APIs and helpers")
Fixes: 0919195f2b0d ("drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker 
threads")
Cc: Alex Deucher 
Cc: Christian König 
Cc: David (ChunMing) Zhou 
Cc: amd-gfx@lists.freedesktop.org
Signed-off-by: Jason Gunthorpe 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 ++---
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dff41d0a85fe96..c0e41f1f0c2365 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -788,7 +789,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
struct hmm_mirror *mirror = bo->mn ? >mn->mirror : NULL;
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-   struct mm_struct *mm = gtt->usertask->mm;
+   struct mm_struct *mm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct hmm_range *range;
@@ -796,25 +797,14 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
uint64_t *pfns;
int r = 0;
 
-   if (!mm) /* Happens during process shutdown */
-   return -ESRCH;
-
if (unlikely(!mirror)) {
DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
-   r = -EFAULT;
-   goto out;
+   return -EFAULT;
}
 
-   vma = find_vma(mm, start);
-   if (unlikely(!vma || start < vma->vm_start)) {
-   r = -EFAULT;
-   goto out;
-   }
-   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-   vma->vm_file)) {
-   r = -EPERM;
-   goto out;
-   }
+   mm = mirror->hmm->mmu_notifier.mm;
+   if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+   return -ESRCH;
 
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (unlikely(!range)) {
@@ -847,6 +837,17 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
 
down_read(>mmap_sem);
+   vma = find_vma(mm, start);
+   if (unlikely(!vma || start < vma->vm_start)) {
+   r = -EFAULT;
+   goto out_unlock;
+   }
+   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+   vma->vm_file)) {
+   r = -EPERM;
+   goto out_unlock;
+   }
+
r = hmm_range_fault(range, 0);
up_read(>mmap_sem);
 
@@ -865,15 +866,19 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
}
 
gtt->range = range;
+   mmput(mm);
 
return 0;
 
+out_unlock:
+   up_read(>mmap_sem);
 out_free_pfns:
hmm_range_unregister(range);
kvfree(pfns);
 out_free_ranges:
kfree(range);
 out:
+   mmput(mm);
return r;
 }
 
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH hmm 01/15] mm/mmu_notifier: define the header pre-processor parts even if disabled

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Now that we have KERNEL_HEADER_TEST all headers are generally compile
tested, so relying on makefile tricks to avoid compiling code that depends
on CONFIG_MMU_NOTIFIER is more annoying.

Instead follow the usual pattern and provide most of the header with only
the functions stubbed out when CONFIG_MMU_NOTIFIER is disabled. This
ensures code compiles no matter what the config setting is.

While here, struct mmu_notifier_mm is private to mmu_notifier.c, move it.

Signed-off-by: Jason Gunthorpe 
---
 include/linux/mmu_notifier.h | 46 +---
 mm/mmu_notifier.c| 13 ++
 2 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1bd8e6a09a3c27..12bd603d318ce7 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -7,8 +7,9 @@
 #include 
 #include 
 
+struct mmu_notifier_mm;
 struct mmu_notifier;
-struct mmu_notifier_ops;
+struct mmu_notifier_range;
 
 /**
  * enum mmu_notifier_event - reason for the mmu notifier callback
@@ -40,36 +41,8 @@ enum mmu_notifier_event {
MMU_NOTIFY_SOFT_DIRTY,
 };
 
-#ifdef CONFIG_MMU_NOTIFIER
-
-#ifdef CONFIG_LOCKDEP
-extern struct lockdep_map __mmu_notifier_invalidate_range_start_map;
-#endif
-
-/*
- * The mmu notifier_mm structure is allocated and installed in
- * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
- * critical section and it's released only when mm_count reaches zero
- * in mmdrop().
- */
-struct mmu_notifier_mm {
-   /* all mmu notifiers registerd in this mm are queued in this list */
-   struct hlist_head list;
-   /* to serialize the list modifications and hlist_unhashed */
-   spinlock_t lock;
-};
-
 #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
 
-struct mmu_notifier_range {
-   struct vm_area_struct *vma;
-   struct mm_struct *mm;
-   unsigned long start;
-   unsigned long end;
-   unsigned flags;
-   enum mmu_notifier_event event;
-};
-
 struct mmu_notifier_ops {
/*
 * Called either by mmu_notifier_unregister or when the mm is
@@ -249,6 +222,21 @@ struct mmu_notifier {
unsigned int users;
 };
 
+#ifdef CONFIG_MMU_NOTIFIER
+
+#ifdef CONFIG_LOCKDEP
+extern struct lockdep_map __mmu_notifier_invalidate_range_start_map;
+#endif
+
+struct mmu_notifier_range {
+   struct vm_area_struct *vma;
+   struct mm_struct *mm;
+   unsigned long start;
+   unsigned long end;
+   unsigned flags;
+   enum mmu_notifier_event event;
+};
+
 static inline int mm_has_notifiers(struct mm_struct *mm)
 {
return unlikely(mm->mmu_notifier_mm);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 7fde88695f35d6..367670cfd02b7b 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -27,6 +27,19 @@ struct lockdep_map __mmu_notifier_invalidate_range_start_map 
= {
 };
 #endif
 
+/*
+ * The mmu notifier_mm structure is allocated and installed in
+ * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
+ * critical section and it's released only when mm_count reaches zero
+ * in mmdrop().
+ */
+struct mmu_notifier_mm {
+   /* all mmu notifiers registered in this mm are queued in this list */
+   struct hlist_head list;
+   /* to serialize the list modifications and hlist_unhashed */
+   spinlock_t lock;
+};
+
 /*
  * This function can't run concurrently against mmu_notifier_register
  * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH hmm 11/15] nouveau: use mmu_range_notifier instead of hmm_mirror

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Remove the hmm_mirror object and use the mmu_range_notifier API instead
for the range, and use the normal mmu_notifier API for the general
invalidation callback.

While here re-organize the pagefault path so the locking pattern is clear.

nouveau is the only driver that uses a temporary range object and instead
forwards nearly every invalidation range directly to the HW. While this is
not how the mmu_range_notifier was intended to be used, the overheads on
the pagefaulting path are similar to the existing hmm_mirror version.
Particularly since the interval tree will be small.

Cc: Ben Skeggs 
Cc: dri-de...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: Ralph Campbell 
Signed-off-by: Jason Gunthorpe 
---
 drivers/gpu/drm/nouveau/nouveau_svm.c | 178 ++
 1 file changed, 98 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c 
b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 577f8811925a59..712c99918551bc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -96,8 +96,6 @@ struct nouveau_svmm {
} unmanaged;
 
struct mutex mutex;
-
-   struct hmm_mirror mirror;
 };
 
 #define SVMM_DBG(s,f,a...) 
\
@@ -293,23 +291,11 @@ static const struct mmu_notifier_ops nouveau_mn_ops = {
.free_notifier = nouveau_svmm_free_notifier,
 };
 
-static int
-nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
-   const struct mmu_notifier_range *update)
-{
-   return 0;
-}
-
-static const struct hmm_mirror_ops nouveau_svmm = {
-   .sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables,
-};
-
 void
 nouveau_svmm_fini(struct nouveau_svmm **psvmm)
 {
struct nouveau_svmm *svmm = *psvmm;
if (svmm) {
-   hmm_mirror_unregister(>mirror);
mutex_lock(>mutex);
svmm->vmm = NULL;
mutex_unlock(>mutex);
@@ -357,15 +343,10 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
goto out_free;
 
down_write(>mm->mmap_sem);
-   svmm->mirror.ops = _svmm;
-   ret = hmm_mirror_register(>mirror, current->mm);
-   if (ret)
-   goto out_mm_unlock;
-
svmm->notifier.ops = _mn_ops;
ret = __mmu_notifier_register(>notifier, current->mm);
if (ret)
-   goto out_hmm_unregister;
+   goto out_mm_unlock;
/* Note, ownership of svmm transfers to mmu_notifier */
 
cli->svm.svmm = svmm;
@@ -374,8 +355,6 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
mutex_unlock(>mutex);
return 0;
 
-out_hmm_unregister:
-   hmm_mirror_unregister(>mirror);
 out_mm_unlock:
up_write(>mm->mmap_sem);
 out_free:
@@ -503,43 +482,89 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm,
fault->inst, fault->addr, fault->access);
 }
 
-static inline bool
-nouveau_range_done(struct hmm_range *range)
+struct svm_notifier {
+   struct mmu_range_notifier notifier;
+   struct nouveau_svmm *svmm;
+};
+
+static bool nouveau_svm_range_invalidate(struct mmu_range_notifier *mrn,
+const struct mmu_notifier_range *range)
 {
-   bool ret = hmm_range_valid(range);
+   struct svm_notifier *sn =
+   container_of(mrn, struct svm_notifier, notifier);
 
-   hmm_range_unregister(range);
-   return ret;
+   /*
+* serializes the update to mrn->invalidate_seq done by caller and
+* prevents invalidation of the PTE from progressing while HW is being
+* programmed. This is very hacky and only works because the normal
+* notifier that does invalidation is always called after the range
+* notifier.
+*/
+   if (mmu_notifier_range_blockable(range))
+   mutex_lock(>svmm->mutex);
+   else if (!mutex_trylock(>svmm->mutex))
+   return false;
+   mutex_unlock(>svmm->mutex);
+   return true;
 }
 
-static int
-nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range)
+static const struct mmu_range_notifier_ops nouveau_svm_mrn_ops = {
+   .invalidate = nouveau_svm_range_invalidate,
+};
+
+static int nouveau_range_fault(struct nouveau_svmm *svmm,
+  struct nouveau_drm *drm, void *data, u32 size,
+  u64 *pfns,
+  struct svm_notifier *notifier)
 {
+   unsigned long timeout =
+   jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+   /* Have HMM fault pages within the fault window to the GPU. */
+   struct hmm_range range = {
+   .notifier = >notifier,
+   .start = notifier->notifier.interval_tree.start,
+   .end = notifier->notifier.interval_tree.last + 1,
+   .pfns = pfns,
+

[PATCH hmm 02/15] mm/mmu_notifier: add an interval tree notifier

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Of the 13 users of mmu_notifiers, 8 of them use only
invalidate_range_start/end() and immediately intersect the
mmu_notifier_range with some kind of internal list of VAs.  4 use an
interval tree (i915_gem, radeon_mn, umem_odp, hfi1). 4 use a linked list
of some kind (scif_dma, vhost, gntdev, hmm)

And the remaining 5 either don't use invalidate_range_start() or do some
special thing with it.

It turns out that building a correct scheme with an interval tree is
pretty complicated, particularly if the use case is synchronizing against
another thread doing get_user_pages().  Many of these implementations have
various subtle and difficult to fix races.

This approach puts the interval tree as common code at the top of the mmu
notifier call tree and implements a shareable locking scheme.

It includes:
 - An interval tree tracking VA ranges, with per-range callbacks
 - A read/write locking scheme for the interval tree that avoids
   sleeping in the notifier path (for OOM killer)
 - A sequence counter based collision-retry locking scheme to tell
   device page fault that a VA range is being concurrently invalidated.

This is based on various ideas:
- hmm accumulates invalidated VA ranges and releases them when all
  invalidates are done, via active_invalidate_ranges count.
  This approach avoids having to intersect the interval tree twice (as
  umem_odp does) at the potential cost of a longer device page fault.

- kvm/umem_odp use a sequence counter to drive the collision retry,
  via invalidate_seq

- a deferred work todo list on unlock scheme like RTNL, via deferred_list.
  This makes adding/removing interval tree members more deterministic

- seqlock, except this version makes the seqlock idea multi-holder on the
  write side by protecting it with active_invalidate_ranges and a spinlock

To minimize MM overhead when only the interval tree is being used, the
entire SRCU and hlist overheads are dropped using some simple
branches. Similarly the interval tree overhead is dropped when in hlist
mode.

The overhead from the mandatory spinlock is broadly the same as most of
existing users which already had a lock (or two) of some sort on the
invalidation path.

Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Signed-off-by: Jason Gunthorpe 
---
 include/linux/mmu_notifier.h |  78 ++
 mm/Kconfig   |   1 +
 mm/mmu_notifier.c| 529 +--
 3 files changed, 583 insertions(+), 25 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 12bd603d318ce7..bc2b12483de127 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -6,10 +6,12 @@
 #include 
 #include 
 #include 
+#include 
 
 struct mmu_notifier_mm;
 struct mmu_notifier;
 struct mmu_notifier_range;
+struct mmu_range_notifier;
 
 /**
  * enum mmu_notifier_event - reason for the mmu notifier callback
@@ -32,6 +34,9 @@ struct mmu_notifier_range;
  * access flags). User should soft dirty the page in the end callback to make
  * sure that anyone relying on soft dirtyness catch pages that might be written
  * through non CPU mappings.
+ *
+ * @MMU_NOTIFY_RELEASE: used during mmu_range_notifier invalidate to signal 
that
+ * the mm refcount is zero and the range is no longer accessible.
  */
 enum mmu_notifier_event {
MMU_NOTIFY_UNMAP = 0,
@@ -39,6 +44,7 @@ enum mmu_notifier_event {
MMU_NOTIFY_PROTECTION_VMA,
MMU_NOTIFY_PROTECTION_PAGE,
MMU_NOTIFY_SOFT_DIRTY,
+   MMU_NOTIFY_RELEASE,
 };
 
 #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
@@ -222,6 +228,25 @@ struct mmu_notifier {
unsigned int users;
 };
 
+/**
+ * struct mmu_range_notifier_ops
+ * @invalidate: Upon return the caller must stop using any SPTEs within this
+ *  range, this function can sleep. Return false if blocking was
+ *  required but range is non-blocking
+ */
+struct mmu_range_notifier_ops {
+   bool (*invalidate)(struct mmu_range_notifier *mrn,
+  const struct mmu_notifier_range *range);
+};
+
+struct mmu_range_notifier {
+   struct interval_tree_node interval_tree;
+   const struct mmu_range_notifier_ops *ops;
+   struct hlist_node deferred_item;
+   unsigned long invalidate_seq;
+   struct mm_struct *mm;
+};
+
 #ifdef CONFIG_MMU_NOTIFIER
 
 #ifdef CONFIG_LOCKDEP
@@ -263,6 +288,59 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
   struct mm_struct *mm);
 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
struct mm_struct *mm);
+
+unsigned long mmu_range_read_begin(struct mmu_range_notifier *mrn);
+int mmu_range_notifier_insert(struct mmu_range_notifier *mrn,
+ unsigned long start, unsigned long length,
+ struct mm_struct *mm);
+int mmu_range_notifier_insert_locked(struct mmu_range_notifier *mrn,
+

[PATCH hmm 08/15] xen/gntdev: Use select for DMA_SHARED_BUFFER

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

DMA_SHARED_BUFFER can not be enabled by the user (it represents a library
set in the kernel). The kconfig convention is to use select for such
symbols so they are turned on implicitly when the user enables a kconfig
that needs them.

Otherwise the XEN_GNTDEV_DMABUF kconfig is overly difficult to enable.

Fixes: 932d6562179e ("xen/gntdev: Add initial support for dma-buf UAPI")
Cc: Oleksandr Andrushchenko 
Cc: Boris Ostrovsky 
Cc: xen-de...@lists.xenproject.org
Cc: Juergen Gross 
Cc: Stefano Stabellini 
Signed-off-by: Jason Gunthorpe 
---
 drivers/xen/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 79cc75096f4232..a50dadd0109336 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -141,7 +141,8 @@ config XEN_GNTDEV
 
 config XEN_GNTDEV_DMABUF
bool "Add support for dma-buf grant access device driver extension"
-   depends on XEN_GNTDEV && XEN_GRANT_DMA_ALLOC && DMA_SHARED_BUFFER
+   depends on XEN_GNTDEV && XEN_GRANT_DMA_ALLOC
+   select DMA_SHARED_BUFFER
help
  Allows userspace processes and kernel modules to use Xen backed
  dma-buf implementation. With this extension grant references to
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH hmm 05/15] RDMA/odp: Use mmu_range_notifier_insert()

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Replace the internal interval tree based mmu notifier with the new common
mmu_range_notifier_insert() API. This removes a lot of code and fixes a
deadlock that can be triggered in ODP:

 zap_page_range()
  mmu_notifier_invalidate_range_start()
   [..]
ib_umem_notifier_invalidate_range_start()
   down_read(_mm->umem_rwsem)
  unmap_single_vma()
[..]
  __split_huge_page_pmd()
mmu_notifier_invalidate_range_start()
[..]
   ib_umem_notifier_invalidate_range_start()
  down_read(_mm->umem_rwsem)   // DEADLOCK

mmu_notifier_invalidate_range_end()
   up_read(_mm->umem_rwsem)
  mmu_notifier_invalidate_range_end()
 up_read(_mm->umem_rwsem)

The umem_rwsem is held across the range_start/end as the ODP algorithm for
invalidate_range_end cannot tolerate changes to the interval
tree. However, due to the nested invalidation regions the second
down_read() can deadlock if there are competing writers. The new core code
provides an alternative scheme to solve this problem.

Fixes: ca748c39ea3f ("RDMA/umem: Get rid of per_mm->notifier_count")
Signed-off-by: Jason Gunthorpe 
---
 drivers/infiniband/core/device.c |   1 -
 drivers/infiniband/core/umem_odp.c   | 288 +++
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   7 +-
 drivers/infiniband/hw/mlx5/mr.c  |   3 +-
 drivers/infiniband/hw/mlx5/odp.c |  48 ++---
 include/rdma/ib_umem_odp.h   |  65 ++
 include/rdma/ib_verbs.h  |   2 -
 7 files changed, 67 insertions(+), 347 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 2dd2cfe9b56136..ac7924b3c73abe 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2617,7 +2617,6 @@ void ib_set_device_ops(struct ib_device *dev, const 
struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, get_vf_config);
SET_DEVICE_OP(dev_ops, get_vf_stats);
SET_DEVICE_OP(dev_ops, init_port);
-   SET_DEVICE_OP(dev_ops, invalidate_range);
SET_DEVICE_OP(dev_ops, iw_accept);
SET_DEVICE_OP(dev_ops, iw_add_ref);
SET_DEVICE_OP(dev_ops, iw_connect);
diff --git a/drivers/infiniband/core/umem_odp.c 
b/drivers/infiniband/core/umem_odp.c
index d7d5fadf0899ad..6132b8127e8435 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -48,197 +48,32 @@
 
 #include "uverbs.h"
 
-static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
-{
-   mutex_lock(_odp->umem_mutex);
-   if (umem_odp->notifiers_count++ == 0)
-   /*
-* Initialize the completion object for waiting on
-* notifiers. Since notifier_count is zero, no one should be
-* waiting right now.
-*/
-   reinit_completion(_odp->notifier_completion);
-   mutex_unlock(_odp->umem_mutex);
-}
-
-static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
-{
-   mutex_lock(_odp->umem_mutex);
-   /*
-* This sequence increase will notify the QP page fault that the page
-* that is going to be mapped in the spte could have been freed.
-*/
-   ++umem_odp->notifiers_seq;
-   if (--umem_odp->notifiers_count == 0)
-   complete_all(_odp->notifier_completion);
-   mutex_unlock(_odp->umem_mutex);
-}
-
-static void ib_umem_notifier_release(struct mmu_notifier *mn,
-struct mm_struct *mm)
-{
-   struct ib_ucontext_per_mm *per_mm =
-   container_of(mn, struct ib_ucontext_per_mm, mn);
-   struct rb_node *node;
-
-   down_read(_mm->umem_rwsem);
-   if (!per_mm->mn.users)
-   goto out;
-
-   for (node = rb_first_cached(_mm->umem_tree); node;
-node = rb_next(node)) {
-   struct ib_umem_odp *umem_odp =
-   rb_entry(node, struct ib_umem_odp, interval_tree.rb);
-
-   /*
-* Increase the number of notifiers running, to prevent any
-* further fault handling on this MR.
-*/
-   ib_umem_notifier_start_account(umem_odp);
-   complete_all(_odp->notifier_completion);
-   umem_odp->umem.ibdev->ops.invalidate_range(
-   umem_odp, ib_umem_start(umem_odp),
-   ib_umem_end(umem_odp));
-   }
-
-out:
-   up_read(_mm->umem_rwsem);
-}
-
-static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
-u64 start, u64 end, void *cookie)
-{
-   ib_umem_notifier_start_account(item);
-   item->umem.ibdev->ops.invalidate_range(item, start, end);
-   return 0;
-}
-
-static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
-   const struct mmu_notifier_range *range)
-{
-   struct ib_ucontext_per_mm *per_mm =

[PATCH hmm 03/15] mm/hmm: allow hmm_range to be used with a mmu_range_notifier or hmm_mirror

2019-10-15 Thread Jason Gunthorpe
From: Jason Gunthorpe 

hmm_mirror's handling of ranges does not use a sequence count which
results in this bug:

 CPU0   CPU1
 hmm_range_wait_until_valid(range)
 valid == true
 hmm_range_fault(range)
hmm_invalidate_range_start()
   range->valid = false
hmm_invalidate_range_end()
   range->valid = true
 hmm_range_valid(range)
  valid == true

Where the hmm_range_valid should not have succeeded.

Adding the required sequence count would make it nearly identical to the
new mmu_range_notifier. Instead replace the hmm_mirror stuff with
mmu_range_notifier.

Co-existence of the two APIs is the first step.

Signed-off-by: Jason Gunthorpe 
---
 include/linux/hmm.h |  5 +
 mm/hmm.c| 25 +++--
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 3fec513b9c00f1..8ac1fd6a81af8f 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -145,6 +145,9 @@ enum hmm_pfn_value_e {
 /*
  * struct hmm_range - track invalidation lock on virtual address range
  *
+ * @notifier: an optional mmu_range_notifier
+ * @notifier_seq: when notifier is used this is the result of
+ *mmu_range_read_begin()
  * @hmm: the core HMM structure this range is active against
  * @vma: the vm area struct for the range
  * @list: all range lock are on a list
@@ -159,6 +162,8 @@ enum hmm_pfn_value_e {
  * @valid: pfns array did not change since it has been fill by an HMM function
  */
 struct hmm_range {
+   struct mmu_range_notifier *notifier;
+   unsigned long   notifier_seq;
struct hmm  *hmm;
struct list_headlist;
unsigned long   start;
diff --git a/mm/hmm.c b/mm/hmm.c
index 902f5fa6bf93ad..22ac3595771feb 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -852,6 +852,14 @@ void hmm_range_unregister(struct hmm_range *range)
 }
 EXPORT_SYMBOL(hmm_range_unregister);
 
+static bool needs_retry(struct hmm_range *range)
+{
+   if (range->notifier)
+   return mmu_range_check_retry(range->notifier,
+range->notifier_seq);
+   return !range->valid;
+}
+
 static const struct mm_walk_ops hmm_walk_ops = {
.pud_entry  = hmm_vma_walk_pud,
.pmd_entry  = hmm_vma_walk_pmd,
@@ -892,18 +900,23 @@ long hmm_range_fault(struct hmm_range *range, unsigned 
int flags)
const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
unsigned long start = range->start, end;
struct hmm_vma_walk hmm_vma_walk;
-   struct hmm *hmm = range->hmm;
+   struct mm_struct *mm;
struct vm_area_struct *vma;
int ret;
 
-   lockdep_assert_held(>mmu_notifier.mm->mmap_sem);
+   if (range->notifier)
+   mm = range->notifier->mm;
+   else
+   mm = range->hmm->mmu_notifier.mm;
+
+   lockdep_assert_held(>mmap_sem);
 
do {
/* If range is no longer valid force retry. */
-   if (!range->valid)
+   if (needs_retry(range))
return -EBUSY;
 
-   vma = find_vma(hmm->mmu_notifier.mm, start);
+   vma = find_vma(mm, start);
if (vma == NULL || (vma->vm_flags & device_vma))
return -EFAULT;
 
@@ -933,7 +946,7 @@ long hmm_range_fault(struct hmm_range *range, unsigned int 
flags)
start = hmm_vma_walk.last;
 
/* Keep trying while the range is valid. */
-   } while (ret == -EBUSY && range->valid);
+   } while (ret == -EBUSY && !needs_retry(range));
 
if (ret) {
unsigned long i;
@@ -991,7 +1004,7 @@ long hmm_range_dma_map(struct hmm_range *range, struct 
device *device,
continue;
 
/* Check if range is being invalidated */
-   if (!range->valid) {
+   if (needs_retry(range)) {
ret = -EBUSY;
goto unmap;
}
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: AMDGPU and 16B stack alignment

2019-10-15 Thread Nick Desaulniers
On Tue, Oct 15, 2019 at 11:05 AM Nick Desaulniers
 wrote:
>
> On Tue, Oct 15, 2019 at 12:19 AM Arnd Bergmann  wrote:
> >
> > On Tue, Oct 15, 2019 at 9:08 AM S, Shirish  wrote:
> > > On 10/15/2019 3:52 AM, Nick Desaulniers wrote:
> >
> > > My gcc build fails with below errors:
> > >
> > > dcn_calcs.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 
> > > 12
> > >
> > > dcn_calc_math.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 
> > > and 12
>
> I was able to reproduce this failure on pre-7.1 versions of GCC.  It
> seems that when:
> 1. code is using doubles
> 2. setting -mpreferred-stack-boundary=3 -mno-sse2, ie. 8B stack alignment
> than GCC produces that error:
> https://godbolt.org/z/7T8nbH
>

Also, I suspect that very error solves the mystery of "why was 16B
stack alignment ever specified?"
-- 
Thanks,
~Nick Desaulniers


Re: AMDGPU and 16B stack alignment

2019-10-15 Thread Nick Desaulniers
On Tue, Oct 15, 2019 at 12:19 AM Arnd Bergmann  wrote:
>
> On Tue, Oct 15, 2019 at 9:08 AM S, Shirish  wrote:
> > On 10/15/2019 3:52 AM, Nick Desaulniers wrote:
>
> > My gcc build fails with below errors:
> >
> > dcn_calcs.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 12
> >
> > dcn_calc_math.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 
> > and 12

I was able to reproduce this failure on pre-7.1 versions of GCC.  It
seems that when:
1. code is using doubles
2. setting -mpreferred-stack-boundary=3 -mno-sse2, ie. 8B stack alignment
than GCC produces that error:
https://godbolt.org/z/7T8nbH

That's already a tall order of constraints, so it's understandable
that the compiler would just error likely during instruction
selection, but was eventually taught how to solve such constraints.

> >
> > While GPF observed on clang builds seem to be fixed.

Thanks for the report.  Your testing these patches is invaluable, Shirish!

>
> Ok, so it seems that gcc insists on having at least 2^4 bytes stack
> alignment when
> SSE is enabled on x86-64, but does not actually rely on that for
> correct operation
> unless it's using sse2. So -msse always has to be paired with
>  -mpreferred-stack-boundary=3.

Seemingly only for older versions of GCC, pre 7.1.

>
> For clang, it sounds like the opposite is true: when passing 16 byte
> stack alignment
> and having sse/sse2 enabled, it requires the incoming stack to be 16
> byte aligned,

I don't think it requires the incoming stack to be 16B aligned for
sse2, I think it requires the incoming and current stack alignment to
match. Today it does not, which is why we observe GPFs.

> but passing 8 byte alignment makes it do the right thing.
>
> So, should we just always pass $(call cc-option, -mpreferred-stack-boundary=4)
> to get the desired outcome on both?

Hmmm...I would have liked to remove it outright, as it is an ABI
mismatch that is likely to result in instability and non-fun-to-debug
runtime issues in the future.  I suspect my patch does work for GCC
7.1+.  The question is: Do we want to either:
1. mark AMDGPU broken for GCC < 7.1, or
2. continue supporting it via stack alignment mismatch?

2 is brittle, and may break at any point in the future, but if it's
working for someone it does make me feel bad to outright disable it.
What I'd image 2 looks like is (psuedo code in a Makefile):

if CC_IS_GCC && GCC_VERSION < 7.1:
  set stack alignment to 16B and hope for the best

So my diff would be amended to keep the stack alignment flags, but
only to support GCC < 7.1.  And that assumes my change compiles with
GCC 7.1+. (Looks like it does for me locally with GCC 8.3, but I would
feel even more confident if someone with hardware to test on and GCC
7.1+ could boot test).
-- 
Thanks,
~Nick Desaulniers
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/8] drm/amdgpu: update amdgpu_discovery to handle revision

2019-10-15 Thread Tuikov, Luben
Patches 1-7: Looks good.
Reviewed-by: Luben Tuikov 

Patch 8: NAK! for the same exact reason as the previous review. No changes to 
NAK reasoning from previous review.

Regards,
Luben

On 2019-10-13 11:21 p.m., Tianci Yin wrote:
> From: "Tianci.Yin" 
> 
> update amdgpu_discovery to get IP revision.
> 
> Change-Id: If8152103d03b58e1dc0f32db63625e290f5f08a0
> Reviewed-by: Alex Deucher 
> Signed-off-by: Tianci.Yin 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 2 +-
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 71198c5318e1..ddd8364102a2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -333,7 +333,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device 
> *adev)
>  }
>  
>  int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
> - int *major, int *minor)
> + int *major, int *minor, int *revision)
>  {
>   struct binary_header *bhdr;
>   struct ip_discovery_header *ihdr;
> @@ -369,6 +369,8 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device 
> *adev, int hw_id,
>   *major = ip->major;
>   if (minor)
>   *minor = ip->minor;
> + if (revision)
> + *revision = ip->revision;
>   return 0;
>   }
>   ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 
> 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> index 5a6693d7d269..ba78e15d9b05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> @@ -30,7 +30,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev);
>  void amdgpu_discovery_fini(struct amdgpu_device *adev);
>  int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
>  int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
> -int *major, int *minor);
> +int *major, int *minor, int *revision);
>  int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
>  
>  #endif /* __AMDGPU_DISCOVERY__ */
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 8/8] drm/amdgpu/psp: add psp memory training implementation

2019-10-15 Thread Tuikov, Luben
On 2019-10-13 11:21 p.m., Tianci Yin wrote:
> From: "Tianci.Yin" 
> 
> add memory training implementation code to save resume time.
> 
> Change-Id: I625794a780b11d824ab57ef39cc33b872c6dc6c9
> Reviewed-by: Alex Deucher 
> Signed-off-by: Tianci.Yin 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   9 ++
>  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 159 
>  3 files changed, 169 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8704f93cabf2..c2b776fd82b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -151,6 +151,7 @@ extern uint amdgpu_sdma_phase_quantum;
>  extern char *amdgpu_disable_cu;
>  extern char *amdgpu_virtual_display;
>  extern uint amdgpu_pp_feature_mask;
> +extern uint amdgpu_force_long_training;
>  extern int amdgpu_job_hang_limit;
>  extern int amdgpu_lbpw;
>  extern int amdgpu_compute_multipipe;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index da7cbee25c61..c7d086569acb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -127,6 +127,7 @@ char *amdgpu_disable_cu = NULL;
>  char *amdgpu_virtual_display = NULL;
>  /* OverDrive(bit 14) disabled by default*/
>  uint amdgpu_pp_feature_mask = 0xbfff;
> +uint amdgpu_force_long_training = 0;
>  int amdgpu_job_hang_limit = 0;
>  int amdgpu_lbpw = -1;
>  int amdgpu_compute_multipipe = -1;
> @@ -390,6 +391,14 @@ module_param_named(sched_hw_submission, 
> amdgpu_sched_hw_submission, int, 0444);
>  MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
>  module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
>  
> +/**
> + * DOC: forcelongtraining (uint)
> + * Force long memory training in resume.
> + * The default is zero, indicates short training in resume.
> + */
> +MODULE_PARM_DESC(forcelongtraining, "force memory long training");
> +module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 
> 0444);
> +
>  /**
>   * DOC: pcie_gen_cap (uint)
>   * Override PCIE gen speed capabilities. See the CAIL flags in 
> drivers/gpu/drm/amd/include/amd_pcie.h.
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 2ba0f68ced10..b7efaa3e913c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -902,6 +902,162 @@ static int psp_v11_0_rlc_autoload_start(struct 
> psp_context *psp)
>   return psp_rlc_autoload_start(psp);
>  }
>  
> +static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int 
> msg)
> +{
> + int ret = 0;
> + int i = 0;
> + uint32_t data_32 = 0;

NAK!

Leave all of those integer variables uninitialized.

> + struct amdgpu_device *adev = psp->adev;
> +
> + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
> + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
> + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
> +
> + /*max 5s*/
> + while (i < 50) {
> + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, 
> mmMP0_SMN_C2PMSG_35),
> +0x8000, 0x8000, false);
> + if (ret == 0)
> + break;
> + i++;
> + }

NAK! 

For-loop please:

for (i = 0; i < 50; i++) {
ret = ...;
}

Regards,
Luben

> + DRM_DEBUG("%s training %s, cost %d * %dms.\n",
> +   (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
> +   (ret == 0) ? "succeed" : "failed",
> +   i, adev->usec_timeout/1000);
> + return ret;
> +}
> +
> +static int psp_v11_0_memory_training_fini(struct psp_context *psp)
> +{
> + int ret = 0;
> + struct psp_memory_training_context *ctx = >mem_train_ctx;
> +
> + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
> + if(ctx->sys_cache) {
> + kfree(ctx->sys_cache);
> + ctx->sys_cache = NULL;
> + }
> +
> + return ret;
> +}
> +
> +static int psp_v11_0_memory_training_init(struct psp_context *psp)
> +{
> + int ret = 0;
> + struct psp_memory_training_context *ctx = >mem_train_ctx;
> +
> + if(ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
> + DRM_DEBUG("memory training does not support!\n");
> + return 0;
> + }
> +
> + ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
> + if(ctx->sys_cache == NULL) {
> + DRM_ERROR("alloc mem_train_ctx.sys_cache failed(%d)!\n", ret);
> + ret = -ENOMEM;
> + goto Err_out;
> + }
> +
> + 
> DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
> +   ctx->train_data_size,
> +   ctx->p2c_train_data_offset,
> +   ctx->c2p_train_data_offset);
> + ctx->init = 

Re: [PATCH] drm/amdgpu/display: fix build error casused by CONFIG_DRM_AMD_DC_DCN2_1

2019-10-15 Thread Lakha, Bhawanpreet
Reviewed-by: Bhawanpreet Lakha 

On 2019-10-15 12:51 p.m., Hersen Wu wrote:
> when CONFIG_DRM_AMD_DC_DCN2_1 is not enable in .config,
> there is build error. struct dpm_clocks shoud not be
> guarded.
>
> Signed-off-by: Hersen Wu 
> ---
>   drivers/gpu/drm/amd/display/dc/dm_pp_smu.h | 3 ---
>   1 file changed, 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h 
> b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
> index 24d65dbbd749..ef7df9ef6d7e 100644
> --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
> @@ -249,8 +249,6 @@ struct pp_smu_funcs_nv {
>   };
>   #endif
>   
> -#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
> -
>   #define PP_SMU_NUM_SOCCLK_DPM_LEVELS  8
>   #define PP_SMU_NUM_DCFCLK_DPM_LEVELS  8
>   #define PP_SMU_NUM_FCLK_DPM_LEVELS4
> @@ -288,7 +286,6 @@ struct pp_smu_funcs_rn {
>   enum pp_smu_status (*get_dpm_clock_table) (struct pp_smu *pp,
>   struct dpm_clocks *clock_table);
>   };
> -#endif
>   
>   struct pp_smu_funcs {
>   struct pp_smu ctx;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/display: fix build error casused by CONFIG_DRM_AMD_DC_DCN2_1

2019-10-15 Thread Hersen Wu
when CONFIG_DRM_AMD_DC_DCN2_1 is not enable in .config,
there is build error. struct dpm_clocks shoud not be
guarded.

Signed-off-by: Hersen Wu 
---
 drivers/gpu/drm/amd/display/dc/dm_pp_smu.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h 
b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 24d65dbbd749..ef7df9ef6d7e 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -249,8 +249,6 @@ struct pp_smu_funcs_nv {
 };
 #endif
 
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
-
 #define PP_SMU_NUM_SOCCLK_DPM_LEVELS  8
 #define PP_SMU_NUM_DCFCLK_DPM_LEVELS  8
 #define PP_SMU_NUM_FCLK_DPM_LEVELS4
@@ -288,7 +286,6 @@ struct pp_smu_funcs_rn {
enum pp_smu_status (*get_dpm_clock_table) (struct pp_smu *pp,
struct dpm_clocks *clock_table);
 };
-#endif
 
 struct pp_smu_funcs {
struct pp_smu ctx;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[ANNOUNCE] xf86-video-ati 19.1.0

2019-10-15 Thread Michel Dänzer

I'm pleased to announce the 19.1.0 release of xf86-video-ati, the Xorg
driver for ATI/AMD Radeon GPUs supported by the radeon kernel driver.
This release supports xserver versions 1.13-1.20.

There are no big changes in this release, just fixes and other minor
improvements.

Thanks to everybody who contributed to this release in any way!


NOTE:

As of September, I'm no longer working for AMD but for Red Hat's
graphics/input infrastructure team. I'm hoping that someone from my
former team at AMD will step up to maintain this driver.


Flora Cui (1):
  dri2: reply to client for WaitMSC request in any case

Michel Dänzer (9):
  Retry get_fb_ptr in get_fb
  dri3: Always flush glamor before sharing pixmap storage with clients
  dri2: Re-use previous CRTC when possible if pick_best_crtc returns NULL
  Remove dri2_drawable_crtc parameter consider_disabled
  present: Check that we can get a KMS FB for flipping
  Don't disable page flipping completely with SW cursor
  Don't set up black scanout buffer if LeaveVT is called from CloseScreen
  Don't unreference FBs of pixmaps from different screens in LeaveVT
  Bump version for 19.1.0 release

git tag: xf86-video-ati-19.1.0

https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.1.0.tar.bz2
MD5:  6e49d3c2839582af415ceded76e626e6  xf86-video-ati-19.1.0.tar.bz2
SHA1: aea1d11c05531b03f2eb67c6785cddf6d7f30e5f  xf86-video-ati-19.1.0.tar.bz2
SHA256: 659f5a1629eea5f5334d9b39b18e6807a63aa1efa33c1236d9cc53acbb223c49  
xf86-video-ati-19.1.0.tar.bz2
SHA512: 
73a81f6c492daf2e89067fb52b3033dc0fe6841f109627ddca1aee54a45a738c8c134443753a2a2aaa2c131e1d560057ebc76351ff2304c16407df3ff568fcd6
  xf86-video-ati-19.1.0.tar.bz2
PGP:  
https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.1.0.tar.bz2.sig

https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.1.0.tar.gz
MD5:  c4172d2b6883a6e8e57c737248b3c9c8  xf86-video-ati-19.1.0.tar.gz
SHA1: 6c5c06555c2aed695b28b5b5d617a994e9926410  xf86-video-ati-19.1.0.tar.gz
SHA256: c05c6e0c396a0148113f1836cfab7f2e43f784c9b7041f11e9cab40a4bc0c90f  
xf86-video-ati-19.1.0.tar.gz
SHA512: 
c382c68ed5f3a690b293e3b56dfdf71f5b279f52da6db925cb5302e595003f69451670fe1ec9546ad4d91cb328b36f547757a69d70ed8b4ade75e613e302
  xf86-video-ati-19.1.0.tar.gz
PGP:  
https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.1.0.tar.gz.sig


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer


signature.asc
Description: This is a digitally signed message part
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu/display: hook renoir dc to pplib funcs

2019-10-15 Thread Lakha, Bhawanpreet
Reviewed-by: Bhawanpreet Lakha 

On 2019-10-15 11:04 a.m., Hersen Wu wrote:
> enable dc get dmp clock table and set dcn watermarks
> via pplib.
>
> Signed-off-by: Hersen Wu 
> ---
>   .../amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c  | 93 +++
>   drivers/gpu/drm/amd/display/dc/dm_pp_smu.h|  2 +-
>   2 files changed, 94 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> index 95564b8de3ce..7add40dea9b7 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> @@ -891,6 +891,90 @@ enum pp_smu_status pp_nv_get_uclk_dpm_states(struct 
> pp_smu *pp,
>   return PP_SMU_RESULT_FAIL;
>   }
>   
> +#ifdef CONFIG_DRM_AMD_DC_DCN2_1
> +enum pp_smu_status pp_rn_get_dpm_clock_table(
> + struct pp_smu *pp, struct dpm_clocks *clock_table)
> +{
> + const struct dc_context *ctx = pp->dm;
> + struct amdgpu_device *adev = ctx->driver_context;
> + struct smu_context *smu = >smu;
> +
> + if (!smu->ppt_funcs)
> + return PP_SMU_RESULT_UNSUPPORTED;
> +
> + if (!smu->ppt_funcs->get_dpm_clock_table)
> + return PP_SMU_RESULT_UNSUPPORTED;
> +
> + if (!smu->ppt_funcs->get_dpm_clock_table(smu, clock_table))
> + return PP_SMU_RESULT_OK;
> +
> + return PP_SMU_RESULT_FAIL;
> +}
> +
> +enum pp_smu_status pp_rn_set_wm_ranges(struct pp_smu *pp,
> + struct pp_smu_wm_range_sets *ranges)
> +{
> + const struct dc_context *ctx = pp->dm;
> + struct amdgpu_device *adev = ctx->driver_context;
> + struct smu_context *smu = >smu;
> + struct dm_pp_wm_sets_with_clock_ranges_soc15 wm_with_clock_ranges;
> + struct dm_pp_clock_range_for_dmif_wm_set_soc15 *wm_dce_clocks =
> + wm_with_clock_ranges.wm_dmif_clocks_ranges;
> + struct dm_pp_clock_range_for_mcif_wm_set_soc15 *wm_soc_clocks =
> + wm_with_clock_ranges.wm_mcif_clocks_ranges;
> + int32_t i;
> +
> + if (!smu->funcs)
> + return PP_SMU_RESULT_UNSUPPORTED;
> +
> + wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets;
> + wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets;
> +
> + for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) {
> + if (ranges->reader_wm_sets[i].wm_inst > 3)
> + wm_dce_clocks[i].wm_set_id = WM_SET_A;
> + else
> + wm_dce_clocks[i].wm_set_id =
> + ranges->reader_wm_sets[i].wm_inst;
> +
> + wm_dce_clocks[i].wm_min_dcfclk_clk_in_khz =
> + ranges->reader_wm_sets[i].min_drain_clk_mhz;
> +
> + wm_dce_clocks[i].wm_max_dcfclk_clk_in_khz =
> + ranges->reader_wm_sets[i].max_drain_clk_mhz;
> +
> + wm_dce_clocks[i].wm_min_mem_clk_in_khz =
> + ranges->reader_wm_sets[i].min_fill_clk_mhz;
> +
> + wm_dce_clocks[i].wm_max_mem_clk_in_khz =
> + ranges->reader_wm_sets[i].max_fill_clk_mhz;
> + }
> +
> + for (i = 0; i < wm_with_clock_ranges.num_wm_mcif_sets; i++) {
> + if (ranges->writer_wm_sets[i].wm_inst > 3)
> + wm_soc_clocks[i].wm_set_id = WM_SET_A;
> + else
> + wm_soc_clocks[i].wm_set_id =
> + ranges->writer_wm_sets[i].wm_inst;
> + wm_soc_clocks[i].wm_min_socclk_clk_in_khz =
> + ranges->writer_wm_sets[i].min_fill_clk_mhz;
> +
> + wm_soc_clocks[i].wm_max_socclk_clk_in_khz =
> + ranges->writer_wm_sets[i].max_fill_clk_mhz;
> +
> + wm_soc_clocks[i].wm_min_mem_clk_in_khz =
> + ranges->writer_wm_sets[i].min_drain_clk_mhz;
> +
> + wm_soc_clocks[i].wm_max_mem_clk_in_khz =
> + ranges->writer_wm_sets[i].max_drain_clk_mhz;
> + }
> +
> + smu_set_watermarks_for_clock_ranges(>smu, _with_clock_ranges);
> +
> + return PP_SMU_RESULT_OK;
> +}
> +#endif
> +
>   void dm_pp_get_funcs(
>   struct dc_context *ctx,
>   struct pp_smu_funcs *funcs)
> @@ -935,6 +1019,15 @@ void dm_pp_get_funcs(
>   funcs->nv_funcs.set_pstate_handshake_support = 
> pp_nv_set_pstate_handshake_support;
>   break;
>   #endif
> +
> +#ifdef CONFIG_DRM_AMD_DC_DCN2_1
> + case DCN_VERSION_2_1:
> + funcs->ctx.ver = PP_SMU_VER_RN;
> + funcs->rn_funcs.pp_smu.dm = ctx;
> + funcs->rn_funcs.set_wm_ranges = pp_rn_set_wm_ranges;
> + funcs->rn_funcs.get_dpm_clock_table = pp_rn_get_dpm_clock_table;
> + break;
> +#endif
>   default:
>   DRM_ERROR("smu version is not supported !\n");
>   break;
> diff --git 

Compilation breaks on drm-next by commit 002c988

2019-10-15 Thread Grodzovsky, Andrey
Hersen - the change 002c988 'drm/amdgpu/powerplay: add renoir funcs to 
support dc' seeems to break compilation on drm-next because 
renoir_get_dpm_clock_table uses defines and structs from 
amd/display/dc/dm_pp_smu.h which are under CONFIG_DRM_AMD_DC_DCN2_1

Andrey

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/display: hook renoir dc to pplib funcs

2019-10-15 Thread Hersen Wu
enable dc get dmp clock table and set dcn watermarks
via pplib.

Signed-off-by: Hersen Wu 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c  | 93 +++
 drivers/gpu/drm/amd/display/dc/dm_pp_smu.h|  2 +-
 2 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 95564b8de3ce..7add40dea9b7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -891,6 +891,90 @@ enum pp_smu_status pp_nv_get_uclk_dpm_states(struct pp_smu 
*pp,
return PP_SMU_RESULT_FAIL;
 }
 
+#ifdef CONFIG_DRM_AMD_DC_DCN2_1
+enum pp_smu_status pp_rn_get_dpm_clock_table(
+   struct pp_smu *pp, struct dpm_clocks *clock_table)
+{
+   const struct dc_context *ctx = pp->dm;
+   struct amdgpu_device *adev = ctx->driver_context;
+   struct smu_context *smu = >smu;
+
+   if (!smu->ppt_funcs)
+   return PP_SMU_RESULT_UNSUPPORTED;
+
+   if (!smu->ppt_funcs->get_dpm_clock_table)
+   return PP_SMU_RESULT_UNSUPPORTED;
+
+   if (!smu->ppt_funcs->get_dpm_clock_table(smu, clock_table))
+   return PP_SMU_RESULT_OK;
+
+   return PP_SMU_RESULT_FAIL;
+}
+
+enum pp_smu_status pp_rn_set_wm_ranges(struct pp_smu *pp,
+   struct pp_smu_wm_range_sets *ranges)
+{
+   const struct dc_context *ctx = pp->dm;
+   struct amdgpu_device *adev = ctx->driver_context;
+   struct smu_context *smu = >smu;
+   struct dm_pp_wm_sets_with_clock_ranges_soc15 wm_with_clock_ranges;
+   struct dm_pp_clock_range_for_dmif_wm_set_soc15 *wm_dce_clocks =
+   wm_with_clock_ranges.wm_dmif_clocks_ranges;
+   struct dm_pp_clock_range_for_mcif_wm_set_soc15 *wm_soc_clocks =
+   wm_with_clock_ranges.wm_mcif_clocks_ranges;
+   int32_t i;
+
+   if (!smu->funcs)
+   return PP_SMU_RESULT_UNSUPPORTED;
+
+   wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets;
+   wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets;
+
+   for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) {
+   if (ranges->reader_wm_sets[i].wm_inst > 3)
+   wm_dce_clocks[i].wm_set_id = WM_SET_A;
+   else
+   wm_dce_clocks[i].wm_set_id =
+   ranges->reader_wm_sets[i].wm_inst;
+
+   wm_dce_clocks[i].wm_min_dcfclk_clk_in_khz =
+   ranges->reader_wm_sets[i].min_drain_clk_mhz;
+
+   wm_dce_clocks[i].wm_max_dcfclk_clk_in_khz =
+   ranges->reader_wm_sets[i].max_drain_clk_mhz;
+
+   wm_dce_clocks[i].wm_min_mem_clk_in_khz =
+   ranges->reader_wm_sets[i].min_fill_clk_mhz;
+
+   wm_dce_clocks[i].wm_max_mem_clk_in_khz =
+   ranges->reader_wm_sets[i].max_fill_clk_mhz;
+   }
+
+   for (i = 0; i < wm_with_clock_ranges.num_wm_mcif_sets; i++) {
+   if (ranges->writer_wm_sets[i].wm_inst > 3)
+   wm_soc_clocks[i].wm_set_id = WM_SET_A;
+   else
+   wm_soc_clocks[i].wm_set_id =
+   ranges->writer_wm_sets[i].wm_inst;
+   wm_soc_clocks[i].wm_min_socclk_clk_in_khz =
+   ranges->writer_wm_sets[i].min_fill_clk_mhz;
+
+   wm_soc_clocks[i].wm_max_socclk_clk_in_khz =
+   ranges->writer_wm_sets[i].max_fill_clk_mhz;
+
+   wm_soc_clocks[i].wm_min_mem_clk_in_khz =
+   ranges->writer_wm_sets[i].min_drain_clk_mhz;
+
+   wm_soc_clocks[i].wm_max_mem_clk_in_khz =
+   ranges->writer_wm_sets[i].max_drain_clk_mhz;
+   }
+
+   smu_set_watermarks_for_clock_ranges(>smu, _with_clock_ranges);
+
+   return PP_SMU_RESULT_OK;
+}
+#endif
+
 void dm_pp_get_funcs(
struct dc_context *ctx,
struct pp_smu_funcs *funcs)
@@ -935,6 +1019,15 @@ void dm_pp_get_funcs(
funcs->nv_funcs.set_pstate_handshake_support = 
pp_nv_set_pstate_handshake_support;
break;
 #endif
+
+#ifdef CONFIG_DRM_AMD_DC_DCN2_1
+   case DCN_VERSION_2_1:
+   funcs->ctx.ver = PP_SMU_VER_RN;
+   funcs->rn_funcs.pp_smu.dm = ctx;
+   funcs->rn_funcs.set_wm_ranges = pp_rn_set_wm_ranges;
+   funcs->rn_funcs.get_dpm_clock_table = pp_rn_get_dpm_clock_table;
+   break;
+#endif
default:
DRM_ERROR("smu version is not supported !\n");
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h 
b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index c03a441ee638..24d65dbbd749 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ 

Re: dmr/amdgpu: Fix crash on SRIOV for ERREVENT_ATHUB_INTERRUPT interrupt.

2019-10-15 Thread Alex Deucher
On Tue, Oct 15, 2019 at 10:27 AM Andrey Grodzovsky
 wrote:
>
> Ignre the ERREVENT_ATHUB_INTERRUPT for systems without RAS.
>
> Signed-off-by: Andrey Grodzovsky 
> Reviewed-and-tested-by: Jack Zhang 

Acked-by: Alex Deucher 

>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 84d8c33..cd84332 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1844,6 +1844,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
>
>  void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
>  {
> +   uint32_t hw_supported, supported;
> +
> +   amdgpu_ras_check_supported(adev, _supported, );
> +   if (!hw_supported)
> +   return;
> +
> if (atomic_cmpxchg(_ras_in_intr, 0, 1) == 0) {
> DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT 
> detected!\n");
>
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

dmr/amdgpu: Fix crash on SRIOV for ERREVENT_ATHUB_INTERRUPT interrupt.

2019-10-15 Thread Andrey Grodzovsky
Ignre the ERREVENT_ATHUB_INTERRUPT for systems without RAS.

Signed-off-by: Andrey Grodzovsky 
Reviewed-and-tested-by: Jack Zhang 

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 84d8c33..cd84332 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1844,6 +1844,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 
 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
 {
+   uint32_t hw_supported, supported;
+
+   amdgpu_ras_check_supported(adev, _supported, );
+   if (!hw_supported)
+   return;
+
if (atomic_cmpxchg(_ras_in_intr, 0, 1) == 0) {
DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT 
detected!\n");
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu/uvd6: fix allocation size in enc ring test

2019-10-15 Thread James Zhu

On 2019-10-14 9:04 a.m., Koenig, Christian wrote:
> Am 14.10.19 um 15:01 schrieb Alex Deucher:
>> On Mon, Oct 14, 2019 at 5:06 AM Christian König
>>  wrote:
>>> Am 11.10.19 um 22:50 schrieb Alex Deucher:
 We need to allocate a large enough buffer for the
 session info, otherwise the IB test can overwrite
 other memory.

 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204241
 Signed-off-by: Alex Deucher 
>>> Acked-by: Christian König  for the series.
>> + Leo, James
>>
>> Seems like we still overwrite the buffer.  Do you know how big the
>> session buffer needs to be?  Is it different for UVD and VCN?

I will check with Firmware team.

James

> At least originally we allocated a separate 4KB BO in VRAM for this. The
> message was quite large IIRC.
>
> Christian.
>
>> Alex
>>
 ---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
 b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
 index 670784a78512..909bc2ce791f 100644
 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
 +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
 @@ -215,12 +215,12 @@ static int uvd_v6_0_enc_get_create_msg(struct 
 amdgpu_ring *ring, uint32_t handle
 uint64_t dummy;
 int i, r;

 - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
 + r = amdgpu_job_alloc_with_ib(ring->adev, (ib_size_dw * 4) + 1024, 
 );
 if (r)
 return r;

 ib = >ibs[0];
 - dummy = ib->gpu_addr + 1024;
 + dummy = ib->gpu_addr + (ib_size_dw * 4);

 ib->length_dw = 0;
 ib->ptr[ib->length_dw++] = 0x0018;
 @@ -277,12 +277,12 @@ static int uvd_v6_0_enc_get_destroy_msg(struct 
 amdgpu_ring *ring,
 uint64_t dummy;
 int i, r;

 - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, );
 + r = amdgpu_job_alloc_with_ib(ring->adev, (ib_size_dw * 4) + 1024, 
 );
 if (r)
 return r;

 ib = >ibs[0];
 - dummy = ib->gpu_addr + 1024;
 + dummy = ib->gpu_addr + (ib_size_dw * 4);

 ib->length_dw = 0;
 ib->ptr[ib->length_dw++] = 0x0018;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/display: add NULL checks for clock manager pointer

2019-10-15 Thread Harry Wentland
On 2019-10-11 4:51 p.m., Alex Deucher wrote:
> From: Ahzo 
> 
> This fixes kernel NULL pointer dereferences on shutdown:
> RIP: 0010:build_audio_output.isra.0+0x97/0x110 [amdgpu]
> RIP: 0010:enable_link_dp+0x186/0x300 [amdgpu]
> 
> Signed-off-by: Ahzo 
> Signed-off-by: Alex Deucher 

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/core/dc_link.c   | 2 +-
>  drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 5 +++--
>  2 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
> index 152c564a8344..8b58cfa3e98e 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
> @@ -1510,7 +1510,7 @@ static enum dc_status enable_link_dp(
>  
>   pipe_ctx->stream_res.pix_clk_params.requested_sym_clk =
>   link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
> - if (!apply_seamless_boot_optimization)
> + if (state->clk_mgr && !apply_seamless_boot_optimization)
>   state->clk_mgr->funcs->update_clocks(state->clk_mgr, state, 
> false);
>  
>   dp_enable_link_phy(
> diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
> b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
> index f8c1b4f1b987..8d8fa10b5d86 100644
> --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
> +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
> @@ -1161,8 +1161,9 @@ static void build_audio_output(
>   }
>   }
>  
> - if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
> - pipe_ctx->stream->signal == 
> SIGNAL_TYPE_DISPLAY_PORT_MST) {
> + if (state->clk_mgr &&
> + (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
> + pipe_ctx->stream->signal == 
> SIGNAL_TYPE_DISPLAY_PORT_MST)) {
>   audio_output->pll_info.dp_dto_source_clock_in_khz =
>   state->clk_mgr->funcs->get_dp_ref_clk_frequency(
>   state->clk_mgr);
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: AMDGPU and 16B stack alignment

2019-10-15 Thread David Laight
From: Arnd Bergmann
> Sent: 15 October 2019 08:19
> 
> On Tue, Oct 15, 2019 at 9:08 AM S, Shirish  wrote:
> > On 10/15/2019 3:52 AM, Nick Desaulniers wrote:
> 
> > My gcc build fails with below errors:
> >
> > dcn_calcs.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 12
> >
> > dcn_calc_math.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 
> > and 12
> >
> > While GPF observed on clang builds seem to be fixed.
> 
> Ok, so it seems that gcc insists on having at least 2^4 bytes stack
> alignment when
> SSE is enabled on x86-64, but does not actually rely on that for
> correct operation
> unless it's using sse2. So -msse always has to be paired with
>  -mpreferred-stack-boundary=3.
> 
> For clang, it sounds like the opposite is true: when passing 16 byte
> stack alignment
> and having sse/sse2 enabled, it requires the incoming stack to be 16
> byte aligned,
> but passing 8 byte alignment makes it do the right thing.
> 
> So, should we just always pass $(call cc-option, -mpreferred-stack-boundary=4)
> to get the desired outcome on both?

It probably won't solve the problem.
You need to find all the asm blocks that call back into C and ensure they
maintain the required stack alignment.
This might be possible in the kernel, but is almost impossible in userspace.

ISTR that gcc arbitrarily changed the stack alignment for i386 a few years ago.
While it helped code generation it broke a lot of things.
I can't remember the correct set of options to get the stack alignment
code added only where it was needed (generates a double %bp frame).

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 1/2] drm/amdgpu: add GFX_PIPELINE capacity check for updating gfx cgpg

2019-10-15 Thread Huang, Ray
Series are Reviewed-by: Huang Rui 

-Original Message-
From: Liang, Prike  
Sent: Tuesday, October 15, 2019 5:50 PM
To: amd-gfx@lists.freedesktop.org
Cc: Quan, Evan ; Feng, Kenneth ; 
Huang, Ray ; Liang, Prike 
Subject: [PATCH 1/2] drm/amdgpu: add GFX_PIPELINE capacity check for updating 
gfx cgpg

Before disable gfx pipeline power gating need check the flag 
AMD_PG_SUPPORT_GFX_PIPELINE.

Signed-off-by: Prike Liang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b577b69..de8f9d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4296,7 +4296,8 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct 
amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
} else {
gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
-   gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
+   if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+   gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
 
amdgpu_gfx_rlc_exit_safe_mode(adev);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: fix S3 failed as RLC safe mode entry stucked in polloing gfx acq

2019-10-15 Thread Liang, Prike
Fix gfx cgpg setting sequence for RLC deadlock at safe mode entry in polling 
gfx response.
The patch can fix VCN IB test failed and DAL get dispaly count failed issue.

Signed-off-by: Prike Liang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 5 -
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 4 
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index de8f9d6..dd345fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4287,9 +4287,6 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct 
amdgpu_device *adev,
 {
amdgpu_gfx_rlc_enter_safe_mode(adev);
 
-   if (is_support_sw_smu(adev) && !enable)
-   smu_set_gfx_cgpg(>smu, enable);
-
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
@@ -4566,8 +4563,6 @@ static int gfx_v9_0_set_powergating_state(void *handle,
gfx_v9_0_enable_cp_power_gating(adev, false);
 
/* update gfx cgpg state */
-   if (is_support_sw_smu(adev) && enable)
-   smu_set_gfx_cgpg(>smu, enable);
gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
 
/* update mgcg state */
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 6cb5288..84d8aa2 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1188,6 +1188,7 @@ static int smu_hw_init(void *handle)
if (adev->flags & AMD_IS_APU) {
smu_powergate_sdma(>smu, false);
smu_powergate_vcn(>smu, false);
+   smu_set_gfx_cgpg(>smu, true);
}
 
if (!smu->pm_enabled)
@@ -1350,6 +1351,9 @@ static int smu_resume(void *handle)
if (ret)
goto failed;
 
+   if (smu->is_apu)
+   smu_set_gfx_cgpg(>smu, true);
+
mutex_unlock(>mutex);
 
pr_info("SMU is resumed successfully!\n");
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: add GFX_PIPELINE capacity check for updating gfx cgpg

2019-10-15 Thread Liang, Prike
Before disable gfx pipeline power gating need check the flag 
AMD_PG_SUPPORT_GFX_PIPELINE.

Signed-off-by: Prike Liang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b577b69..de8f9d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4296,7 +4296,8 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct 
amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
} else {
gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
-   gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
+   if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+   gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
 
amdgpu_gfx_rlc_exit_safe_mode(adev);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: No need to check gfxoff status after enable gfxoff feature

2019-10-15 Thread Wang, Kevin(Yang)
comment inline.

From: amd-gfx  on behalf of chen gong 

Sent: Monday, October 14, 2019 10:57 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Gong, Curry 
Subject: [PATCH] drm/amdgpu: No need to check gfxoff status after enable gfxoff 
feature

smu_send_smc_msg(smu, SMU_MSG_AllowGfxOff) Just turn on a switch.

As to when GPU get into "GFXoff" will be up to drawing load.

So we can not sure which state GPU should be in after enable gfxoff
feature.

Signed-off-by: chen gong 
---
 drivers/gpu/drm/amd/powerplay/smu_v12_0.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c 
b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
index c9691d0..cac4269 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
@@ -244,15 +244,6 @@ static int smu_v12_0_gfx_off_control(struct smu_context 
*smu, bool enable)
 if (enable) {
 ret = smu_send_smc_msg(smu, SMU_MSG_AllowGfxOff);

-   /* confirm gfx is back to "off" state, timeout is 5 seconds */
-   while (!(smu_v12_0_get_gfxoff_status(smu) == 0)) {
-   msleep(10);
-   timeout--;
[kevin]:
the varible of "timeout" is not used after this patch, please remove it 
together.
after fixed.
Reviewed-by: Kevin Wang 

-   if (timeout == 0) {
-   DRM_ERROR("enable gfxoff timeout and 
failed!\n");
-   break;
-   }
-   }
 } else {
 ret = smu_send_smc_msg(smu, SMU_MSG_DisallowGfxOff);

--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: AMDGPU and 16B stack alignment

2019-10-15 Thread Arnd Bergmann
On Tue, Oct 15, 2019 at 9:08 AM S, Shirish  wrote:
> On 10/15/2019 3:52 AM, Nick Desaulniers wrote:

> My gcc build fails with below errors:
>
> dcn_calcs.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 12
>
> dcn_calc_math.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 
> 12
>
> While GPF observed on clang builds seem to be fixed.

Ok, so it seems that gcc insists on having at least 2^4 bytes stack
alignment when
SSE is enabled on x86-64, but does not actually rely on that for
correct operation
unless it's using sse2. So -msse always has to be paired with
 -mpreferred-stack-boundary=3.

For clang, it sounds like the opposite is true: when passing 16 byte
stack alignment
and having sse/sse2 enabled, it requires the incoming stack to be 16
byte aligned,
but passing 8 byte alignment makes it do the right thing.

So, should we just always pass $(call cc-option, -mpreferred-stack-boundary=4)
to get the desired outcome on both?

   Arnd


Re: AMDGPU and 16B stack alignment

2019-10-15 Thread S, Shirish
Hi Nick,

On 10/15/2019 3:52 AM, Nick Desaulniers wrote:

Hello!

The x86 kernel is compiled with an 8B stack alignment via
`-mpreferred-stack-boundary=3` for GCC since 3.6-rc1 via
commit d9b0cde91c60 ("x86-64, gcc: Use
-mpreferred-stack-boundary=3 if supported")
or `-mstack-alignment=8` for Clang. Parts of the AMDGPU driver are
compiled with 16B stack alignment.

Generally, the stack alignment is part of the ABI. Linking together two
different translation units with differing stack alignment is dangerous,
particularly when the translation unit with the smaller stack alignment
makes calls into the translation unit with the larger stack alignment.
While 8B aligned stacks are sometimes also 16B aligned, they are not
always.

Multiple users have reported General Protection Faults (GPF) when using
the AMDGPU driver compiled with Clang. Clang is placing objects in stack
slots assuming the stack is 16B aligned, and selecting instructions that
require 16B aligned memory operands. At runtime, syscalls handling 8B
stack aligned code calls into code that assumes 16B stack alignment.
When the stack is a multiple of 8B but not 16B, these instructions
result in a GPF.

GCC doesn't select instructions with alignment requirements, so the GPFs
aren't observed, but it is still considered an ABI breakage to mix and
match stack alignment.

I have patches that basically remove -mpreferred-stack-boundary=4 and
-mstack-alignment=16 from AMDGPU:
https://github.com/ClangBuiltLinux/linux/issues/735#issuecomment-541247601
Yuxuan has tested with Clang and GCC and reported it fixes the GPF's observed.

My gcc build fails with below errors:

dcn_calcs.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 12

dcn_calc_math.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 12

While GPF observed on clang builds seem to be fixed.

--
Regards,
Shirish S



I've split the patch into 4; same commit message but different Fixes
tags so that they backport to stable on finer granularity. 2 questions
BEFORE I send the series:

1. Would you prefer 4 patches with unique `fixes` tags, or 1 patch?
2. Was there or is there still a good reason for the stack alignment mismatch?

(Further, I think we can use -msse2 for BOTH clang+gcc after my patch,
but I don't have hardware to test on. I'm happy to write/send the
follow up patch, but I'd need help testing).




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: always reset asic when going into suspend

2019-10-15 Thread Daniel Drake
On Asus UX434DA (Ryzen7 3700U), upon resume from s2idle, the screen
turns on again and shows the pre-suspend image, but the display remains
frozen from that point onwards.

The kernel logs show errors:

 [drm] psp command failed and response status is (0x7)
 [drm] Fence fallback timer expired on ring sdma0
 [drm] Fence fallback timer expired on ring gfx
 amdgpu :03:00.0: [drm:amdgpu_ib_ring_tests] *ERROR* IB test failed on gfx 
(-22).
 [drm:process_one_work] *ERROR* ib ring test failed (-22).

This can also be reproduced with pm_test:
 # echo devices > /sys/power/pm_test
 # echo freeze > /sys/power/mem

The same reproducer causes the same problem on Asus X512DK (Ryzen5 3500U)
even though that model is normally able to suspend and resume OK via S3.

Experimenting, I observed that this error condition can be invoked on
any amdgpu product by executing in succession:

  amdgpu_device_suspend(drm_dev, true, true);
  amdgpu_device_resume(drm_dev, true, true);

i.e. it appears that the resume routine is unable to get the device out
of suspended state, except for the S3 suspend case where it presumably has
a bit of extra help from the firmware or hardware.

However, I also observed that the runtime suspend/resume routines work
OK when tested like this, which lead me to the key difference in these
two cases: the ASIC reset, which only happens in the runtime suspend path.

Since it takes less than 1ms, we should do the ASIC reset in all
suspend paths, fixing resume from s2idle on these products.

Link: https://bugs.freedesktop.org/show_bug.cgi?id=111811
Signed-off-by: Daniel Drake 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5a1939dbd4e3..7f4870e974fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3082,15 +3082,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
suspend, bool fbcon)
 */
amdgpu_bo_evict_vram(adev);
 
+   amdgpu_asic_reset(adev);
+   r = amdgpu_asic_reset(adev);
+   if (r)
+   DRM_ERROR("amdgpu asic reset failed\n");
+
pci_save_state(dev->pdev);
if (suspend) {
/* Shut down the device */
pci_disable_device(dev->pdev);
pci_set_power_state(dev->pdev, PCI_D3hot);
-   } else {
-   r = amdgpu_asic_reset(adev);
-   if (r)
-   DRM_ERROR("amdgpu asic reset failed\n");
}
 
return 0;
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx