Re: [PATCH 6/8] drm/amdkfd: New IOCTL to allocate queue GWS

2019-05-30 Thread Dave Airlie
On Sat, 25 May 2019 at 05:48, Kuehling, Felix  wrote:
>
> On 2019-05-23 6:41 p.m., Zeng, Oak wrote:
> > Add a new kfd ioctl to allocate queue GWS. Queue
> > GWS is released on queue destroy.
> >
> > Change-Id: I60153c26a577992ad873e4292e759e5c3d5bbd15
> > Signed-off-by: Oak Zeng 
>
> Reviewed-by: Felix Kuehling 
>

btw just noticed in passing we are adding new features to kfd, is
there an open source developed userspace to go along with this, there
needs to a dev branch in public before new ioctls/uapi surfaces should
be added to the kernel.

The commits should probably have pointers to that branch.

Dave.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: use new HMM APIs and helpers

2019-05-30 Thread Kuehling, Felix
This is a nice simplification. See a few comments inline.

On 2019-05-30 10:41 a.m., Yang, Philip wrote:
> HMM provides new APIs and helps in kernel 5.2-rc1 to simplify driver
> path. The old hmm APIs are deprecated and will be removed in future.
>
> Below are changes in driver:
>
> 1. Change hmm_vma_fault to hmm_range_register and hmm_range_fault which
> supports range with multiple vmas, remove the multiple vmas handle path
> and data structure.
> 2. Change hmm_vma_range_done to hmm_range_unregister.
> 3. Use default flags to avoid pre-fill pfn arrays.
> 4. Use new hmm_device_ helpers.
>
> Change-Id: I1a00f88459f3b96c9536933e9a17eb1ebaa78113
> Signed-off-by: Philip Yang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  |   1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 140 +---
>   2 files changed, 53 insertions(+), 88 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 41ccee49a224..e0bb47ce570b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -519,7 +519,6 @@ void amdgpu_hmm_init_range(struct hmm_range *range)
>   range->flags = hmm_range_flags;
>   range->values = hmm_range_values;
>   range->pfn_shift = PAGE_SHIFT;
> - range->pfns = NULL;
>   INIT_LIST_HEAD(>list);
>   }
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 7138dc1dd1f4..25a9fcb409c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,8 +711,7 @@ struct amdgpu_ttm_tt {
>   struct task_struct  *usertask;
>   uint32_tuserflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> - struct hmm_range*ranges;
> - int nr_ranges;
> + struct hmm_range*range;
>   #endif
>   };
>   
> @@ -725,57 +724,33 @@ struct amdgpu_ttm_tt {
>*/
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>   
> -/* Support Userptr pages cross max 16 vmas */
> -#define MAX_NR_VMAS  (16)
> -
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   struct mm_struct *mm = gtt->usertask->mm;
>   unsigned long start = gtt->userptr;
> - unsigned long end = start + ttm->num_pages * PAGE_SIZE;
> - struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
> - struct hmm_range *ranges;
> - unsigned long nr_pages, i;
> - uint64_t *pfns, f;
> + struct vm_area_struct *vma;
> + struct hmm_range *range;
> + unsigned long i;
> + uint64_t *pfns;
>   int r = 0;
>   
>   if (!mm) /* Happens during process shutdown */
>   return -ESRCH;
>   
> - down_read(>mmap_sem);
> -
> - /* user pages may cross multiple VMAs */
> - gtt->nr_ranges = 0;
> - do {
> - unsigned long vm_start;
> -
> - if (gtt->nr_ranges >= MAX_NR_VMAS) {
> - DRM_ERROR("Too many VMAs in userptr range\n");
> - r = -EFAULT;
> - goto out;
> - }
> -
> - vm_start = vma ? vma->vm_end : start;
> - vma = find_vma(mm, vm_start);
> - if (unlikely(!vma || vm_start < vma->vm_start)) {
> - r = -EFAULT;
> - goto out;
> - }
> - vmas[gtt->nr_ranges++] = vma;
> - } while (end > vma->vm_end);
> -
> - DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
> - start, gtt->nr_ranges, ttm->num_pages);
> -
> + vma = find_vma(mm, start);
> + if (unlikely(!vma || start < vma->vm_start)) {
> + r = -EFAULT;
> + goto out;
> + }
>   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> - vmas[0]->vm_file)) {
> + vma->vm_file)) {
>   r = -EPERM;
>   goto out;
>   }
>   
> - ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
> - if (unlikely(!ranges)) {
> + range = kvmalloc(sizeof(*range), GFP_KERNEL | __GFP_ZERO);

A single range is pretty small. You can probably allocate that with 
kmalloc now (or kcalloc/kzalloc since you specified GFP_ZERO).


> + if (unlikely(!range)) {
>   r = -ENOMEM;
>   goto out;
>   }
> @@ -786,61 +761,52 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
> struct page **pages)
>   goto out_free_ranges;
>   }
>   
> - for (i = 0; i < gtt->nr_ranges; i++)
> - amdgpu_hmm_init_range([i]);
> + amdgpu_hmm_init_range(range);
> + range->default_flags = range->flags[HMM_PFN_VALID];
> + range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
> + 0 : range->flags[HMM_PFN_WRITE];
> + range->pfn_flags_mask = 0;
> + range->pfns 

[PATCH] drm/amd/display: Don't set mode_changed=false if the stream was removed

2019-05-30 Thread Nicholas Kazlauskas
[Why]
When switching from vt to desktop with EDID emulation we can receive
an atomic commit such that we have a crtc where mode_changed = true.

During the dm_update_crtc_state disable pass we remove the stream from
the context and free it on the dm_new_crtc_state.

During the enable pass we compare the new provisional stream to the
dm_old_crtc_state->stream and determine that the stream is unchanged
and no scaling has been changed.

Following this, new_crtc_state->mode_changed is then set to false.
The connectors haven't changed and the CRTC active state hasn't changed
so drm_atomic_crtc_needs_modeset returns false, so we jump to
skip_modeset and we hit:

BUG_ON(dm_new_crtc_state->stream == NULL);

...since the old stream is gone from the context and the new stream is
also still NULL.

[How]
Ensure that we still a stream to reuse before checking if we can reuse
the old stream without a full modeset.

Cc: Roman Li 
Cc: Leo Li 
Cc: Harry Wentland 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 57359037ed7c..796f83ca7a4c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6248,7 +6248,17 @@ static int dm_update_crtc_state(struct 
amdgpu_display_manager *dm,
 
dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level;
 
-   if (dc_is_stream_unchanged(new_stream, 
dm_old_crtc_state->stream) &&
+   /*
+* If we already removed the old stream from the context
+* (and set the new stream to NULL) then we can't reuse
+* the old stream even if the stream and scaling are unchanged.
+* We'll hit the BUG_ON and black screen.
+*
+* TODO: Refactor this function to allow this check to work
+* in all conditions.
+*/
+   if (dm_new_crtc_state->stream &&
+   dc_is_stream_unchanged(new_stream, 
dm_old_crtc_state->stream) &&
dc_is_stream_scaling_unchanged(new_stream, 
dm_old_crtc_state->stream)) {
new_crtc_state->mode_changed = false;
DRM_DEBUG_DRIVER("Mode change not required, setting 
mode_changed to %d",
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[bug report] drm/amd/display: Move link functions from dc to dc_link

2019-05-30 Thread Dan Carpenter
Hello Chris Park,

This is a semi-automatic email about new static checker warnings.

The patch f5692f69f673: "drm/amd/display: Move link functions from dc 
to dc_link" from May 10, 2019, leads to the following Smatch 
complaint:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link.c:2929 
dc_link_set_preferred_link_settings()
warn: variable dereferenced before check 'link_stream' (see line 2926)

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link.c
  2911  for (i = 0; i < MAX_PIPES; i++) {
  2912  pipe = >current_state->res_ctx.pipe_ctx[i];
  2913  if (pipe->stream && pipe->stream->link) {
  2914  if (pipe->stream->link == link)
  2915  break;
  2916  }
  2917  }
  2918  
  2919  /* Stream not found */
  2920  if (i == MAX_PIPES)
  2921  return;
  2922  
  2923  link_stream = 
link->dc->current_state->res_ctx.pipe_ctx[i].stream;
  2924  
  2925  /* Cannot retrain link if backend is off */
  2926  if (link_stream->dpms_off)
^
Can link_stream be NULL?

  2927  return;
  2928  
  2929  if (link_stream)
^^^
We check here so maybe?

  2930  decide_link_settings(link_stream, _settings);
  2931  

regards,
dan carpenter
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Remove call to memset after dma_alloc_coherent

2019-05-30 Thread Hariprasad Kelam
This patch fixes below warning reported by coccicheck

./drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c:63:13-31: WARNING:
dma_alloc_coherent use in ih -> ring already zeroes out memory,  so
memset is not needed

Signed-off-by: Hariprasad Kelam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 934dfdc..d922187 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -65,7 +65,6 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih,
if (ih->ring == NULL)
return -ENOMEM;
 
-   memset((void *)ih->ring, 0, ih->ring_size + 8);
ih->gpu_addr = dma_addr;
ih->wptr_addr = dma_addr + ih->ring_size;
ih->wptr_cpu = >ring[ih->ring_size / 4];
-- 
2.7.4



Re: [PATCH 2/7] drm/dp_mst: Register AUX devices for MST ports

2019-05-30 Thread Li, Sun peng (Leo)

Hey, sorry for my late response!

On 2019-05-16 5:40 p.m., Lyude Paul wrote:

>>if (old_pdt != port->pdt && !port->input) {
>> @@ -1220,6 +1268,8 @@ static void drm_dp_add_port(struct drm_dp_mst_branch
>> *mstb,
>>drm_connector_set_tile_property(port->connector);
>>}
>>(*mstb->mgr->cbs->register_connector)(port->connector);
> This is wrong: we always want to setup everything in the connector first
> before trying to register it, not after. Otherwise, things explode like so:

**snip**

> [  452.424461] [ cut here ]
> [  452.424464] sysfs group 'power' not found for kobject 'drm_dp_aux5'
> [  452.424471] WARNING: CPU: 3 PID: 1887 at fs/sysfs/group.c:256 
> sysfs_remove_group+0x76/0x80
> [  452.424473] Modules linked in: vfat fat elan_i2c i915(O) intel_rapl 
> x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel iTCO_wdt kvm mei_wdt 
> irqbypass iTCO_vendor_support crct10dif_pclmul wmi_bmof intel_wmi_thunderbolt 
> crc32_pclmul i2c_algo_bit ghash_clmulni_intel intel_cstate drm_kms_helper(O) 
> intel_uncore intel_rapl_perf btusb btrtl btbcm syscopyarea btintel 
> sysfillrect sysimgblt fb_sys_fops bluetooth drm(O) joydev mei_me idma64 
> ucsi_acpi thunderbolt ecdh_generic i2c_i801 intel_xhci_usb_role_switch 
> processor_thermal_device typec_ucsi intel_lpss_pci intel_soc_dts_iosf mei 
> roles intel_lpss typec intel_pch_thermal wmi thinkpad_acpi ledtrig_audio 
> rfkill int3403_thermal int340x_thermal_zone int3400_thermal acpi_thermal_rel 
> acpi_pad video pcc_cpufreq crc32c_intel nvme serio_raw uas e1000e nvme_core 
> usb_storage i2c_dev
> [  452.424492] CPU: 3 PID: 1887 Comm: unloadgpumod Tainted: G   O 
>  5.1.0Lyude-Test+ #1
> [  452.424494] Hardware name: LENOVO 20L8S2N800/20L8S2N800, BIOS N22ET35W 
> (1.12 ) 04/09/2018
> [  452.424496] RIP: 0010:sysfs_remove_group+0x76/0x80
> [  452.424498] Code: 48 89 df 5b 5d 41 5c e9 f8 bc ff ff 48 89 df e8 d0 bc ff 
> ff eb cb 49 8b 14 24 48 8b 75 00 48 c7 c7 08 a5 0c aa e8 44 00 d6 ff <0f> 0b 
> 5b 5d 41 5c c3 0f 1f 00 0f 1f 44 00 00 48 85 f6 74 31 41 54
> [  452.424500] RSP: 0018:a8bb81b5fb28 EFLAGS: 00010282
> [  452.424501] RAX:  RBX:  RCX: 
> 0006
> [  452.424502] RDX: 0007 RSI: 0086 RDI: 
> 981fde2d5a00
> [  452.424503] RBP: a9ea12e0 R08: 0792 R09: 
> 0046
> [  452.424504] R10: 0727 R11: a8bb81b5f9d0 R12: 
> 981fd5f77010
> [  452.424505] R13: 981fd6ebbedc R14: dead0200 R15: 
> dead0100
> [  452.424507] FS:  7f8cc1d8c740() GS:981fde2c() 
> knlGS:
> [  452.424508] CS:  0010 DS:  ES:  CR0: 80050033
> [  452.424509] CR2: 55b19d079a08 CR3: 00043b2a0002 CR4: 
> 003606e0
> [  452.424510] DR0:  DR1:  DR2: 
> 
> [  452.424511] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [  452.424512] Call Trace:
> [  452.424516]  device_del+0x75/0x360
> [  452.424518]  ? class_find_device+0x96/0xf0
> [  452.424520]  device_unregister+0x16/0x60
> [  452.424521]  device_destroy+0x3a/0x40
> [  452.424525]  drm_dp_aux_unregister_devnode+0xea/0x180 [drm_kms_helper]
> [  452.424534]  ? drm_dbg+0x87/0x90 [drm]
> [  452.424538]  drm_dp_mst_topology_put_port+0x5b/0x110 [drm_kms_helper]
> [  452.424543]  drm_dp_mst_topology_put_mstb+0xb6/0x180 [drm_kms_helper]
> [  452.424547]  drm_dp_mst_topology_mgr_set_mst+0x233/0x2b0 [drm_kms_helper]
> [  452.424551]  drm_dp_mst_topology_mgr_destroy+0x18/0xa0 [drm_kms_helper]
> [  452.424571]  intel_dp_encoder_flush_work+0x32/0xb0 [i915]
> [  452.424592]  intel_ddi_encoder_destroy+0x32/0x70 [i915]
> [  452.424600]  drm_mode_config_cleanup+0x51/0x2e0 [drm]
> [  452.424621]  intel_modeset_cleanup+0xc8/0x140 [i915]
> [  452.424633]  i915_driver_unload+0xa8/0x130 [i915]
> [  452.424645]  i915_pci_remove+0x1e/0x40 [i915]
> [  452.424647]  pci_device_remove+0x3b/0xc0
> [  452.424649]  device_release_driver_internal+0xe4/0x1d0
> [  452.424651]  pci_stop_bus_device+0x69/0x90
> [  452.424653]  pci_stop_and_remove_bus_device_locked+0x16/0x30
> [  452.424655]  remove_store+0x75/0x90
> [  452.424656]  kernfs_fop_write+0x116/0x190
> [  452.424658]  vfs_write+0xa5/0x1a0
> [  452.424660]  ksys_write+0x57/0xd0
> [  452.424663]  do_syscall_64+0x55/0x150
> [  452.424665]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [  452.424667] RIP: 0033:0x7f8cc1e7d038
> [  452.424668] Code: 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 f3 
> 0f 1e fa 48 8d 05 e5 76 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 
> 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 49 89 d4 55
> [  452.424670] RSP: 002b:7ffce4321218 EFLAGS: 0246 ORIG_RAX: 
> 0001
> [  452.424672] RAX: ffda RBX: 0002 RCX: 
> 7f8cc1e7d038
> [  452.424673] RDX: 0002 RSI: 

Re: [PATCH v15 00/17] arm64: untag user pointers passed to the kernel

2019-05-30 Thread Catalin Marinas
On Tue, May 28, 2019 at 04:14:45PM +0200, Andrey Konovalov wrote:
> Thanks for a lot of valuable input! I've read through all the replies
> and got somewhat lost. What are the changes I need to do to this
> series?
> 
> 1. Should I move untagging for memory syscalls back to the generic
> code so other arches would make use of it as well, or should I keep
> the arm64 specific memory syscalls wrappers and address the comments
> on that patch?

Keep them generic again but make sure we get agreement with Khalid on
the actual ABI implications for sparc.

> 2. Should I make untagging opt-in and controlled by a command line argument?

Opt-in, yes, but per task rather than kernel command line option.
prctl() is a possibility of opting in.

> 3. Should I "add Documentation/core-api/user-addresses.rst to describe
> proper care and handling of user space pointers with untagged_addr(),
> with examples based on all the cases seen so far in this series"?
> Which examples specifically should it cover?

I think we can leave 3 for now as not too urgent. What I'd like is for
Vincenzo's TBI user ABI document to go into a more common place since we
can expand it to cover both sparc and arm64. We'd need an arm64-specific
doc as well for things like prctl() and later MTE that sparc may support
differently.

-- 
Catalin


Re: [PATCH v15 05/17] arms64: untag user pointers passed to memory syscalls

2019-05-30 Thread Catalin Marinas
On Thu, May 30, 2019 at 10:05:55AM -0600, Khalid Aziz wrote:
> On 5/30/19 9:11 AM, Catalin Marinas wrote:
> > So if a database program is doing an anonymous mmap(PROT_TBI) of 100GB,
> > IIUC for sparc the faulted-in pages will have random colours (on 64-byte
> > granularity). Ignoring the information leak from prior uses of such
> > pages, it would be the responsibility of the db program to issue the
> > stxa. On arm64, since we also want to do this via malloc(), any large
> > allocation would require all pages to be faulted in so that malloc() can
> > set the write colour before being handed over to the user. That's what
> > we want to avoid and the user is free to repaint the memory as it likes.
> 
> On sparc, any newly allocated page is cleared along with any old tags on
> it. Since clearing tag happens automatically when page is cleared on
> sparc, clear_user_page() will need to execute additional stxa
> instructions to set a new tag. It is doable. In a way it is done already
> if page is being pre-colored with tag 0 always ;)

Ah, good to know. On arm64 we'd have to use different instructions,
although the same loop.

> Where would the pre-defined tag be stored - as part of address stored
> in vm_start or a new field in vm_area_struct?

I think we can discuss the details when we post the actual MTE patches.
In our internal hack we overloaded the VM_HIGH_ARCH_* flags and selected
CONFIG_ARCH_USES_HIGH_VMA_FLAGS (used for pkeys on x86).

For the time being, I'd rather restrict tagged addresses passed to
mmap() until we agreed that they have any meaning. If we allowed them
now but get ignored (though probably no-one would be doing this), I feel
it's slightly harder to change the semantics afterwards.

Thanks.

-- 
Catalin


[PATCH][next] drm/amd/display: remove redundant assignment to status

2019-05-30 Thread Colin King
From: Colin Ian King 

The variable status is initialized with a value that is never read
and status is reassigned several statements later. This initialization
is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 65d6caedbd82..cf6166a1be53 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2367,7 +2367,7 @@ static bool retrieve_link_cap(struct dc_link *link)
union down_stream_port_count down_strm_port_count;
union edp_configuration_cap edp_config_cap;
union dp_downstream_port_present ds_port = { 0 };
-   enum dc_status status = DC_ERROR_UNEXPECTED;
+   enum dc_status status;
uint32_t read_dpcd_retry_cnt = 3;
int i;
struct dp_sink_hw_fw_revision dp_hw_fw_revision;
-- 
2.20.1



Re: [PATCH v15 05/17] arms64: untag user pointers passed to memory syscalls

2019-05-30 Thread Khalid Aziz
On 5/30/19 9:11 AM, Catalin Marinas wrote:
> On Wed, May 29, 2019 at 01:16:37PM -0600, Khalid Aziz wrote:
>> mmap() can return the same tagged address but I am uneasy about kernel
>> pre-coloring the pages. Database can mmap 100's of GB of memory. That is
>> lot of work being offloaded to the kernel to pre-color the page even if
>> done in batches as pages are faulted in.
> 
> For anonymous mmap() for example, the kernel would have to zero the
> faulted in pages anyway. We can handle the colouring at the same time in
> clear_user_page() (as I said below, we have to clear the colour anyway
> from previous uses, so it's simply extending this to support something
> other than tag/colour 0 by default with no additional overhead).
> 

On sparc M7, clear_user_page() ends up in M7clear_user_page defined in
arch/sparc/lib/M7memset.S. M7 code use Block Init Store (BIS) to clear
the page. BIS on M7 clears the memory tags as well and no separate
instructions are needed to clear the tags. As a result when kernel
clears a page before returning it to user, the page is not only zeroed
out, its tags are also cleared to 0.

>>> Since we already need such loop in the kernel, we might as well allow
>>> user space to require a certain colour. This comes in handy for large
>>> malloc() and another advantage is that the C library won't be stuck
>>> trying to paint the whole range (think GB).
>>
>> If kernel is going to pre-color all pages in a vma, we will need to
>> store the default tag in the vma. It will add more time to page fault
>> handling code. On sparc M7, kernel will need to execute additional 128
>> stxa instructions to set the tags on a normal page.
> 
> As I said, since the user can retrieve an old colour using ldxa, the
> kernel should perform this operation anyway on any newly allocated page
> (unless you clear the existing colour on page freeing).>

Tags are not cleared on sparc on freeing. They get cleared when the page
is allocated again.

 We can try to store tags for an entire region in vma but that is
 expensive, plus on sparc tags are set in userspace with no
 participation from kernel and now we need a way for userspace to
 communicate the tags to kernel.
>>>
>>> We can't support finer granularity through the mmap() syscall and, as
>>> you said, the vma is not the right thing to store the individual tags.
>>> With the above extension to mmap(), we'd have to store a colour per vma
>>> and prevent merging if different colours (we could as well use the
>>> pkeys mechanism we already have in the kernel but use a colour per vma
>>> instead of a key).
>>
>> Since tags can change on any part of mmap region on sparc at any time
>> without kernel being involved, I am not sure I see much reason for
>> kernel to enforce any tag related restrictions.
> 
> It's not enforcing a tag, more like the default colour for a faulted in
> page. Anyway, if sparc is going with default 0/untagged, that's fine as
> well. We may add this mmap() option to arm64 only.
> 
 From sparc point of view, making kernel responsible for assigning tags
 to a page on page fault is full of pitfalls.
>>>
>>> This could be just some arm64-specific but if you plan to deploy it more
>>> generically for sparc (at the C library level), you may find this
>>> useful.
>>
>> Common semantics from app developer point of view will be very useful to
>> maintain. If arm64 says mmap with MAP_FIXED and a tagged address will
>> return a pre-colored page, I would rather have it be the same on any
>> architecture. Is there a use case that justifies kernel doing this extra
>> work?
> 
> So if a database program is doing an anonymous mmap(PROT_TBI) of 100GB,
> IIUC for sparc the faulted-in pages will have random colours (on 64-byte
> granularity). Ignoring the information leak from prior uses of such
> pages, it would be the responsibility of the db program to issue the
> stxa. On arm64, since we also want to do this via malloc(), any large
> allocation would require all pages to be faulted in so that malloc() can
> set the write colour before being handed over to the user. That's what
> we want to avoid and the user is free to repaint the memory as it likes.
> 

On sparc, any newly allocated page is cleared along with any old tags on
it. Since clearing tag happens automatically when page is cleared on
sparc, clear_user_page() will need to execute additional stxa
instructions to set a new tag. It is doable. In a way it is done already
if page is being pre-colored with tag 0 always ;) Where would the
pre-defined tag be stored - as part of address stored in vm_start or a
new field in vm_area_struct?

--
Khalid



Re: RX 580 and 5K displays, bandwidth validation failed whith multiple monitors

2019-05-30 Thread Harry Wentland
On 2019-05-27 10:58 a.m., Gaël HERMET wrote:
> Hi,
> 
> I have been facing an issue with my 5K display (iiyama ProLite
> XB2779QQS-S1).
> 
> It works fine as long as it is the only active monitor, as soon as I
> activate another monitor the main one (5k) can't display more than 4k.
> 
> Debug using "echo 0x4 > /sys/module/drm/parameters/debug" show this :
> mai 23 09:01:22 bureau-gael /usr/lib/gdm3/gdm-x-session[3465]: (EE)
> AMDGPU(0): failed to set mode: Invalid argument
> mai 23 09:01:22 bureau-gael kernel: [drm:dce112_validate_bandwidth
> [amdgpu]] dce112_validate_bandwidth: Bandwidth validation failed!
> 
> I disabled the check by forcing is_display_configuration_supported to
> return true in dce_calcs.c and it works fine.
> 
> Anything I can do to correct this bandwidth calculation ?
> 

The bandwidth formulas come from our HW teams and usually leave a good
margin of error. Changing the formulas to allow for your case isn't a
correction as it might cause issues in certain scenarious, i.e. there's
no more guarantee that things work as expected.

Examples are 4k video playback (especially multiple videos), gaming.
Anything that does a lot of GPU memory access.

Issues that might appear are underflow, i.e. white lines on the screen.
In extreme cases underflow might even hang the entire display pipe.

Harry

> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/doc: add rough outline of tracepoint documentation

2019-05-30 Thread Tom St Denis
Fixed with "is".

Thanks.  I'll wait for more feedback before submitting a v2.

Tom

On Thu, May 30, 2019 at 11:45 AM Abramov, Slava 
wrote:

> Comments inline (marked with [slava a]).
>
>
> General comment - word capitalisation in the lists is inconsistent
>
>
> --
> *From:* amd-gfx  on behalf of
> StDenis, Tom 
> *Sent:* Thursday, May 30, 2019 10:56 AM
> *To:* amd-gfx@lists.freedesktop.org
> *Cc:* StDenis, Tom
> *Subject:* [PATCH] drm/amd/doc: add rough outline of tracepoint
> documentation
>
> Signed-off-by: Tom St Denis 
> ---
>  Documentation/gpu/amdgpu.rst  |  10 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 221 ++
>  2 files changed, 231 insertions(+)
>
> diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
> index 86138798128f..3564765110e5 100644
> --- a/Documentation/gpu/amdgpu.rst
> +++ b/Documentation/gpu/amdgpu.rst
> @@ -89,6 +89,16 @@ AMDGPU RAS debugfs control interface
>  .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> :internal:
>
> +AMDGPU Tracing Support
> +==
> +
> +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +   :doc: AMDGPU Tracing Support
> +
> +
> +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +   :internal:
> +
>
>  GPU Power/Thermal Controls and Monitoring
>  =
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index d3ca2424b5fe..71febb90d3e3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -37,6 +37,227 @@
>  #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
>
> job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)
>
> +/**
> + * DOC: AMDGPU Tracing Support
> + *
> + * The AMDGPU driver provides numerous trace points that can aid
> + * in debugging.  They are globally enabled by the file:
> + *
> + * /sys/kernel/debug/tracing/events/amdgpu/enable
> + *
> + * or individually by the enable files in the sub-directories
> + * of that directory.
> + *
> + * amdgpu_mm_rreg, amdgpu_mm_wreg
> + * --
> + *
> + * These trace points track reads and writes to MMIO registers by
> + * the kernel driver (activity inside ring/indirect buffers are not
> + * traced) which can be used to diagnose IP block activity and
> + * responses.
>
> [slava a] Either 'activities are not traced' or 'activity is not traced'
> [slava a] Double usage of word 'activity' sounds weird.
>
> [snap]
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/display: Don't load DMCU for Raven 1 (v2)

2019-05-30 Thread Alex Deucher
On Thu, May 30, 2019 at 10:48 AM Samantha McVey  wrote:
>
> Alex,
>
>
>
> Are any of these non-booting systems laptops? If all the laptops don't have 
> this issue, is there a way we can detect we are on mobile and load the DMCU? 
> Seeing as ABM and PSR are both practically only used on mobile, maybe we can 
> check for that. This way we only enable it on systems that actually need the 
> features. I am guessing the number of Raven Ridge laptops is much much 
> smaller than the number of motherboards which can potentially support Raven 
> Ridge.
>
>

Mostly if not exclusively laptops.  DMCU doesn't really have much use
on desktops since there is no eDP panel in most cases.  We don't
currently support PSR so it's really only ABM.

Alex

>
> On vrijdag 24 mei 2019 18:49:27 CEST you wrote:
>
> > On Fri, May 24, 2019 at 12:32 PM Mike Lothian  
> > wrote:
>
> > > I realise you don't want to enable this as it's breaking some people's
>
> > > systems, but could we add a new boot parameter to force it for working
>
> > > systems? Or check against a black list maybe?
>
> >
>
> > We could probably add a whitelist. I'm not sure what the best way to
>
> > id the working systems are though.
>
> >
>
> > Alex
>
> >
>
> > > On Fri, 24 May 2019 at 17:20, Alex Deucher  
> > > wrote:
>
> > > > On Fri, May 24, 2019 at 12:09 PM Mike Lothian  
> > > > wrote:
>
> > > > > Hi
>
> > > > >
>
> > > > > Curious to know what this means for folk that have newer Raven1 boards
>
> > > > > that didn't have issues loading the firmware
>
> > > >
>
> > > > You won't get ABM I think. ABM is the automatic backlight management.
>
> > > >
>
> > > > Alex
>
> > > >
>
> > > > > Cheers
>
> > > > >
>
> > > > > Mike
>
> > > > >
>
> > > > > On Fri, 24 May 2019 at 16:34, Alex Deucher  
> > > > > wrote:
>
> > > > > > From: Harry Wentland 
>
> > > > > >
>
> > > > > > [WHY]
>
> > > > > > Some early Raven boards had a bad SBIOS that doesn't play nicely
>
> > > > > > with
>
> > > > > > the DMCU FW. We thought the issues were fixed by ignoring errors on
>
> > > > > > DMCU
>
> > > > > > load but that doesn't seem to be the case. We've still seen reports
>
> > > > > > of
>
> > > > > > users unable to boot their systems at all.
>
> > > > > >
>
> > > > > > [HOW]
>
> > > > > > Disable DMCU load on Raven 1. Only load it for Raven 2 and Picasso.
>
> > > > > >
>
> > > > > > v2: Fix ifdef (Alex)
>
> > > > > >
>
> > > > > > Signed-off-by: Harry Wentland 
>
> > > > > > Reviewed-by: Nicholas Kazlauskas 
>
> > > > > > Signed-off-by: Alex Deucher 
>
> > > > > > Cc: stable at vger.kernel.org
>
> > > > > > ---
>
> > > > > >
>
> > > > > > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++--
>
> > > > > > 1 file changed, 10 insertions(+), 2 deletions(-)
>
> > > > > >
>
> > > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>
> > > > > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index
>
> > > > > > 995f9df66142..bcb1a93c0b4c 100644
>
> > > > > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>
> > > > > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>
> > > > > > @@ -29,6 +29,7 @@
>
> > > > > >
>
> > > > > > #include "dm_services_types.h"
>
> > > > > > #include "dc.h"
>
> > > > > > #include "dc/inc/core_types.h"
>
> > > > > >
>
> > > > > > +#include "dal_asic_id.h"
>
> > > > > >
>
> > > > > > #include "vid.h"
>
> > > > > > #include "amdgpu.h"
>
> > > > > >
>
> > > > > > @@ -640,7 +641,7 @@ static void amdgpu_dm_fini(struct amdgpu_device
>
> > > > > > *adev)
>
> > > > > >
>
> > > > > > static int load_dmcu_fw(struct amdgpu_device *adev)
>
> > > > > > {
>
> > > > > >
>
> > > > > > - const char *fw_name_dmcu;
>
> > > > > > + const char *fw_name_dmcu = NULL;
>
> > > > > >
>
> > > > > > int r;
>
> > > > > > const struct dmcu_firmware_header_v1_0 *hdr;
>
> > > > > >
>
> > > > > > @@ -663,7 +664,14 @@ static int load_dmcu_fw(struct amdgpu_device
>
> > > > > > *adev)
>
> > > > > >
>
> > > > > > case CHIP_VEGA20:
>
> > > > > > return 0;
>
> > > > > >
>
> > > > > > case CHIP_RAVEN:
>
> > > > > > - fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
>
> > > > > > +#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
>
> > > > > > + if (ASICREV_IS_PICASSO(adev->external_rev_id))
>
> > > > > > + fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
>
> > > > > > + else if (ASICREV_IS_RAVEN2(adev->external_rev_id))
>
> > > > > > + fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
>
> > > > > > + else
>
> > > > > > +#endif
>
> > > > > > + return 0;
>
> > > > > >
>
> > > > > > break;
>
> > > > > >
>
> > > > > > default:
>
> > > > > > DRM_ERROR("Unsupported ASIC type: 0x%X\n",
>
> > > > > > adev->asic_type);
>
> > > > > >
>
> > > > > > --
>
> > > > > > 2.20.1
>
> > > > > >
>
> > > > > > ___
>
> > > > > > amd-gfx mailing list
>
> > > > > > amd-gfx at lists.freedesktop.org
>
> > > > > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> 

Re: [PATCH] drm/amd/doc: add rough outline of tracepoint documentation

2019-05-30 Thread Abramov, Slava
Comments inline (marked with [slava a]).


General comment - word capitalisation in the lists is inconsistent



From: amd-gfx  on behalf of StDenis, Tom 

Sent: Thursday, May 30, 2019 10:56 AM
To: amd-gfx@lists.freedesktop.org
Cc: StDenis, Tom
Subject: [PATCH] drm/amd/doc: add rough outline of tracepoint documentation

Signed-off-by: Tom St Denis 
---
 Documentation/gpu/amdgpu.rst  |  10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 221 ++
 2 files changed, 231 insertions(+)

diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 86138798128f..3564765110e5 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -89,6 +89,16 @@ AMDGPU RAS debugfs control interface
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:internal:

+AMDGPU Tracing Support
+==
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+   :doc: AMDGPU Tracing Support
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+   :internal:
+

 GPU Power/Thermal Controls and Monitoring
 =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d3ca2424b5fe..71febb90d3e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -37,6 +37,227 @@
 #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
  
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)

+/**
+ * DOC: AMDGPU Tracing Support
+ *
+ * The AMDGPU driver provides numerous trace points that can aid
+ * in debugging.  They are globally enabled by the file:
+ *
+ * /sys/kernel/debug/tracing/events/amdgpu/enable
+ *
+ * or individually by the enable files in the sub-directories
+ * of that directory.
+ *
+ * amdgpu_mm_rreg, amdgpu_mm_wreg
+ * --
+ *
+ * These trace points track reads and writes to MMIO registers by
+ * the kernel driver (activity inside ring/indirect buffers are not
+ * traced) which can be used to diagnose IP block activity and
+ * responses.

[slava a] Either 'activities are not traced' or 'activity is not traced'
[slava a] Double usage of word 'activity' sounds weird.

[snap]
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v15 05/17] arms64: untag user pointers passed to memory syscalls

2019-05-30 Thread Catalin Marinas
On Wed, May 29, 2019 at 01:16:37PM -0600, Khalid Aziz wrote:
> On 5/29/19 8:20 AM, Catalin Marinas wrote:
> > On Tue, May 28, 2019 at 05:33:04PM -0600, Khalid Aziz wrote:
> >> Steps 1 and 2 are accomplished by userspace by calling mprotect() with
> >> PROT_ADI. Tags are set by storing tags in a loop, for example:
> >>
> >> version = 10;
> >> tmp_addr = shmaddr;
> >> end = shmaddr + BUFFER_SIZE;
> >> while (tmp_addr < end) {
> >> asm volatile(
> >> "stxa %1, [%0]0x90\n\t"
> >> :
> >> : "r" (tmp_addr), "r" (version));
> >> tmp_addr += adi_blksz;
> >> }
> > 
> > On arm64, a sequence similar to the above would live in the libc. So a
> > malloc() call will tag the memory and return the tagged address to 
> > thePre-coloring could easily be done by 
> > user.
> > 
> > We were not planning for a PROT_ADI/MTE but rather have MTE enabled for
> > all user memory ranges. We may revisit this before we upstream the MTE
> > support (probably some marginal benefit for the hardware not fetching
> > the tags from memory if we don't need to, e.g. code sections).
> > 
> > Given that we already have the TBI feature and with MTE enabled the top
> > byte is no longer ignored, we are planning for an explicit opt-in by the
> > user via prctl() to enable MTE.
> 
> OK. I had initially proposed enabling ADI for a process using prctl().
> Feedback I got was prctl was not a desirable interface and I ended up
> making mprotect() with PROT_ADI enable ADI on the process instead. Just
> something to keep in mind.

Thanks for the feedback. We'll keep this in mind when adding MTE
support. In the way we plan to deploy this, it would be a libc decision
to invoke the mmap() with the right flag.

This could actually simplify the automatic page faulting below brk(),
basically no tagged/coloured memory allowed implicitly. It needs
feedback from the bionic/glibc folk.

> >> With these semantics, giving mmap() or shamat() a tagged address is
> >> meaningless since no tags have been stored at the addresses mmap() will
> >> allocate and one can not store tags before memory range has been
> >> allocated. If we choose to allow tagged addresses to come into mmap()
> >> and shmat(), sparc code can strip the tags unconditionally and that may
> >> help simplify ABI and/or code.
> > 
> > We could say that with TBI (pre-MTE support), the top byte is actually
> > ignored on mmap(). Now, if you pass a MAP_FIXED with a tagged address,
> > should the user expect the same tagged address back or stripping the tag
> > is acceptable? If we want to keep the current mmap() semantics, I'd say
> > the same tag is returned. However, with MTE this also implies that the
> > memory was coloured.
> 
> Is assigning a tag aprivileged operationon ARM64? I am thinking not
> since you mentioned libc could do it in a loop for malloc'd memory.

Indeed it's not, the user can do it.

> mmap() can return the same tagged address but I am uneasy about kernel
> pre-coloring the pages. Database can mmap 100's of GB of memory. That is
> lot of work being offloaded to the kernel to pre-color the page even if
> done in batches as pages are faulted in.

For anonymous mmap() for example, the kernel would have to zero the
faulted in pages anyway. We can handle the colouring at the same time in
clear_user_page() (as I said below, we have to clear the colour anyway
from previous uses, so it's simply extending this to support something
other than tag/colour 0 by default with no additional overhead).

> > Since the user can probe the pre-existing colour in a faulted-in page
> > (either with some 'ldxa' instruction or by performing a tag-checked
> > access), the kernel should always pre-colour (even if colour 0) any
> > allocated page. There might not be an obvious security risk but I feel
> > uneasy about letting colours leak between address spaces (different user
> > processes or between kernel and user).
> 
> On sparc, tags 0 and 15 are special in that 0 means untagged memory and
> 15 means match any tag in the address. Colour 0 is the default for any
> newly faulted in page on sparc.

With MTE we don't have match-all/any tag in memory, only in the virtual
address/pointer. So if we turn on MTE for all pages and the user
accesses an address with a 0 tag, the underlying memory needs to be
coloured with the same 0 value.

> > Since we already need such loop in the kernel, we might as well allow
> > user space to require a certain colour. This comes in handy for large
> > malloc() and another advantage is that the C library won't be stuck
> > trying to paint the whole range (think GB).
> 
> If kernel is going to pre-color all pages in a vma, we will need to
> store the default tag in the vma. It will add more time to page fault
> handling code. On sparc M7, kernel will need to execute additional 128
> stxa instructions to set the tags on a normal page.

As I 

Re: [PATCH] drm/sched: Fix make htmldocs warnings.

2019-05-30 Thread Christian König

Am 29.05.19 um 21:36 schrieb Daniel Vetter:

On Wed, May 29, 2019 at 04:43:45PM +, Grodzovsky, Andrey wrote:

I don't, sorry.

Should we fix that? Seems like you do plenty of scheduler stuff, so would
make sense I guess ...


Reviewed-by: Christian König  for the patch.

And +1 for giving Andrey commit rights to drm-misc-next.

Christian.


-Daniel


Andrey

On 5/29/19 12:42 PM, Alex Deucher wrote:

On Wed, May 29, 2019 at 10:29 AM Andrey Grodzovsky
 wrote:

Signed-off-by: Andrey Grodzovsky 

Reviewed-by: Alex Deucher 

I'll push it to drm-misc in a minute unless you have commit rights.

Alex


---
   drivers/gpu/drm/scheduler/sched_main.c | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 49e7d07..c1058ee 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -353,6 +353,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
* drm_sched_stop - stop the scheduler
*
* @sched: scheduler instance
+ * @bad: job which caused the time out
*
* Stop the scheduler and also removes and frees all completed jobs.
* Note: bad job will not be freed as it might be used later and so it's
@@ -422,6 +423,7 @@ EXPORT_SYMBOL(drm_sched_stop);
* drm_sched_job_recovery - recover jobs after a reset
*
* @sched: scheduler instance
+ * @full_recovery: proceed with complete sched restart
*
*/
   void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
--
2.7.4

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd/doc: add rough outline of tracepoint documentation

2019-05-30 Thread StDenis, Tom
Signed-off-by: Tom St Denis 
---
 Documentation/gpu/amdgpu.rst  |  10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 221 ++
 2 files changed, 231 insertions(+)

diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 86138798128f..3564765110e5 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -89,6 +89,16 @@ AMDGPU RAS debugfs control interface
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:internal:
 
+AMDGPU Tracing Support
+==
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+   :doc: AMDGPU Tracing Support
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+   :internal:
+
 
 GPU Power/Thermal Controls and Monitoring
 =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d3ca2424b5fe..71febb90d3e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -37,6 +37,227 @@
 #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
 
job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished)
 
+/**
+ * DOC: AMDGPU Tracing Support
+ *
+ * The AMDGPU driver provides numerous trace points that can aid
+ * in debugging.  They are globally enabled by the file:
+ *
+ * /sys/kernel/debug/tracing/events/amdgpu/enable
+ *
+ * or individually by the enable files in the sub-directories
+ * of that directory.
+ *
+ * amdgpu_mm_rreg, amdgpu_mm_wreg
+ * --
+ *
+ * These trace points track reads and writes to MMIO registers by
+ * the kernel driver (activity inside ring/indirect buffers are not
+ * traced) which can be used to diagnose IP block activity and
+ * responses.
+ *
+ * The trace captures the following information:
+ *
+ * - DID of the device being used
+ * - Register address
+ * - Value being read or written
+ *
+ * It does not differentiate between multiple instances of the same
+ * ASIC. The register address is the DWORD address of the register
+ * being used.
+ *
+ * amdgpu_iv
+ * -
+ *
+ * This tracepoint captures data from an IRQ event before dispatching
+ * control to the respective IP block IRQ handler.  The trace
+ * captures the following information:
+ *
+ *- client ID
+ *- source ID
+ *- ring ID
+ *- VM ID
+ *- VM ID source
+ *- Timestamp
+ *- Timestamp source
+ *- PAS ID
+ *- 4 SRC data words
+ *
+ * amdgpu_bo_create
+ * 
+ *
+ * This tracepoint captures the state of a successfully created
+ * buffer object (BO).  The trace captures the following information:
+ *
+ *- kernel address of the BO
+ *- Number of (GPU) pages
+ *- Memory type
+ *- Preferred domain
+ *- Allowed domains
+ *- Buffer Object Flags
+ *
+ * amdgpu_cs
+ * -
+ *
+ * This tracepoint captures data about a command submission prior
+ * to being submitted to a ring (or queue).  The trace captures all
+ * indirect buffers (ibs) at once before issuing any commands.  The
+ * trace captures the following information:
+ *
+ *- kernel address of the buffer object (BO) list
+ *- The ring (index) being submitted to
+ *- The number of words
+ *- The number of fences emitted
+ *
+ * amdgpu_cs_ioctl
+ * ---
+ *
+ * This tracepoint captures information from the IOCTL query that
+ * submits commands to a ring.  It is part of the DRM userspace
+ * infrastructure.  The trace captures the following information:
+ *
+ *- Schedule job ID
+ *- Timeline (name of ring the fence is emitted to)
+ *- Fence context
+ *- Fence sequence number
+ *- Ring name being submitted to
+ *- Number of indirect buffers (ibs)
+ *
+ * amdgpu_sched_run_job
+ * 
+ *
+ * This tracepoint captures information about a job submission
+ * at the point of scheduling for submission to a ring.  The trace
+ * captures the following information:
+ *
+ *- Job ID
+ *- Timeline (name of ring the fence is emitted to)
+ *- Context ID
+ *- Sequence Number
+ *- Ring name being submitted to
+ *- Number of indirect buffers (ibs)
+ *
+ * amdgpu_vm_grab_id
+ * -
+ *
+ * This tracepoint captures information about a VM allocation.
+ * The trace captures the following information:
+ *
+ *- PAS ID
+ *- Ring name
+ *- Ring index
+ *- VM ID that is assigned
+ *- VM hub assigned (depends on ring)
+ *- Page Directory base address
+ *- Flag indicating whether it needs to be flushed or not
+ *
+ * amdgpu_vm_bo_map
+ * 
+ *
+ * This tracepoint captures information when a new mapping is inserted
+ * into a VM space.  The trace captures the following information:
+ *
+ *- Kernel address of the buffer object (bo)
+ *- Start page address of mapping
+ *- Last page address of mapping
+ *- Offset into mapping
+ *- Flags 

Re: [PATCH] drm/amd/display: Don't load DMCU for Raven 1 (v2)

2019-05-30 Thread Samantha McVey
Alex,

Are any of these non-booting systems laptops? If all the laptops don't have 
this issue, is 
there a way we can detect we are on mobile and load the DMCU? Seeing as ABM and 
PSR 
are both practically only used on mobile, maybe we can check for that. This way 
we only 
enable it on systems that actually need the features. I am guessing the number 
of Raven 
Ridge laptops is much much smaller than the number of motherboards which can 
potentially support Raven Ridge.

On vrijdag 24 mei 2019 18:49:27 CEST you wrote:
> On Fri, May 24, 2019 at 12:32 PM Mike Lothian  wrote:
> > I realise you don't want to enable this as it's breaking some people's
> > systems, but could we add a new boot parameter to force it for working
> > systems? Or check against a black list maybe?
> 
> We could probably add a whitelist.  I'm not sure what the best way to
> id the working systems are though.
> 
> Alex
> 
> > On Fri, 24 May 2019 at 17:20, Alex Deucher  wrote:
> > > On Fri, May 24, 2019 at 12:09 PM Mike Lothian  
> > > wrote:
> > > > Hi
> > > > 
> > > > Curious to know what this means for folk that have newer Raven1 boards
> > > > that didn't have issues loading the firmware
> > > 
> > > You won't get ABM I think.  ABM is the automatic backlight management.
> > > 
> > > Alex
> > > 
> > > > Cheers
> > > > 
> > > > Mike
> > > > 
> > > > On Fri, 24 May 2019 at 16:34, Alex Deucher  
> > > > wrote:
> > > > > From: Harry Wentland 
> > > > > 
> > > > > [WHY]
> > > > > Some early Raven boards had a bad SBIOS that doesn't play nicely
> > > > > with
> > > > > the DMCU FW. We thought the issues were fixed by ignoring errors on
> > > > > DMCU
> > > > > load but that doesn't seem to be the case. We've still seen reports
> > > > > of
> > > > > users unable to boot their systems at all.
> > > > > 
> > > > > [HOW]
> > > > > Disable DMCU load on Raven 1. Only load it for Raven 2 and Picasso.
> > > > > 
> > > > > v2: Fix ifdef (Alex)
> > > > > 
> > > > > Signed-off-by: Harry Wentland 
> > > > > Reviewed-by: Nicholas Kazlauskas 
> > > > > Signed-off-by: Alex Deucher 
> > > > > Cc: stable at vger.kernel.org
> > > > > ---
> > > > > 
> > > > >  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++--
> > > > >  1 file changed, 10 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index
> > > > > 995f9df66142..bcb1a93c0b4c 100644
> > > > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > @@ -29,6 +29,7 @@
> > > > > 
> > > > >  #include "dm_services_types.h"
> > > > >  #include "dc.h"
> > > > >  #include "dc/inc/core_types.h"
> > > > > 
> > > > > +#include "dal_asic_id.h"
> > > > > 
> > > > >  #include "vid.h"
> > > > >  #include "amdgpu.h"
> > > > > 
> > > > > @@ -640,7 +641,7 @@ static void amdgpu_dm_fini(struct amdgpu_device
> > > > > *adev)
> > > > > 
> > > > >  static int load_dmcu_fw(struct amdgpu_device *adev)
> > > > >  {
> > > > > 
> > > > > -   const char *fw_name_dmcu;
> > > > > +   const char *fw_name_dmcu = NULL;
> > > > > 
> > > > > int r;
> > > > > const struct dmcu_firmware_header_v1_0 *hdr;
> > > > > 
> > > > > @@ -663,7 +664,14 @@ static int load_dmcu_fw(struct amdgpu_device
> > > > > *adev)
> > > > > 
> > > > > case CHIP_VEGA20:
> > > > > return 0;
> > > > > 
> > > > > case CHIP_RAVEN:
> > > > > -   fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
> > > > > +#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
> > > > > +   if (ASICREV_IS_PICASSO(adev->external_rev_id))
> > > > > +   fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
> > > > > +   else if (ASICREV_IS_RAVEN2(adev->external_rev_id))
> > > > > +   fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
> > > > > +   else
> > > > > +#endif
> > > > > +   return 0;
> > > > > 
> > > > > break;
> > > > > 
> > > > > default:
> > > > > DRM_ERROR("Unsupported ASIC type: 0x%X\n",
> > > > > adev->asic_type);
> > > > > ___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: use new HMM APIs and helpers

2019-05-30 Thread Yang, Philip
HMM provides new APIs and helps in kernel 5.2-rc1 to simplify driver
path. The old hmm APIs are deprecated and will be removed in future.

Below are changes in driver:

1. Change hmm_vma_fault to hmm_range_register and hmm_range_fault which
supports range with multiple vmas, remove the multiple vmas handle path
and data structure.
2. Change hmm_vma_range_done to hmm_range_unregister.
3. Use default flags to avoid pre-fill pfn arrays.
4. Use new hmm_device_ helpers.

Change-Id: I1a00f88459f3b96c9536933e9a17eb1ebaa78113
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 140 +---
 2 files changed, 53 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 41ccee49a224..e0bb47ce570b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -519,7 +519,6 @@ void amdgpu_hmm_init_range(struct hmm_range *range)
range->flags = hmm_range_flags;
range->values = hmm_range_values;
range->pfn_shift = PAGE_SHIFT;
-   range->pfns = NULL;
INIT_LIST_HEAD(>list);
}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7138dc1dd1f4..25a9fcb409c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -711,8 +711,7 @@ struct amdgpu_ttm_tt {
struct task_struct  *usertask;
uint32_tuserflags;
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-   struct hmm_range*ranges;
-   int nr_ranges;
+   struct hmm_range*range;
 #endif
 };
 
@@ -725,57 +724,33 @@ struct amdgpu_ttm_tt {
  */
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
 
-/* Support Userptr pages cross max 16 vmas */
-#define MAX_NR_VMAS(16)
-
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
unsigned long start = gtt->userptr;
-   unsigned long end = start + ttm->num_pages * PAGE_SIZE;
-   struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
-   struct hmm_range *ranges;
-   unsigned long nr_pages, i;
-   uint64_t *pfns, f;
+   struct vm_area_struct *vma;
+   struct hmm_range *range;
+   unsigned long i;
+   uint64_t *pfns;
int r = 0;
 
if (!mm) /* Happens during process shutdown */
return -ESRCH;
 
-   down_read(>mmap_sem);
-
-   /* user pages may cross multiple VMAs */
-   gtt->nr_ranges = 0;
-   do {
-   unsigned long vm_start;
-
-   if (gtt->nr_ranges >= MAX_NR_VMAS) {
-   DRM_ERROR("Too many VMAs in userptr range\n");
-   r = -EFAULT;
-   goto out;
-   }
-
-   vm_start = vma ? vma->vm_end : start;
-   vma = find_vma(mm, vm_start);
-   if (unlikely(!vma || vm_start < vma->vm_start)) {
-   r = -EFAULT;
-   goto out;
-   }
-   vmas[gtt->nr_ranges++] = vma;
-   } while (end > vma->vm_end);
-
-   DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
-   start, gtt->nr_ranges, ttm->num_pages);
-
+   vma = find_vma(mm, start);
+   if (unlikely(!vma || start < vma->vm_start)) {
+   r = -EFAULT;
+   goto out;
+   }
if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-   vmas[0]->vm_file)) {
+   vma->vm_file)) {
r = -EPERM;
goto out;
}
 
-   ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
-   if (unlikely(!ranges)) {
+   range = kvmalloc(sizeof(*range), GFP_KERNEL | __GFP_ZERO);
+   if (unlikely(!range)) {
r = -ENOMEM;
goto out;
}
@@ -786,61 +761,52 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
struct page **pages)
goto out_free_ranges;
}
 
-   for (i = 0; i < gtt->nr_ranges; i++)
-   amdgpu_hmm_init_range([i]);
+   amdgpu_hmm_init_range(range);
+   range->default_flags = range->flags[HMM_PFN_VALID];
+   range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
+   0 : range->flags[HMM_PFN_WRITE];
+   range->pfn_flags_mask = 0;
+   range->pfns = pfns;
+   hmm_range_register(range, mm, start,
+  start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
 
-   f = ranges[0].flags[HMM_PFN_VALID];
-   f |= amdgpu_ttm_tt_is_readonly(ttm) ?
-   0 : ranges[0].flags[HMM_PFN_WRITE];
-   memset64(pfns, f, ttm->num_pages);
-
-   for (nr_pages = 0, i 

Re: [PATCH] drm/amdgpu: add pmu counters

2019-05-30 Thread Alex Deucher
On Wed, May 29, 2019 at 11:02 AM Kim, Jonathan  wrote:
>
> add pmu counters to monitor amdgpu device performance.
> each pmu registered recorded per pmu type per asic type.
>
> Change-Id: I8449f4ea824c411ee24a5b783ac066189b9de08e
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile|   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   5 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c| 394 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h|  37 ++
>  4 files changed, 437 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 11a651ff7f0d..90d4c5d299dd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -54,7 +54,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
> amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o 
> amdgpu_atomfirmware.o \
> amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
> amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
> -   amdgpu_vm_sdma.o
> +   amdgpu_vm_sdma.o amdgpu_pmu.o
>
>  # add asic specific block
>  amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 582f5635fcb2..51f479b357a1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -61,6 +61,7 @@
>
>  #include "amdgpu_xgmi.h"
>  #include "amdgpu_ras.h"
> +#include "amdgpu_pmu.h"
>
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -2748,6 +2749,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> goto failed;
> }
>
> +   r = amdgpu_pmu_init(adev);
> +   if (r)
> +   dev_err(adev->dev, "amdgpu_pmu_init failed\n");
> +
> /* must succeed. */
> amdgpu_ras_resume(adev);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> new file mode 100644
> index ..39cff772dd9e
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> @@ -0,0 +1,394 @@
> +/*
> + * Copyright 2019 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Author: Jonathan Kim 
> + *
> + */
> +
> +#define pr_fmt(fmt)"perf/amdgpu_pmu: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "amdgpu.h"
> +#include "amdgpu_pmu.h"
> +#include "df_v3_6.h"
> +
> +#define PMU_NAME_SIZE 32
> +
> +/* record to keep track of pmu entry per pmu type per device */
> +struct amdgpu_pmu_entry {
> +   struct amdgpu_device *adev;
> +   struct pmu pmu;
> +   unsigned int pmu_perf_type;
> +};
> +
> +PMU_FORMAT_ATTR(df_event,  "config:0-7");
> +PMU_FORMAT_ATTR(df_instance,   "config:8-15");
> +PMU_FORMAT_ATTR(df_unitmask,   "config:16-23");
> +
> +/* df format attributes  */
> +static struct attribute *amdgpu_df_format_attrs[] = {
> +   _attr_df_event.attr,
> +   _attr_df_instance.attr,
> +   _attr_df_unitmask.attr,
> +   NULL
> +};
> +
> +/* df format attribute group */
> +static struct attribute_group amdgpu_df_format_attr_group = {
> +   .name = "format",
> +   .attrs = amdgpu_df_format_attrs,
> +};
> +
> +/* df event attribute group */
> +static struct attribute_group amdgpu_df_events_attr_group = {
> +   .name = "events",
> +};
> +
> +struct AMDGPU_PMU_EVENT_DESC {
> +   struct kobj_attribute attr;
> +   const char *event;
> +};
> +
> +static ssize_t _pmu_event_show(struct kobject *kobj,
> +  struct kobj_attribute *attr, char 

Re: [PATCH v2] drm/amdgpu/display: Fix reload driver error

2019-05-30 Thread Kazlauskas, Nicholas
On 5/28/19 11:12 PM, Emily Deng wrote:
> Issue:
> Will have follow error when reload driver:
> [ 3986.567739] sysfs: cannot create duplicate filename 
> '/devices/pci:00/:00:07.0/drm_dp_aux_dev'
> [ 3986.567743] CPU: 6 PID: 1767 Comm: modprobe Tainted: G   OE 
> 5.0.0-rc1-custom #1
> [ 3986.567745] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> Ubuntu-1.8.2-1ubuntu1 04/01/2014
> [ 3986.567746] Call Trace:
> ..
> [ 3986.567808]  drm_dp_aux_register_devnode+0xdc/0x140 [drm_kms_helper]
> ..
> [ 3986.569081] kobject_add_internal failed for drm_dp_aux_dev with -EEXIST, 
> don't try to register things with the same name in the same directory.
> 
> Reproduce sequences:
> 1.modprobe amdgpu
> 2.modprobe -r amdgpu
> 3.modprobe amdgpu
> 
> Root cause:
> When unload driver, it doesn't unregister aux.
> 
> v2: Don't use has_aux
> 
> Signed-off-by: Emily Deng 
Reviewed-by: Nicholas Kazlauskas 

Only a minor nitpick about not mentioning that you're also removing the 
i2c that we added on the connector, which looks correct to me, but isn't 
related to the aux that was registered.

Looks fine to me other than that.

Nicholas Kazlauskas

> ---
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 ++-
>   1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 8fe1685..941313b 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -3760,6 +3760,13 @@ int amdgpu_dm_connector_atomic_get_property(struct 
> drm_connector *connector,
>   return ret;
>   }
>   
> +static void amdgpu_dm_connector_unregister(struct drm_connector *connector)
> +{
> + struct amdgpu_dm_connector *amdgpu_dm_connector = 
> to_amdgpu_dm_connector(connector);
> +
> + drm_dp_aux_unregister(_dm_connector->dm_dp_aux.aux);
> +}
> +
>   static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
>   {
>   struct amdgpu_dm_connector *aconnector = 
> to_amdgpu_dm_connector(connector);
> @@ -3788,6 +3795,11 @@ static void amdgpu_dm_connector_destroy(struct 
> drm_connector *connector)
>   drm_dp_cec_unregister_connector(>dm_dp_aux.aux);
>   drm_connector_unregister(connector);
>   drm_connector_cleanup(connector);
> + if (aconnector->i2c) {
> + i2c_del_adapter(>i2c->base);
> + kfree(aconnector->i2c);
> + }
> +
>   kfree(connector);
>   }
>   
> @@ -3846,7 +3858,8 @@ static const struct drm_connector_funcs 
> amdgpu_dm_connector_funcs = {
>   .atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
>   .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
>   .atomic_set_property = amdgpu_dm_connector_atomic_set_property,
> - .atomic_get_property = amdgpu_dm_connector_atomic_get_property
> + .atomic_get_property = amdgpu_dm_connector_atomic_get_property,
> + .early_unregister = amdgpu_dm_connector_unregister
>   };
>   
>   static int get_modes(struct drm_connector *connector)
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

drop drmP.h usage? [Was: [pull] amdgpu, amdkfd drm-next-5.3]

2019-05-30 Thread Sam Ravnborg
Hi Alex et all.

> - Various cleanups

Any chance to persuade one of you guys to sweep through the amd / raedeon
tree and drop use of the deprecated header drmP.h?

Status at the moment (drm-misc-next):
$git grep drmP | cut -d '/' -f 1  | uniq -c | sort -n | tail -n 10
 11 meson
 12 mediatek
 13 vmwgfx
 15 rockchip
 16 sti
 18 sun4i
 24 exynos
 27 nouveau
103 radeon
110 amd

nouveau is already taken care of, the cleanup is in their tree.
amd and radeon are the two "worst" drivers in this respect at the
moment.

Thanks,
Sam
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx