Re: [PATCH 21/26] drm/amdgpu: convert srbm lock to a spinlock v2

2017-04-12 Thread Christian König

Am 12.04.2017 um 00:19 schrieb Alex Deucher:

On Thu, Apr 6, 2017 at 2:21 AM, Andres Rodriguez  wrote:

Replace adev->srbm_mutex with a spinlock adev->srbm_lock

v2: rebased on 4.12 and included gfx9
Signed-off-by: Andres Rodriguez 

Maybe move this one up to the front of the series so it can be applied now?


Actually I'm not sure if that patch is still a good idea.

It turned out we need to sleep way more often when the KIQ is in use.

Christian.



Alex



---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  4 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  4 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/cik_sdma.c |  4 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++---
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 34 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 24 
  drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c|  4 +--
  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c|  4 +--
  10 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4f54846..b9a4161 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1455,21 +1455,21 @@ struct amdgpu_device {
 struct work_struct  reset_work;
 struct notifier_block   acpi_nb;
 struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
 struct amdgpu_debugfs   debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
 unsigneddebugfs_count;
  #if defined(CONFIG_DEBUG_FS)
 struct dentry   
*debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
  #endif
 struct amdgpu_atif  atif;
 struct amdgpu_atcs  atcs;
-   struct mutexsrbm_mutex;
+   spinlock_t  srbm_lock;
 /* GRBM index mutex. Protects concurrent access to GRBM index */
 struct mutexgrbm_idx_mutex;
 struct dev_pm_domainvga_pm_domain;
 boolhave_disp_power_ref;

 /* BIOS */
 boolis_atom_fw;
 uint8_t *bios;
 uint32_tbios_size;
 struct amdgpu_bo*stollen_vga_memory;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 5254562..a009990 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -162,30 +162,30 @@ static inline struct amdgpu_device 
*get_amdgpu_device(struct kgd_dev *kgd)
  {
 return (struct amdgpu_device *)kgd;
  }

  static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 uint32_t queue, uint32_t vmid)
  {
 struct amdgpu_device *adev = get_amdgpu_device(kgd);
 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | 
QUEUEID(queue);

-   mutex_lock(>srbm_mutex);
+   spin_lock(>srbm_lock);
 WREG32(mmSRBM_GFX_CNTL, value);
  }

  static void unlock_srbm(struct kgd_dev *kgd)
  {
 struct amdgpu_device *adev = get_amdgpu_device(kgd);

 WREG32(mmSRBM_GFX_CNTL, 0);
-   mutex_unlock(>srbm_mutex);
+   spin_unlock(>srbm_lock);
  }

  static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 uint32_t queue_id)
  {
 struct amdgpu_device *adev = get_amdgpu_device(kgd);

 uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index db7410a..6b93a5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -123,30 +123,30 @@ static inline struct amdgpu_device 
*get_amdgpu_device(struct kgd_dev *kgd)
  {
 return (struct amdgpu_device *)kgd;
  }

  static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 uint32_t queue, uint32_t vmid)
  {
 struct amdgpu_device *adev = get_amdgpu_device(kgd);
 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | 
QUEUEID(queue);

-   mutex_lock(>srbm_mutex);
+   spin_lock(>srbm_lock);
 WREG32(mmSRBM_GFX_CNTL, value);
  }

  static void unlock_srbm(struct kgd_dev *kgd)
  {
 struct amdgpu_device *adev = get_amdgpu_device(kgd);

 WREG32(mmSRBM_GFX_CNTL, 0);
-   mutex_unlock(>srbm_mutex);
+   spin_unlock(>srbm_lock);
  }

  static void acquire_queue(struct 

Re: [PATCH 8/8] amdgpu: use sync file for shared semaphores (v2.1)

2017-04-12 Thread Christian König

Am 12.04.2017 um 05:58 schrieb Dave Airlie:

On 12 April 2017 at 13:34, Mao, David  wrote:

My point is it is reasonable to split the semaphore signal/wait with the 
command submission.
For the signal ioctl, we could just pick the last fence in the same schedule 
context, and we don't need to ask for a explicit flush or a dummy submission 
trick.
The spec guarantee the signal always comes before the wait, which means, we 
could always get the valid fence. For the kernel sem object.

I'm a bit vague on the schedule contexts stuff, but does anything
guarantee the X server present operation be in the same schedule
context?


Not even remotely. Since X and the application are separate processes 
they can't access each others schedule contexts.



This might be something for Christian to chime in on, we could I
suppose add ioctls to avoid the dummy CS submission, we could also
make dummy
CS submission simpler, if we submit no IBs then we could just have it
deal with the semaphores for those cases and avoid any explicit
flushes,
which saves reproducing the logic to wait and sync.


Sorry, I'm not following the whole discussion. Why do we want a dummy 
submission in the first place? Just to have the semaphore in the 
signaled state?



But at least for the wait case, we need to send something to the
scheduler to wait on, and that looks like the CS ioctl we have now
pretty much,
For the signal case there might be a better argument that an explicit
signal with last fence on this ctx could be used, however at least
with the way
radv works now, we definitely know the X server is finished with the
present buffer as it tells us via its own sync logic, at that point
radv submits
an empty CS with the signal semaphores, we'd really want to pass a
semaphore between the X server and client to do this perfectly.


Yes, agree. We should fix this on the user space level, not add any 
kernel workarounds.


Regards,
Christian.



Dave.
___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 8/8] amdgpu: use sync file for shared semaphores (v3)

2017-04-12 Thread Christian König

Am 12.04.2017 um 06:57 schrieb Dave Airlie:

From: Dave Airlie 

This creates a new command submission chunk for amdgpu
to add wait and signal sync objects around the submission.

Sync objects are managed via the drm syncobj ioctls.

The command submission interface is enhanced with two new
chunks, one for semaphore waiting, one for semaphore signalling
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

NOTE: this interface addition needs a version bump to expose
it to userspace.

v1.1: keep file reference on import.
v2: move to using syncobjs
v2.1: change some APIs to just use p pointer.
v3: make more robust against CS failures, we now add the
wait sems but only remove them once the CS job has been
submitted.

Signed-off-by: Dave Airlie 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 120 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   2 +-
  include/uapi/drm/amdgpu_drm.h   |   6 ++
  3 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index df25b32..e9b5796 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,6 +27,7 @@
  #include 
  #include 
  #include 
+#include 
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  
@@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)

break;
  
  		case AMDGPU_CHUNK_ID_DEPENDENCIES:

+   case AMDGPU_CHUNK_ID_SEM_WAIT:
+   case AMDGPU_CHUNK_ID_SEM_SIGNAL:
break;
  
  		default:

@@ -1008,6 +1011,73 @@ static int amdgpu_process_fence_dep(struct 
amdgpu_cs_parser *p,
return 0;
  }
  
+static int amdgpu_sem_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,

+uint32_t handle)
+{
+   int r;
+   struct dma_fence *fence;


Missing new line between declaration and code.


+   r = drm_syncobj_fence_get(p->filp, handle, );
+   if (r)
+   return r;
+
+   r = amdgpu_sync_fence(p->adev, >job->sync, fence);
+   dma_fence_put(fence);
+
+   return r;
+}
+
+static int amdgpu_sem_lookup_and_remove(struct amdgpu_cs_parser *p,
+ uint32_t handle)
+{
+   int r;
+   struct dma_fence *old_fence;
+
+   r = drm_syncobj_replace_fence(p->filp, handle, NULL);
+   if (r)
+   return r;
+   dma_fence_put(old_fence);


Am I wrong or is old_fence not initialized here?

Additional to that what happens when the fence in the sync object was 
changed while we do the CS? Or even worse the handle got assigned to a 
new sync object.


Regards,
Christian.


+
+   return r;
+}
+
+static int amdgpu_process_sem_wait_dep(struct amdgpu_cs_parser *p,
+  struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_sem_lookup_and_add_to_sync(p, deps[i].handle);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
+static int amdgpu_process_sem_wait_post(struct amdgpu_cs_parser *p,
+   struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_sem_lookup_and_remove(p, deps[i].handle);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
  static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
  struct amdgpu_cs_parser *p)
  {
@@ -1022,12 +1092,60 @@ static int amdgpu_cs_dependencies(struct amdgpu_device 
*adev,
r = amdgpu_process_fence_dep(p, chunk);
if (r)
return r;
+   } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
+   r = amdgpu_process_sem_wait_dep(p, chunk);
+   if (r)
+   return r;
}
}
  
  	return 0;

  }
  
+static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,

+struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   

Re: [PATCH 8/8] amdgpu: use sync file for shared semaphores (v3)

2017-04-12 Thread Chris Wilson
On Wed, Apr 12, 2017 at 11:55:24AM +0200, Christian König wrote:
> Am 12.04.2017 um 06:57 schrieb Dave Airlie:
> >+static int amdgpu_sem_lookup_and_remove(struct amdgpu_cs_parser *p,
> >+  uint32_t handle)
> >+{
> >+int r;
> >+struct dma_fence *old_fence;
> >+
> >+r = drm_syncobj_replace_fence(p->filp, handle, NULL);
> >+if (r)
> >+return r;
> >+dma_fence_put(old_fence);
> 
> Am I wrong or is old_fence not initialized here?
> 
> Additional to that what happens when the fence in the sync object
> was changed while we do the CS? Or even worse the handle got
> assigned to a new sync object.

We either ww_mutex the lot, or regard that as a userspace race where the
order between the two concurrent CS emits is undefined and who gets the
in-semaphore is happenstance.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Chris Wilson
On Wed, Apr 12, 2017 at 12:36:37PM +1000, Dave Airlie wrote:
> On 11 April 2017 at 17:50, Chris Wilson  wrote:
> > On Tue, Apr 11, 2017 at 01:22:17PM +1000, Dave Airlie wrote:
> >> From: Dave Airlie 
> >>
> >> This object can be used to implement the Vulkan semaphores.
> >>
> >> The object behaviour differs from fence, in that you can
> >> replace the underlying fence, and you cannot merge semaphores.
> >>
> >> Signed-off-by: Dave Airlie 
> >> ---
> >> +/**
> >> + * sync_file_replace_fence - replace the fence related to the sync_file
> >> + * @sync_file:sync file to replace fence in
> >> + * @fence: fence to replace with (or NULL for no fence).
> >> + * Returns previous fence.
> >> + */
> >> +struct dma_fence *sync_file_replace_fence(struct sync_file *sync_file,
> >> +   struct dma_fence *fence)
> >> +{
> >> + struct dma_fence *ret_fence = NULL;
> >> +
> >> + if (sync_file->type != SYNC_FILE_TYPE_SEMAPHORE)
> >> + return NULL;
> >> +
> >> + if (fence)
> >> + dma_fence_get(fence);
> >> +
> >> + mutex_lock(_file->lock);
> >> +
> >> + ret_fence = sync_file_get_fence_locked(sync_file);
> >> + if (ret_fence) {
> >> + if (test_bit(POLL_ENABLED, _fence->flags))
> >> + dma_fence_remove_callback(ret_fence, _file->cb);
> >> + }
> >
> > Fails when sync_file_replace_fence is passed sync_file->fence.
> 
> Not sure what the best semantics are there, any opinions on barring
> wakeups/polling on semaphore sync_files, and just punting this
> until we need it.

I think getting it right now will make writing sw_sync-esque (i.e. cpu)
tests easier and more complete.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 8/8] amdgpu: use sync file for shared semaphores (v3)

2017-04-12 Thread Christian König

Am 12.04.2017 um 12:18 schrieb Chris Wilson:

On Wed, Apr 12, 2017 at 11:55:24AM +0200, Christian König wrote:

Am 12.04.2017 um 06:57 schrieb Dave Airlie:

+static int amdgpu_sem_lookup_and_remove(struct amdgpu_cs_parser *p,
+ uint32_t handle)
+{
+   int r;
+   struct dma_fence *old_fence;
+
+   r = drm_syncobj_replace_fence(p->filp, handle, NULL);
+   if (r)
+   return r;
+   dma_fence_put(old_fence);

Am I wrong or is old_fence not initialized here?

Additional to that what happens when the fence in the sync object
was changed while we do the CS? Or even worse the handle got
assigned to a new sync object.

We either ww_mutex the lot, or regard that as a userspace race where the
order between the two concurrent CS emits is undefined and who gets the
in-semaphore is happenstance.


My thinking to solve this was rather a) resolve the handle to an object 
only once and b) replace the old fence with NULL only if it didn't changed.


Letting userspace race here is also an option, but I would prefer it 
another solution.


Christian.


-Chris



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/3] drm/radeon: Make SI support in Radeon conditional

2017-04-12 Thread Li, Samuel
My understanding is that SI support in AMDGPU is limited. 
So maybe here it can be the other way around, that by default SI support in 
AMDGPU shall be disabled.

Sam


-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Felix 
Kuehling
Sent: Friday, April 07, 2017 4:16 PM
To: amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix 
Subject: [PATCH 2/3] drm/radeon: Make SI support in Radeon conditional

Advertise SI PCI IDs only when they are not supported by amdgpu.
Use the CONFIG_DRM_AMDGPU_SI to check so that a single option in the kernel 
config keeps both drivers in sync.

This is the simplest possible change. A more complete solution may want to 
conditionally disable more SI-specific code in the Radeon driver.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/radeon/Kconfig  |  12 +++
 drivers/gpu/drm/radeon/radeon_drv.c |   3 +
 include/drm/drm_pciids.h| 146 ++--
 3 files changed, 89 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig 
index 86bbac8..2e66198 100644
--- a/drivers/gpu/drm/radeon/Kconfig
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -1,3 +1,15 @@
+config DRM_RADEON_FORCE_SI
+   bool "Enable radeon support for SI parts even when amdgpu supports it"
+   depends on DRM_RADEON && DRM_AMDGPU_SI
+   default n
+   help
+ Choose this option if you want to enable support for SI ASICs
+ in the radeon driver even when CONFIG_DRM_AMDGPU_SI is enabled.
+
+ This option is meant for experimentation and testing. In a
+ production system only one driver should claim support for the
+ same ASIC.
+
 config DRM_RADEON_FORCE_CIK
bool "Enable radeon support for CIK parts even when amdgpu supports it"
depends on DRM_RADEON && DRM_AMDGPU_CIK diff --git 
a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index fda6411..235c324 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -305,6 +305,9 @@ static inline void radeon_unregister_atpx_handler(void) {}  
#if !defined(CONFIG_DRM_AMDGPU_CIK) || defined(CONFIG_DRM_RADEON_FORCE_CIK)
radeon_CIK_PCI_IDS,
 #endif
+#if !defined(CONFIG_DRM_AMDGPU_SI) || defined(CONFIG_DRM_RADEON_FORCE_SI)
+   radeon_CIK_PCI_IDS,
+#endif
radeon_PCI_IDS
 };
 
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 
cf17901..b42179f 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -1,3 +1,77 @@
+#define radeon_SI_PCI_IDS \
+   {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x678A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6790, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6791, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6792, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6798, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6799, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x679A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x679B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x679E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x679F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6801, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6802, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6806, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6810, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6811, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6816, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6817, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6818, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 
CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+   {0x1002, 0x6819, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 

Re: [PATCH umr] Prevent reading sensors far too quickly.

2017-04-12 Thread Edward O'Callaghan
Reviewed-by: Edward O'Callaghan 

On 04/12/2017 11:10 PM, Tom St Denis wrote:
> On platforms without GPU_POWER sensors the thread reading sensors would 
> proceed
> far too quickly.  So it is now rate limited to 50Hz to be consistent.
> 
> Signed-off-by: Tom St Denis 
> ---
>  src/app/top.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/src/app/top.c b/src/app/top.c
> index 96e33ff2e1da..364180eb70f6 100644
> --- a/src/app/top.c
> +++ b/src/app/top.c
> @@ -293,18 +293,24 @@ static volatile struct umr_bitfield *sensor_bits = NULL;
>  static void *gpu_sensor_thread(void *data)
>  {
>   struct umr_asic asic = *((struct umr_asic*)data);
> - int size, rem, off, x;
> + int size, rem, off, x, power;
>   char fname[128];
> + struct timespec ts;
> +
> + ts.tv_sec = 0;
> + ts.tv_nsec = 10UL / 50; // limit to 50Hz
>  
>   snprintf(fname, sizeof(fname)-1, 
> "/sys/kernel/debug/dri/%d/amdgpu_sensors", asic.instance);
>   asic.fd.sensors = open(fname, O_RDWR);
>   while (!sensor_thread_quit) {
>   rem = sizeof gpu_power_data;
>   off = 0;
> + power = 0;
>   for (x = 0; sensor_bits[x].regname; ) {
>   switch (sensor_bits[x].start) {
>   case AMDGPU_PP_SENSOR_GPU_POWER:
>   size = 16;
> + power = 1;
>   break;
>   default:
>   size = 4;
> @@ -316,6 +322,10 @@ static void *gpu_sensor_thread(void *data)
>   rem -= size;
>   x   += size / 4;
>   }
> +
> + // sleep for 20ms if no GPU power sensor to rate limit things a 
> bit
> + if (!power)
> + nanosleep(, NULL);
>   }
>   close(asic.fd.sensors);
>   return NULL;
> 



signature.asc
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 8/8] amdgpu: use sync file for shared semaphores (v3)

2017-04-12 Thread Alex Deucher
On Wed, Apr 12, 2017 at 12:57 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This creates a new command submission chunk for amdgpu
> to add wait and signal sync objects around the submission.
>
> Sync objects are managed via the drm syncobj ioctls.
>
> The command submission interface is enhanced with two new
> chunks, one for semaphore waiting, one for semaphore signalling
> and just takes a list of handles for each.
>
> This is based on work originally done by David Zhou at AMD,
> with input from Christian Konig on what things should look like.
>
> NOTE: this interface addition needs a version bump to expose
> it to userspace.
>
> v1.1: keep file reference on import.
> v2: move to using syncobjs
> v2.1: change some APIs to just use p pointer.
> v3: make more robust against CS failures, we now add the
> wait sems but only remove them once the CS job has been
> submitted.
>
> Signed-off-by: Dave Airlie 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 120 
> +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   2 +-
>  include/uapi/drm/amdgpu_drm.h   |   6 ++
>  3 files changed, 126 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index df25b32..e9b5796 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
>
> @@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 
> void *data)
> break;
>
> case AMDGPU_CHUNK_ID_DEPENDENCIES:
> +   case AMDGPU_CHUNK_ID_SEM_WAIT:
> +   case AMDGPU_CHUNK_ID_SEM_SIGNAL:
> break;
>
> default:
> @@ -1008,6 +1011,73 @@ static int amdgpu_process_fence_dep(struct 
> amdgpu_cs_parser *p,
> return 0;
>  }
>
> +static int amdgpu_sem_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
> +uint32_t handle)
> +{
> +   int r;
> +   struct dma_fence *fence;
> +   r = drm_syncobj_fence_get(p->filp, handle, );
> +   if (r)
> +   return r;
> +
> +   r = amdgpu_sync_fence(p->adev, >job->sync, fence);
> +   dma_fence_put(fence);
> +
> +   return r;
> +}
> +
> +static int amdgpu_sem_lookup_and_remove(struct amdgpu_cs_parser *p,
> + uint32_t handle)
> +{
> +   int r;
> +   struct dma_fence *old_fence;
> +
> +   r = drm_syncobj_replace_fence(p->filp, handle, NULL);
> +   if (r)
> +   return r;
> +   dma_fence_put(old_fence);
> +
> +   return r;
> +}
> +
> +static int amdgpu_process_sem_wait_dep(struct amdgpu_cs_parser *p,
> +  struct amdgpu_cs_chunk *chunk)
> +{
> +   unsigned num_deps;
> +   int i, r;
> +   struct drm_amdgpu_cs_chunk_sem *deps;
> +
> +   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> +   num_deps = chunk->length_dw * 4 /
> +   sizeof(struct drm_amdgpu_cs_chunk_sem);
> +
> +   for (i = 0; i < num_deps; ++i) {
> +   r = amdgpu_sem_lookup_and_add_to_sync(p, deps[i].handle);
> +   if (r)
> +   return r;
> +   }
> +   return 0;
> +}
> +
> +static int amdgpu_process_sem_wait_post(struct amdgpu_cs_parser *p,
> +   struct amdgpu_cs_chunk *chunk)
> +{
> +   unsigned num_deps;
> +   int i, r;
> +   struct drm_amdgpu_cs_chunk_sem *deps;
> +
> +   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> +   num_deps = chunk->length_dw * 4 /
> +   sizeof(struct drm_amdgpu_cs_chunk_sem);
> +
> +   for (i = 0; i < num_deps; ++i) {
> +   r = amdgpu_sem_lookup_and_remove(p, deps[i].handle);
> +   if (r)
> +   return r;
> +   }
> +   return 0;
> +}
> +
>  static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>   struct amdgpu_cs_parser *p)
>  {
> @@ -1022,12 +1092,60 @@ static int amdgpu_cs_dependencies(struct 
> amdgpu_device *adev,
> r = amdgpu_process_fence_dep(p, chunk);
> if (r)
> return r;
> +   } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
> +   r = amdgpu_process_sem_wait_dep(p, chunk);
> +   if (r)
> +   return r;
> }
> }
>
> return 0;
>  }
>
> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
> +struct amdgpu_cs_chunk *chunk)
> +{
> +   unsigned num_deps;
> +   int i, r;
> +   struct 

[PATCH 2/2] drm/amdgpu: drop support for per ASIC read registers

2017-04-12 Thread Christian König
From: Christian König 

Only per family registers are still used.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 29 +-
 drivers/gpu/drm/amd/amdgpu/vi.c| 42 +-
 2 files changed, 2 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8917bde..2c05dab 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -280,10 +280,6 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device 
*adev,
return true;
 }
 
-static struct amdgpu_allowed_register_entry vega10_allowed_read_registers[] = {
-   /* todo */
-};
-
 static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)},
{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)},
@@ -341,32 +337,9 @@ static uint32_t soc15_get_register_value(struct 
amdgpu_device *adev,
 static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 reg_offset, u32 *value)
 {
-   struct amdgpu_allowed_register_entry *asic_register_table = NULL;
-   struct amdgpu_allowed_register_entry *asic_register_entry;
-   uint32_t size, i;
+   uint32_t i;
 
*value = 0;
-   switch (adev->asic_type) {
-   case CHIP_VEGA10:
-   asic_register_table = vega10_allowed_read_registers;
-   size = ARRAY_SIZE(vega10_allowed_read_registers);
-   break;
-   default:
-   return -EINVAL;
-   }
-
-   if (asic_register_table) {
-   for (i = 0; i < size; i++) {
-   asic_register_entry = asic_register_table + i;
-   if (reg_offset != asic_register_entry->reg_offset)
-   continue;
-   *value = soc15_get_register_value(adev,
- 
asic_register_entry->grbm_indexed,
- se_num, sh_num, 
reg_offset);
-   return 0;
-   }
-   }
-
for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
if (reg_offset != soc15_allowed_read_registers[i].reg_offset)
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 5be8e94..505c17a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -464,12 +464,6 @@ static void vi_detect_hw_virtualization(struct 
amdgpu_device *adev)
}
 }
 
-static const struct amdgpu_allowed_register_entry 
tonga_allowed_read_registers[] = {
-};
-
-static const struct amdgpu_allowed_register_entry cz_allowed_read_registers[] 
= {
-};
-
 static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] 
= {
{mmGRBM_STATUS},
{mmGRBM_STATUS2},
@@ -648,43 +642,9 @@ static uint32_t vi_get_register_value(struct amdgpu_device 
*adev,
 static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 reg_offset, u32 *value)
 {
-   const struct amdgpu_allowed_register_entry *asic_register_table = NULL;
-   const struct amdgpu_allowed_register_entry *asic_register_entry;
-   uint32_t size, i;
+   uint32_t i;
 
*value = 0;
-   switch (adev->asic_type) {
-   case CHIP_TOPAZ:
-   asic_register_table = tonga_allowed_read_registers;
-   size = ARRAY_SIZE(tonga_allowed_read_registers);
-   break;
-   case CHIP_FIJI:
-   case CHIP_TONGA:
-   case CHIP_POLARIS11:
-   case CHIP_POLARIS10:
-   case CHIP_POLARIS12:
-   case CHIP_CARRIZO:
-   case CHIP_STONEY:
-   asic_register_table = cz_allowed_read_registers;
-   size = ARRAY_SIZE(cz_allowed_read_registers);
-   break;
-   default:
-   return -EINVAL;
-   }
-
-   if (asic_register_table) {
-   for (i = 0; i < size; i++) {
-   bool indexed = asic_register_entry->grbm_indexed;
-
-   asic_register_entry = asic_register_table + i;
-   if (reg_offset != asic_register_entry->reg_offset)
-   continue;
-   *value = vi_get_register_value(adev, indexed, se_num,
-  sh_num, reg_offset);
-   return 0;
-   }
-   }
-
for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) {
bool indexed = vi_allowed_read_registers[i].grbm_indexed;
 
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: drop support for untouched registers

2017-04-12 Thread Christian König
From: Christian König 

I couldn't figure out what this was original good for, but we
don't use it any more.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 -
 drivers/gpu/drm/amd/amdgpu/cik.c| 121 +-
 drivers/gpu/drm/amd/amdgpu/si.c |  84 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c  |  50 ++-
 drivers/gpu/drm/amd/amdgpu/vi.c | 168 ++--
 5 files changed, 210 insertions(+), 214 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b7e7156..71364f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1296,7 +1296,6 @@ struct amdgpu_smumgr {
  */
 struct amdgpu_allowed_register_entry {
uint32_t reg_offset;
-   bool untouched;
bool grbm_indexed;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 1451594..6ce9f80 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -965,62 +965,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device 
*adev,
 }
 
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] 
= {
-   {mmGRBM_STATUS, false},
-   {mmGB_ADDR_CONFIG, false},
-   {mmMC_ARB_RAMCFG, false},
-   {mmGB_TILE_MODE0, false},
-   {mmGB_TILE_MODE1, false},
-   {mmGB_TILE_MODE2, false},
-   {mmGB_TILE_MODE3, false},
-   {mmGB_TILE_MODE4, false},
-   {mmGB_TILE_MODE5, false},
-   {mmGB_TILE_MODE6, false},
-   {mmGB_TILE_MODE7, false},
-   {mmGB_TILE_MODE8, false},
-   {mmGB_TILE_MODE9, false},
-   {mmGB_TILE_MODE10, false},
-   {mmGB_TILE_MODE11, false},
-   {mmGB_TILE_MODE12, false},
-   {mmGB_TILE_MODE13, false},
-   {mmGB_TILE_MODE14, false},
-   {mmGB_TILE_MODE15, false},
-   {mmGB_TILE_MODE16, false},
-   {mmGB_TILE_MODE17, false},
-   {mmGB_TILE_MODE18, false},
-   {mmGB_TILE_MODE19, false},
-   {mmGB_TILE_MODE20, false},
-   {mmGB_TILE_MODE21, false},
-   {mmGB_TILE_MODE22, false},
-   {mmGB_TILE_MODE23, false},
-   {mmGB_TILE_MODE24, false},
-   {mmGB_TILE_MODE25, false},
-   {mmGB_TILE_MODE26, false},
-   {mmGB_TILE_MODE27, false},
-   {mmGB_TILE_MODE28, false},
-   {mmGB_TILE_MODE29, false},
-   {mmGB_TILE_MODE30, false},
-   {mmGB_TILE_MODE31, false},
-   {mmGB_MACROTILE_MODE0, false},
-   {mmGB_MACROTILE_MODE1, false},
-   {mmGB_MACROTILE_MODE2, false},
-   {mmGB_MACROTILE_MODE3, false},
-   {mmGB_MACROTILE_MODE4, false},
-   {mmGB_MACROTILE_MODE5, false},
-   {mmGB_MACROTILE_MODE6, false},
-   {mmGB_MACROTILE_MODE7, false},
-   {mmGB_MACROTILE_MODE8, false},
-   {mmGB_MACROTILE_MODE9, false},
-   {mmGB_MACROTILE_MODE10, false},
-   {mmGB_MACROTILE_MODE11, false},
-   {mmGB_MACROTILE_MODE12, false},
-   {mmGB_MACROTILE_MODE13, false},
-   {mmGB_MACROTILE_MODE14, false},
-   {mmGB_MACROTILE_MODE15, false},
-   {mmCC_RB_BACKEND_DISABLE, false, true},
-   {mmGC_USER_RB_BACKEND_DISABLE, false, true},
-   {mmGB_BACKEND_MAP, false, false},
-   {mmPA_SC_RASTER_CONFIG, false, true},
-   {mmPA_SC_RASTER_CONFIG_1, false, true},
+   {mmGRBM_STATUS},
+   {mmGB_ADDR_CONFIG},
+   {mmMC_ARB_RAMCFG},
+   {mmGB_TILE_MODE0},
+   {mmGB_TILE_MODE1},
+   {mmGB_TILE_MODE2},
+   {mmGB_TILE_MODE3},
+   {mmGB_TILE_MODE4},
+   {mmGB_TILE_MODE5},
+   {mmGB_TILE_MODE6},
+   {mmGB_TILE_MODE7},
+   {mmGB_TILE_MODE8},
+   {mmGB_TILE_MODE9},
+   {mmGB_TILE_MODE10},
+   {mmGB_TILE_MODE11},
+   {mmGB_TILE_MODE12},
+   {mmGB_TILE_MODE13},
+   {mmGB_TILE_MODE14},
+   {mmGB_TILE_MODE15},
+   {mmGB_TILE_MODE16},
+   {mmGB_TILE_MODE17},
+   {mmGB_TILE_MODE18},
+   {mmGB_TILE_MODE19},
+   {mmGB_TILE_MODE20},
+   {mmGB_TILE_MODE21},
+   {mmGB_TILE_MODE22},
+   {mmGB_TILE_MODE23},
+   {mmGB_TILE_MODE24},
+   {mmGB_TILE_MODE25},
+   {mmGB_TILE_MODE26},
+   {mmGB_TILE_MODE27},
+   {mmGB_TILE_MODE28},
+   {mmGB_TILE_MODE29},
+   {mmGB_TILE_MODE30},
+   {mmGB_TILE_MODE31},
+   {mmGB_MACROTILE_MODE0},
+   {mmGB_MACROTILE_MODE1},
+   {mmGB_MACROTILE_MODE2},
+   {mmGB_MACROTILE_MODE3},
+   {mmGB_MACROTILE_MODE4},
+   {mmGB_MACROTILE_MODE5},
+   {mmGB_MACROTILE_MODE6},
+   {mmGB_MACROTILE_MODE7},
+   {mmGB_MACROTILE_MODE8},
+   {mmGB_MACROTILE_MODE9},
+   {mmGB_MACROTILE_MODE10},
+   {mmGB_MACROTILE_MODE11},
+   {mmGB_MACROTILE_MODE12},
+   {mmGB_MACROTILE_MODE13},
+   {mmGB_MACROTILE_MODE14},
+   {mmGB_MACROTILE_MODE15},
+   {mmCC_RB_BACKEND_DISABLE, true},
+   {mmGC_USER_RB_BACKEND_DISABLE, true},
+   {mmGB_BACKEND_MAP, false},
+  

Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Dave Airlie
>>
>> Not sure what the best semantics are there, any opinions on barring
>> wakeups/polling on semaphore sync_files, and just punting this
>> until we need it.
>
> I think getting it right now will make writing sw_sync-esque (i.e. cpu)
> tests easier and more complete.

I just don't have any use case for it, so we would be writing code to
write tests for it.

That doesn't seem smart.

If there is a future non-testing use case, the API is expressive
enough for someone
to add a flag or new sync obj to allow polling and to add support in a
nice easily
digestible patch.

Dave.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 16/26] drm/amdgpu: new queue policy, take first 2 queues of each pipe

2017-04-12 Thread Andres Rodriguez



On 2017-04-11 06:30 PM, Felix Kuehling wrote:

What about GFX9?


I don't have a gfx9 card to test with yet.

When changing the queue policy for gfx7/8 I found a lot of places in the 
code that had the assumption that amdgpu would only be using the same 
pipe. I'm pretty sure there is probably something on the gfx9 code that 
I've missed and will blow up with the queue policy change. So I don't 
really want to send out this change without testing it.


If someone wants to create a similar patch for gfx9 and test it that 
should be simple.


Or... if you could send me a vega board... ;)

Regards,
Andres



See one more comment inline [FK].


On 17-04-06 02:21 AM, Andres Rodriguez wrote:

Instead of taking the first pipe and giving the rest to kfd, take the
first 2 queues of each pipe.

Effectively, amdgpu and amdkfd own the same number of queues. But
because the queues are spread over multiple pipes the hardware will be
able to better handle concurrent compute workloads.

amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4
amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 5a8ebae..c0cfcb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2833,21 +2833,21 @@ static void gfx_v7_0_compute_queue_acquire(struct 
amdgpu_device *adev)
pipe = (i / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
mec = (i / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;

/* we've run out of HW */
if (mec > adev->gfx.mec.num_mec)
break;

/* policy: amdgpu owns all queues in the first pipe */


[FK] The comments needs to be updated (same for GFX8 below).


-   if (mec == 0 && pipe == 0)
+   if (mec == 0 && queue < 2)
set_bit(i, adev->gfx.mec.queue_bitmap);
}

/* update the number of active compute rings */
adev->gfx.num_compute_rings =
bitmap_weight(adev->gfx.mec.queue_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES);

/* If you hit this case and edited the policy, you probably just
 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 70119c5..f0c1a3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1453,21 +1453,21 @@ static void gfx_v8_0_compute_queue_acquire(struct 
amdgpu_device *adev)
pipe = (i / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
mec = (i / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;

/* we've run out of HW */
if (mec > adev->gfx.mec.num_mec)
break;

/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
+   if (mec == 0 && queue < 2)
set_bit(i, adev->gfx.mec.queue_bitmap);
}

/* update the number of active compute rings */
adev->gfx.num_compute_rings =
bitmap_weight(adev->gfx.mec.queue_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES);

/* If you hit this case and edited the policy, you probably just
 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Add kernel parameter to manage memory error handling.

2017-04-12 Thread Panariti, David


> -Original Message-
> From: Michel Dänzer [mailto:mic...@daenzer.net]
> Sent: Tuesday, April 11, 2017 10:12 PM
> To: Panariti, David 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: Add kernel parameter to manage
> memory error handling.
> 
> 
> Hi Dave,
> 
> 
> Please send patches inline instead of as attachments, ideally using git send-
> email.
> 
> 
> > @@ -212,6 +213,9 @@ module_param_named(cg_mask,
> amdgpu_cg_mask, uint,
> > 0444);  MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 =
> disable
> > power gating)");  module_param_named(pg_mask, amdgpu_pg_mask,
> uint,
> > 0444);
> >
> > +MODULE_PARM_DESC(ecc_mask, "ECC/EDC flags mask (0 = disable
> > +ECC/EDC)");
> 
> "0 = disable ECC/EDC" implies that they're enabled by default? Was that
> already the case before this patch?

[davep] Yes it was, and there was actually a problem in some cases where the CZ 
would hang which is why I added the param.
I was wondering if it would be better to default to them being off, but I 
wasn't sure how important maintaining original behavior is considered.
Actually, there are some bugs in the workaround function as it is, so it really 
should default to off.
I did some work implementing EDC for Vilas in research, but it was side tracked 
for a while.
I fixed up the workaround, but it really makes no sense to use it at all until 
the rest of the EDC code is in place, and isn't currently scheduled to happen.
Given that, I actually think it would be best to remove the workaround and 
leave EDC disabled until (if) it is scheduled to be completed.

> 
> 
> > @@ -1664,6 +1664,24 @@ static int
> gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
> > if (adev->asic_type != CHIP_CARRIZO)
> > return 0;
> >
> > +   DRM_INFO("gfx_v8_0_do_edc_gpr_workarounds(): ecc_flags:
> 0x%08x\n",
> > +adev->ecc_flags);
> > +
> > +   /*
> > +* Check if EDC has been requested.
> > +* For Carrizo, EDC is the best/safest mode WRT error handling.
> > +*/
> > +   if (!(adev->ecc_flags
> > + & (AMD_ECC_SUPPORT_BEST | AMD_ECC_SUPPORT_EDC))) {
> > +   DRM_INFO("gfx_v8_0_do_edc_gpr_workarounds(): "
> > +"skipping workarounds and not enabling EDC.\n");
> > +
> > +   return 0;
> > +   }
> > +
> > +   DRM_INFO("gfx_v8_0_do_edc_gpr_workarounds(): "
> > +"running workarounds and enabling EDC.\n");
> 
> These DRM_INFOs are too chatty, maybe make them e.g.
> DRM_DEBUG_DRIVER.
> 
> 
> --
> Earthling Michel Dänzer   |   http://www.amd.com
> Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 25/26] drm/amdgpu: guarantee bijective mapping of ring ids for LRU v3

2017-04-12 Thread Felix Kuehling
I haven't reviewed this in detail. But would it make sense to squash
that into the commit that introduced the LRU policy for the queue
manager (patch 18 in this series)?

Regards,
  Felix


On 17-04-06 02:21 AM, Andres Rodriguez wrote:
> Depending on usage patterns, the current LRU policy may create a
> non-injective mapping between userspace ring ids and kernel rings.
>
> This behaviour is undesired as apps that attempt to fill all HW blocks
> would be unable to reach some of them.
>
> This change forces the LRU policy to create bijective mappings only.
>
> v2: compress ring_blacklist
> v3: simplify amdgpu_ring_is_blacklisted() logic
>
> Signed-off-by: Andres Rodriguez 
> Reviewed-by: Nicolai Hähnle 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 16 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 33 
> +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  4 ++--
>  3 files changed, 42 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
> index 926da09..adf59ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
> @@ -109,24 +109,36 @@ static enum amdgpu_ring_type 
> amdgpu_hw_ip_to_ring_type(int hw_ip)
>   DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
>   return -1;
>   }
>  }
>  
>  static int amdgpu_lru_map(struct amdgpu_device *adev,
> struct amdgpu_queue_mapper *mapper,
> int user_ring,
> struct amdgpu_ring **out_ring)
>  {
> - int r;
> + int r, i, j;
>   int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
> + int ring_blacklist[AMDGPU_MAX_RINGS];
> + struct amdgpu_ring *ring;
>  
> - r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
> + /* 0 is a valid ring index, so initialize to -1 */
> + memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
> +
> + for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
> + ring = mapper->queue_map[i];
> + if (ring)
> + ring_blacklist[j++] = ring->idx;
> + }
> +
> + r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
> + j, out_ring);
>   if (r)
>   return r;
>  
>   return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
>  }
>  
>  /**
>   * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
>   *
>   * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index b1af952..09fa8f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -419,46 +419,65 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
>   ring->adev->rings[ring->idx] = NULL;
>  }
>  
>  static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
>struct amdgpu_ring *ring)
>  {
>   /* list_move_tail handles the case where ring isn't part of the list */
>   list_move_tail(>lru_list, >ring_lru_list);
>  }
>  
> +static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
> +int *blacklist, int num_blacklist)
> +{
> + int i;
> +
> + for (i = 0; i < num_blacklist; i++) {
> + if (ring->idx == blacklist[i])
> + return true;
> + }
> +
> + return false;
> +}
> +
>  /**
>   * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
>   *
>   * @adev: amdgpu_device pointer
>   * @type: amdgpu_ring_type enum
> + * @blacklist: blacklisted ring ids array
> + * @num_blacklist: number of entries in @blacklist
>   * @ring: output ring
>   *
>   * Retrieve the amdgpu_ring structure for the least recently used ring of
>   * a specific IP block (all asics).
>   * Returns 0 on success, error on failure.
>   */
> -int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
> - struct amdgpu_ring **ring)
> +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
> + int num_blacklist, struct amdgpu_ring **ring)
>  {
>   struct amdgpu_ring *entry;
>  
>   /* List is sorted in LRU order, find first entry corresponding
>* to the desired HW IP */
>   *ring = NULL;
>   spin_lock(>ring_lru_list_lock);
>   list_for_each_entry(entry, >ring_lru_list, lru_list) {
> - if (entry->funcs->type == type) {
> - *ring = entry;
> - amdgpu_ring_lru_touch_locked(adev, *ring);
> - break;
> - }
> + if (entry->funcs->type != type)
> + continue;
> +
> + if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
> +  

Re: [PATCH] Add support for high priority scheduling in amdgpu v8

2017-04-12 Thread Andres Rodriguez
Thanks for the comments. I'll get these and Alex's comments addressed 
and re-send the series out.


Regards,
Andres

On 2017-04-11 06:31 PM, Felix Kuehling wrote:

I replied to patch 5 with an observation. But I think that code was
broken before your change.

The order of patches 7 and 8 should be reversed. Otherwise you break
radeon before you fix it.

See my comments on patch 9, 15 and 16 in separate emails. With those
comments addressed:

1-4, 10, 13-14 are Acked-by: Felix Kuehling 
5-9, 11-12, 15-16 are Reviewed-by: Felix Kuehling 

Regards,
  Felix

On 17-04-06 02:21 AM, Andres Rodriguez wrote:

Includes fixes for Alex's comments on v7.

Also includes a new patch to deal with an issue we discussed on #radeon:
[PATCH 15/26] drm/amdgpu: avoid KIQ clashing with compute or KFD

Patches missing review/ack:
10
14
15
18
20
21
23
24

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Chris Wilson
On Thu, Apr 13, 2017 at 05:05:27AM +1000, Dave Airlie wrote:
> >>
> >> Not sure what the best semantics are there, any opinions on barring
> >> wakeups/polling on semaphore sync_files, and just punting this
> >> until we need it.
> >
> > I think getting it right now will make writing sw_sync-esque (i.e. cpu)
> > tests easier and more complete.
> 
> I just don't have any use case for it, so we would be writing code to
> write tests for it.
> 
> That doesn't seem smart.
> 
> If there is a future non-testing use case, the API is expressive
> enough for someone
> to add a flag or new sync obj to allow polling and to add support in a
> nice easily
> digestible patch.

My first thought was to check the signaled status would be to use
poll(0), but that can be retrieved from the sync_file_status ioctl. But
to get that still needs for us to acquire an fd from the syncobj. And if
we were to want check the flag on a driver syncobj, we would need to be
able to export one. That doesn't look very promising...

The testing chain would look like

create syncobj
execbuf -> signal syncobj
syncobj wait -> dummy/nop execbuf -> fence out

then use the fence as a proxy for testing the status of the syncobj.

For generic tests, we could add an interface for vgem to use syncobjs?

If there's no way to export an fd for the syncobj, then we don't need
to handle it in the fops. Just be sure to leave a breadcrumb behind so
that the first person who does try to pass back a syncobj fd is reminded
that they need to fill in the fops.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Dave Airlie
On 13 April 2017 at 08:34, Chris Wilson  wrote:
> On Thu, Apr 13, 2017 at 07:41:28AM +1000, Dave Airlie wrote:
>> >
>> > The problem, as I see it, is that you are taking functionality away from
>> > sync_file. If you are wrapping them up inside a sync_file, we have a
>> > fair expectation that our code to handle sync_files will continue to
>> > work.
>>
>> What code? there is no code existing that should be sharing sync objects
>> with semaphores backing them, and trying to use them as sync_files.
>
> By masquerading semaphores as sync_files, you are inviting them to be
> used with the existing code to handle sync_file.

But what existing code is going to get a sync object from another process
and blindly use it without knowing where it got it.

I'm not buying the argument that just because something is possible,
we have to support it.

>
> And quite different from the existing CPU visible sync_files. Why try
> stuffing them through the same interface? At the moment the only thing
> they have in common is the single fence pointer, and really you want
> them just for their handle/fd to a GPU channel.
>
> After you strip out the fops from the semaphore code, doesn't it boil
> down to:
>
> static void semaphore_file_release(struct inode *inode, struct file *file)
> {
> struct semaphore_file *s = file->private_data;
>
> dma_fence_put(s->fence);
> kfree(s);
> }
>
> static const struct file_operations semaphore_file_fops = {
> .release = semaphore_file_release,
> };
>
> struct semaphore_file *semaphore_file_create(void)
> {
> struct semaphore_file *s;
>
> s = kzalloc(sizeof(*s), GFP_KERNEL);
> if (!s)
> return NULL;
>
> s->file = anon_inode_getfile("semaphore_"file",
>  _file_fops, s, 0);
>
> return s;
> }
>
> Plus your new routines to manipulate the semaphore.
>
> The commonality with the current sync_file {
> struct file *file;
> struct dma_fence *fence;
> };
> could be extracted and sync_file become fence_file. Would it not help to
> avoid any further confusion by treating them as two very distinct classes
> of fd?
>
> And for me to stop calling the user interface sync_file.

That's a better argument, but the consideration is that there are probably
use-cases for doing something with these in the future, and we'd be limiting
those by doing this. The current code adds the API we need without constraining
future use cases overly much, and avoids any pitfalls.

I'll consider whether we should just split sync_file, but you still have the
same problems you mention, you get an fd you do something you shouldn't with
it, it doesn't fix any of the problems you are raising.

If you have an opaque fd you don't know the providence of, and except some
operations to just work o nit, you are going to be disappointed.

Dave.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Dave Airlie
>
> The problem, as I see it, is that you are taking functionality away from
> sync_file. If you are wrapping them up inside a sync_file, we have a
> fair expectation that our code to handle sync_files will continue to
> work.

What code? there is no code existing that should be sharing sync objects
with semaphores backing them, and trying to use them as sync_files.

This is parallel functionality.

Dave.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 01/26] drm/amdgpu: refactor MQD/HQD initialization v2

2017-04-12 Thread Andres Rodriguez



On 2017-04-11 06:08 PM, Alex Deucher wrote:

On Thu, Apr 6, 2017 at 2:21 AM, Andres Rodriguez  wrote:

The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

v2: the non-kiq path for gfxv8 was dropped in the rebase

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 110 +
 2 files changed, 309 insertions(+), 248 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 185cb31..f67ef58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -42,20 +42,22 @@
 #include "gca/gfx_7_2_sh_mask.h"

 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"

 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"

 #define GFX7_NUM_GFX_RINGS 1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE  2048
+


Might want to split out that the rename of this define into a separate
patch so it can be applied early.  Could probably also split the gfx7
and gfx8 changes into two patches so they can be applied separately
separately so gfx7 doesn't have to be beholden to the flux in gfx8 at
the moment.



Done



 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);

 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
@@ -2814,40 +2816,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", 
r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);

amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }

-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;

/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe = 1;
adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * 8;

if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
-adev->gfx.mec.num_mec 
*adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 PAGE_SIZE, true,
 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 >gfx.mec.hpd_eop_obj);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
return r;
}
}

r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
@@ -2863,21 +2863,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return r;
}
r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **));
if (r) {
dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
gfx_v7_0_mec_fini(adev);
return r;
}

/* clear memory.  Not sure if this is required or not */
-   memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * 
MEC_HPD_SIZE * 2);
+   memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 
GFX7_MEC_HPD_SIZE * 2);

amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);

return 0;
 }

 struct hqd_registers
 {
u32 cp_mqd_base_addr;
@@ -2938,261 +2938,296 @@ struct bonaire_mqd
u32 restart[3];
u32 thread_trace_enable;
u32 reserved1;
u32 user_data[16];
u32 vgtcs_invoke_count[2];
struct hqd_registers queue_state;
u32 dequeue_cntr;
u32 interrupt_queue[64];
 };

-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * 

Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Chris Wilson
On Wed, Apr 12, 2017 at 09:01:32PM +0100, Chris Wilson wrote:
> On Thu, Apr 13, 2017 at 05:05:27AM +1000, Dave Airlie wrote:
> > >>
> > >> Not sure what the best semantics are there, any opinions on barring
> > >> wakeups/polling on semaphore sync_files, and just punting this
> > >> until we need it.
> > >
> > > I think getting it right now will make writing sw_sync-esque (i.e. cpu)
> > > tests easier and more complete.
> > 
> > I just don't have any use case for it, so we would be writing code to
> > write tests for it.
> > 
> > That doesn't seem smart.
> > 
> > If there is a future non-testing use case, the API is expressive
> > enough for someone
> > to add a flag or new sync obj to allow polling and to add support in a
> > nice easily
> > digestible patch.
> 
> My first thought was to check the signaled status would be to use
> poll(0), but that can be retrieved from the sync_file_status ioctl. But
> to get that still needs for us to acquire an fd from the syncobj. And if
> we were to want check the flag on a driver syncobj, we would need to be
> able to export one. That doesn't look very promising...

Hmm, you do export fd to pass syncobj between processes. Let's not start
with syncobj being a second class sync_file.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Chris Wilson
On Thu, Apr 13, 2017 at 06:51:17AM +1000, Dave Airlie wrote:
> On 13 April 2017 at 06:39, Chris Wilson  wrote:
> > On Wed, Apr 12, 2017 at 09:01:32PM +0100, Chris Wilson wrote:
> >> On Thu, Apr 13, 2017 at 05:05:27AM +1000, Dave Airlie wrote:
> >> > >>
> >> > >> Not sure what the best semantics are there, any opinions on barring
> >> > >> wakeups/polling on semaphore sync_files, and just punting this
> >> > >> until we need it.
> >> > >
> >> > > I think getting it right now will make writing sw_sync-esque (i.e. cpu)
> >> > > tests easier and more complete.
> >> >
> >> > I just don't have any use case for it, so we would be writing code to
> >> > write tests for it.
> >> >
> >> > That doesn't seem smart.
> >> >
> >> > If there is a future non-testing use case, the API is expressive
> >> > enough for someone
> >> > to add a flag or new sync obj to allow polling and to add support in a
> >> > nice easily
> >> > digestible patch.
> >>
> >> My first thought was to check the signaled status would be to use
> >> poll(0), but that can be retrieved from the sync_file_status ioctl. But
> >> to get that still needs for us to acquire an fd from the syncobj. And if
> >> we were to want check the flag on a driver syncobj, we would need to be
> >> able to export one. That doesn't look very promising...
> >
> > Hmm, you do export fd to pass syncobj between processes. Let's not start
> > with syncobj being a second class sync_file.
> 
> It's not the same semantics as a sync_file. Userspace should never be polling 
> on
> semaphores.
> 
> Semaphores are one process signals, one process waits, no semantics for
> sniffing or use cases. Unless you have a practical use case for an new
> or proposed
> API I don't think we should start with adding functionality.

The problem, as I see it, is that you are taking functionality away from
sync_file. If you are wrapping them up inside a sync_file, we have a
fair expectation that our code to handle sync_files will continue to
work.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 01/26] drm/amdgpu: refactor MQD/HQD initialization v2

2017-04-12 Thread Andres Rodriguez



On 2017-04-11 06:18 PM, Alex Deucher wrote:

On Thu, Apr 6, 2017 at 2:21 AM, Andres Rodriguez  wrote:

The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

v2: the non-kiq path for gfxv8 was dropped in the rebase

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 110 +
 2 files changed, 309 insertions(+), 248 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 185cb31..f67ef58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -42,20 +42,22 @@
 #include "gca/gfx_7_2_sh_mask.h"

 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"

 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"

 #define GFX7_NUM_GFX_RINGS 1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE  2048
+

 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);

 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
@@ -2814,40 +2816,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", 
r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);

amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }

-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;

/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe = 1;
adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * 8;

if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
-adev->gfx.mec.num_mec 
*adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 PAGE_SIZE, true,
 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 >gfx.mec.hpd_eop_obj);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
return r;
}
}

r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
@@ -2863,21 +2863,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return r;
}
r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **));
if (r) {
dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
gfx_v7_0_mec_fini(adev);
return r;
}

/* clear memory.  Not sure if this is required or not */
-   memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * 
MEC_HPD_SIZE * 2);
+   memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 
GFX7_MEC_HPD_SIZE * 2);

amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);

return 0;
 }

 struct hqd_registers
 {
u32 cp_mqd_base_addr;
@@ -2938,261 +2938,296 @@ struct bonaire_mqd
u32 restart[3];
u32 thread_trace_enable;
u32 reserved1;
u32 user_data[16];
u32 vgtcs_invoke_count[2];
struct hqd_registers queue_state;
u32 dequeue_cntr;
u32 interrupt_queue[64];
 };

-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int 

Re: [PATCH 5/8] sync_file: add support for a semaphore object

2017-04-12 Thread Chris Wilson
On Thu, Apr 13, 2017 at 07:41:28AM +1000, Dave Airlie wrote:
> >
> > The problem, as I see it, is that you are taking functionality away from
> > sync_file. If you are wrapping them up inside a sync_file, we have a
> > fair expectation that our code to handle sync_files will continue to
> > work.
> 
> What code? there is no code existing that should be sharing sync objects
> with semaphores backing them, and trying to use them as sync_files.

By masquerading semaphores as sync_files, you are inviting them to be
used with the existing code to handle sync_file.
 
> This is parallel functionality.

And quite different from the existing CPU visible sync_files. Why try
stuffing them through the same interface? At the moment the only thing
they have in common is the single fence pointer, and really you want
them just for their handle/fd to a GPU channel.

After you strip out the fops from the semaphore code, doesn't it boil
down to:

static void semaphore_file_release(struct inode *inode, struct file *file)
{
struct semaphore_file *s = file->private_data;

dma_fence_put(s->fence);
kfree(s);
}

static const struct file_operations semaphore_file_fops = {
.release = semaphore_file_release,
};

struct semaphore_file *semaphore_file_create(void)
{
struct semaphore_file *s;

s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return NULL;

s->file = anon_inode_getfile("semaphore_"file",
 _file_fops, s, 0);

return s;
}

Plus your new routines to manipulate the semaphore.

The commonality with the current sync_file {
struct file *file;
struct dma_fence *fence;
};
could be extracted and sync_file become fence_file. Would it not help to
avoid any further confusion by treating them as two very distinct classes
of fd?

And for me to stop calling the user interface sync_file.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 7/7] amdgpu: use sync file for shared semaphores (v3)

2017-04-12 Thread Dave Airlie
From: Dave Airlie 

This creates a new command submission chunk for amdgpu
to add wait and signal sync objects around the submission.

Sync objects are managed via the drm syncobj ioctls.

The command submission interface is enhanced with two new
chunks, one for semaphore waiting, one for semaphore signalling
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

NOTE: this interface addition needs a version bump to expose
it to userspace.

v1.1: keep file reference on import.
v2: move to using syncobjs
v2.1: change some APIs to just use p pointer.
v3: make more robust against CS failures, we now add the
wait sems but only remove them once the CS job has been
submitted.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 112 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   2 +-
 include/uapi/drm/amdgpu_drm.h   |   6 ++
 3 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index df25b32..ed114c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void 
*data)
break;
 
case AMDGPU_CHUNK_ID_DEPENDENCIES:
+   case AMDGPU_CHUNK_ID_SEM_WAIT:
+   case AMDGPU_CHUNK_ID_SEM_SIGNAL:
break;
 
default:
@@ -1008,6 +1011,65 @@ static int amdgpu_process_fence_dep(struct 
amdgpu_cs_parser *p,
return 0;
 }
 
+static int amdgpu_sem_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
+uint32_t handle)
+{
+   int r;
+   struct dma_fence *fence;
+   r = drm_syncobj_fence_get(p->filp, handle, );
+   if (r)
+   return r;
+
+   r = amdgpu_sync_fence(p->adev, >job->sync, fence);
+   dma_fence_put(fence);
+
+   return r;
+}
+
+static int amdgpu_sem_lookup_and_remove(struct amdgpu_cs_parser *p,
+ uint32_t handle)
+{
+   return drm_syncobj_replace_fence(p->filp, handle, NULL);
+}
+
+static int amdgpu_process_sem_wait_dep(struct amdgpu_cs_parser *p,
+  struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_sem_lookup_and_add_to_sync(p, deps[i].handle);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
+static int amdgpu_process_sem_wait_post(struct amdgpu_cs_parser *p,
+   struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_sem_lookup_and_remove(p, deps[i].handle);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
  struct amdgpu_cs_parser *p)
 {
@@ -1022,12 +1084,60 @@ static int amdgpu_cs_dependencies(struct amdgpu_device 
*adev,
r = amdgpu_process_fence_dep(p, chunk);
if (r)
return r;
+   } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
+   r = amdgpu_process_sem_wait_dep(p, chunk);
+   if (r)
+   return r;
}
}
 
return 0;
 }
 
+static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
+struct amdgpu_cs_chunk *chunk)
+{
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_sem *deps;
+
+   deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+   for (i = 0; i < num_deps; ++i) {
+   r = drm_syncobj_replace_fence(p->filp, deps[i].handle,
+ p->fence);
+   if (r)
+   return r;
+   }
+   return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct 

RE: [PATCH] drm/amdgpu: Add kernel parameter to manage memory error handling.

2017-04-12 Thread Deucher, Alexander
> -Original Message-
> From: Michel Dänzer [mailto:mic...@daenzer.net]
> Sent: Wednesday, April 12, 2017 9:17 PM
> To: Panariti, David
> Cc: Deucher, Alexander; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: Add kernel parameter to manage
> memory error handling.
> 
> On 13/04/17 02:38 AM, Panariti, David wrote:
> >> From: Michel Dänzer [mailto:mic...@daenzer.net]
> >>
> >>> @@ -212,6 +213,9 @@ module_param_named(cg_mask,
> >> amdgpu_cg_mask, uint,
> >>> 0444);  MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 =
> >> disable
> >>> power gating)");  module_param_named(pg_mask, amdgpu_pg_mask,
> >> uint,
> >>> 0444);
> >>>
> >>> +MODULE_PARM_DESC(ecc_mask, "ECC/EDC flags mask (0 = disable
> >>> +ECC/EDC)");
> >>
> >> "0 = disable ECC/EDC" implies that they're enabled by default? Was
> >> that already the case before this patch?
> >
> > [davep] Yes it was, and there was actually a problem in some cases
> > where the CZ would hang which is why I added the param. I was
> > wondering if it would be better to default to them being off, but I
> > wasn't sure how important maintaining original behavior is
> > considered. Actually, there are some bugs in the workaround function
> > as it is, so it really should default to off.
> 
> I agree. There have been some bug reports about Carrizo hangs, I wonder
> if any of those might be related to this.

Only the embedded SKUs support EDC.  If they are embedded parts, it could be 
related.

Alex

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add kernel parameter to manage memory error handling.

2017-04-12 Thread Michel Dänzer
On 13/04/17 02:38 AM, Panariti, David wrote:
>> From: Michel Dänzer [mailto:mic...@daenzer.net]
>> 
>>> @@ -212,6 +213,9 @@ module_param_named(cg_mask,
>> amdgpu_cg_mask, uint,
>>> 0444);  MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 =
>> disable
>>> power gating)");  module_param_named(pg_mask, amdgpu_pg_mask,
>> uint,
>>> 0444);
>>> 
>>> +MODULE_PARM_DESC(ecc_mask, "ECC/EDC flags mask (0 = disable 
>>> +ECC/EDC)");
>> 
>> "0 = disable ECC/EDC" implies that they're enabled by default? Was
>> that already the case before this patch?
> 
> [davep] Yes it was, and there was actually a problem in some cases
> where the CZ would hang which is why I added the param. I was
> wondering if it would be better to default to them being off, but I
> wasn't sure how important maintaining original behavior is
> considered. Actually, there are some bugs in the workaround function
> as it is, so it really should default to off.

I agree. There have been some bug reports about Carrizo hangs, I wonder
if any of those might be related to this.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/7] sync_file: add support for sem_file

2017-04-12 Thread Chris Wilson
On Thu, Apr 13, 2017 at 11:41:41AM +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> This adds support for a file that has semaphore semantics
> (for Vulkan shared semaphores).
> 
> These objects are persistent objects that can have a
> fence that changes. When the object is signaled, a fence
> is attached to it, and when an object is waited on, the
> fence is removed. All interactions with these objects
> should be via command submission routines in the drm
> drivers. The sem_file is just for passing the sems between
> processes.
> 
> Signed-off-by: Dave Airlie 
> ---
>  drivers/dma-buf/sync_file.c | 101 
> 
>  include/linux/sync_file.h   |  16 +++
>  2 files changed, 117 insertions(+)
> 
> diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> index 2342d8b..a88d786 100644
> --- a/drivers/dma-buf/sync_file.c
> +++ b/drivers/dma-buf/sync_file.c
> @@ -468,3 +468,104 @@ static const struct file_operations sync_file_fops = {
>   .unlocked_ioctl = sync_file_ioctl,
>   .compat_ioctl = sync_file_ioctl,
>  };
> +
> +static int sem_file_release(struct inode *inode, struct file *file)
> +{
> + struct sem_file *sem_file = file->private_data;
> + struct dma_fence *fence;
> +
> + fence = rcu_dereference_protected(sem_file->base.fence, 1);
> + dma_fence_put(fence);
> + kfree(sem_file);
> +
> + return 0;
> +}
> +
> +static const struct file_operations sem_file_fops = {
> + .release = sem_file_release,
> +};
> +
> +struct sem_file *sem_file_alloc(void)
> +{
> + struct sem_file *sem_file;
> + int ret;
> +
> + sem_file = kzalloc(sizeof(*sem_file), GFP_KERNEL);
> + if (!sem_file)
> + return NULL;
> +
> + ret = fence_file_init(_file->base,
> +   _file_fops);
> + if (ret)
> + goto err;
> +
> + RCU_INIT_POINTER(sem_file->base.fence, NULL);
> + mutex_init(_file->lock);
> +
> + return sem_file;
> +
> +err:
> + kfree(sem_file);
> + return NULL;
> +}
> +EXPORT_SYMBOL(sem_file_alloc);
> +
> +struct sem_file *sem_file_fdget(int fd)
> +{
> + struct file *file = fget(fd);
> +
> + if (!file)
> + return NULL;
> +
> + if (file->f_op != _file_fops)
> + goto err;
> +
> + return file->private_data;
> +
> +err:
> + fput(file);
> + return NULL;
> +}
> +EXPORT_SYMBOL(sem_file_fdget);
> +
> +#define sem_file_held(obj) lockdep_is_held(&(obj)->lock)
> +
> +struct dma_fence *sem_file_get_fence(struct sem_file *sem_file)
> +{
> + struct dma_fence *fence;
> +
> + if (!rcu_access_pointer(sem_file->base.fence)) {
> + return NULL;
> + }
> +
> + rcu_read_lock();
> + fence = dma_fence_get_rcu_safe(_file->base.fence);
> + rcu_read_unlock();
> + return fence;
> +}
> +EXPORT_SYMBOL(sem_file_get_fence);
> +
> +static inline struct dma_fence *
> +sem_file_get_fence_locked(struct sem_file *sem_file)
> +{
> + return rcu_dereference_protected(sem_file->base.fence,
> +  sem_file_held(sem_file));
> +}
> +
> +int sem_file_replace_fence(struct sem_file *sem_file,
> +struct dma_fence *fence,
> +struct dma_fence **old_fence)
> +{
> + struct dma_fence *ret_fence = NULL;
> +
> + if (fence)
> + dma_fence_get(fence);
> +
> + mutex_lock(_file->lock);
> + ret_fence = sem_file_get_fence_locked(sem_file);
> + RCU_INIT_POINTER(sem_file->base.fence, fence);
> + mutex_unlock(_file->lock);

Is xchg() universal?

struct dma_fence *sem_file_replace_fence(struct sem_file *sem_file,
 struct dma_fence *fence)
{
return xchg(_file->base.fence, dma_fence_get(fence));
}

safe against the rcu read and kills off the mutex.

I think this is the cleaner approach, precisely because it stops me
having the delusion that the semaphores and sync_file are
interchangeable, and I won't ask if I can merge semaphores together, or
if I can inspect the state with the CPU.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/7] sync_file: mark the fence pointer as rcu.

2017-04-12 Thread Dave Airlie
From: Dave Airlie 

The current code never updates this pointer after init,
and it should always be initialised with a valid value.

However in the future we want to share some code with
semaphore files, and those will want to use rcu to update
this pointer. However none of the places sync_file access
the pointer from should correspond with semaphore files,
so this just wraps things with init and dereference wrappers.

Signed-off-by: Dave Airlie 
---
 drivers/dma-buf/sync_file.c | 32 +++-
 include/linux/sync_file.h   |  2 +-
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index dc89b1d..9b7ad7e 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -78,7 +78,7 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
if (!sync_file)
return NULL;
 
-   sync_file->fence = dma_fence_get(fence);
+   RCU_INIT_POINTER(sync_file->fence, dma_fence_get(fence));
 
snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
 fence->ops->get_driver_name(fence),
@@ -122,7 +122,7 @@ struct dma_fence *sync_file_get_fence(int fd)
if (!sync_file)
return NULL;
 
-   fence = dma_fence_get(sync_file->fence);
+   fence = dma_fence_get(rcu_dereference_protected(sync_file->fence, 1));
fput(sync_file->file);
 
return fence;
@@ -141,7 +141,7 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 * we own the reference of the dma_fence_array creation.
 */
if (num_fences == 1) {
-   sync_file->fence = fences[0];
+   RCU_INIT_POINTER(sync_file->fence, fences[0]);
kfree(fences);
} else {
array = dma_fence_array_create(num_fences, fences,
@@ -150,7 +150,7 @@ static int sync_file_set_fence(struct sync_file *sync_file,
if (!array)
return -ENOMEM;
 
-   sync_file->fence = >base;
+   RCU_INIT_POINTER(sync_file->fence, >base);
}
 
return 0;
@@ -159,8 +159,9 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 static struct dma_fence **get_fences(struct sync_file *sync_file,
 int *num_fences)
 {
-   if (dma_fence_is_array(sync_file->fence)) {
-   struct dma_fence_array *array = 
to_dma_fence_array(sync_file->fence);
+   struct dma_fence *fence = rcu_dereference_protected(sync_file->fence, 
1);
+   if (dma_fence_is_array(fence)) {
+   struct dma_fence_array *array = to_dma_fence_array(fence);
 
*num_fences = array->num_fences;
return array->fences;
@@ -278,10 +279,12 @@ static struct sync_file *sync_file_merge(const char 
*name, struct sync_file *a,
 static int sync_file_release(struct inode *inode, struct file *file)
 {
struct sync_file *sync_file = file->private_data;
+   struct dma_fence *fence;
 
-   if (test_bit(POLL_ENABLED, _file->fence->flags))
-   dma_fence_remove_callback(sync_file->fence, _file->cb);
-   dma_fence_put(sync_file->fence);
+   fence = rcu_dereference_protected(sync_file->fence, 1);
+   if (test_bit(POLL_ENABLED, >flags))
+   dma_fence_remove_callback(fence, _file->cb);
+   dma_fence_put(fence);
kfree(sync_file);
 
return 0;
@@ -290,16 +293,19 @@ static int sync_file_release(struct inode *inode, struct 
file *file)
 static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 {
struct sync_file *sync_file = file->private_data;
+   struct dma_fence *fence;
+
+   fence = rcu_dereference_protected(sync_file->fence, 1);
 
poll_wait(file, _file->wq, wait);
 
-   if (!test_and_set_bit(POLL_ENABLED, _file->fence->flags)) {
-   if (dma_fence_add_callback(sync_file->fence, _file->cb,
+   if (!test_and_set_bit(POLL_ENABLED, >flags)) {
+   if (dma_fence_add_callback(fence, _file->cb,
   fence_check_cb_func) < 0)
wake_up_all(_file->wq);
}
 
-   return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
+   return dma_fence_is_signaled(fence) ? POLLIN : 0;
 }
 
 static long sync_file_ioctl_merge(struct sync_file *sync_file,
@@ -414,7 +420,7 @@ static long sync_file_ioctl_fence_info(struct sync_file 
*sync_file,
 
 no_fences:
strlcpy(info.name, sync_file->name, sizeof(info.name));
-   info.status = dma_fence_is_signaled(sync_file->fence);
+   info.status = 
dma_fence_is_signaled(rcu_dereference_protected(sync_file->fence, 1));
info.num_fences = num_fences;
 
if (copy_to_user((void __user *)arg, , sizeof(info)))
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index d37beef..adbc0b9 100644
--- 

[PATCH 1/7] sync_file: get rid of internal reference count.

2017-04-12 Thread Dave Airlie
From: Dave Airlie 

sync_file uses the reference count of the file, the internal
kref was never getting moved past 1.

We can reintroduce this if we decide we need it later.

[airlied: fix buildbot warnings]

Reviewed-by: Chris Wilson 
Signed-off-by: Dave Airlie 
---
 drivers/dma-buf/sync_file.c | 13 ++---
 include/linux/sync_file.h   |  3 ---
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2321035..dc89b1d 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -41,8 +41,6 @@ static struct sync_file *sync_file_alloc(void)
if (IS_ERR(sync_file->file))
goto err;
 
-   kref_init(_file->kref);
-
init_waitqueue_head(_file->wq);
 
INIT_LIST_HEAD(_file->cb.node);
@@ -277,22 +275,15 @@ static struct sync_file *sync_file_merge(const char 
*name, struct sync_file *a,
 
 }
 
-static void sync_file_free(struct kref *kref)
+static int sync_file_release(struct inode *inode, struct file *file)
 {
-   struct sync_file *sync_file = container_of(kref, struct sync_file,
-kref);
+   struct sync_file *sync_file = file->private_data;
 
if (test_bit(POLL_ENABLED, _file->fence->flags))
dma_fence_remove_callback(sync_file->fence, _file->cb);
dma_fence_put(sync_file->fence);
kfree(sync_file);
-}
-
-static int sync_file_release(struct inode *inode, struct file *file)
-{
-   struct sync_file *sync_file = file->private_data;
 
-   kref_put(_file->kref, sync_file_free);
return 0;
 }
 
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 3e3ab84..d37beef 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -14,7 +14,6 @@
 #define _LINUX_SYNC_FILE_H
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -24,7 +23,6 @@
 /**
  * struct sync_file - sync file to export to the userspace
  * @file:  file representing this fence
- * @kref:  reference count on fence.
  * @name:  name of sync_file.  Useful for debugging
  * @sync_file_list:membership in global file list
  * @wq:wait queue for fence signaling
@@ -33,7 +31,6 @@
  */
 struct sync_file {
struct file *file;
-   struct kref kref;
charname[32];
 #ifdef CONFIG_DEBUG_FS
struct list_headsync_file_list;
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/7] sync_file: split out fence_file base class from sync_file.

2017-04-12 Thread Dave Airlie
From: Dave Airlie 

This just splits out a common base object to be shared
between sync_file and sem_files.

Signed-off-by: Dave Airlie 
---
 drivers/dma-buf/sync_file.c  | 49 +++-
 drivers/gpu/drm/drm_atomic.c |  4 ++--
 include/linux/sync_file.h| 17 ++-
 3 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 9b7ad7e..2342d8b 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -28,17 +28,28 @@
 
 static const struct file_operations sync_file_fops;
 
+static int fence_file_init(struct fence_file *fence_file,
+  const struct file_operations *fops)
+{
+   fence_file->file = anon_inode_getfile("fence_file", fops,
+fence_file, 0);
+   if (IS_ERR(fence_file->file))
+   return PTR_ERR(fence_file->file);
+   return 0;
+}
+
 static struct sync_file *sync_file_alloc(void)
 {
struct sync_file *sync_file;
+   int ret;
 
sync_file = kzalloc(sizeof(*sync_file), GFP_KERNEL);
if (!sync_file)
return NULL;
 
-   sync_file->file = anon_inode_getfile("sync_file", _file_fops,
-sync_file, 0);
-   if (IS_ERR(sync_file->file))
+   ret = fence_file_init(_file->base,
+ _file_fops);
+   if (ret)
goto err;
 
init_waitqueue_head(_file->wq);
@@ -67,7 +78,7 @@ static void fence_check_cb_func(struct dma_fence *f, struct 
dma_fence_cb *cb)
  *
  * Creates a sync_file containg @fence. This function acquires and additional
  * reference of @fence for the newly-created _file, if it succeeds. The
- * sync_file can be released with fput(sync_file->file). Returns the
+ * sync_file can be released with fput(sync_file->base.file). Returns the
  * sync_file or NULL in case of error.
  */
 struct sync_file *sync_file_create(struct dma_fence *fence)
@@ -78,7 +89,7 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
if (!sync_file)
return NULL;
 
-   RCU_INIT_POINTER(sync_file->fence, dma_fence_get(fence));
+   RCU_INIT_POINTER(sync_file->base.fence, dma_fence_get(fence));
 
snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
 fence->ops->get_driver_name(fence),
@@ -122,8 +133,8 @@ struct dma_fence *sync_file_get_fence(int fd)
if (!sync_file)
return NULL;
 
-   fence = dma_fence_get(rcu_dereference_protected(sync_file->fence, 1));
-   fput(sync_file->file);
+   fence = dma_fence_get(rcu_dereference_protected(sync_file->base.fence, 
1));
+   fput(sync_file->base.file);
 
return fence;
 }
@@ -141,7 +152,7 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 * we own the reference of the dma_fence_array creation.
 */
if (num_fences == 1) {
-   RCU_INIT_POINTER(sync_file->fence, fences[0]);
+   RCU_INIT_POINTER(sync_file->base.fence, fences[0]);
kfree(fences);
} else {
array = dma_fence_array_create(num_fences, fences,
@@ -150,7 +161,7 @@ static int sync_file_set_fence(struct sync_file *sync_file,
if (!array)
return -ENOMEM;
 
-   RCU_INIT_POINTER(sync_file->fence, >base);
+   RCU_INIT_POINTER(sync_file->base.fence, >base);
}
 
return 0;
@@ -159,7 +170,7 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 static struct dma_fence **get_fences(struct sync_file *sync_file,
 int *num_fences)
 {
-   struct dma_fence *fence = rcu_dereference_protected(sync_file->fence, 
1);
+   struct dma_fence *fence = 
rcu_dereference_protected(sync_file->base.fence, 1);
if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence);
 
@@ -168,7 +179,7 @@ static struct dma_fence **get_fences(struct sync_file 
*sync_file,
}
 
*num_fences = 1;
-   return _file->fence;
+   return _file->base.fence;
 }
 
 static void add_fence(struct dma_fence **fences,
@@ -271,7 +282,7 @@ static struct sync_file *sync_file_merge(const char *name, 
struct sync_file *a,
return sync_file;
 
 err:
-   fput(sync_file->file);
+   fput(sync_file->base.file);
return NULL;
 
 }
@@ -281,7 +292,7 @@ static int sync_file_release(struct inode *inode, struct 
file *file)
struct sync_file *sync_file = file->private_data;
struct dma_fence *fence;
 
-   fence = rcu_dereference_protected(sync_file->fence, 1);
+   fence = rcu_dereference_protected(sync_file->base.fence, 1);
if (test_bit(POLL_ENABLED, >flags))
dma_fence_remove_callback(fence, _file->cb);

[rfc repost] drm sync objects - a new beginning (make ickle happier?)

2017-04-12 Thread Dave Airlie
Okay I've taken Chris's suggestions to heart and reworked things
around a sem_file to see how they might look.

This means the drm_syncobj are currently only useful for semaphores,
the flags field could be used in future to use it for other things,
and we can reintroduce some of the API then if needed.

This refactors sync_file first to add some basic rcu wrappers
about the fence pointer, as this point never updates this should
all be fine unlocked.

It then creates the sem_file with a mutex, and uses that to
track the semaphores with reduced fops and the replace and
get APIs.

Then it reworks the drm stuff on top, and fixes amdgpu bug
with old_fence.

Let's see if anyone prefers one approach over the other.

Dave.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/7] amdgpu/cs: split out fence dependency checking

2017-04-12 Thread Dave Airlie
From: Dave Airlie 

This just splits out the fence depenency checking into it's
own function to make it easier to add semaphore dependencies.

Reviewed-by: Christian König 
Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 85 +++---
 1 file changed, 47 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 99424cb..df25b32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -963,56 +963,65 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return 0;
 }
 
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
+static int amdgpu_process_fence_dep(struct amdgpu_cs_parser *p,
+   struct amdgpu_cs_chunk *chunk)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-   int i, j, r;
-
-   for (i = 0; i < p->nchunks; ++i) {
-   struct drm_amdgpu_cs_chunk_dep *deps;
-   struct amdgpu_cs_chunk *chunk;
-   unsigned num_deps;
+   unsigned num_deps;
+   int i, r;
+   struct drm_amdgpu_cs_chunk_dep *deps;
 
-   chunk = >chunks[i];
+   deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_dep);
 
-   if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
-   continue;
+   for (i = 0; i < num_deps; ++i) {
+   struct amdgpu_ring *ring;
+   struct amdgpu_ctx *ctx;
+   struct dma_fence *fence;
 
-   deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
-   num_deps = chunk->length_dw * 4 /
-   sizeof(struct drm_amdgpu_cs_chunk_dep);
+   r = amdgpu_cs_get_ring(p->adev, deps[i].ip_type,
+  deps[i].ip_instance,
+  deps[i].ring, );
+   if (r)
+   return r;
 
-   for (j = 0; j < num_deps; ++j) {
-   struct amdgpu_ring *ring;
-   struct amdgpu_ctx *ctx;
-   struct dma_fence *fence;
+   ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+   if (ctx == NULL)
+   return -EINVAL;
 
-   r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-  deps[j].ip_instance,
-  deps[j].ring, );
+   fence = amdgpu_ctx_get_fence(ctx, ring,
+deps[i].handle);
+   if (IS_ERR(fence)) {
+   r = PTR_ERR(fence);
+   amdgpu_ctx_put(ctx);
+   return r;
+   } else if (fence) {
+   r = amdgpu_sync_fence(p->adev, >job->sync,
+ fence);
+   dma_fence_put(fence);
+   amdgpu_ctx_put(ctx);
if (r)
return r;
+   }
+   }
+   return 0;
+}
 
-   ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
-   if (ctx == NULL)
-   return -EINVAL;
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ struct amdgpu_cs_parser *p)
+{
+   int i, r;
 
-   fence = amdgpu_ctx_get_fence(ctx, ring,
-deps[j].handle);
-   if (IS_ERR(fence)) {
-   r = PTR_ERR(fence);
-   amdgpu_ctx_put(ctx);
-   return r;
+   for (i = 0; i < p->nchunks; ++i) {
+   struct amdgpu_cs_chunk *chunk;
 
-   } else if (fence) {
-   r = amdgpu_sync_fence(adev, >job->sync,
- fence);
-   dma_fence_put(fence);
-   amdgpu_ctx_put(ctx);
-   if (r)
-   return r;
-   }
+   chunk = >chunks[i];
+
+   if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+   r = amdgpu_process_fence_dep(p, chunk);
+   if (r)
+   return r;
}
}
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/7] sync_file: add support for sem_file

2017-04-12 Thread Dave Airlie
From: Dave Airlie 

This adds support for a file that has semaphore semantics
(for Vulkan shared semaphores).

These objects are persistent objects that can have a
fence that changes. When the object is signaled, a fence
is attached to it, and when an object is waited on, the
fence is removed. All interactions with these objects
should be via command submission routines in the drm
drivers. The sem_file is just for passing the sems between
processes.

Signed-off-by: Dave Airlie 
---
 drivers/dma-buf/sync_file.c | 101 
 include/linux/sync_file.h   |  16 +++
 2 files changed, 117 insertions(+)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2342d8b..a88d786 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -468,3 +468,104 @@ static const struct file_operations sync_file_fops = {
.unlocked_ioctl = sync_file_ioctl,
.compat_ioctl = sync_file_ioctl,
 };
+
+static int sem_file_release(struct inode *inode, struct file *file)
+{
+   struct sem_file *sem_file = file->private_data;
+   struct dma_fence *fence;
+
+   fence = rcu_dereference_protected(sem_file->base.fence, 1);
+   dma_fence_put(fence);
+   kfree(sem_file);
+
+   return 0;
+}
+
+static const struct file_operations sem_file_fops = {
+   .release = sem_file_release,
+};
+
+struct sem_file *sem_file_alloc(void)
+{
+   struct sem_file *sem_file;
+   int ret;
+
+   sem_file = kzalloc(sizeof(*sem_file), GFP_KERNEL);
+   if (!sem_file)
+   return NULL;
+
+   ret = fence_file_init(_file->base,
+ _file_fops);
+   if (ret)
+   goto err;
+
+   RCU_INIT_POINTER(sem_file->base.fence, NULL);
+   mutex_init(_file->lock);
+
+   return sem_file;
+
+err:
+   kfree(sem_file);
+   return NULL;
+}
+EXPORT_SYMBOL(sem_file_alloc);
+
+struct sem_file *sem_file_fdget(int fd)
+{
+   struct file *file = fget(fd);
+
+   if (!file)
+   return NULL;
+
+   if (file->f_op != _file_fops)
+   goto err;
+
+   return file->private_data;
+
+err:
+   fput(file);
+   return NULL;
+}
+EXPORT_SYMBOL(sem_file_fdget);
+
+#define sem_file_held(obj) lockdep_is_held(&(obj)->lock)
+
+struct dma_fence *sem_file_get_fence(struct sem_file *sem_file)
+{
+   struct dma_fence *fence;
+
+   if (!rcu_access_pointer(sem_file->base.fence)) {
+   return NULL;
+   }
+
+   rcu_read_lock();
+   fence = dma_fence_get_rcu_safe(_file->base.fence);
+   rcu_read_unlock();
+   return fence;
+}
+EXPORT_SYMBOL(sem_file_get_fence);
+
+static inline struct dma_fence *
+sem_file_get_fence_locked(struct sem_file *sem_file)
+{
+   return rcu_dereference_protected(sem_file->base.fence,
+sem_file_held(sem_file));
+}
+
+int sem_file_replace_fence(struct sem_file *sem_file,
+  struct dma_fence *fence,
+  struct dma_fence **old_fence)
+{
+   struct dma_fence *ret_fence = NULL;
+
+   if (fence)
+   dma_fence_get(fence);
+
+   mutex_lock(_file->lock);
+   ret_fence = sem_file_get_fence_locked(sem_file);
+   RCU_INIT_POINTER(sem_file->base.fence, fence);
+   mutex_unlock(_file->lock);
+   *old_fence = ret_fence;
+   return 0;
+}
+EXPORT_SYMBOL(sem_file_replace_fence);
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index b0ae1cf..49735c8 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -54,4 +54,20 @@ struct sync_file {
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
 
+
+/**
+ * struct sem_file - shared semaphore file for userspace.
+ * @base: base fence file.
+ * @lock: mutex to lock the fence_file fence ptr.
+ */
+struct sem_file {
+   struct fence_file base;
+   struct mutex lock;
+};
+struct sem_file *sem_file_create(void);
+struct sem_file *sem_file_fdget(int fd);
+struct dma_fence *sem_file_get_fence(struct sem_file *sem_file);
+int sem_file_replace_fence(struct sem_file *sem_file,
+  struct dma_fence *fence,
+  struct dma_fence **old_fence);
 #endif /* _LINUX_SYNC_H */
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/7] drm: introduce sync objects as wrappers for sem files

2017-04-12 Thread Dave Airlie
From: Dave Airlie 

Sync objects are new toplevel drm object, that have the same
semantics as sem_file objects, but without requiring an fd
to be consumed to support them.

This patch just enables the DRM interface to create these
objects, it doesn't actually provide any semaphore objects
or new features.

Signed-off-by: Dave Airlie 
---
 Documentation/gpu/drm-internals.rst |   5 +
 Documentation/gpu/drm-mm.rst|   6 +
 drivers/dma-buf/sync_file.c |   4 +-
 drivers/gpu/drm/Makefile|   2 +-
 drivers/gpu/drm/drm_fops.c  |   8 +
 drivers/gpu/drm/drm_internal.h  |  13 ++
 drivers/gpu/drm/drm_ioctl.c |  12 ++
 drivers/gpu/drm/drm_syncobj.c   | 285 
 include/drm/drmP.h  |   5 +
 include/drm/drm_drv.h   |   1 +
 include/drm/drm_syncobj.h   |  36 +
 include/uapi/drm/drm.h  |  25 
 12 files changed, 399 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_syncobj.c
 create mode 100644 include/drm/drm_syncobj.h

diff --git a/Documentation/gpu/drm-internals.rst 
b/Documentation/gpu/drm-internals.rst
index e35920d..743b751 100644
--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@@ -98,6 +98,11 @@ DRIVER_ATOMIC
 implement appropriate obj->atomic_get_property() vfuncs for any
 modeset objects with driver specific properties.
 
+DRIVER_SYNCOBJ
+Driver support sync objects. These are just sync files that don't
+use a file descriptor. They can be used to implement Vulkan shared
+semaphores.
+
 Major, Minor and Patchlevel
 ~~~
 
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index f5760b1..28aebe8 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -483,3 +483,9 @@ DRM Cache Handling
 
 .. kernel-doc:: drivers/gpu/drm/drm_cache.c
:export:
+
+DRM Sync Objects
+===
+
+.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
+   :export:
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index a88d786..734d61f 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -485,7 +485,7 @@ static const struct file_operations sem_file_fops = {
.release = sem_file_release,
 };
 
-struct sem_file *sem_file_alloc(void)
+struct sem_file *sem_file_create(void)
 {
struct sem_file *sem_file;
int ret;
@@ -508,7 +508,7 @@ struct sem_file *sem_file_alloc(void)
kfree(sem_file);
return NULL;
 }
-EXPORT_SYMBOL(sem_file_alloc);
+EXPORT_SYMBOL(sem_file_create);
 
 struct sem_file *sem_file_fdget(int fd)
 {
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 3ee9579..b5e565c 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -16,7 +16,7 @@ drm-y   :=drm_auth.o drm_bufs.o drm_cache.o \
drm_framebuffer.o drm_connector.o drm_blend.o \
drm_encoder.o drm_mode_object.o drm_property.o \
drm_plane.o drm_color_mgmt.o drm_print.o \
-   drm_dumb_buffers.o drm_mode_config.o
+   drm_dumb_buffers.o drm_mode_config.o drm_syncobj.o
 
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_DRM_VM) += drm_vm.o
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index afdf5b1..9a61df2 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -219,6 +219,9 @@ static int drm_open_helper(struct file *filp, struct 
drm_minor *minor)
if (drm_core_check_feature(dev, DRIVER_GEM))
drm_gem_open(dev, priv);
 
+   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   drm_syncobj_open(priv);
+
if (drm_core_check_feature(dev, DRIVER_PRIME))
drm_prime_init_file_private(>prime);
 
@@ -266,6 +269,8 @@ static int drm_open_helper(struct file *filp, struct 
drm_minor *minor)
 out_prime_destroy:
if (drm_core_check_feature(dev, DRIVER_PRIME))
drm_prime_destroy_file_private(>prime);
+   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   drm_syncobj_release(priv);
if (drm_core_check_feature(dev, DRIVER_GEM))
drm_gem_release(dev, priv);
put_pid(priv->pid);
@@ -400,6 +405,9 @@ int drm_release(struct inode *inode, struct file *filp)
drm_property_destroy_user_blobs(dev, file_priv);
}
 
+   if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   drm_syncobj_release(file_priv);
+
if (drm_core_check_feature(dev, DRIVER_GEM))
drm_gem_release(dev, file_priv);
 
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index f37388c..44ef903 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -142,4 +142,17 @@ static inline int drm_debugfs_crtc_crc_add(struct drm_crtc 

Re: [PATCH 2/4] PCI: add functionality for resizing resources v2

2017-04-12 Thread Bjorn Helgaas
On Tue, Apr 11, 2017 at 05:37:04PM +0200, Christian König wrote:

> >>+int pci_resize_resource(struct pci_dev *dev, int resno, int size)
> >>+{
> >>+   struct resource *res = dev->resource + resno;
> >>+   u32 sizes = pci_rbar_get_possible_sizes(dev, resno);
> >>+   int old = pci_rbar_get_current_size(dev, resno);
> >>+   u64 bytes = 1ULL << (size + 20);
> >>+   int ret = 0;
> >I think we should fail the request if the device is enabled, i.e., if
> >the PCI_COMMAND_MEMORY bit is set.  We can't safely change the BAR
> >while memory decoding is enabled.
> >
> >I know there's code in pci_std_update_resource() that turns off
> >PCI_COMMAND_MEMORY, but I think that's a mistake: I think it should
> >fail when asked to update an enabled BAR the same way
> >pci_iov_update_resource() does.
> >
> >I'm not sure why you call pci_reenable_device() below, but I think I
> >would rather have the driver do something like this:
> >
> >   pci_disable_device(dev);
> >   pci_resize_resource(dev, 0, size);
> >   pci_enable_device(dev);
> >
> >That way it's very clear to the driver that it must re-read all BARs
> >after resizing this one.
> 
> I've tried it, but this actually doesn't seem to work.
> 
> At least on the board I've tried pci_disable_device() doesn't clear
> the PCI_COMMAND_MEMORY bit, it just clears the master bit.

Yeah, you're right.  We generally turn *on* PCI_COMMAND_MEMORY in the
pci_enable_device() path, but the pci_disable_device() path doesn't
turn it off.

> Additional to that the power management reference counting
> pci_disable_device() and pci_enable_device() doesn't look like what
> I want for this functionality.
> 
> How about the driver needs to disabled memory decoding itself before
> trying to resize anything?

There are a few places that do that, so maybe that's the best option.

Bjorn
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/4] x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 30h-3fh) Processors

2017-04-12 Thread Bjorn Helgaas
On Tue, Apr 11, 2017 at 05:48:25PM +0200, Christian König wrote:
> Am 24.03.2017 um 16:47 schrieb Bjorn Helgaas:
> >On Mon, Mar 13, 2017 at 01:41:35PM +0100, Christian König wrote:
> >>From: Christian König 
> >>
> >>Most BIOS don't enable this because of compatibility reasons.
> >Can you give any more details here?  Without more hints, it's hard to
> >know whether any of the compatibility reasons might apply to Linux as
> >well.
> 
> Unfortunately not, I could try to ask a few more people at AMD if
> they know the background.
> 
> I was told that there are a few boards which offers that as a BIOS
> option, but so far I haven't found any (and I have quite a few
> here).
> 
> My best guess is that older windows versions have a problem with that.
> 
> >>Manually enable a 64bit BAR of 64GB size so that we have
> >>enough room for PCI devices.
> > From the context, I'm guessing this is enabling a new 64GB window
> >through the PCI host bridge?
> 
> Yes, exactly. Sorry for the confusion.
> 
> >That might be documented as a "BAR", but
> >it's not anything the Linux PCI core would recognize as a BAR.
> 
> At least the AMD NB documentation calls this the host BARs. But I'm
> perfectly fine with any terminology.
> 
> How about calling it host bridge window instead?

That works for me.

> >I think the specs would envision this being done via an ACPI _SRS
> >method on the PNP0A03 host bridge device.  That would be a more
> >generic path that would work on any host bridge.  Did you explore that
> >possibility?  I would prefer to avoid adding device-specific code if
> >that's possible.
> 
> I've checked quite a few boards, but none of them actually
> implements it this way.
> 
> M$ is working on a new ACPI table to enable this vendor neutral, but
> I guess that will still take a while.
> 
> I want to support this for all AMD CPU released in the past 5 years
> or so, so we are going to deal with a bunch of older boards as well.

I've never seen _SRS for host bridges either.  I'm curious about what
sort of new table will be proposed.  It seems like the existing ACPI
resource framework could manage it, but I certainly don't know all the
issues.

> >>+   pci_bus_add_resource(dev->bus, res, 0);
> >We would need some sort of printk here to explain how this new window
> >magically appeared.
> 
> Good point, consider this done.
> 
> But is this actually the right place of doing it? Or would you
> prefer something to be added to the probing code?
> 
> I think those fixups are applied a bit later, aren't they?

Logically, this should be done before we enumerate the PCI devices
below the host bridge, so a PCI device fixup is not the ideal place
for it, but it might be the most practical.

I could imagine some sort of quirk like the ones in
drivers/pnp/quirks.c that could add the window to the host bridge _CRS
and program the bridge to open it.  But the PCI host bridges aren't
handled through the path that applies those fixups, and it would be
messy to identify your bridges (you currently use PCI vendor/device
IDs, which are only available after enumerating the device).  So this
doesn't seem like a viable strategy.

Bjorn
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 01/26] drm/amdgpu: refactor MQD/HQD initialization v2

2017-04-12 Thread Andres Rodriguez

Your proposed patch looks good to me. You can add a:

Reviewed-by: Andres Rodriguez 

On 2017-04-11 06:18 PM, Alex Deucher wrote:

On Thu, Apr 6, 2017 at 2:21 AM, Andres Rodriguez  wrote:

The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

v2: the non-kiq path for gfxv8 was dropped in the rebase

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 110 +
 2 files changed, 309 insertions(+), 248 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 185cb31..f67ef58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -42,20 +42,22 @@
 #include "gca/gfx_7_2_sh_mask.h"

 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"

 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"

 #define GFX7_NUM_GFX_RINGS 1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE  2048
+

 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);

 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
@@ -2814,40 +2816,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", 
r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);

amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }

-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;

/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe = 1;
adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * 8;

if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
-adev->gfx.mec.num_mec 
*adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 PAGE_SIZE, true,
 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 >gfx.mec.hpd_eop_obj);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
return r;
}
}

r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
@@ -2863,21 +2863,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return r;
}
r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **));
if (r) {
dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
gfx_v7_0_mec_fini(adev);
return r;
}

/* clear memory.  Not sure if this is required or not */
-   memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * 
MEC_HPD_SIZE * 2);
+   memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 
GFX7_MEC_HPD_SIZE * 2);

amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);

return 0;
 }

 struct hqd_registers
 {
u32 cp_mqd_base_addr;
@@ -2938,261 +2938,296 @@ struct bonaire_mqd
u32 restart[3];
u32 thread_trace_enable;
u32 reserved1;
u32 user_data[16];
u32 vgtcs_invoke_count[2];
struct hqd_registers queue_state;
u32 dequeue_cntr;
u32 interrupt_queue[64];
 };

-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int 

Re: [PATCH 2/2] drm/amdgpu: drop support for per ASIC read registers

2017-04-12 Thread Alex Deucher
On Wed, Apr 12, 2017 at 8:13 AM, Christian König
 wrote:
> From: Christian König 
>
> Only per family registers are still used.
>
> Signed-off-by: Christian König 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 29 +-
>  drivers/gpu/drm/amd/amdgpu/vi.c| 42 
> +-
>  2 files changed, 2 insertions(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8917bde..2c05dab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -280,10 +280,6 @@ static bool soc15_read_bios_from_rom(struct 
> amdgpu_device *adev,
> return true;
>  }
>
> -static struct amdgpu_allowed_register_entry vega10_allowed_read_registers[] 
> = {
> -   /* todo */
> -};
> -
>  static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = 
> {
> { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)},
> { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)},
> @@ -341,32 +337,9 @@ static uint32_t soc15_get_register_value(struct 
> amdgpu_device *adev,
>  static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
> u32 sh_num, u32 reg_offset, u32 *value)
>  {
> -   struct amdgpu_allowed_register_entry *asic_register_table = NULL;
> -   struct amdgpu_allowed_register_entry *asic_register_entry;
> -   uint32_t size, i;
> +   uint32_t i;
>
> *value = 0;
> -   switch (adev->asic_type) {
> -   case CHIP_VEGA10:
> -   asic_register_table = vega10_allowed_read_registers;
> -   size = ARRAY_SIZE(vega10_allowed_read_registers);
> -   break;
> -   default:
> -   return -EINVAL;
> -   }
> -
> -   if (asic_register_table) {
> -   for (i = 0; i < size; i++) {
> -   asic_register_entry = asic_register_table + i;
> -   if (reg_offset != asic_register_entry->reg_offset)
> -   continue;
> -   *value = soc15_get_register_value(adev,
> - 
> asic_register_entry->grbm_indexed,
> - se_num, sh_num, 
> reg_offset);
> -   return 0;
> -   }
> -   }
> -
> for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
> if (reg_offset != soc15_allowed_read_registers[i].reg_offset)
> continue;
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 5be8e94..505c17a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -464,12 +464,6 @@ static void vi_detect_hw_virtualization(struct 
> amdgpu_device *adev)
> }
>  }
>
> -static const struct amdgpu_allowed_register_entry 
> tonga_allowed_read_registers[] = {
> -};
> -
> -static const struct amdgpu_allowed_register_entry 
> cz_allowed_read_registers[] = {
> -};
> -
>  static const struct amdgpu_allowed_register_entry 
> vi_allowed_read_registers[] = {
> {mmGRBM_STATUS},
> {mmGRBM_STATUS2},
> @@ -648,43 +642,9 @@ static uint32_t vi_get_register_value(struct 
> amdgpu_device *adev,
>  static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
> u32 sh_num, u32 reg_offset, u32 *value)
>  {
> -   const struct amdgpu_allowed_register_entry *asic_register_table = 
> NULL;
> -   const struct amdgpu_allowed_register_entry *asic_register_entry;
> -   uint32_t size, i;
> +   uint32_t i;
>
> *value = 0;
> -   switch (adev->asic_type) {
> -   case CHIP_TOPAZ:
> -   asic_register_table = tonga_allowed_read_registers;
> -   size = ARRAY_SIZE(tonga_allowed_read_registers);
> -   break;
> -   case CHIP_FIJI:
> -   case CHIP_TONGA:
> -   case CHIP_POLARIS11:
> -   case CHIP_POLARIS10:
> -   case CHIP_POLARIS12:
> -   case CHIP_CARRIZO:
> -   case CHIP_STONEY:
> -   asic_register_table = cz_allowed_read_registers;
> -   size = ARRAY_SIZE(cz_allowed_read_registers);
> -   break;
> -   default:
> -   return -EINVAL;
> -   }
> -
> -   if (asic_register_table) {
> -   for (i = 0; i < size; i++) {
> -   bool indexed = asic_register_entry->grbm_indexed;
> -
> -   asic_register_entry = asic_register_table + i;
> -   if (reg_offset != asic_register_entry->reg_offset)
> -   continue;
> -   *value = vi_get_register_value(adev, indexed, se_num,
> -  sh_num,