Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-02-12 Thread Felix Kuehling
On 2018-02-11 07:42 AM, Oded Gabbay wrote:
> Hi Felix,
> Do you object to me changing amd_kfd_ to amdkfd_ in the various
> structures and functions ?
> So far, we don't have anything with prefix of amd_kfd_ so I would like
> to keep on consistency.

We use the prefix amdgpu_amdkfd_ throughout the KFD-related amdgpu code.
Not sure why we did something different here. I'm OK with changing it
for consistency.

Thanks,
  Felix

>
> Other then that, this patch is:
> Reviewed-by: Oded Gabbay 
>
>
> Oded

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-02-11 Thread Oded Gabbay
On Wed, Feb 7, 2018 at 3:32 AM, Felix Kuehling  wrote:
> This fence is used by KFD to keep memory resident while user mode
> queues are enabled. Trying to evict memory will trigger the
> enable_signaling callback, which starts a KFD eviction, which
> involves preempting user mode queues before signaling the fence.
> There is one such fence per process.
>
> v2:
> * Grab a reference to mm_struct
> * Dereference fence after NULL check
> * Simplify fence release, no need to signal without anyone waiting
> * Added signed-off-by Harish, who is the original author of this code
>
> Signed-off-by: Harish Kasiviswanathan 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile  |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   |  15 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 179 
> +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   5 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c |  21 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  18 +++
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h  |   6 +
>  7 files changed, 241 insertions(+), 4 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index d6e5b72..43dc3f9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -130,6 +130,7 @@ amdgpu-y += \
>  # add amdkfd interfaces
>  amdgpu-y += \
>  amdgpu_amdkfd.o \
> +amdgpu_amdkfd_fence.o \
>  amdgpu_amdkfd_gfx_v8.o
>
>  # add cgs
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 2a519f9..492c7af 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -29,6 +29,8 @@
>  #include 
>  #include 
>
> +extern const struct kgd2kfd_calls *kgd2kfd;
> +
>  struct amdgpu_device;
>
>  struct kgd_mem {
> @@ -37,6 +39,19 @@ struct kgd_mem {
> void *cpu_ptr;
>  };
>
> +/* KFD Memory Eviction */
> +struct amdgpu_amdkfd_fence {
> +   struct dma_fence base;
> +   struct mm_struct *mm;
> +   spinlock_t lock;
> +   char timeline_name[TASK_COMM_LEN];
> +};
> +
> +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
> +  struct mm_struct *mm);
> +bool amd_kfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
> +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
> +
>  int amdgpu_amdkfd_init(void);
>  void amdgpu_amdkfd_fini(void);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> new file mode 100644
> index 000..cf2f1e9
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> @@ -0,0 +1,179 @@
> +/*
> + * Copyright 2016-2018 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "amdgpu_amdkfd.h"
> +
> +const struct dma_fence_ops amd_kfd_fence_ops;
> +static atomic_t fence_seq = ATOMIC_INIT(0);
> +
> +/* Eviction Fence
> + * Fence helper functions to deal with KFD memory eviction.
> + * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
> + *  evicted unless all the user queues for that process are evicted.
> + *
> + * All the BOs in a process share an eviction fence. When process X wants
> + * to map VRAM memory but TTM can't find enough space, TTM will attempt to
> + * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
> + * by calling ttm_bo_driver->eviction_valuable().
> + *
> + * 

Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-31 Thread Christian König

Am 31.01.2018 um 00:21 schrieb Felix Kuehling:

On 2018-01-30 10:35 AM, Christian König wrote:

Am 30.01.2018 um 16:28 schrieb Kasiviswanathan, Harish:

[+Harish, forgot to acknowledge him in the commit description, will
fix
that in v2]

Harish, please see Christian's question below in amd_kfd_fence_signal.
Did I understand this correctly?

[HK]: Yes the lifetime of eviction fences is tied to the lifetime of
the process associated with it. When the process terminates the fence
is signaled and released. For all the BOs that belong to this process
the eviction should be detached from it when the BO is released.
However, this eviction fence could be still attached to shared BOs.
So signaling it frees those BOs.


On 2018-01-29 08:43 AM, Christian König wrote:

Hi Felix & Harish,

maybe explain why I found that odd: dma_fence_add_callback() sets the
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT flag before adding the callback.

So the flag should always be set when there are callbacks.
Did I miss anything?

I don't think we add any callbacks to our eviction fences.

[HK] Setting DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is not required. It was
my oversight. Since, dma_fence_signal() function called cb_list
functions only if DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is set, I thought
it was safe to set it. However, the cb_list would be empty if no
callbacks are added. So setting DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is
redundant.

Ok in this case let's just remove that and also use the
dma_fence_signal() function (not the _locked variant) for signaling
the DMA fence.

Sure. Though it makes me wonder why we need to signal the fence at all.
This is when the reference count of the fence is 0. Doesn't that imply
that no one is left waiting for the fence?


Good point as well, yeah when the fences reference count becomes zero 
there is no point in signaling it.


So the whole function can be removed.

Regards,
Christian.



Regards,
   Felix



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-30 Thread Felix Kuehling
On 2018-01-30 10:35 AM, Christian König wrote:
> Am 30.01.2018 um 16:28 schrieb Kasiviswanathan, Harish:
 [+Harish, forgot to acknowledge him in the commit description, will
 fix
 that in v2]

 Harish, please see Christian's question below in amd_kfd_fence_signal.
 Did I understand this correctly?
>> [HK]: Yes the lifetime of eviction fences is tied to the lifetime of
>> the process associated with it. When the process terminates the fence
>> is signaled and released. For all the BOs that belong to this process
>> the eviction should be detached from it when the BO is released.
>> However, this eviction fence could be still attached to shared BOs.
>> So signaling it frees those BOs.
>>
>>
>> On 2018-01-29 08:43 AM, Christian König wrote:
>>> Hi Felix & Harish,
>>>
>>> maybe explain why I found that odd: dma_fence_add_callback() sets the
>>> DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT flag before adding the callback.
>>>
>>> So the flag should always be set when there are callbacks.
>>> Did I miss anything?
>> I don't think we add any callbacks to our eviction fences.
>>
>> [HK] Setting DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is not required. It was
>> my oversight. Since, dma_fence_signal() function called cb_list
>> functions only if DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is set, I thought
>> it was safe to set it. However, the cb_list would be empty if no
>> callbacks are added. So setting DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is
>> redundant.
>
> Ok in this case let's just remove that and also use the
> dma_fence_signal() function (not the _locked variant) for signaling
> the DMA fence.

Sure. Though it makes me wonder why we need to signal the fence at all.
This is when the reference count of the fence is 0. Doesn't that imply
that no one is left waiting for the fence?

Regards,
  Felix

>
> Thanks,
> Christian.
>
>>
>> Best Regards,
>> Harish
>>
>>  
>> Regards,
>>    Felix
>>
>>> Regards,
>>> Christian.
>>>
>>> Am 29.01.2018 um 00:55 schrieb Felix Kuehling:
 [+Harish, forgot to acknowledge him in the commit description, will
 fix
 that in v2]

 Harish, please see Christian's question below in amd_kfd_fence_signal.
 Did I understand this correctly?

 Regards,
     Felix

 On 2018-01-28 06:42 PM, Felix Kuehling wrote:
> On 2018-01-27 04:16 AM, Christian König wrote:
>> Am 27.01.2018 um 02:09 schrieb Felix Kuehling:
> [snip]
>>> +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64
>>> context,
>>> +   void *mm)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = NULL;
>>> +
>>> +    fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>> +    if (fence == NULL)
>>> +    return NULL;
>>> +
>>> +    /* mm_struct mm is used as void pointer to identify the parent
>>> + * KFD process. Don't dereference it. Fence and any threads
>>> using
>>> + * mm is guranteed to be released before process termination.
>>> + */
>>> +    fence->mm = mm;
>> That won't work. Fences can live much longer than any process who
>> created them.
>>
>> I've already found a fence in a BO still living hours after the
>> process was killed and the pid long recycled.
>>
>> I suggest to make fence->mm a real mm_struct pointer with reference
>> counting and then set it to NULL and drop the reference in
>> enable_signaling.
> I agree. But enable_signaling may be too early to drop the reference.
> amd_kfd_fence_check_mm could still be called later from
> amdgpu_ttm_bo_eviction_valuable, as long as the fence hasn't
> signaled yet.
>
> The safe place is problably in amd_kfd_fence_release.
>
>>> +    get_task_comm(fence->timeline_name, current);
>>> +    spin_lock_init(>lock);
>>> +
>>> +    dma_fence_init(>base, _kfd_fence_ops, >lock,
>>> +   context, atomic_inc_return(_seq));
>>> +
>>> +    return fence;
>>> +}
>>> +
>>> +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct
>>> dma_fence *f)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence;
>>> +
>>> +    if (!f)
>>> +    return NULL;
>>> +
>>> +    fence = container_of(f, struct amdgpu_amdkfd_fence, base);
>>> +    if (fence && f->ops == _kfd_fence_ops)
>>> +    return fence;
>>> +
>>> +    return NULL;
>>> +}
>>> +
>>> +static const char *amd_kfd_fence_get_driver_name(struct dma_fence
>>> *f)
>>> +{
>>> +    return "amdgpu_amdkfd_fence";
>>> +}
>>> +
>>> +static const char *amd_kfd_fence_get_timeline_name(struct
>>> dma_fence *f)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>>> +
>>> +    return fence->timeline_name;
>>> +}
>>> +
>>> +/**
>>> + * amd_kfd_fence_enable_signaling - This gets called when TTM
>>> wants
>>> to evict
>>> + *  a KFD BO 

Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-30 Thread Christian König

Am 30.01.2018 um 16:28 schrieb Kasiviswanathan, Harish:

[+Harish, forgot to acknowledge him in the commit description, will fix
that in v2]

Harish, please see Christian's question below in amd_kfd_fence_signal.
Did I understand this correctly?

[HK]: Yes the lifetime of eviction fences is tied to the lifetime of the 
process associated with it. When the process terminates the fence is signaled 
and released. For all the BOs that belong to this process the eviction should 
be detached from it when the BO is released. However, this eviction fence could 
be still attached to shared BOs. So signaling it frees those BOs.


On 2018-01-29 08:43 AM, Christian König wrote:

Hi Felix & Harish,

maybe explain why I found that odd: dma_fence_add_callback() sets the
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT flag before adding the callback.

So the flag should always be set when there are callbacks.
Did I miss anything?

I don't think we add any callbacks to our eviction fences.

[HK] Setting DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is not required. It was my 
oversight. Since, dma_fence_signal() function called cb_list functions only if 
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is set, I thought it was safe to set it. 
However, the cb_list would be empty if no callbacks are added. So setting 
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is redundant.


Ok in this case let's just remove that and also use the 
dma_fence_signal() function (not the _locked variant) for signaling the 
DMA fence.


Thanks,
Christian.



Best Regards,
Harish

  


Regards,
   Felix


Regards,
Christian.

Am 29.01.2018 um 00:55 schrieb Felix Kuehling:

[+Harish, forgot to acknowledge him in the commit description, will fix
that in v2]

Harish, please see Christian's question below in amd_kfd_fence_signal.
Did I understand this correctly?

Regards,
    Felix

On 2018-01-28 06:42 PM, Felix Kuehling wrote:

On 2018-01-27 04:16 AM, Christian König wrote:

Am 27.01.2018 um 02:09 schrieb Felix Kuehling:

[snip]

+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+   void *mm)
+{
+    struct amdgpu_amdkfd_fence *fence = NULL;
+
+    fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+    if (fence == NULL)
+    return NULL;
+
+    /* mm_struct mm is used as void pointer to identify the parent
+ * KFD process. Don't dereference it. Fence and any threads
using
+ * mm is guranteed to be released before process termination.
+ */
+    fence->mm = mm;

That won't work. Fences can live much longer than any process who
created them.

I've already found a fence in a BO still living hours after the
process was killed and the pid long recycled.

I suggest to make fence->mm a real mm_struct pointer with reference
counting and then set it to NULL and drop the reference in
enable_signaling.

I agree. But enable_signaling may be too early to drop the reference.
amd_kfd_fence_check_mm could still be called later from
amdgpu_ttm_bo_eviction_valuable, as long as the fence hasn't
signaled yet.

The safe place is problably in amd_kfd_fence_release.


+    get_task_comm(fence->timeline_name, current);
+    spin_lock_init(>lock);
+
+    dma_fence_init(>base, _kfd_fence_ops, >lock,
+   context, atomic_inc_return(_seq));
+
+    return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct
dma_fence *f)
+{
+    struct amdgpu_amdkfd_fence *fence;
+
+    if (!f)
+    return NULL;
+
+    fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+    if (fence && f->ops == _kfd_fence_ops)
+    return fence;
+
+    return NULL;
+}
+
+static const char *amd_kfd_fence_get_driver_name(struct dma_fence
*f)
+{
+    return "amdgpu_amdkfd_fence";
+}
+
+static const char *amd_kfd_fence_get_timeline_name(struct
dma_fence *f)
+{
+    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+    return fence->timeline_name;
+}
+
+/**
+ * amd_kfd_fence_enable_signaling - This gets called when TTM wants
to evict
+ *  a KFD BO and schedules a job to move the BO.
+ *  If fence is already signaled return true.
+ *  If fence is not signaled schedule a evict KFD process work item.
+ */
+static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
+{
+    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+    if (!fence)
+    return false;
+
+    if (dma_fence_is_signaled(f))
+    return true;
+
+    if (!kgd2kfd->schedule_evict_and_restore_process(
+    (struct mm_struct *)fence->mm, f))
+    return true;
+
+    return false;
+}
+
+static int amd_kfd_fence_signal(struct dma_fence *f)
+{
+    unsigned long flags;
+    int ret;
+
+    spin_lock_irqsave(f->lock, flags);
+    /* Set enabled bit so cb will called */
+    set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, >flags);

Mhm, why is that necessary?

This only gets called from fence_release below. I think this is to
avoid
needlessly scheduling an eviction/restore cycle when an eviction fence
gets destroyed that hasn't been triggered before, 

RE: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-30 Thread Kasiviswanathan, Harish
>> [+Harish, forgot to acknowledge him in the commit description, will fix
>> that in v2]
>>
>> Harish, please see Christian's question below in amd_kfd_fence_signal.
>> Did I understand this correctly?

[HK]: Yes the lifetime of eviction fences is tied to the lifetime of the 
process associated with it. When the process terminates the fence is signaled 
and released. For all the BOs that belong to this process the eviction should 
be detached from it when the BO is released. However, this eviction fence could 
be still attached to shared BOs. So signaling it frees those BOs.


On 2018-01-29 08:43 AM, Christian König wrote:
> Hi Felix & Harish,
>
> maybe explain why I found that odd: dma_fence_add_callback() sets the
> DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT flag before adding the callback.
>
> So the flag should always be set when there are callbacks.
> Did I miss anything?

I don't think we add any callbacks to our eviction fences.

[HK] Setting DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is not required. It was my 
oversight. Since, dma_fence_signal() function called cb_list functions only if 
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is set, I thought it was safe to set it. 
However, the cb_list would be empty if no callbacks are added. So setting 
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT is redundant.

Best Regards,
Harish

 

Regards,
  Felix

>
> Regards,
> Christian.
>
> Am 29.01.2018 um 00:55 schrieb Felix Kuehling:
>> [+Harish, forgot to acknowledge him in the commit description, will fix
>> that in v2]
>>
>> Harish, please see Christian's question below in amd_kfd_fence_signal.
>> Did I understand this correctly?
>>
>> Regards,
>>    Felix
>>
>> On 2018-01-28 06:42 PM, Felix Kuehling wrote:
>>> On 2018-01-27 04:16 AM, Christian König wrote:
 Am 27.01.2018 um 02:09 schrieb Felix Kuehling:
>>> [snip]
> +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
> +   void *mm)
> +{
> +    struct amdgpu_amdkfd_fence *fence = NULL;
> +
> +    fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +    if (fence == NULL)
> +    return NULL;
> +
> +    /* mm_struct mm is used as void pointer to identify the parent
> + * KFD process. Don't dereference it. Fence and any threads
> using
> + * mm is guranteed to be released before process termination.
> + */
> +    fence->mm = mm;
 That won't work. Fences can live much longer than any process who
 created them.

 I've already found a fence in a BO still living hours after the
 process was killed and the pid long recycled.

 I suggest to make fence->mm a real mm_struct pointer with reference
 counting and then set it to NULL and drop the reference in
 enable_signaling.
>>> I agree. But enable_signaling may be too early to drop the reference.
>>> amd_kfd_fence_check_mm could still be called later from
>>> amdgpu_ttm_bo_eviction_valuable, as long as the fence hasn't
>>> signaled yet.
>>>
>>> The safe place is problably in amd_kfd_fence_release.
>>>
> +    get_task_comm(fence->timeline_name, current);
> +    spin_lock_init(>lock);
> +
> +    dma_fence_init(>base, _kfd_fence_ops, >lock,
> +   context, atomic_inc_return(_seq));
> +
> +    return fence;
> +}
> +
> +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct
> dma_fence *f)
> +{
> +    struct amdgpu_amdkfd_fence *fence;
> +
> +    if (!f)
> +    return NULL;
> +
> +    fence = container_of(f, struct amdgpu_amdkfd_fence, base);
> +    if (fence && f->ops == _kfd_fence_ops)
> +    return fence;
> +
> +    return NULL;
> +}
> +
> +static const char *amd_kfd_fence_get_driver_name(struct dma_fence
> *f)
> +{
> +    return "amdgpu_amdkfd_fence";
> +}
> +
> +static const char *amd_kfd_fence_get_timeline_name(struct
> dma_fence *f)
> +{
> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
> +
> +    return fence->timeline_name;
> +}
> +
> +/**
> + * amd_kfd_fence_enable_signaling - This gets called when TTM wants
> to evict
> + *  a KFD BO and schedules a job to move the BO.
> + *  If fence is already signaled return true.
> + *  If fence is not signaled schedule a evict KFD process work item.
> + */
> +static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
> +{
> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
> +
> +    if (!fence)
> +    return false;
> +
> +    if (dma_fence_is_signaled(f))
> +    return true;
> +
> +    if (!kgd2kfd->schedule_evict_and_restore_process(
> +    (struct mm_struct *)fence->mm, f))
> +    return true;
> +
> +    return false;
> +}
> +
> +static int amd_kfd_fence_signal(struct dma_fence *f)
> +{
> +    unsigned long flags;

Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-29 Thread Felix Kuehling
On 2018-01-29 08:43 AM, Christian König wrote:
> Hi Felix & Harish,
>
> maybe explain why I found that odd: dma_fence_add_callback() sets the
> DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT flag before adding the callback.
>
> So the flag should always be set when there are callbacks.
> Did I miss anything?

I don't think we add any callbacks to our eviction fences.

Regards,
  Felix

>
> Regards,
> Christian.
>
> Am 29.01.2018 um 00:55 schrieb Felix Kuehling:
>> [+Harish, forgot to acknowledge him in the commit description, will fix
>> that in v2]
>>
>> Harish, please see Christian's question below in amd_kfd_fence_signal.
>> Did I understand this correctly?
>>
>> Regards,
>>    Felix
>>
>> On 2018-01-28 06:42 PM, Felix Kuehling wrote:
>>> On 2018-01-27 04:16 AM, Christian König wrote:
 Am 27.01.2018 um 02:09 schrieb Felix Kuehling:
>>> [snip]
> +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
> +   void *mm)
> +{
> +    struct amdgpu_amdkfd_fence *fence = NULL;
> +
> +    fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +    if (fence == NULL)
> +    return NULL;
> +
> +    /* mm_struct mm is used as void pointer to identify the parent
> + * KFD process. Don't dereference it. Fence and any threads
> using
> + * mm is guranteed to be released before process termination.
> + */
> +    fence->mm = mm;
 That won't work. Fences can live much longer than any process who
 created them.

 I've already found a fence in a BO still living hours after the
 process was killed and the pid long recycled.

 I suggest to make fence->mm a real mm_struct pointer with reference
 counting and then set it to NULL and drop the reference in
 enable_signaling.
>>> I agree. But enable_signaling may be too early to drop the reference.
>>> amd_kfd_fence_check_mm could still be called later from
>>> amdgpu_ttm_bo_eviction_valuable, as long as the fence hasn't
>>> signaled yet.
>>>
>>> The safe place is problably in amd_kfd_fence_release.
>>>
> +    get_task_comm(fence->timeline_name, current);
> +    spin_lock_init(>lock);
> +
> +    dma_fence_init(>base, _kfd_fence_ops, >lock,
> +   context, atomic_inc_return(_seq));
> +
> +    return fence;
> +}
> +
> +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct
> dma_fence *f)
> +{
> +    struct amdgpu_amdkfd_fence *fence;
> +
> +    if (!f)
> +    return NULL;
> +
> +    fence = container_of(f, struct amdgpu_amdkfd_fence, base);
> +    if (fence && f->ops == _kfd_fence_ops)
> +    return fence;
> +
> +    return NULL;
> +}
> +
> +static const char *amd_kfd_fence_get_driver_name(struct dma_fence
> *f)
> +{
> +    return "amdgpu_amdkfd_fence";
> +}
> +
> +static const char *amd_kfd_fence_get_timeline_name(struct
> dma_fence *f)
> +{
> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
> +
> +    return fence->timeline_name;
> +}
> +
> +/**
> + * amd_kfd_fence_enable_signaling - This gets called when TTM wants
> to evict
> + *  a KFD BO and schedules a job to move the BO.
> + *  If fence is already signaled return true.
> + *  If fence is not signaled schedule a evict KFD process work item.
> + */
> +static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
> +{
> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
> +
> +    if (!fence)
> +    return false;
> +
> +    if (dma_fence_is_signaled(f))
> +    return true;
> +
> +    if (!kgd2kfd->schedule_evict_and_restore_process(
> +    (struct mm_struct *)fence->mm, f))
> +    return true;
> +
> +    return false;
> +}
> +
> +static int amd_kfd_fence_signal(struct dma_fence *f)
> +{
> +    unsigned long flags;
> +    int ret;
> +
> +    spin_lock_irqsave(f->lock, flags);
> +    /* Set enabled bit so cb will called */
> +    set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, >flags);
 Mhm, why is that necessary?
>>> This only gets called from fence_release below. I think this is to
>>> avoid
>>> needlessly scheduling an eviction/restore cycle when an eviction fence
>>> gets destroyed that hasn't been triggered before, probably during
>>> process termination.
>>>
>>> Harish, do you remember any other reason for this?
>>>
> +    ret = dma_fence_signal_locked(f);
> +    spin_unlock_irqrestore(f->lock, flags);
> +
> +    return ret;
> +}
> +
> +/**
> + * amd_kfd_fence_release - callback that fence can be freed
> + *
> + * @fence: fence
> + *
> + * This function is called when the reference count becomes zero.
> + * It just RCU schedules freeing up the fence.
> + */
> +static void 

Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-29 Thread Christian König

Hi Felix & Harish,

maybe explain why I found that odd: dma_fence_add_callback() sets the 
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT flag before adding the callback.


So the flag should always be set when there are callbacks.

Did I miss anything?

Regards,
Christian.

Am 29.01.2018 um 00:55 schrieb Felix Kuehling:

[+Harish, forgot to acknowledge him in the commit description, will fix
that in v2]

Harish, please see Christian's question below in amd_kfd_fence_signal.
Did I understand this correctly?

Regards,
   Felix

On 2018-01-28 06:42 PM, Felix Kuehling wrote:

On 2018-01-27 04:16 AM, Christian König wrote:

Am 27.01.2018 um 02:09 schrieb Felix Kuehling:

[snip]

+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+   void *mm)
+{
+    struct amdgpu_amdkfd_fence *fence = NULL;
+
+    fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+    if (fence == NULL)
+    return NULL;
+
+    /* mm_struct mm is used as void pointer to identify the parent
+ * KFD process. Don't dereference it. Fence and any threads using
+ * mm is guranteed to be released before process termination.
+ */
+    fence->mm = mm;

That won't work. Fences can live much longer than any process who
created them.

I've already found a fence in a BO still living hours after the
process was killed and the pid long recycled.

I suggest to make fence->mm a real mm_struct pointer with reference
counting and then set it to NULL and drop the reference in
enable_signaling.

I agree. But enable_signaling may be too early to drop the reference.
amd_kfd_fence_check_mm could still be called later from
amdgpu_ttm_bo_eviction_valuable, as long as the fence hasn't signaled yet.

The safe place is problably in amd_kfd_fence_release.


+    get_task_comm(fence->timeline_name, current);
+    spin_lock_init(>lock);
+
+    dma_fence_init(>base, _kfd_fence_ops, >lock,
+   context, atomic_inc_return(_seq));
+
+    return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+    struct amdgpu_amdkfd_fence *fence;
+
+    if (!f)
+    return NULL;
+
+    fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+    if (fence && f->ops == _kfd_fence_ops)
+    return fence;
+
+    return NULL;
+}
+
+static const char *amd_kfd_fence_get_driver_name(struct dma_fence *f)
+{
+    return "amdgpu_amdkfd_fence";
+}
+
+static const char *amd_kfd_fence_get_timeline_name(struct dma_fence *f)
+{
+    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+    return fence->timeline_name;
+}
+
+/**
+ * amd_kfd_fence_enable_signaling - This gets called when TTM wants
to evict
+ *  a KFD BO and schedules a job to move the BO.
+ *  If fence is already signaled return true.
+ *  If fence is not signaled schedule a evict KFD process work item.
+ */
+static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
+{
+    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+    if (!fence)
+    return false;
+
+    if (dma_fence_is_signaled(f))
+    return true;
+
+    if (!kgd2kfd->schedule_evict_and_restore_process(
+    (struct mm_struct *)fence->mm, f))
+    return true;
+
+    return false;
+}
+
+static int amd_kfd_fence_signal(struct dma_fence *f)
+{
+    unsigned long flags;
+    int ret;
+
+    spin_lock_irqsave(f->lock, flags);
+    /* Set enabled bit so cb will called */
+    set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, >flags);

Mhm, why is that necessary?

This only gets called from fence_release below. I think this is to avoid
needlessly scheduling an eviction/restore cycle when an eviction fence
gets destroyed that hasn't been triggered before, probably during
process termination.

Harish, do you remember any other reason for this?


+    ret = dma_fence_signal_locked(f);
+    spin_unlock_irqrestore(f->lock, flags);
+
+    return ret;
+}
+
+/**
+ * amd_kfd_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
+ */
+static void amd_kfd_fence_release(struct dma_fence *f)
+{
+    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+    /* Unconditionally signal the fence. The process is getting
+ * terminated.
+ */
+    if (WARN_ON(!fence))
+    return; /* Not an amdgpu_amdkfd_fence */
+
+    amd_kfd_fence_signal(f);
+    kfree_rcu(f, rcu);
+}
+
+/**
+ * amd_kfd_fence_check_mm - Check if @mm is same as that of the
fence @f
+ *  if same return TRUE else return FALSE.
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ */
+bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm)
+{
+    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+    if (!fence)
+    return false;
+    else if (fence->mm == mm)
+    return true;
+
+    return false;
+}
+
+const struct dma_fence_ops amd_kfd_fence_ops = {
+    .get_driver_name = 

Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-28 Thread Felix Kuehling
[+Harish, forgot to acknowledge him in the commit description, will fix
that in v2]

Harish, please see Christian's question below in amd_kfd_fence_signal.
Did I understand this correctly?

Regards,
  Felix

On 2018-01-28 06:42 PM, Felix Kuehling wrote:
> On 2018-01-27 04:16 AM, Christian König wrote:
>> Am 27.01.2018 um 02:09 schrieb Felix Kuehling:
> [snip]
>>> +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
>>> +   void *mm)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = NULL;
>>> +
>>> +    fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>> +    if (fence == NULL)
>>> +    return NULL;
>>> +
>>> +    /* mm_struct mm is used as void pointer to identify the parent
>>> + * KFD process. Don't dereference it. Fence and any threads using
>>> + * mm is guranteed to be released before process termination.
>>> + */
>>> +    fence->mm = mm;
>> That won't work. Fences can live much longer than any process who
>> created them.
>>
>> I've already found a fence in a BO still living hours after the
>> process was killed and the pid long recycled.
>>
>> I suggest to make fence->mm a real mm_struct pointer with reference
>> counting and then set it to NULL and drop the reference in
>> enable_signaling.
> I agree. But enable_signaling may be too early to drop the reference.
> amd_kfd_fence_check_mm could still be called later from
> amdgpu_ttm_bo_eviction_valuable, as long as the fence hasn't signaled yet.
>
> The safe place is problably in amd_kfd_fence_release.
>
>>> +    get_task_comm(fence->timeline_name, current);
>>> +    spin_lock_init(>lock);
>>> +
>>> +    dma_fence_init(>base, _kfd_fence_ops, >lock,
>>> +   context, atomic_inc_return(_seq));
>>> +
>>> +    return fence;
>>> +}
>>> +
>>> +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence;
>>> +
>>> +    if (!f)
>>> +    return NULL;
>>> +
>>> +    fence = container_of(f, struct amdgpu_amdkfd_fence, base);
>>> +    if (fence && f->ops == _kfd_fence_ops)
>>> +    return fence;
>>> +
>>> +    return NULL;
>>> +}
>>> +
>>> +static const char *amd_kfd_fence_get_driver_name(struct dma_fence *f)
>>> +{
>>> +    return "amdgpu_amdkfd_fence";
>>> +}
>>> +
>>> +static const char *amd_kfd_fence_get_timeline_name(struct dma_fence *f)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>>> +
>>> +    return fence->timeline_name;
>>> +}
>>> +
>>> +/**
>>> + * amd_kfd_fence_enable_signaling - This gets called when TTM wants
>>> to evict
>>> + *  a KFD BO and schedules a job to move the BO.
>>> + *  If fence is already signaled return true.
>>> + *  If fence is not signaled schedule a evict KFD process work item.
>>> + */
>>> +static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>>> +
>>> +    if (!fence)
>>> +    return false;
>>> +
>>> +    if (dma_fence_is_signaled(f))
>>> +    return true;
>>> +
>>> +    if (!kgd2kfd->schedule_evict_and_restore_process(
>>> +    (struct mm_struct *)fence->mm, f))
>>> +    return true;
>>> +
>>> +    return false;
>>> +}
>>> +
>>> +static int amd_kfd_fence_signal(struct dma_fence *f)
>>> +{
>>> +    unsigned long flags;
>>> +    int ret;
>>> +
>>> +    spin_lock_irqsave(f->lock, flags);
>>> +    /* Set enabled bit so cb will called */
>>> +    set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, >flags);
>> Mhm, why is that necessary?
> This only gets called from fence_release below. I think this is to avoid
> needlessly scheduling an eviction/restore cycle when an eviction fence
> gets destroyed that hasn't been triggered before, probably during
> process termination.
>
> Harish, do you remember any other reason for this?
>
>>> +    ret = dma_fence_signal_locked(f);
>>> +    spin_unlock_irqrestore(f->lock, flags);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +/**
>>> + * amd_kfd_fence_release - callback that fence can be freed
>>> + *
>>> + * @fence: fence
>>> + *
>>> + * This function is called when the reference count becomes zero.
>>> + * It just RCU schedules freeing up the fence.
>>> + */
>>> +static void amd_kfd_fence_release(struct dma_fence *f)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>>> +    /* Unconditionally signal the fence. The process is getting
>>> + * terminated.
>>> + */
>>> +    if (WARN_ON(!fence))
>>> +    return; /* Not an amdgpu_amdkfd_fence */
>>> +
>>> +    amd_kfd_fence_signal(f);
>>> +    kfree_rcu(f, rcu);
>>> +}
>>> +
>>> +/**
>>> + * amd_kfd_fence_check_mm - Check if @mm is same as that of the
>>> fence @f
>>> + *  if same return TRUE else return FALSE.
>>> + *
>>> + * @f: [IN] fence
>>> + * @mm: [IN] mm that needs to be verified
>>> + */
>>> +bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);

Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-28 Thread Felix Kuehling

On 2018-01-27 04:16 AM, Christian König wrote:
> Am 27.01.2018 um 02:09 schrieb Felix Kuehling:
[snip]
>> +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
>> +   void *mm)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence = NULL;
>> +
>> +    fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>> +    if (fence == NULL)
>> +    return NULL;
>> +
>> +    /* mm_struct mm is used as void pointer to identify the parent
>> + * KFD process. Don't dereference it. Fence and any threads using
>> + * mm is guranteed to be released before process termination.
>> + */
>> +    fence->mm = mm;
>
> That won't work. Fences can live much longer than any process who
> created them.
>
> I've already found a fence in a BO still living hours after the
> process was killed and the pid long recycled.
>
> I suggest to make fence->mm a real mm_struct pointer with reference
> counting and then set it to NULL and drop the reference in
> enable_signaling.

I agree. But enable_signaling may be too early to drop the reference.
amd_kfd_fence_check_mm could still be called later from
amdgpu_ttm_bo_eviction_valuable, as long as the fence hasn't signaled yet.

The safe place is problably in amd_kfd_fence_release.

>
>> +    get_task_comm(fence->timeline_name, current);
>> +    spin_lock_init(>lock);
>> +
>> +    dma_fence_init(>base, _kfd_fence_ops, >lock,
>> +   context, atomic_inc_return(_seq));
>> +
>> +    return fence;
>> +}
>> +
>> +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence;
>> +
>> +    if (!f)
>> +    return NULL;
>> +
>> +    fence = container_of(f, struct amdgpu_amdkfd_fence, base);
>> +    if (fence && f->ops == _kfd_fence_ops)
>> +    return fence;
>> +
>> +    return NULL;
>> +}
>> +
>> +static const char *amd_kfd_fence_get_driver_name(struct dma_fence *f)
>> +{
>> +    return "amdgpu_amdkfd_fence";
>> +}
>> +
>> +static const char *amd_kfd_fence_get_timeline_name(struct dma_fence *f)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> +
>> +    return fence->timeline_name;
>> +}
>> +
>> +/**
>> + * amd_kfd_fence_enable_signaling - This gets called when TTM wants
>> to evict
>> + *  a KFD BO and schedules a job to move the BO.
>> + *  If fence is already signaled return true.
>> + *  If fence is not signaled schedule a evict KFD process work item.
>> + */
>> +static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> +
>> +    if (!fence)
>> +    return false;
>> +
>> +    if (dma_fence_is_signaled(f))
>> +    return true;
>> +
>> +    if (!kgd2kfd->schedule_evict_and_restore_process(
>> +    (struct mm_struct *)fence->mm, f))
>> +    return true;
>> +
>> +    return false;
>> +}
>> +
>> +static int amd_kfd_fence_signal(struct dma_fence *f)
>> +{
>> +    unsigned long flags;
>> +    int ret;
>> +
>> +    spin_lock_irqsave(f->lock, flags);
>> +    /* Set enabled bit so cb will called */
>> +    set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, >flags);
>
> Mhm, why is that necessary?

This only gets called from fence_release below. I think this is to avoid
needlessly scheduling an eviction/restore cycle when an eviction fence
gets destroyed that hasn't been triggered before, probably during
process termination.

Harish, do you remember any other reason for this?

>
>> +    ret = dma_fence_signal_locked(f);
>> +    spin_unlock_irqrestore(f->lock, flags);
>> +
>> +    return ret;
>> +}
>> +
>> +/**
>> + * amd_kfd_fence_release - callback that fence can be freed
>> + *
>> + * @fence: fence
>> + *
>> + * This function is called when the reference count becomes zero.
>> + * It just RCU schedules freeing up the fence.
>> + */
>> +static void amd_kfd_fence_release(struct dma_fence *f)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> +    /* Unconditionally signal the fence. The process is getting
>> + * terminated.
>> + */
>> +    if (WARN_ON(!fence))
>> +    return; /* Not an amdgpu_amdkfd_fence */
>> +
>> +    amd_kfd_fence_signal(f);
>> +    kfree_rcu(f, rcu);
>> +}
>> +
>> +/**
>> + * amd_kfd_fence_check_mm - Check if @mm is same as that of the
>> fence @f
>> + *  if same return TRUE else return FALSE.
>> + *
>> + * @f: [IN] fence
>> + * @mm: [IN] mm that needs to be verified
>> + */
>> +bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> +
>> +    if (!fence)
>> +    return false;
>> +    else if (fence->mm == mm)
>> +    return true;
>> +
>> +    return false;
>> +}
>> +
>> +const struct dma_fence_ops amd_kfd_fence_ops = {
>> +    .get_driver_name = amd_kfd_fence_get_driver_name,
>> +    .get_timeline_name = amd_kfd_fence_get_timeline_name,
>> +    .enable_signaling = amd_kfd_fence_enable_signaling,
>> +    .signaled = 

Re: [PATCH 06/25] drm/amdgpu: Add KFD eviction fence

2018-01-27 Thread Christian König

Am 27.01.2018 um 02:09 schrieb Felix Kuehling:

This fence is used by KFD to keep memory resident while user mode
queues are enabled. Trying to evict memory will trigger the
enable_signaling callback, which starts a KFD eviction, which
involves preempting user mode queues before signaling the fence.
There is one such fence per process.

Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/Makefile  |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   |  15 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 196 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c |  18 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  18 +++
  drivers/gpu/drm/amd/include/kgd_kfd_interface.h  |   6 +
  7 files changed, 256 insertions(+), 3 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index d6e5b72..43dc3f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -130,6 +130,7 @@ amdgpu-y += \
  # add amdkfd interfaces
  amdgpu-y += \
 amdgpu_amdkfd.o \
+amdgpu_amdkfd_fence.o \
 amdgpu_amdkfd_gfx_v8.o
  
  # add cgs

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2a519f9..8d92f5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -29,6 +29,8 @@
  #include 
  #include 
  
+extern const struct kgd2kfd_calls *kgd2kfd;

+
  struct amdgpu_device;
  
  struct kgd_mem {

@@ -37,6 +39,19 @@ struct kgd_mem {
void *cpu_ptr;
  };
  
+/* KFD Memory Eviction */

+struct amdgpu_amdkfd_fence {
+   struct dma_fence base;
+   void *mm;
+   spinlock_t lock;
+   char timeline_name[TASK_COMM_LEN];
+};
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+  void *mm);
+bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+
  int amdgpu_amdkfd_init(void);
  void amdgpu_amdkfd_fini(void);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c

new file mode 100644
index 000..252e44e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "amdgpu_amdkfd.h"
+
+const struct dma_fence_ops amd_kfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+
+static int amd_kfd_fence_signal(struct dma_fence *f);
+
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ *  evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_bo_driver->eviction_valuable().
+ *
+ * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
+ *  to process X. Otherwise, it will return true to indicate BO can be
+ *  evicted by TTM.
+ *
+ * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ *  nofity when the BO is free to move.