Re: [PATCH] drm/amdkfd: option to disable system mem limit

2020-08-04 Thread Felix Kuehling
Am 2020-08-04 um 1:29 p.m. schrieb Alex Deucher:
> On Tue, Aug 4, 2020 at 1:27 PM Felix Kuehling  wrote:
>> Am 2020-07-27 um 9:24 a.m. schrieb Philip Yang:
>>> If multiple process share system memory through /dev/shm, KFD allocate
>>> memory should not fail if it reachs the system memory limit because
>>> one copy of physical system memory are shared by multiple process.
>>>
>>> Add module parameter to provide user option to disable system memory
>>> limit check, to run multiple process share memory application. By
>>> default the system memory limit is on.
>>>
>>> Print out debug message to warn user if KFD allocate memory failed
>>> because of system memory limit.
>>>
>>> Signed-off-by: Philip Yang 
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 ++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 -
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 9 +
>>>  3 files changed, 19 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index e97c088d03b3..3c0d5ecfe0d5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -187,9 +187,11 @@ extern int amdgpu_force_asic_type;
>>>  #ifdef CONFIG_HSA_AMD
>>>  extern int sched_policy;
>>>  extern bool debug_evictions;
>>> +extern bool no_system_mem_limit;
>>>  #else
>>>  static const int sched_policy = KFD_SCHED_POLICY_HWS;
>>>  static const bool debug_evictions; /* = false */
>>> +static const bool no_system_mem_limit;
>>>  #endif
>>>
>>>  extern int amdgpu_tmz;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 8703aa1fe4a5..502e8204c012 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -99,7 +99,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
>>>   mem *= si.mem_unit;
>>>
>>>   spin_lock_init(_mem_limit.mem_limit_lock);
>>> - kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
>>> + if (no_system_mem_limit)
>>> + kfd_mem_limit.max_system_mem_limit = U64_MAX;
>>> + else
>>> + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
>>>   kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
>>>   pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
>>>   (kfd_mem_limit.max_system_mem_limit >> 20),
>>> @@ -148,6 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
>>> amdgpu_device *adev,
>>>
>>>   spin_lock(_mem_limit.mem_limit_lock);
>>>
>>> + if (kfd_mem_limit.system_mem_used + system_mem_needed >
>>> + kfd_mem_limit.max_system_mem_limit)
>>> + pr_debug("Set no_system_mem_limit if using shared memory\n");
>>> +
>>>   if ((kfd_mem_limit.system_mem_used + system_mem_needed >
>>>kfd_mem_limit.max_system_mem_limit) ||
>>>   (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 6291f5f0d223..e9acd0a9f327 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -715,6 +715,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue 
>>> preemption timeout in ms (1
>>>  bool debug_evictions;
>>>  module_param(debug_evictions, bool, 0644);
>>>  MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = 
>>> default)");
>>> +
>>> +/**
>>> + * DOC: no_system_mem_limit(bool)
>>> + * Disable system memory limit, to support multiple process shared memory
>>> + */
>>> +bool no_system_mem_limit;
>>> +module_param(no_system_mem_limit, bool, 0644);
>> The permissions suggest that this parameter is writable at runtime using
>> sysfs. However, the parameter is only read once during module init. So
>> any runtime changes to this parameter will not take effect.
>>
>> You can fix this in two ways:
>>
>>  1. Make the parameter read only
>>  2. Change the implementation of amdgpu_amdkfd_reserve_mem_limit to
>> check the parameter every time and only apply the system memory
>> limit check if necessary
>>
>> I think the second option is preferable, because it allows user to
>> experiment with this without rebooting.
> Agreed.  If we go with that approach, maybe just drop the module
> parameter altogether and just let the user set it manually per device
> at runtime.

The KFD system memory limit is global. There is no useful way to apply
this limit per device.

Regards,
  Felix


>
> Alex
>
>> Regards,
>>   Felix
>>
>>
>>> +MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false 
>>> = default)");
>>> +
>>>  #endif
>>>
>>>  /**
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: option to disable system mem limit

2020-08-04 Thread Alex Deucher
On Tue, Aug 4, 2020 at 1:27 PM Felix Kuehling  wrote:
>
> Am 2020-07-27 um 9:24 a.m. schrieb Philip Yang:
> > If multiple process share system memory through /dev/shm, KFD allocate
> > memory should not fail if it reachs the system memory limit because
> > one copy of physical system memory are shared by multiple process.
> >
> > Add module parameter to provide user option to disable system memory
> > limit check, to run multiple process share memory application. By
> > default the system memory limit is on.
> >
> > Print out debug message to warn user if KFD allocate memory failed
> > because of system memory limit.
> >
> > Signed-off-by: Philip Yang 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 ++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 -
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 9 +
> >  3 files changed, 19 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index e97c088d03b3..3c0d5ecfe0d5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -187,9 +187,11 @@ extern int amdgpu_force_asic_type;
> >  #ifdef CONFIG_HSA_AMD
> >  extern int sched_policy;
> >  extern bool debug_evictions;
> > +extern bool no_system_mem_limit;
> >  #else
> >  static const int sched_policy = KFD_SCHED_POLICY_HWS;
> >  static const bool debug_evictions; /* = false */
> > +static const bool no_system_mem_limit;
> >  #endif
> >
> >  extern int amdgpu_tmz;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index 8703aa1fe4a5..502e8204c012 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -99,7 +99,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
> >   mem *= si.mem_unit;
> >
> >   spin_lock_init(_mem_limit.mem_limit_lock);
> > - kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
> > + if (no_system_mem_limit)
> > + kfd_mem_limit.max_system_mem_limit = U64_MAX;
> > + else
> > + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
> >   kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
> >   pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
> >   (kfd_mem_limit.max_system_mem_limit >> 20),
> > @@ -148,6 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
> > amdgpu_device *adev,
> >
> >   spin_lock(_mem_limit.mem_limit_lock);
> >
> > + if (kfd_mem_limit.system_mem_used + system_mem_needed >
> > + kfd_mem_limit.max_system_mem_limit)
> > + pr_debug("Set no_system_mem_limit if using shared memory\n");
> > +
> >   if ((kfd_mem_limit.system_mem_used + system_mem_needed >
> >kfd_mem_limit.max_system_mem_limit) ||
> >   (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 6291f5f0d223..e9acd0a9f327 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -715,6 +715,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue 
> > preemption timeout in ms (1
> >  bool debug_evictions;
> >  module_param(debug_evictions, bool, 0644);
> >  MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = 
> > default)");
> > +
> > +/**
> > + * DOC: no_system_mem_limit(bool)
> > + * Disable system memory limit, to support multiple process shared memory
> > + */
> > +bool no_system_mem_limit;
> > +module_param(no_system_mem_limit, bool, 0644);
>
> The permissions suggest that this parameter is writable at runtime using
> sysfs. However, the parameter is only read once during module init. So
> any runtime changes to this parameter will not take effect.
>
> You can fix this in two ways:
>
>  1. Make the parameter read only
>  2. Change the implementation of amdgpu_amdkfd_reserve_mem_limit to
> check the parameter every time and only apply the system memory
> limit check if necessary
>
> I think the second option is preferable, because it allows user to
> experiment with this without rebooting.

Agreed.  If we go with that approach, maybe just drop the module
parameter altogether and just let the user set it manually per device
at runtime.

Alex

>
> Regards,
>   Felix
>
>
> > +MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false 
> > = default)");
> > +
> >  #endif
> >
> >  /**
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: option to disable system mem limit

2020-08-04 Thread Felix Kuehling
Am 2020-07-27 um 9:24 a.m. schrieb Philip Yang:
> If multiple process share system memory through /dev/shm, KFD allocate
> memory should not fail if it reachs the system memory limit because
> one copy of physical system memory are shared by multiple process.
>
> Add module parameter to provide user option to disable system memory
> limit check, to run multiple process share memory application. By
> default the system memory limit is on.
>
> Print out debug message to warn user if KFD allocate memory failed
> because of system memory limit.
>
> Signed-off-by: Philip Yang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 9 +
>  3 files changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088d03b3..3c0d5ecfe0d5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -187,9 +187,11 @@ extern int amdgpu_force_asic_type;
>  #ifdef CONFIG_HSA_AMD
>  extern int sched_policy;
>  extern bool debug_evictions;
> +extern bool no_system_mem_limit;
>  #else
>  static const int sched_policy = KFD_SCHED_POLICY_HWS;
>  static const bool debug_evictions; /* = false */
> +static const bool no_system_mem_limit;
>  #endif
>  
>  extern int amdgpu_tmz;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 8703aa1fe4a5..502e8204c012 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -99,7 +99,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
>   mem *= si.mem_unit;
>  
>   spin_lock_init(_mem_limit.mem_limit_lock);
> - kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
> + if (no_system_mem_limit)
> + kfd_mem_limit.max_system_mem_limit = U64_MAX;
> + else
> + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
>   kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
>   pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
>   (kfd_mem_limit.max_system_mem_limit >> 20),
> @@ -148,6 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
> amdgpu_device *adev,
>  
>   spin_lock(_mem_limit.mem_limit_lock);
>  
> + if (kfd_mem_limit.system_mem_used + system_mem_needed >
> + kfd_mem_limit.max_system_mem_limit)
> + pr_debug("Set no_system_mem_limit if using shared memory\n");
> +
>   if ((kfd_mem_limit.system_mem_used + system_mem_needed >
>kfd_mem_limit.max_system_mem_limit) ||
>   (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f0d223..e9acd0a9f327 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -715,6 +715,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue 
> preemption timeout in ms (1
>  bool debug_evictions;
>  module_param(debug_evictions, bool, 0644);
>  MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = 
> default)");
> +
> +/**
> + * DOC: no_system_mem_limit(bool)
> + * Disable system memory limit, to support multiple process shared memory
> + */
> +bool no_system_mem_limit;
> +module_param(no_system_mem_limit, bool, 0644);

The permissions suggest that this parameter is writable at runtime using
sysfs. However, the parameter is only read once during module init. So
any runtime changes to this parameter will not take effect.

You can fix this in two ways:

 1. Make the parameter read only
 2. Change the implementation of amdgpu_amdkfd_reserve_mem_limit to
check the parameter every time and only apply the system memory
limit check if necessary

I think the second option is preferable, because it allows user to
experiment with this without rebooting.

Regards,
  Felix


> +MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = 
> default)");
> +
>  #endif
>  
>  /**
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: option to disable system mem limit

2020-08-04 Thread philip yang

Ping.

On 2020-07-27 9:24 a.m., Philip Yang wrote:

If multiple process share system memory through /dev/shm, KFD allocate
memory should not fail if it reachs the system memory limit because
one copy of physical system memory are shared by multiple process.

Add module parameter to provide user option to disable system memory
limit check, to run multiple process share memory application. By
default the system memory limit is on.

Print out debug message to warn user if KFD allocate memory failed
because of system memory limit.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 9 +
  3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088d03b3..3c0d5ecfe0d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -187,9 +187,11 @@ extern int amdgpu_force_asic_type;
  #ifdef CONFIG_HSA_AMD
  extern int sched_policy;
  extern bool debug_evictions;
+extern bool no_system_mem_limit;
  #else
  static const int sched_policy = KFD_SCHED_POLICY_HWS;
  static const bool debug_evictions; /* = false */
+static const bool no_system_mem_limit;
  #endif
  
  extern int amdgpu_tmz;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8703aa1fe4a5..502e8204c012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -99,7 +99,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
  
  	spin_lock_init(_mem_limit.mem_limit_lock);

-   kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+   if (no_system_mem_limit)
+   kfd_mem_limit.max_system_mem_limit = U64_MAX;
+   else
+   kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
@@ -148,6 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
  
  	spin_lock(_mem_limit.mem_limit_lock);
  
+	if (kfd_mem_limit.system_mem_used + system_mem_needed >

+   kfd_mem_limit.max_system_mem_limit)
+   pr_debug("Set no_system_mem_limit if using shared memory\n");
+
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
 kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f0d223..e9acd0a9f327 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -715,6 +715,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue 
preemption timeout in ms (1
  bool debug_evictions;
  module_param(debug_evictions, bool, 0644);
  MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = 
default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = 
default)");
+
  #endif
  
  /**

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: option to disable system mem limit

2020-07-27 Thread Philip Yang
If multiple process share system memory through /dev/shm, KFD allocate
memory should not fail if it reachs the system memory limit because
one copy of physical system memory are shared by multiple process.

Add module parameter to provide user option to disable system memory
limit check, to run multiple process share memory application. By
default the system memory limit is on.

Print out debug message to warn user if KFD allocate memory failed
because of system memory limit.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 9 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088d03b3..3c0d5ecfe0d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -187,9 +187,11 @@ extern int amdgpu_force_asic_type;
 #ifdef CONFIG_HSA_AMD
 extern int sched_policy;
 extern bool debug_evictions;
+extern bool no_system_mem_limit;
 #else
 static const int sched_policy = KFD_SCHED_POLICY_HWS;
 static const bool debug_evictions; /* = false */
+static const bool no_system_mem_limit;
 #endif
 
 extern int amdgpu_tmz;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8703aa1fe4a5..502e8204c012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -99,7 +99,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
 
spin_lock_init(_mem_limit.mem_limit_lock);
-   kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+   if (no_system_mem_limit)
+   kfd_mem_limit.max_system_mem_limit = U64_MAX;
+   else
+   kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
@@ -148,6 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
 
spin_lock(_mem_limit.mem_limit_lock);
 
+   if (kfd_mem_limit.system_mem_used + system_mem_needed >
+   kfd_mem_limit.max_system_mem_limit)
+   pr_debug("Set no_system_mem_limit if using shared memory\n");
+
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
 kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f0d223..e9acd0a9f327 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -715,6 +715,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue 
preemption timeout in ms (1
 bool debug_evictions;
 module_param(debug_evictions, bool, 0644);
 MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = 
default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = 
default)");
+
 #endif
 
 /**
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx