[PATCH libdrm] amdgpu: When couldn't find bo, need to return error.

2018-08-31 Thread Emily Deng
The startx will have segmant fault if return success.

SWDEV-163962

Change-Id: I56b189fa26efdcd1d96e5100af3f3e0b1208b0c3
Signed-off-by: Emily Deng 
---
 amdgpu/amdgpu_bo.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index f25cacc..7e297fa 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -760,6 +760,7 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev,
  uint64_t *offset_in_bo)
 {
uint32_t i;
+   int r = 0;
struct amdgpu_bo *bo;
 
if (cpu == NULL || size == 0)
@@ -787,10 +788,11 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle 
dev,
} else {
*buf_handle = NULL;
*offset_in_bo = 0;
+   r = -errno;
}
pthread_mutex_unlock(>bo_table_mutex);
 
-   return 0;
+   return r;
 }
 
 int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation

2018-08-31 Thread Felix Kuehling
Thanks for this. A few comments and a question inline.

On 2018-08-31 09:27 AM, Christian König wrote:
> Since we have a lot of FAQ on the VM state machine try to improve the
> documentation by adding functions for each state move.
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 
> -
>  1 file changed, 79 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index a9275a99d793..40c22635fefd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device 
> *adev, unsigned level)
>   return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
>  }
>  
> +/**
> + * amdgpu_vm_bo_evicted - vm_bo is evicted
> + *
> + * @vm_bo: vm_bo which is evicted
> + *
> + * State for PDs/PTs and per VM BOs which are not at the location they should
> + * be.
> + */
> +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
> +{
> + struct amdgpu_vm *vm = vm_bo->vm;
> + struct amdgpu_bo *bo = vm_bo->bo;
> +
> + vm_bo->moved = true;
> + if (bo->tbo.type == ttm_bo_type_kernel)
> + list_move(_bo->vm_status, >evicted);
> + else
> + list_move_tail(_bo->vm_status, >evicted);
> +}
> +
> +/**
> + * amdgpu_vm_bo_relocated - vm_bo is reloacted
> + *
> + * @vm_bo: vm_bo which is relocated
> + *
> + * State for PDs/PTs which needs to update their parent PD.
> + */
> +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
> +{
> + list_move(_bo->vm_status, _bo->vm->relocated);
> +}
> +
> +/**
> + * amdgpu_vm_bo_moved - vm_bo is moved
> + *
> + * @vm_bo: vm_bo which is moved
> + *
> + * State for per VM and normal BOs which are moved, but that change is not 
> yet
> + * reflected in the page tables.

I have a question here. Why does amdgpu_cs_vm_handling call
amdgpu_vm_bo_update manually for its BO list entries? Wouldn't it be
enough to just call amdgpu_vm_handle_moved?

> + */
> +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
> +{
> + struct amdgpu_vm *vm = vm_bo->vm;
> +
> + spin_lock(>moved_lock);
> + list_move(_bo->vm_status, >moved);
> + spin_unlock(>moved_lock);

If vm->moved_lock protects the moved list, do we also need to take it
whenever something is moved from that list? That could potentially be
any list_move operation that uses vm_bo->vm_status. I found one case
below where that may not be handled correctly.

> +}
> +
> +/**
> + * amdgpu_vm_bo_idle - vm_bo is idle
> + *
> + * @vm_bo: vm_bo which is now idle
> + *
> + * State for PDs/PTs and per VM BOs which have gone through the state machine
> + * and are now idle.
> + */
> +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
> +{
> + list_move(_bo->vm_status, _bo->vm->idle);
> + vm_bo->moved = false;
> +}
> +
>  /**
>   * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
>   *
> @@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct 
> amdgpu_vm_bo_base *base,
>  
>   vm->bulk_moveable = false;
>   if (bo->tbo.type == ttm_bo_type_kernel)
> - list_move(>vm_status, >relocated);
> + amdgpu_vm_bo_relocated(base);
>   else
> - list_move(>vm_status, >idle);
> + amdgpu_vm_bo_idle(base);
>  
>   if (bo->preferred_domains &
>   amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
> @@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct 
> amdgpu_vm_bo_base *base,
>* is currently evicted. add the bo to the evicted list to make sure it
>* is validated on next vm use to avoid fault.
>* */
> - list_move_tail(>vm_status, >evicted);
> - base->moved = true;
> + amdgpu_vm_bo_evicted(base);
>  }
>  
>  /**
> @@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>   break;
>  
>   if (bo->tbo.type != ttm_bo_type_kernel) {
> - spin_lock(>moved_lock);
> - list_move(_base->vm_status, >moved);
> - spin_unlock(>moved_lock);
> + amdgpu_vm_bo_moved(bo_base);
>   } else {
>   if (vm->use_cpu_for_update)
>   r = amdgpu_bo_kmap(bo, NULL);
> @@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>   r = amdgpu_ttm_alloc_gart(>tbo);
>   if (r)
>   break;
> - list_move(_base->vm_status, >relocated);
> + amdgpu_vm_bo_relocated(bo_base);
>   }
>   }
>  
> @@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
> *adev,
>   bo_base = 

[PATCH 1/1] drm/amdgpu: Clean up KFD init and fini

2018-08-31 Thread Felix Kuehling
Only initialize KFD once by moving amdgpu_amdkfd_init from
amdgpu_pci_probe to amdgpu_init. This fixes kernel oopses and hangs
when booting multi-GPU systems.

Also removed some vestiges of KFD being its own module.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 12 
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8bee9a0..a79df2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -28,7 +28,6 @@
 #include 
 
 const struct kgd2kfd_calls *kgd2kfd;
-bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
 
 static const unsigned int compute_vmid_bitmap = 0xFF00;
 
@@ -51,10 +50,8 @@ int amdgpu_amdkfd_init(void)
 
 void amdgpu_amdkfd_fini(void)
 {
-   if (kgd2kfd) {
+   if (kgd2kfd)
kgd2kfd->exit();
-   symbol_put(kgd2kfd_init);
-   }
 }
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a96ceff..b5c2ccb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -922,14 +922,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
 
-   /*
-* Initialize amdkfd before starting radeon. If it was not loaded yet,
-* defer radeon probing
-*/
-   ret = amdgpu_amdkfd_init();
-   if (ret == -EPROBE_DEFER)
-   return ret;
-
/* Get rid of things like offb */
ret = amdgpu_kick_out_firmware_fb(pdev);
if (ret)
@@ -1274,6 +1266,10 @@ static int __init amdgpu_init(void)
pdriver = _kms_pci_driver;
driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
+
+   /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
+   amdgpu_amdkfd_init();
+
/* let modprobe override vga console setting */
return pci_register_driver(pdriver);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/powerplay: added vega20 overdrive support V3

2018-08-31 Thread Deucher, Alexander
Reviewed-by: Alex Deucher 


From: amd-gfx  on behalf of Evan Quan 

Sent: Friday, August 31, 2018 3:50:30 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander; Quan, Evan; Zhu, Rex
Subject: [PATCH] drm/amd/powerplay: added vega20 overdrive support V3

Added vega20 overdrive support based on existing OD sysfs
APIs. However, the OD logics are simplified on vega20. So,
the behavior will be a little different and works only on
some limited levels.

V2: fix typo
fix commit description
revise error logs
add support for clock OD

V3: separate clock from voltage OD settings

Change-Id: I403cb38a95863db664cf06d030ac42a19bff6b33
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c|  45 +++
 .../gpu/drm/amd/include/kgd_pp_interface.h|   2 +
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c| 289 +-
 3 files changed, 335 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index e2577518b9c6..262c0ffc9d5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -474,6 +474,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * in each power level within a power state.  The pp_od_clk_voltage is used for
  * this.
  *
+ * < For Vega10 and previous ASICs >
+ *
  * Reading the file will display:
  *
  * - a list of engine clock levels and voltages labeled OD_SCLK
@@ -491,6 +493,44 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * "c" (commit) to the file to commit your changes.  If you want to reset to 
the
  * default power levels, write "r" (reset) to the file to reset them.
  *
+ *
+ * < For Vega20 >
+ *
+ * Reading the file will display:
+ *
+ * - minimum and maximum engine clock labeled OD_SCLK
+ *
+ * - maximum memory clock labeled OD_MCLK
+ *
+ * - three  points labeled OD_VDDC_CURVE.
+ *   They can be used to calibrate the sclk voltage curve.
+ *
+ * - a list of valid ranges for sclk, mclk, and voltage curve points
+ *   labeled OD_RANGE
+ *
+ * To manually adjust these settings:
+ *
+ * - First select manual using power_dpm_force_performance_level
+ *
+ * - For clock frequency setting, enter a new value by writing a
+ *   string that contains "s/m index clock" to the file. The index
+ *   should be 0 if to set minimum clock. And 1 if to set maximum
+ *   clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz.
+ *   "m 1 800" will update maximum mclk to be 800Mhz.
+ *
+ *   For sclk voltage curve, enter the new values by writing a
+ *   string that contains "vc point clock voff" to the file. The
+ *   points are indexed by 0, 1 and 2. E.g., "vc 0 300 10" will
+ *   update point1 with clock set as 300Mhz and voltage increased
+ *   by 10mV. "vc 2 1000 -10" will update point3 with clock set
+ *   as 1000Mhz and voltage drop by 10mV.
+ *
+ * - When you have edited all of the states as needed, write "c" (commit)
+ *   to the file to commit your changes
+ *
+ * - If you want to reset to the default power levels, write "r" (reset)
+ *   to the file to reset them
+ *
  */

 static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
@@ -520,6 +560,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device 
*dev,
 type = PP_OD_RESTORE_DEFAULT_TABLE;
 else if (*buf == 'c')
 type = PP_OD_COMMIT_DPM_TABLE;
+   else if (!strncmp(buf, "vc", 2))
+   type = PP_OD_EDIT_VDDC_CURVE;
 else
 return -EINVAL;

@@ -527,6 +569,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device 
*dev,

 tmp_str = buf_cpy;

+   if (type == PP_OD_EDIT_VDDC_CURVE)
+   tmp_str++;
 while (isspace(*++tmp_str));

 while (tmp_str[0]) {
@@ -570,6 +614,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device 
*dev,
 if (adev->powerplay.pp_funcs->print_clock_levels) {
 size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
 size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+   size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, 
buf+size);
 size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, 
buf+size);
 return size;
 } else {
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index e23746ba53bf..92780f3fb0b3 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -94,6 +94,7 @@ enum pp_clock_type {
 PP_PCIE,
 OD_SCLK,
 OD_MCLK,
+   OD_VDDC_CURVE,
 OD_RANGE,
 };

@@ -141,6 +142,7 @@ enum {
 enum PP_OD_DPM_TABLE_COMMAND {
 PP_OD_EDIT_SCLK_VDDC_TABLE,
 PP_OD_EDIT_MCLK_VDDC_TABLE,
+   PP_OD_EDIT_VDDC_CURVE,
 PP_OD_RESTORE_DEFAULT_TABLE,
 PP_OD_COMMIT_DPM_TABLE
 };
diff --git 

Re: [PATCH] drm/amdgpu: Use drm_dev_unplug in PCI .remove

2018-08-31 Thread Christian König

Am 31.08.2018 um 18:13 schrieb Andrey Grodzovsky:

This at least allows to fail any subsequent IOCTLs with -ENODEV
after the device is gone.
Still this operation is not supported yet in graphic mode
and will lead at least to page faults and other issues.

Signed-off-by: Andrey Grodzovsky 


Well it is a start, Reviewed-by: Christian König .


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d7d9a9d..66d14d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -978,8 +978,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
  {
struct drm_device *dev = pci_get_drvdata(pdev);
  
-	drm_dev_unregister(dev);

-   drm_dev_put(dev);
+   DRM_INFO("Device removal is currently not supported outside of 
fbcon\n");
+   drm_dev_unplug(dev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Use drm_dev_unplug in PCI .remove

2018-08-31 Thread Andrey Grodzovsky
This at least allows to fail any subsequent IOCTLs with -ENODEV
after the device is gone.
Still this operation is not supported yet in graphic mode
and will lead at least to page faults and other issues.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d7d9a9d..66d14d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -978,8 +978,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 {
struct drm_device *dev = pci_get_drvdata(pdev);
 
-   drm_dev_unregister(dev);
-   drm_dev_put(dev);
+   DRM_INFO("Device removal is currently not supported outside of 
fbcon\n");
+   drm_dev_unplug(dev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] libdrm: Allow dynamic drm majors on linux

2018-08-31 Thread Thomas Hellstrom

On 08/31/2018 05:27 PM, Emil Velikov wrote:

On 31 August 2018 at 15:38, Michel Dänzer  wrote:

[ Adding the amd-gfx list ]

On 2018-08-31 3:05 p.m., Thomas Hellstrom wrote:

On 08/31/2018 02:30 PM, Emil Velikov wrote:

On 31 August 2018 at 12:54, Thomas Hellstrom 
wrote:

To determine whether a device node is a drm device node or not, the code
currently compares the node's major number to the static drm major
device
number.

This breaks the standalone vmwgfx driver on XWayland dri clients,


Any particular reason why the code doesn't use a fixed node there?
It will make the diff vs the in-kernel driver a bit smaller.

Because then it won't be able to interoperate with other in-tree
drivers, like virtual drm drivers or passthrough usb drm drivers.
There is no clean way to share the minor number allocation with in-tree
drm, so standalone vmwgfx is using dynamic major allocation.

I wonder why I haven't heard of any of these issues with the standalone
version of amdgpu shipped in packaged AMD releases. Does that also use a
different major number? If yes, maybe it's just that nobody has tried
Xwayland clients with that driver. If no, how does it avoid the other
issues described above?


AFAICT, the difference is that the standalone vmwgfx uses an internal
copy of drm core.
It doesn't reuse the in-kernel drm, hence it cannot know which minor it can use.

-Emil


Actually, standalone vmwgfx could perhaps also try to allocate minors 
from 63 and downwards. That might work, but needs some verification.


/Thomas

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] libdrm: Allow dynamic drm majors on linux

2018-08-31 Thread Christian König

Am 31.08.2018 um 17:27 schrieb Emil Velikov:

On 31 August 2018 at 15:38, Michel Dänzer  wrote:

[ Adding the amd-gfx list ]

On 2018-08-31 3:05 p.m., Thomas Hellstrom wrote:

On 08/31/2018 02:30 PM, Emil Velikov wrote:

On 31 August 2018 at 12:54, Thomas Hellstrom 
wrote:

To determine whether a device node is a drm device node or not, the code
currently compares the node's major number to the static drm major
device
number.

This breaks the standalone vmwgfx driver on XWayland dri clients,


Any particular reason why the code doesn't use a fixed node there?
It will make the diff vs the in-kernel driver a bit smaller.

Because then it won't be able to interoperate with other in-tree
drivers, like virtual drm drivers or passthrough usb drm drivers.
There is no clean way to share the minor number allocation with in-tree
drm, so standalone vmwgfx is using dynamic major allocation.

I wonder why I haven't heard of any of these issues with the standalone
version of amdgpu shipped in packaged AMD releases. Does that also use a
different major number? If yes, maybe it's just that nobody has tried
Xwayland clients with that driver. If no, how does it avoid the other
issues described above?


AFAICT, the difference is that the standalone vmwgfx uses an internal
copy of drm core.
It doesn't reuse the in-kernel drm, hence it cannot know which minor it can use.


The amdgpu pro package has it's own drm core copy as well and there it 
still works.


Not sure how our back-porting guys handle that.

Christian.



-Emil
___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] libdrm: Allow dynamic drm majors on linux

2018-08-31 Thread Emil Velikov
On 31 August 2018 at 15:38, Michel Dänzer  wrote:
>
> [ Adding the amd-gfx list ]
>
> On 2018-08-31 3:05 p.m., Thomas Hellstrom wrote:
>> On 08/31/2018 02:30 PM, Emil Velikov wrote:
>>> On 31 August 2018 at 12:54, Thomas Hellstrom 
>>> wrote:
 To determine whether a device node is a drm device node or not, the code
 currently compares the node's major number to the static drm major
 device
 number.

 This breaks the standalone vmwgfx driver on XWayland dri clients,

>>> Any particular reason why the code doesn't use a fixed node there?
>>> It will make the diff vs the in-kernel driver a bit smaller.
>> Because then it won't be able to interoperate with other in-tree
>> drivers, like virtual drm drivers or passthrough usb drm drivers.
>> There is no clean way to share the minor number allocation with in-tree
>> drm, so standalone vmwgfx is using dynamic major allocation.
>
> I wonder why I haven't heard of any of these issues with the standalone
> version of amdgpu shipped in packaged AMD releases. Does that also use a
> different major number? If yes, maybe it's just that nobody has tried
> Xwayland clients with that driver. If no, how does it avoid the other
> issues described above?
>
AFAICT, the difference is that the standalone vmwgfx uses an internal
copy of drm core.
It doesn't reuse the in-kernel drm, hence it cannot know which minor it can use.

-Emil
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: fix idle state and bulk_moveavle flag

2018-08-31 Thread Michel Dänzer
On 2018-08-31 3:10 p.m., Christian König wrote:
> Add BOs to the idle state again and correctly clear the flag when
> new BOs are added.
> 
> Signed-off-by: Christian König 

Typo in the shortlog: bulk_moveavle -> bulk_moveable


The series is

Tested-by: Michel Dänzer 

Thanks Christian!


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper

2018-08-31 Thread Christian König

Am 31.08.2018 um 17:15 schrieb Michel Dänzer:

On 2018-08-31 3:10 p.m., Christian König wrote:

Staring at the function for six hours, just to essentially move one line
of code.

That sucks, but the commit log should describe what the problem was and
how this patch solves it.



Signed-off-by: Christian König 
---
  drivers/gpu/drm/ttm/ttm_bo.c | 13 -
  1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 35d53d81f486..138c98902033 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
  static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
struct list_head *lru, bool is_swap)
  {
+   struct list_head *list;
LIST_HEAD(entries);
LIST_HEAD(before);
-   struct list_head *list1, *list2;
  
-	list1 = is_swap ? >last->swap : >last->lru;

-   list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
+   reservation_object_assert_held(pos->last->resv);
+   list = is_swap ? >last->swap : >last->lru;
+   list_cut_position(, lru, list);
+
+   reservation_object_assert_held(pos->first->resv);
+   list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
+   list_cut_position(, , list);

So the problem was that the first list_cut_position call could result in
list2 pointing to la-la-land? Good catch!


Yes, exactly. Thought that would be obvious, but going to add that to 
the commit log.


Can I get a tested-by? You where much better at reproducing that than I'm.

Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper

2018-08-31 Thread Michel Dänzer
On 2018-08-31 3:10 p.m., Christian König wrote:
> Staring at the function for six hours, just to essentially move one line
> of code.

That sucks, but the commit log should describe what the problem was and
how this patch solves it.


> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 35d53d81f486..138c98902033 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>   struct list_head *lru, bool is_swap)
>  {
> + struct list_head *list;
>   LIST_HEAD(entries);
>   LIST_HEAD(before);
> - struct list_head *list1, *list2;
>  
> - list1 = is_swap ? >last->swap : >last->lru;
> - list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> + reservation_object_assert_held(pos->last->resv);
> + list = is_swap ? >last->swap : >last->lru;
> + list_cut_position(, lru, list);
> +
> + reservation_object_assert_held(pos->first->resv);
> + list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> + list_cut_position(, , list);

So the problem was that the first list_cut_position call could result in
list2 pointing to la-la-land? Good catch!


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] libdrm: Allow dynamic drm majors on linux

2018-08-31 Thread Thomas Hellstrom

On 08/31/2018 04:49 PM, Michel Dänzer wrote:

On 2018-08-31 4:46 p.m., Thomas Hellstrom wrote:

On 08/31/2018 04:38 PM, Michel Dänzer wrote:

[ Adding the amd-gfx list ]

On 2018-08-31 3:05 p.m., Thomas Hellstrom wrote:

On 08/31/2018 02:30 PM, Emil Velikov wrote:

On 31 August 2018 at 12:54, Thomas Hellstrom 
wrote:

To determine whether a device node is a drm device node or not, the
code
currently compares the node's major number to the static drm major
device
number.

This breaks the standalone vmwgfx driver on XWayland dri clients,


Any particular reason why the code doesn't use a fixed node there?
It will make the diff vs the in-kernel driver a bit smaller.

Because then it won't be able to interoperate with other in-tree
drivers, like virtual drm drivers or passthrough usb drm drivers.
There is no clean way to share the minor number allocation with in-tree
drm, so standalone vmwgfx is using dynamic major allocation.

I wonder why I haven't heard of any of these issues with the standalone
version of amdgpu shipped in packaged AMD releases. Does that also use a
different major number? If yes, maybe it's just that nobody has tried
Xwayland clients with that driver. If no, how does it avoid the other
issues described above?



Is standalone AMD supposed to be able to coexist with in-tree drm drivers?

Yes, it does, it's working e.g. on laptops with an Intel integrated and
an AMD discrete GPU.




Hmm. The symptoms with xf86-video-vmware are that when mesa initializes, 
we get:

MESA-LOADER: failed to retrieve device information
MESA-LOADER: failed to retrieve device information
MESA-LOADER: failed to retrieve device information

but then vmwgfx_dri.so loads anyway.

With XWayland, mesa just silently tries swrast instead of vmwgfx.

Not sure this has always been the case though. It might be due to a 
recent XWayland change. In any case, the change to libdrm silence the 
warnings on Xorg and makes mesa try vmwgfx on XWayland.


/Thomas




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] libdrm: Allow dynamic drm majors on linux

2018-08-31 Thread Michel Dänzer
On 2018-08-31 4:46 p.m., Thomas Hellstrom wrote:
> On 08/31/2018 04:38 PM, Michel Dänzer wrote:
>> [ Adding the amd-gfx list ]
>>
>> On 2018-08-31 3:05 p.m., Thomas Hellstrom wrote:
>>> On 08/31/2018 02:30 PM, Emil Velikov wrote:
 On 31 August 2018 at 12:54, Thomas Hellstrom 
 wrote:
> To determine whether a device node is a drm device node or not, the
> code
> currently compares the node's major number to the static drm major
> device
> number.
>
> This breaks the standalone vmwgfx driver on XWayland dri clients,
>
 Any particular reason why the code doesn't use a fixed node there?
 It will make the diff vs the in-kernel driver a bit smaller.
>>> Because then it won't be able to interoperate with other in-tree
>>> drivers, like virtual drm drivers or passthrough usb drm drivers.
>>> There is no clean way to share the minor number allocation with in-tree
>>> drm, so standalone vmwgfx is using dynamic major allocation.
>> I wonder why I haven't heard of any of these issues with the standalone
>> version of amdgpu shipped in packaged AMD releases. Does that also use a
>> different major number? If yes, maybe it's just that nobody has tried
>> Xwayland clients with that driver. If no, how does it avoid the other
>> issues described above?
>>
>>
> Is standalone AMD supposed to be able to coexist with in-tree drm drivers?

Yes, it does, it's working e.g. on laptops with an Intel integrated and
an AMD discrete GPU.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] libdrm: Allow dynamic drm majors on linux

2018-08-31 Thread Thomas Hellstrom

On 08/31/2018 04:38 PM, Michel Dänzer wrote:

[ Adding the amd-gfx list ]

On 2018-08-31 3:05 p.m., Thomas Hellstrom wrote:

On 08/31/2018 02:30 PM, Emil Velikov wrote:

On 31 August 2018 at 12:54, Thomas Hellstrom 
wrote:

To determine whether a device node is a drm device node or not, the code
currently compares the node's major number to the static drm major
device
number.

This breaks the standalone vmwgfx driver on XWayland dri clients,


Any particular reason why the code doesn't use a fixed node there?
It will make the diff vs the in-kernel driver a bit smaller.

Because then it won't be able to interoperate with other in-tree
drivers, like virtual drm drivers or passthrough usb drm drivers.
There is no clean way to share the minor number allocation with in-tree
drm, so standalone vmwgfx is using dynamic major allocation.

I wonder why I haven't heard of any of these issues with the standalone
version of amdgpu shipped in packaged AMD releases. Does that also use a
different major number? If yes, maybe it's just that nobody has tried
Xwayland clients with that driver. If no, how does it avoid the other
issues described above?



Is standalone AMD supposed to be able to coexist with in-tree drm drivers?

/Thomas


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] libdrm: Allow dynamic drm majors on linux

2018-08-31 Thread Michel Dänzer

[ Adding the amd-gfx list ]

On 2018-08-31 3:05 p.m., Thomas Hellstrom wrote:
> On 08/31/2018 02:30 PM, Emil Velikov wrote:
>> On 31 August 2018 at 12:54, Thomas Hellstrom 
>> wrote:
>>> To determine whether a device node is a drm device node or not, the code
>>> currently compares the node's major number to the static drm major
>>> device
>>> number.
>>>
>>> This breaks the standalone vmwgfx driver on XWayland dri clients,
>>>
>> Any particular reason why the code doesn't use a fixed node there?
>> It will make the diff vs the in-kernel driver a bit smaller.
> Because then it won't be able to interoperate with other in-tree
> drivers, like virtual drm drivers or passthrough usb drm drivers.
> There is no clean way to share the minor number allocation with in-tree
> drm, so standalone vmwgfx is using dynamic major allocation.

I wonder why I haven't heard of any of these issues with the standalone
version of amdgpu shipped in packaged AMD releases. Does that also use a
different major number? If yes, maybe it's just that nobody has tried
Xwayland clients with that driver. If no, how does it avoid the other
issues described above?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: move size calculations to the front of the file again

2018-08-31 Thread Christian König
amdgpu_vm_bo_* functions should come much later.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 90 +-
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d59222fb5931..a9275a99d793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -133,51 +133,6 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
 };
 
-/**
- * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
- *
- * @base: base structure for tracking BO usage in a VM
- * @vm: vm to which bo is to be added
- * @bo: amdgpu buffer object
- *
- * Initialize a bo_va_base structure and add it to the appropriate lists
- *
- */
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
-  struct amdgpu_vm *vm,
-  struct amdgpu_bo *bo)
-{
-   base->vm = vm;
-   base->bo = bo;
-   INIT_LIST_HEAD(>bo_list);
-   INIT_LIST_HEAD(>vm_status);
-
-   if (!bo)
-   return;
-   list_add_tail(>bo_list, >va);
-
-   if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
-   return;
-
-   vm->bulk_moveable = false;
-   if (bo->tbo.type == ttm_bo_type_kernel)
-   list_move(>vm_status, >relocated);
-   else
-   list_move(>vm_status, >idle);
-
-   if (bo->preferred_domains &
-   amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
-   return;
-
-   /*
-* we checked all the prerequisites, but it looks like this per vm bo
-* is currently evicted. add the bo to the evicted list to make sure it
-* is validated on next vm use to avoid fault.
-* */
-   list_move_tail(>vm_status, >evicted);
-   base->moved = true;
-}
-
 /**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
@@ -249,6 +204,51 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device 
*adev, unsigned level)
return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 }
 
+/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+  struct amdgpu_vm *vm,
+  struct amdgpu_bo *bo)
+{
+   base->vm = vm;
+   base->bo = bo;
+   INIT_LIST_HEAD(>bo_list);
+   INIT_LIST_HEAD(>vm_status);
+
+   if (!bo)
+   return;
+   list_add_tail(>bo_list, >va);
+
+   if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+   return;
+
+   vm->bulk_moveable = false;
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   list_move(>vm_status, >relocated);
+   else
+   list_move(>vm_status, >idle);
+
+   if (bo->preferred_domains &
+   amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+   return;
+
+   /*
+* we checked all the prerequisites, but it looks like this per vm bo
+* is currently evicted. add the bo to the evicted list to make sure it
+* is validated on next vm use to avoid fault.
+* */
+   list_move_tail(>vm_status, >evicted);
+   base->moved = true;
+}
+
 /**
  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
  *
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: improve VM state machine documentation

2018-08-31 Thread Christian König
Since we have a lot of FAQ on the VM state machine try to improve the
documentation by adding functions for each state move.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 -
 1 file changed, 79 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a9275a99d793..40c22635fefd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device 
*adev, unsigned level)
return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 }
 
+/**
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for PDs/PTs and per VM BOs which are not at the location they should
+ * be.
+ */
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+{
+   struct amdgpu_vm *vm = vm_bo->vm;
+   struct amdgpu_bo *bo = vm_bo->bo;
+
+   vm_bo->moved = true;
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   list_move(_bo->vm_status, >evicted);
+   else
+   list_move_tail(_bo->vm_status, >evicted);
+}
+
+/**
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
+ *
+ * @vm_bo: vm_bo which is relocated
+ *
+ * State for PDs/PTs which needs to update their parent PD.
+ */
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
+{
+   list_move(_bo->vm_status, _bo->vm->relocated);
+}
+
+/**
+ * amdgpu_vm_bo_moved - vm_bo is moved
+ *
+ * @vm_bo: vm_bo which is moved
+ *
+ * State for per VM and normal BOs which are moved, but that change is not yet
+ * reflected in the page tables.
+ */
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
+{
+   struct amdgpu_vm *vm = vm_bo->vm;
+
+   spin_lock(>moved_lock);
+   list_move(_bo->vm_status, >moved);
+   spin_unlock(>moved_lock);
+}
+
+/**
+ * amdgpu_vm_bo_idle - vm_bo is idle
+ *
+ * @vm_bo: vm_bo which is now idle
+ *
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
+ * and are now idle.
+ */
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
+{
+   list_move(_bo->vm_status, _bo->vm->idle);
+   vm_bo->moved = false;
+}
+
 /**
  * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
  *
@@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base 
*base,
 
vm->bulk_moveable = false;
if (bo->tbo.type == ttm_bo_type_kernel)
-   list_move(>vm_status, >relocated);
+   amdgpu_vm_bo_relocated(base);
else
-   list_move(>vm_status, >idle);
+   amdgpu_vm_bo_idle(base);
 
if (bo->preferred_domains &
amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
@@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base 
*base,
 * is currently evicted. add the bo to the evicted list to make sure it
 * is validated on next vm use to avoid fault.
 * */
-   list_move_tail(>vm_status, >evicted);
-   base->moved = true;
+   amdgpu_vm_bo_evicted(base);
 }
 
 /**
@@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
break;
 
if (bo->tbo.type != ttm_bo_type_kernel) {
-   spin_lock(>moved_lock);
-   list_move(_base->vm_status, >moved);
-   spin_unlock(>moved_lock);
+   amdgpu_vm_bo_moved(bo_base);
} else {
if (vm->use_cpu_for_update)
r = amdgpu_bo_kmap(bo, NULL);
@@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
r = amdgpu_ttm_alloc_gart(>tbo);
if (r)
break;
-   list_move(_base->vm_status, >relocated);
+   amdgpu_vm_bo_relocated(bo_base);
}
}
 
@@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
bo_base = list_first_entry(>relocated,
   struct amdgpu_vm_bo_base,
   vm_status);
-   bo_base->moved = false;
-   list_move(_base->vm_status, >idle);
+   amdgpu_vm_bo_idle(bo_base);
 
bo = bo_base->bo->parent;
if (!bo)
@@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct 
amdgpu_pte_update_params *p,
if (entry->huge) {
/* Add the entry to the relocated list to update it. */
entry->huge = false;
-   list_move(>base.vm_status, >vm->relocated);
+   

[PATCH 2/3] drm/amdgpu: fix "use bulk moves for efficient VM LRU handling" v2

2018-08-31 Thread Christian König
First step to fix the LRU corruption, we accidentially tried to move things
on the LRU after dropping the lock.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dd734970e167..349dcc37ee64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1237,6 +1237,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
ring = to_amdgpu_ring(entity->rq->sched);
amdgpu_ring_priority_get(ring, priority);
 
+   amdgpu_vm_move_to_lru_tail(p->adev, >vm);
+
ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence);
amdgpu_mn_unlock(p->mn);
 
@@ -1258,7 +1260,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
-   struct amdgpu_fpriv *fpriv;
int i, r;
 
if (!adev->accel_working)
@@ -1303,8 +1304,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
 
r = amdgpu_cs_submit(, cs);
 
-   fpriv = filp->driver_priv;
-   amdgpu_vm_move_to_lru_tail(adev, >vm);
 out:
amdgpu_cs_parser_fini(, r, reserved_buffers);
return r;
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amdgpu: fix idle state and bulk_moveavle flag

2018-08-31 Thread Christian König
Add BOs to the idle state again and correctly clear the flag when
new BOs are added.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f31fa351caba..d59222fb5931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -156,12 +156,15 @@ static void amdgpu_vm_bo_base_init(struct 
amdgpu_vm_bo_base *base,
return;
list_add_tail(>bo_list, >va);
 
-   if (bo->tbo.type == ttm_bo_type_kernel)
-   list_move(>vm_status, >relocated);
-
if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
return;
 
+   vm->bulk_moveable = false;
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   list_move(>vm_status, >relocated);
+   else
+   list_move(>vm_status, >idle);
+
if (bo->preferred_domains &
amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
return;
@@ -1121,7 +1124,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
   struct amdgpu_vm_bo_base,
   vm_status);
bo_base->moved = false;
-   list_del_init(_base->vm_status);
+   list_move(_base->vm_status, >idle);
 
bo = bo_base->bo->parent;
if (!bo)
@@ -2646,7 +2649,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
return r;
 
vm->pte_support_ats = false;
-   vm->bulk_moveable = true;
 
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper

2018-08-31 Thread Christian König
Staring at the function for six hours, just to essentially move one line
of code.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 35d53d81f486..138c98902033 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
struct list_head *lru, bool is_swap)
 {
+   struct list_head *list;
LIST_HEAD(entries);
LIST_HEAD(before);
-   struct list_head *list1, *list2;
 
-   list1 = is_swap ? >last->swap : >last->lru;
-   list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
+   reservation_object_assert_held(pos->last->resv);
+   list = is_swap ? >last->swap : >last->lru;
+   list_cut_position(, lru, list);
+
+   reservation_object_assert_held(pos->first->resv);
+   list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
+   list_cut_position(, , list);
 
-   list_cut_position(, lru, list1);
-   list_cut_position(, , list2);
list_splice(, lru);
list_splice_tail(, lru);
 }
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu] Do not push the CM_GAMMA_LUT property values in drmmode_crtc_cm_init

2018-08-31 Thread Michel Dänzer
From: Michel Dänzer 

The crtc->gamma_lut values aren't initialized yet at this point, and
the property values are pushed again from drmmode_setup_colormap
anyway.

Fixes intermittent flicker due to random gamma LUT values during server
startup.

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 46be29d07..6ef6a98e2 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -1863,7 +1863,7 @@ static void drmmode_crtc_cm_init(int drm_fd, xf86CrtcPtr 
crtc)
drmmode_crtc->ctm->matrix[8] = (uint64_t)1 << 32;
 
/* Push properties to reset properties currently in hardware */
-   for (i = 0; i < CM_DEGAMMA_LUT_SIZE; i++) {
+   for (i = 0; i < CM_GAMMA_LUT; i++) {
if (drmmode_crtc_push_cm_prop(crtc, i))
xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR,
   "Failed to initialize color management "
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 00/12] remove_conflicting_framebuffers() cleanup

2018-08-31 Thread Chris Wilson
Quoting Daniel Vetter (2018-08-31 10:04:39)
> On Thu, Aug 30, 2018 at 11:00:01PM +0200, Michał Mirosław wrote:
> > This series cleans up duplicated code for replacing firmware FB
> > driver with proper DRI driver and adds handover support to
> > Tegra driver.
> > 
> > This is a sligtly updated version of a series sent on 24 Nov 2017.
> > 
> > v2:
> >  - rebased on current drm-next
> >  - dropped staging/sm750fb changes
> >  - added kernel docs for DRM helpers
> > 
> > Michał Mirosław (12):
> >   fbdev: show fbdev number for debugging
> >   fbdev: allow apertures == NULL in remove_conflicting_framebuffers()
> >   fbdev: add remove_conflicting_pci_framebuffers()
> >   drm/amdgpu: use simpler remove_conflicting_pci_framebuffers()
> >   drm/bochs: use simpler remove_conflicting_pci_framebuffers()
> >   drm/cirrus: use simpler remove_conflicting_pci_framebuffers()
> >   drm/mgag200: use simpler remove_conflicting_pci_framebuffers()
> >   drm/radeon: use simpler remove_conflicting_pci_framebuffers()
> >   drm/virtio: use simpler remove_conflicting_pci_framebuffers()
> >   drm/vc4: use simpler remove_conflicting_framebuffers(NULL)
> >   drm/sun4i: use simpler remove_conflicting_framebuffers(NULL)
> >   drm/tegra: kick out simplefb
> 
> Looks very neat. A bit confused about the drm changes in the fbdev-titled
> patches 1&3, but I guess we can merge as-is. Up to you whether you want to
> split or not I'd say.

Ahah, someone is looking at remove_conflicting_framebuffers(). May I
interest you in a use-after-free?

[  378.423513] stack segment:  [#1] PREEMPT SMP PTI
[  378.423530] CPU: 1 PID: 4338 Comm: pm_rpm Tainted: G U
4.19.0-rc1-CI-CI_DRM_4746+ #1
[  378.423548] Hardware name: To Be Filled By O.E.M. To Be Filled By 
O.E.M./J4205-ITX, BIOS P1.10 09/29/2016
[  378.423570] RIP: 0010:do_remove_conflicting_framebuffers+0x56/0x170
[  378.423587] Code: 49 8b 45 00 48 85 c0 74 50 f6 40 0a 08 74 4a 4d 85 e4 48 
8b a8 78 04 00 00 74 1f 48 85 ed 74 1a 41 8b 0c 24 31 db 85 c9 74 10 <8b> 55 00 
85 d2 75 42 83 c3 01 41 39 1c 24 77 f0 48 85 ed 74 1a 45
[  378.423620] RSP: 0018:c91dfa88 EFLAGS: 00010202
[  378.423632] RAX: 880274470008 RBX:  RCX: 0001
[  378.423646] RDX: 0001 RSI: a025c634 RDI: 88025cc3b428
[  378.423660] RBP: 6b6b6b6b6b6b6b6b R08: 1edaddfa R09: a025c634
[  378.423673] R10: c91dfae8 R11: 820de938 R12: 88025cc3b428
[  378.423687] R13: 8234ca20 R14: 8234cb20 R15: 0001
[  378.423701] FS:  7fcf03d0a980() GS:880277e8() 
knlGS:
[  378.423717] CS:  0010 DS:  ES:  CR0: 80050033
[  378.423729] CR2: 7fffece1fdb8 CR3: 0001fe32e000 CR4: 003406e0
[  378.423742] Call Trace:
[  378.423756]  remove_conflicting_framebuffers+0x28/0x40
[  378.423856]  i915_driver_load+0x7f5/0x10c0 [i915]
[  378.423873]  ? _raw_spin_unlock_irqrestore+0x4c/0x60
[  378.423887]  ? lockdep_hardirqs_on+0xe0/0x1b0
[  378.423962]  i915_pci_probe+0x29/0xa0 [i915]
[  378.423977]  pci_device_probe+0xa1/0x130
[  378.423990]  really_probe+0x25d/0x3c0
[  378.424002]  driver_probe_device+0x10a/0x120
[  378.424013]  __driver_attach+0xdb/0x100
[  378.424025]  ? driver_probe_device+0x120/0x120
[  378.424037]  bus_for_each_dev+0x74/0xc0
[  378.424048]  bus_add_driver+0x15f/0x250
[  378.424060]  ? 0xa069d000
[  378.424070]  driver_register+0x56/0xe0
[  378.424080]  ? 0xa069d000
[  378.424090]  do_one_initcall+0x58/0x2e0
[  378.424101]  ? rcu_lockdep_current_cpu_online+0x8f/0xd0
[  378.424116]  ? do_init_module+0x1d/0x1ea
[  378.424127]  ? rcu_read_lock_sched_held+0x6f/0x80
[  378.424141]  ? kmem_cache_alloc_trace+0x264/0x290
[  378.424154]  do_init_module+0x56/0x1ea
[  378.424167]  load_module+0x26ba/0x29a0
[  378.424182]  ? vfs_read+0x122/0x140
[  378.424199]  ? __se_sys_finit_module+0xd3/0xf0
[  378.424210]  __se_sys_finit_module+0xd3/0xf0
[  378.424226]  do_syscall_64+0x55/0x190
[  378.424237]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  378.424249] RIP: 0033:0x7fcf02f9b839
[  378.424258] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 
f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 
f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48
[  378.424290] RSP: 002b:7fffece21f58 EFLAGS: 0246 ORIG_RAX: 
0139
[  378.424307] RAX: ffda RBX: 56344e1a4d80 RCX: 7fcf02f9b839
[  378.424321] RDX:  RSI: 7fcf026470e5 RDI: 0003
[  378.424336] RBP: 7fcf026470e5 R08:  R09: 
[  378.424349] R10: 0003 R11: 0246 R12: 
[  378.424363] R13: 56344e1a R14:  R15: 56344e1a4d80

https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4613/fi-bxt-j4205/dmesg0.log
-Chris
___
amd-gfx mailing list

Re: [PATCH v2 00/12] remove_conflicting_framebuffers() cleanup

2018-08-31 Thread Daniel Vetter
On Thu, Aug 30, 2018 at 11:00:01PM +0200, Michał Mirosław wrote:
> This series cleans up duplicated code for replacing firmware FB
> driver with proper DRI driver and adds handover support to
> Tegra driver.
> 
> This is a sligtly updated version of a series sent on 24 Nov 2017.
> 
> v2:
>  - rebased on current drm-next
>  - dropped staging/sm750fb changes
>  - added kernel docs for DRM helpers
> 
> Michał Mirosław (12):
>   fbdev: show fbdev number for debugging
>   fbdev: allow apertures == NULL in remove_conflicting_framebuffers()
>   fbdev: add remove_conflicting_pci_framebuffers()
>   drm/amdgpu: use simpler remove_conflicting_pci_framebuffers()
>   drm/bochs: use simpler remove_conflicting_pci_framebuffers()
>   drm/cirrus: use simpler remove_conflicting_pci_framebuffers()
>   drm/mgag200: use simpler remove_conflicting_pci_framebuffers()
>   drm/radeon: use simpler remove_conflicting_pci_framebuffers()
>   drm/virtio: use simpler remove_conflicting_pci_framebuffers()
>   drm/vc4: use simpler remove_conflicting_framebuffers(NULL)
>   drm/sun4i: use simpler remove_conflicting_framebuffers(NULL)
>   drm/tegra: kick out simplefb

Looks very neat. A bit confused about the drm changes in the fbdev-titled
patches 1&3, but I guess we can merge as-is. Up to you whether you want to
split or not I'd say.

Bartlomiej, ack for pullin in this entire pile through drm-misc?

Thanks, Daniel

> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 24 +-
>  drivers/gpu/drm/bochs/bochs_drv.c| 18 +--
>  drivers/gpu/drm/cirrus/cirrus_drv.c  | 23 +-
>  drivers/gpu/drm/mgag200/mgag200_drv.c| 21 +
>  drivers/gpu/drm/mgag200/mgag200_main.c   |  9 --
>  drivers/gpu/drm/radeon/radeon_drv.c  | 23 +-
>  drivers/gpu/drm/sun4i/sun4i_drv.c| 18 +--
>  drivers/gpu/drm/tegra/drm.c  |  4 +++
>  drivers/gpu/drm/vc4/vc4_drv.c| 20 +---
>  drivers/gpu/drm/virtio/virtgpu_drm_bus.c | 24 ++
>  drivers/video/fbdev/core/fbmem.c | 40 ++--
>  include/drm/drm_fb_helper.h  | 34 
>  include/linux/fb.h   |  2 ++
>  13 files changed, 88 insertions(+), 172 deletions(-)
> 
> -- 
> 2.18.0
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 02/12] fbdev: allow apertures == NULL in remove_conflicting_framebuffers()

2018-08-31 Thread Daniel Vetter
On Fri, Aug 31, 2018 at 10:56:56AM +0200, Daniel Vetter wrote:
> On Thu, Aug 30, 2018 at 11:00:05PM +0200, Michał Mirosław wrote:
> > Interpret (otherwise-invalid) NULL apertures argument to mean all-memory
> > range. This will allow to remove several duplicates of this code from
> > drivers in following patches.
> > 
> > Signed-off-by: Michał Mirosław 
> > [for v1]
> > Acked-by: Bartlomiej Zolnierkiewicz 
> > 
> > ---
> > v2: added kerneldoc to corresponding DRM helper
> > ---
> >  drivers/video/fbdev/core/fbmem.c | 14 ++
> >  include/drm/drm_fb_helper.h  | 10 ++
> >  2 files changed, 24 insertions(+)
> > 
> > diff --git a/drivers/video/fbdev/core/fbmem.c 
> > b/drivers/video/fbdev/core/fbmem.c
> > index 30a18d4c9de4..0df148eb4699 100644
> > --- a/drivers/video/fbdev/core/fbmem.c
> > +++ b/drivers/video/fbdev/core/fbmem.c
> > @@ -1779,11 +1779,25 @@ int remove_conflicting_framebuffers(struct 
> > apertures_struct *a,
> > const char *name, bool primary)
> >  {
> > int ret;
> > +   bool do_free = false;
> > +
> > +   if (!a) {
> > +   a = alloc_apertures(1);
> > +   if (!a)
> > +   return -ENOMEM;
> > +
> > +   a->ranges[0].base = 0;
> > +   a->ranges[0].size = ~0;
> > +   do_free = true;
> > +   }
> >  
> > mutex_lock(_lock);
> > ret = do_remove_conflicting_framebuffers(a, name, primary);
> > mutex_unlock(_lock);
> >  
> > +   if (do_free)
> > +   kfree(a);
> > +
> > return ret;
> >  }
> >  EXPORT_SYMBOL(remove_conflicting_framebuffers);
> > diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
> > index b069433e7fc1..1c1e53abb25d 100644
> > --- a/include/drm/drm_fb_helper.h
> > +++ b/include/drm/drm_fb_helper.h
> > @@ -566,6 +566,16 @@ static inline void 
> > drm_fb_helper_output_poll_changed(struct drm_device *dev)
> >  
> >  #endif
> >  
> > +/**
> > + * drm_fb_helper_remove_conflicting_framebuffers - remove firmware 
> > framebuffers
> > + * @a: memory range, users of which are to be removed
> > + * @name: requesting driver name
> > + * @primary: also kick vga16fb if present
> > + *
> > + * This function removes framebuffer devices (eg. initialized by firmware)
> > + * which use memory range described by @a. If @a is NULL all such devices 
> > are
> > + * removed.
> > + */
> 
> This looks like misplaced copypasta. You only need this once I think.

Ah no, just a fixup for the lack of kerneldoc we have. Can you pls split
this out into a separate patch?

Thanks, Daniel

> -Daniel
> 
> >  static inline int
> >  drm_fb_helper_remove_conflicting_framebuffers(struct apertures_struct *a,
> >   const char *name, bool primary)
> > -- 
> > 2.18.0
> > 
> > ___
> > dri-devel mailing list
> > dri-de...@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH (repost) 5/5] drm/amdgpu: add DisplayPort CEC-Tunneling-over-AUX support

2018-08-31 Thread Hans Verkuil
On 08/24/2018 04:59 PM, Alex Deucher wrote:
> On Fri, Aug 24, 2018 at 3:20 AM Hans Verkuil  wrote:
>>
>> On 08/23/2018 08:38 PM, Harry Wentland wrote:
>>> On 2018-08-17 10:11 AM, Hans Verkuil wrote:
 From: Hans Verkuil 

 Add DisplayPort CEC-Tunneling-over-AUX support to amdgpu.

 Signed-off-by: Hans Verkuil 
 Acked-by: Alex Deucher 
 ---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 13 +++--
  .../drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c |  2 ++
  2 files changed, 13 insertions(+), 2 deletions(-)

 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
 b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
 index 34f34823bab5..77898c95bef6 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
 +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
 @@ -898,6 +898,7 @@ amdgpu_dm_update_connector_after_detect(struct 
 amdgpu_dm_connector *aconnector)
  aconnector->dc_sink = sink;
  if (sink->dc_edid.length == 0) {
  aconnector->edid = NULL;
 +drm_dp_cec_unset_edid(>dm_dp_aux.aux);
  } else {
  aconnector->edid =
  (struct edid *) sink->dc_edid.raw_edid;
 @@ -905,10 +906,13 @@ amdgpu_dm_update_connector_after_detect(struct 
 amdgpu_dm_connector *aconnector)

  drm_connector_update_edid_property(connector,
  aconnector->edid);
 +drm_dp_cec_set_edid(>dm_dp_aux.aux,
 +aconnector->edid);
  }
  amdgpu_dm_add_sink_to_freesync_module(connector, 
 aconnector->edid);

  } else {
 +drm_dp_cec_unset_edid(>dm_dp_aux.aux);
  amdgpu_dm_remove_sink_from_freesync_module(connector);
  drm_connector_update_edid_property(connector, NULL);
  aconnector->num_modes = 0;
 @@ -1059,12 +1063,16 @@ static void handle_hpd_rx_irq(void *param)
  drm_kms_helper_hotplug_event(dev);
  }
  }
 +
  if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
 -(dc_link->type == dc_connection_mst_branch))
 +(dc_link->type == dc_connection_mst_branch)) {
  dm_handle_hpd_rx_irq(aconnector);
 +}
>>>
>>> These lines don't really add anything functional.
>>
>> Oops, a left-over from debugging code. I'll remove this 'change' and post a 
>> v2
>> with all the Acks/reviewed-bys.
>>
>> Any idea who would typically merge a patch series like this?
> 
> I (or anyone else with drm-misc rights) can push them for you, however
> drm-misc is a committer tree so if you'd like access to apply patches
> yourself, you could do that too.  Request access here:
> https://www.freedesktop.org/wiki/AccountRequests/

OK, I pushed this series to drm-next. It's the first time I'm using dim & 
drm-misc
so let me know if I did anything silly.

Regards,

Hans
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/4] drm/amdgpu/gmc9: rework stolen vga memory handling

2018-08-31 Thread Christian König

Am 30.08.2018 um 16:53 schrieb Alex Deucher:

No functional change, just rework it in order to adjust the
behavior on a per asic level.  The problem is that on vega10,
something corrupts the lower 8 MB of vram on the second
resume from S3.


Is that tested with engineering samples or production level hardware? 
Maybe it is just a leftover from BIOS bringup?



This does not seem to affect Raven, other
gmc9 based asics need testing.

Signed-off-by: Alex Deucher 


Acked-by: Christian König  for the series.


---
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 48 +--
  1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 04d50893a6f2..46cff7d8b375 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -692,6 +692,28 @@ static int gmc_v9_0_ecc_available(struct amdgpu_device 
*adev)
return lost_sheep == 0;
  }
  
+static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)

+{
+
+   /*
+* TODO:
+* Currently there is a bug where some memory client outside
+* of the driver writes to first 8M of VRAM on S3 resume,
+* this overrides GART which by default gets placed in first 8M and
+* causes VM_FAULTS once GTT is accessed.
+* Keep the stolen memory reservation until the while this is not 
solved.
+* Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
+*/
+   switch (adev->asic_type) {
+   case CHIP_RAVEN:
+   case CHIP_VEGA10:
+   case CHIP_VEGA12:
+   case CHIP_VEGA20:
+   default:
+   return true;
+   }
+}
+
  static int gmc_v9_0_late_init(void *handle)
  {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -708,10 +730,8 @@ static int gmc_v9_0_late_init(void *handle)
unsigned i;
int r;
  
-	/*

-* TODO - Uncomment once GART corruption issue is fixed.
-*/
-   /* amdgpu_bo_late_init(adev); */
+   if (!gmc_v9_0_keep_stolen_memory(adev))
+   amdgpu_bo_late_init(adev);
  
  	for(i = 0; i < adev->num_rings; ++i) {

struct amdgpu_ring *ring = adev->rings[i];
@@ -848,18 +868,16 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
  
  static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)

  {
-#if 0
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
-#endif
unsigned size;
  
  	/*

 * TODO Remove once GART corruption is resolved
 * Check related code in gmc_v9_0_sw_fini
 * */
-   size = 9 * 1024 * 1024;
+   if (gmc_v9_0_keep_stolen_memory(adev))
+   return 9 * 1024 * 1024;
  
-#if 0

if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 
MB for FB */
} else {
@@ -876,6 +894,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct 
amdgpu_device *adev)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
+   case CHIP_VEGA20:
default:
viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, 
VIEWPORT_HEIGHT) *
@@ -888,7 +907,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct 
amdgpu_device *adev)
if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
return 0;
  
-#endif

return size;
  }
  
@@ -1000,16 +1018,8 @@ static int gmc_v9_0_sw_fini(void *handle)

amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
  
-	/*

-   * TODO:
-   * Currently there is a bug where some memory client outside
-   * of the driver writes to first 8M of VRAM on S3 resume,
-   * this overrides GART which by default gets placed in first 8M and
-   * causes VM_FAULTS once GTT is accessed.
-   * Keep the stolen memory reservation until the while this is not solved.
-   * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
-   */
-   amdgpu_bo_free_kernel(>stolen_vga_memory, NULL, NULL);
+   if (gmc_v9_0_keep_stolen_memory(adev))
+   amdgpu_bo_free_kernel(>stolen_vga_memory, NULL, NULL);
  
  	amdgpu_gart_table_vram_free(adev);

amdgpu_bo_fini(adev);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/powerplay: added vega20 overdrive support V3

2018-08-31 Thread Evan Quan
Added vega20 overdrive support based on existing OD sysfs
APIs. However, the OD logics are simplified on vega20. So,
the behavior will be a little different and works only on
some limited levels.

V2: fix typo
fix commit description
revise error logs
add support for clock OD

V3: separate clock from voltage OD settings

Change-Id: I403cb38a95863db664cf06d030ac42a19bff6b33
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c|  45 +++
 .../gpu/drm/amd/include/kgd_pp_interface.h|   2 +
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c| 289 +-
 3 files changed, 335 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index e2577518b9c6..262c0ffc9d5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -474,6 +474,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * in each power level within a power state.  The pp_od_clk_voltage is used for
  * this.
  *
+ * < For Vega10 and previous ASICs >
+ *
  * Reading the file will display:
  *
  * - a list of engine clock levels and voltages labeled OD_SCLK
@@ -491,6 +493,44 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * "c" (commit) to the file to commit your changes.  If you want to reset to 
the
  * default power levels, write "r" (reset) to the file to reset them.
  *
+ *
+ * < For Vega20 >
+ *
+ * Reading the file will display:
+ *
+ * - minimum and maximum engine clock labeled OD_SCLK
+ *
+ * - maximum memory clock labeled OD_MCLK
+ *
+ * - three  points labeled OD_VDDC_CURVE.
+ *   They can be used to calibrate the sclk voltage curve.
+ *
+ * - a list of valid ranges for sclk, mclk, and voltage curve points
+ *   labeled OD_RANGE
+ *
+ * To manually adjust these settings:
+ *
+ * - First select manual using power_dpm_force_performance_level
+ *
+ * - For clock frequency setting, enter a new value by writing a
+ *   string that contains "s/m index clock" to the file. The index
+ *   should be 0 if to set minimum clock. And 1 if to set maximum
+ *   clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz.
+ *   "m 1 800" will update maximum mclk to be 800Mhz.
+ *
+ *   For sclk voltage curve, enter the new values by writing a
+ *   string that contains "vc point clock voff" to the file. The
+ *   points are indexed by 0, 1 and 2. E.g., "vc 0 300 10" will
+ *   update point1 with clock set as 300Mhz and voltage increased
+ *   by 10mV. "vc 2 1000 -10" will update point3 with clock set
+ *   as 1000Mhz and voltage drop by 10mV.
+ *
+ * - When you have edited all of the states as needed, write "c" (commit)
+ *   to the file to commit your changes
+ *
+ * - If you want to reset to the default power levels, write "r" (reset)
+ *   to the file to reset them
+ *
  */
 
 static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
@@ -520,6 +560,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device 
*dev,
type = PP_OD_RESTORE_DEFAULT_TABLE;
else if (*buf == 'c')
type = PP_OD_COMMIT_DPM_TABLE;
+   else if (!strncmp(buf, "vc", 2))
+   type = PP_OD_EDIT_VDDC_CURVE;
else
return -EINVAL;
 
@@ -527,6 +569,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device 
*dev,
 
tmp_str = buf_cpy;
 
+   if (type == PP_OD_EDIT_VDDC_CURVE)
+   tmp_str++;
while (isspace(*++tmp_str));
 
while (tmp_str[0]) {
@@ -570,6 +614,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device 
*dev,
if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+   size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, 
buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
return size;
} else {
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index e23746ba53bf..92780f3fb0b3 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -94,6 +94,7 @@ enum pp_clock_type {
PP_PCIE,
OD_SCLK,
OD_MCLK,
+   OD_VDDC_CURVE,
OD_RANGE,
 };
 
@@ -141,6 +142,7 @@ enum {
 enum PP_OD_DPM_TABLE_COMMAND {
PP_OD_EDIT_SCLK_VDDC_TABLE,
PP_OD_EDIT_MCLK_VDDC_TABLE,
+   PP_OD_EDIT_VDDC_CURVE,
PP_OD_RESTORE_DEFAULT_TABLE,
PP_OD_COMMIT_DPM_TABLE
 };
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index ececa2f7fe5f..546a6170a220 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -2506,11 +2506,207 @@ static int