from:"Andrey Grodzovsky"


Ping

Andrey

On 1/20/21 12:01 AM, Andrey Grodzovsky wrote:


On 1/19/21 3:48 AM, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  5 
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 
++

  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
  6 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 478a7d8..2953420 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -51,6 +51,7 @@
  #include 
  #include 
  #include 
+#include 
    #include 
  #include 
@@ -1041,6 +1042,10 @@ struct amdgpu_device {
    bool    in_pci_err_recovery;
  struct pci_saved_state  *pci_state;
+
+    struct notifier_block    nb;
+    struct blocking_notifier_head    notifier;
+    struct list_head    device_bo_list;
  };
    static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 45e23e3..e99f4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,8 @@
  #include 
  #include 
  +#include 
+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3200,6 +3202,39 @@ static const struct attribute 
*amdgpu_dev_attributes[] = {

  };
    +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+    struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
+    struct amdgpu_bo *bo = NULL;
+
+    /*
+ * Following is a set of IOMMU group dependencies taken care of before
+ * device's IOMMU group is removed
+ */
+    if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
+
+    spin_lock(&ttm_bo_glob.lru_lock);
+    list_for_each_entry(bo, &adev->device_bo_list, bo) {
+    if (bo->tbo.ttm)
+    ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
+    }
+    spin_unlock(&ttm_bo_glob.lru_lock);


That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.

You need to use a mutex here or even better make sure you can access the 
device_bo_list without a lock in this moment.


Christian.



I can think of switching to RCU list ? Otherwise, elements are added
on BO create and deleted on BO destroy, how can i prevent any of those from
happening while in this section besides mutex ? Make a copy list and run over 
it instead ?


Andrey





+
+    if (adev->irq.ih.use_bus_addr)
+    amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+    if (adev->irq.ih1.use_bus_addr)
+    amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+    if (adev->irq.ih2.use_bus_addr)
+    amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+
+    amdgpu_gart_dummy_page_fini(adev);
+    }
+
+    return NOTIFY_OK;
+}
+
+
  /**
   * amdgpu_device_init - initialize the driver
   *
@@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
    INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
  +    INIT_LIST_HEAD(&adev->device_bo_list);
+
  adev->gfx.gfx_off_req_count = 1;
  adev->pm.ac_power = power_supply_is_system_supplied() > 0;
  @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  if (amdgpu_device_cache_pci_state(adev->pdev))
  pci_restore_state(pdev);
  +    BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
+    adev->nb.notifier_call = amdgpu_iommu_group_notifier;
+
+    if (adev->dev->iommu_group) {
+    r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
+    if (r)
+    goto failed;
+    }
+
  return 0;
    failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index 0db9330..486ad6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct 
amdgpu_device *adev)

   *
   * Frees the dummy page used by the driver (all asics).
   */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
  {
  if (!adev->dummy_page_addr)
  return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b

Re: [PATCH v4 11/14] drm/amdgpu: Guard against write accesses after device removal



On 1/19/21 2:16 PM, Andrey Grodzovsky wrote:


On 1/19/21 1:59 PM, Christian König wrote:

Am 19.01.21 um 19:22 schrieb Andrey Grodzovsky:


On 1/19/21 1:05 PM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 4:35 PM Andrey Grodzovsky
 wrote:

There is really no other way according to this article
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flwn.net%2FArticles%2F767885%2F&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Cee61fb937d2d4baedf6f08d8bcac5b02%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466795752297305%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=a9Y4ZMEVYaMP7IeMVxQgXGpAkDXSkedMAiWkyqwzEe8%3D&reserved=0 



"A perfect solution seems nearly impossible though; we cannot acquire a 
mutex on

the user
to prevent them from yanking a device and we cannot check for a presence 
change

after every
device access for performance reasons. "

But I assumed srcu_read_lock should be pretty seamless performance wise, no ?

The read side is supposed to be dirt cheap, the write side is were we
just stall for all readers to eventually complete on their own.
Definitely should be much cheaper than mmio read, on the mmio write
side it might actually hurt a bit. Otoh I think those don't stall the
cpu by default when they're timing out, so maybe if the overhead is
too much for those, we could omit them?

Maybe just do a small microbenchmark for these for testing, with a
register that doesn't change hw state. So with and without
drm_dev_enter/exit, and also one with the hw plugged out so that we
have actual timeouts in the transactions.
-Daniel



So say writing in a loop to some harmless scratch register for many times 
both for plugged

and unplugged case and measure total time delta ?


I think we should at least measure the following:

1. Writing X times to a scratch reg without your patch.
2. Writing X times to a scratch reg with your patch.
3. Writing X times to a scratch reg with the hardware physically disconnected.



Just realized, I can't test this part since I don't have eGPU to yank out.

Andrey




I suggest to repeat that once for Polaris (or older) and once for Vega or Navi.

The SRBM on Polaris is meant to introduce some delay in each access, so it 
might react differently then the newer hardware.


Christian.



Will do.

Andrey






Andrey





The other solution would be as I suggested to keep all the device IO ranges
reserved and system
memory pages unfreed until the device is finalized in the driver but 
Daniel said

this would upset the PCI layer (the MMIO ranges reservation part).

Andrey




On 1/19/21 3:55 AM, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.

Wow, that adds quite some overhead to every register access. I'm not sure we
can do this.

Christian.


Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 
   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  9 
   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c    | 53 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h    |  3 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 70 
++

   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   | 49 ++---
   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 16 ++-
   drivers/gpu/drm/amd/amdgpu/psp_v12_0.c |  8 +---
   drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  |  8 +---
   9 files changed, 184 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e99f4f1..0a9d73c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -72,6 +72,8 @@
 #include 
   +#include 
+
   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -404,13 +406,21 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev,
uint32_t offset)
    */
   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, 
uint8_t

value)
   {
+    int idx;
+
   if (adev->in_pci_err_recovery)
   return;
   +
+    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
   if (offset < adev->rmmio_size)
   writeb(value, adev->rmmio + offset);
   else
   BUG();
+
+    drm_dev_exit(idx);
   }
 /**
@@ -427,9 +437,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
   uint32_t reg, uint32_t v,
   uint32_t acc_flags)
   {
+    int idx;
+
   if (adev->in_pci_err_recovery)
   return;
   +    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
   if ((reg * 4) < adev->rm

Re: [PATCH] drm/amdgpu: race issue when jobs on 2 ring timeout




On 1/20/21 9:12 AM, Horace Chen wrote:

Fix a racing issue when jobs on 2 rings timeout simultaneously.

If 2 rings timed out at the same time, the
amdgpu_device_gpu_recover will be reentered. Then the
adev->gmc.xgmi.head will be grabbed by 2 local linked list,
which may cause wild pointer issue in iterating.

lock the device earily to prevent the node be added to 2
different lists.

also increase karma for the skipped job since the job is also
timed out and should be guilty.

Signed-off-by: Horace Chen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 70 +++---
  1 file changed, 61 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4d434803fb49..d59d3182ac2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4460,6 +4460,46 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
up_write(&adev->reset_sem);
  }
  
+/*

+ * to lockup a list of amdgpu devices in a hive safely, if not a hive
+ * with multiple nodes, it will be same as amdgpu_device_lock_adev.
+ *
+ * unlock won't require roll back.
+ */
+static bool amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct 
amdgpu_hive_info *hive)
+{
+   struct amdgpu_device *tmp_adev = NULL;
+
+   if (adev->gmc.xgmi.num_physical_nodes > 1) {
+   if (!hive) {
+   dev_err(adev->dev, "Hive is NULL while device has multiple 
xgmi nodes");
+   return false;
+   }
+   list_for_each_entry(tmp_adev, &hive->device_list, 
gmc.xgmi.head) {
+   if (!amdgpu_device_lock_adev(tmp_adev, hive))
+   goto roll_back;
+   }
+   return true;
+   } else {
+   return amdgpu_device_lock_adev(adev, hive);
+   }
+roll_back:
+   if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
+   /*
+* if the lockup iteration break in the middle of a hive,
+* it may means there may has a race issue,
+* or a hive device locked up independently.
+* we may be in trouble and may not,
+* so will try to roll back the lock and give out a warnning.
+*/
+   dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. 
Rolling back to unlock");
+   list_for_each_entry_continue_reverse(tmp_adev, 
&hive->device_list, gmc.xgmi.head) {
+   amdgpu_device_unlock_adev(tmp_adev);
+   }
+   }
+   return false;
+}
+
  static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
  {
struct pci_dev *p = NULL;
@@ -4573,11 +4613,32 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
job ? job->base.id : -1, hive->hive_id);
amdgpu_put_xgmi_hive(hive);
+   if (job)
+   drm_sched_increase_karma(&job->base);
return 0;
}
mutex_lock(&hive->hive_lock);
}
  
+	/*

+* lock the device before we try to operate the linked list
+* if didn't get the device lock, don't touch the linked list since
+* others may iterating it.
+*/
+   if (!amdgpu_device_lock_hive_adev(adev, hive)) {
+   dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another 
already in progress",
+   job ? job->base.id : -1);
+
+   if (adev->gmc.xgmi.num_physical_nodes > 1 && !hive)
+   r = -ENODEV;
+   else
+   r = 0;



You can just change amdgpu_device_lock_hive_adev return type to int instead
of code duplication, maybe returning EAGAIN for actual locking failure.

Andrey



+   /* even we skipped this reset, still need to set the job to 
guilty */
+   if (job)
+   drm_sched_increase_karma(&job->base);
+   goto skip_recovery;
+   }
+
/*
 * Build list of devices to reset.
 * In case we are in XGMI hive mode, resort the device list
@@ -4585,8 +4646,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 */
INIT_LIST_HEAD(&device_list);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
-   if (!hive)
-   return -ENODEV;
if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
list_rotate_to_front(&adev->gmc.xgmi.head, 
&hive->device_list);
device_list_handle = &hive->device_list;
@@ -4597,13 +4656,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,

Re: [PATCH v4 00/14] RFC Support hot device unplug in amdgpu




On 1/20/21 4:05 AM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 01:18:15PM -0500, Andrey Grodzovsky wrote:

On 1/19/21 1:08 PM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 6:31 PM Andrey Grodzovsky
 wrote:

On 1/19/21 9:16 AM, Daniel Vetter wrote:

On Mon, Jan 18, 2021 at 04:01:09PM -0500, Andrey Grodzovsky wrote:

Until now extracting a card either by physical extraction (e.g. eGPU with
thunderbolt connection or by emulation through  syfs -> 
/sys/bus/pci/devices/device_id/remove)
would cause random crashes in user apps. The random crashes in apps were
mostly due to the app having mapped a device backed BO into its address
space was still trying to access the BO while the backing device was gone.
To answer this first problem Christian suggested to fix the handling of mapped
memory in the clients when the device goes away by forcibly unmap all buffers 
the
user processes has by clearing their respective VMAs mapping the device BOs.
Then when the VMAs try to fill in the page tables again we check in the fault
handlerif the device is removed and if so, return an error. This will generate a
SIGBUS to the application which can then cleanly terminate.This indeed was done
but this in turn created a problem of kernel OOPs were the OOPSes were due to 
the
fact that while the app was terminating because of the SIGBUSit would trigger 
use
after free in the driver by calling to accesses device structures that were 
already
released from the pci remove sequence.This was handled by introducing a 'flush'
sequence during device removal were we wait for drm file reference to drop to 0
meaning all user clients directly using this device terminated.

v2:
Based on discussions in the mailing list with Daniel and Pekka [1] and based on 
the document
produced by Pekka from those discussions [2] the whole approach with returning 
SIGBUS and
waiting for all user clients having CPU mapping of device BOs to die was 
dropped.
Instead as per the document suggestion the device structures are kept alive 
until
the last reference to the device is dropped by user client and in the meanwhile 
all existing and new CPU mappings of the BOs
belonging to the device directly or by dma-buf import are rerouted to per user
process dummy rw page.Also, I skipped the 'Requirements for KMS UAPI' section 
of [2]
since i am trying to get the minimal set of requirements that still give useful 
solution
to work and this is the'Requirements for Render and Cross-Device UAPI' section 
and so my
test case is removing a secondary device, which is render only and is not 
involved
in KMS.

v3:
More updates following comments from v2 such as removing loop to find DRM file 
when rerouting
page faults to dummy page,getting rid of unnecessary sysfs handling refactoring 
and moving
prevention of GPU recovery post device unplug from amdgpu to scheduler layer.
On top of that added unplug support for the IOMMU enabled system.

v4:
Drop last sysfs hack and use sysfs default attribute.
Guard against write accesses after device removal to avoid modifying released 
memory.
Update dummy pages handling to on demand allocation and release through drm 
managed framework.
Add return value to scheduler job TO handler (by Luben Tuikov) and use this in 
amdgpu for prevention
of GPU recovery post device unplug
Also rebase on top of drm-misc-mext instead of amd-staging-drm-next

With these patches I am able to gracefully remove the secondary card using 
sysfs remove hook while glxgears
is running off of secondary card (DRI_PRIME=1) without kernel oopses or hangs 
and keep working
with the primary card or soft reset the device without hangs or oopses

TODOs for followup work:
Convert AMDGPU code to use devm (for hw stuff) and drmm (for sw stuff and 
allocations) (Daniel)
Support plugging the secondary device back after unplug - currently still 
experiencing HW error on plugging back.
Add support for 'Requirements for KMS UAPI' section of [2] - unplugging 
primary, display connected card.

[1] - Discussions during v3 of the patchset 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Famd-gfx%2Fmsg55576.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Cbe51719dbdac41f5176b08d8bd2279ec%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637467303085005502%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=T4JLiSl7m4R%2FhcfcAxomY%2FMJ8QiTHaJ%2FJaqNZVT%2FDsk%3D&reserved=0
[2] - drm/doc: device hot-unplug for userspace 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Fdri-devel%2Fmsg259755.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Cbe51719dbdac41f5176b08d8bd2279ec%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637467303085005502%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=qitlHw6tqm4eGRstKccgh8zIPgILbS%2FJUa5yZ

Re: [PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.



On 1/19/21 3:48 AM, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  5 
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
  6 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 478a7d8..2953420 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -51,6 +51,7 @@
  #include 
  #include 
  #include 
+#include 
    #include 
  #include 
@@ -1041,6 +1042,10 @@ struct amdgpu_device {
    bool    in_pci_err_recovery;
  struct pci_saved_state  *pci_state;
+
+    struct notifier_block    nb;
+    struct blocking_notifier_head    notifier;
+    struct list_head    device_bo_list;
  };
    static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 45e23e3..e99f4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,8 @@
  #include 
  #include 
  +#include 
+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] 
= {

  };
    +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+    struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
+    struct amdgpu_bo *bo = NULL;
+
+    /*
+ * Following is a set of IOMMU group dependencies taken care of before
+ * device's IOMMU group is removed
+ */
+    if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
+
+    spin_lock(&ttm_bo_glob.lru_lock);
+    list_for_each_entry(bo, &adev->device_bo_list, bo) {
+    if (bo->tbo.ttm)
+    ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
+    }
+    spin_unlock(&ttm_bo_glob.lru_lock);


That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.

You need to use a mutex here or even better make sure you can access the 
device_bo_list without a lock in this moment.


Christian.



I can think of switching to RCU list ? Otherwise, elements are added
on BO create and deleted on BO destroy, how can i prevent any of those from
happening while in this section besides mutex ? Make a copy list and run over it 
instead ?


Andrey





+
+    if (adev->irq.ih.use_bus_addr)
+    amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+    if (adev->irq.ih1.use_bus_addr)
+    amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+    if (adev->irq.ih2.use_bus_addr)
+    amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+
+    amdgpu_gart_dummy_page_fini(adev);
+    }
+
+    return NOTIFY_OK;
+}
+
+
  /**
   * amdgpu_device_init - initialize the driver
   *
@@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
    INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
  +    INIT_LIST_HEAD(&adev->device_bo_list);
+
  adev->gfx.gfx_off_req_count = 1;
  adev->pm.ac_power = power_supply_is_system_supplied() > 0;
  @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  if (amdgpu_device_cache_pci_state(adev->pdev))
  pci_restore_state(pdev);
  +    BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
+    adev->nb.notifier_call = amdgpu_iommu_group_notifier;
+
+    if (adev->dev->iommu_group) {
+    r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
+    if (r)
+    goto failed;
+    }
+
  return 0;
    failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index 0db9330..486ad6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
*adev)

   *
   * Frees the dummy page used by the driver (all asics).
   */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
  {
  if (!adev->dummy_page_addr)
  return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

index afa2e28..5678d

Re: [PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.



On 1/19/21 5:01 PM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 10:22 PM Andrey Grodzovsky
 wrote:


On 1/19/21 8:45 AM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  5 
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 
++
   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
   6 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 478a7d8..2953420 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -51,6 +51,7 @@
   #include 
   #include 
   #include 
+#include 
   #include 
   #include 
@@ -1041,6 +1042,10 @@ struct amdgpu_device {
   boolin_pci_err_recovery;
   struct pci_saved_state  *pci_state;
+
+ struct notifier_block nb;
+ struct blocking_notifier_head notifier;
+ struct list_head device_bo_list;
   };
   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45e23e3..e99f4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,8 @@
   #include 
   #include 
+#include 
+
   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = 
{
   };
+static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
+ struct amdgpu_bo *bo = NULL;
+
+ /*
+ * Following is a set of IOMMU group dependencies taken care of before
+ * device's IOMMU group is removed
+ */
+ if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
+
+ spin_lock(&ttm_bo_glob.lru_lock);
+ list_for_each_entry(bo, &adev->device_bo_list, bo) {
+ if (bo->tbo.ttm)
+ ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
+ }
+ spin_unlock(&ttm_bo_glob.lru_lock);

That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.

You need to use a mutex here or even better make sure you can access the
device_bo_list without a lock in this moment.

I'd also be worried about the notifier mutex getting really badly in the
way.

Plus I'm worried why we even need this, it sounds a bit like papering over
the iommu subsystem. Assuming we clean up all our iommu mappings in our
device hotunplug/unload code, why do we still need to have an additional
iommu notifier on top, with all kinds of additional headaches? The iommu
shouldn't clean up before the devices in its group have cleaned up.

I think we need more info here on what the exact problem is first.
-Daniel


Originally I experienced the  crash bellow on IOMMU enabled device, it happens 
post device removal from PCI topology -
during shutting down of user client holding last reference to drm device file 
(X in my case).
The crash is because by the time I get to this point struct device->iommu_group 
pointer is NULL
already since the IOMMU group for the device is unset during PCI removal. So 
this contradicts what you said above
that the iommu shouldn't clean up before the devices in its group have cleaned 
up.
So instead of guessing when is the right place to place all IOMMU related 
cleanups it makes sense
to get notification from IOMMU subsystem in the form of event 
IOMMU_GROUP_NOTIFY_DEL_DEVICE
and use that place to do all the relevant cleanups.

Yeah that goes boom, but you shouldn't need this special iommu cleanup
handler. Making sure that all the dma-api mappings are gone needs to
be done as part of the device hotunplug, you can't delay that to the
last drm_device cleanup.

So I most of the patch here with pulling that out (should be outright
removed from the final release code even) is good, just not yet how
you call that new code. Probably these bits (aside from walking all
buffers and unpopulating the tt) should be done from the early_free
callback you're adding.

Also what I just realized: For normal unload you need to make sure the
hw is actually stopped first, before we unmap buffers. Otherwise
driver unload will likely result in wedged hw, probably not what you
want for debugging.
-Daniel


Since device removal from IOMMU group and this hook in particular
takes place before call to amdgpu_pci_remove essenti

Re: [PATCH 2/2] drm/amdgpu/display: buffer INTERRUPT_LOW_IRQ_CONTEXT interrupt work



On 1/15/21 2:21 AM, Chen, Xiaogang wrote:

On 1/14/2021 1:24 AM, Grodzovsky, Andrey wrote:


On 1/14/21 12:11 AM, Chen, Xiaogang wrote:

On 1/12/2021 10:54 PM, Grodzovsky, Andrey wrote:

On 1/4/21 1:01 AM, Xiaogang.Chen wrote:

From: Xiaogang Chen 

amdgpu DM handles INTERRUPT_LOW_IRQ_CONTEXT interrupt(hpd, hpd_rx) by
using work queue and uses single work_struct. If previous interrupt
has not been handled new interrupts(same type) will be discarded and
driver just sends "amdgpu_dm_irq_schedule_work FAILED" message out.
If some important hpd, hpd_rx related interrupts are missed by driver
the hot (un)plug devices may cause system hang or unstable, such as
system resumes from S3 sleep with mst device connected.

This patch dynamically allocates new amdgpu_dm_irq_handler_data for
new interrupts if previous INTERRUPT_LOW_IRQ_CONTEXT interrupt work
has not been handled. So the new interrupt works can be queued to the
same workqueue_struct, instead discard the new interrupts.
All allocated amdgpu_dm_irq_handler_data are put into a single linked
list and will be reused after.


I believe this creates a possible concurrency between already
executing work item
and the new incoming one for which you allocate a new work item on
the fly. While
handle_hpd_irq is serialized with aconnector->hpd_lock I am seeing
that for handle_hpd_rx_irq
it's not locked for MST use case (which is the most frequently used
with this interrupt).  Did you
verified that handle_hpd_rx_irq is reentrant ?


handle_hpd_rx_irq is put at a work queue. Its execution is serialized
by the work queue. So there is no reentrant.


You are using system_highpri_wq which has the property that it has
multiple workers thread pool spread across all the
active CPUs, see all work queue definitions here
https://elixir.bootlin.com/linux/v5.11-rc3/source/include/linux/workqueue.h#L358
I beleieve that what you saying about no chance of reentrnacy would be
correct if it would be same work item dequeued for execution
while previous instance is still running, see the explanation here -
https://elixir.bootlin.com/linux/v5.11-rc3/source/kernel/workqueue.c#L1435.
Non reentrancy is guaranteed only for the same work item. If you want
non reentrancy (full serializtion) for different work items you should
create
you own single threaded work-queue using create_singlethread_workqueue



Thank you. I think the easiest way is using aconnector->hpd_lock at
handle_hpd_rx_irq to lock for dc_link->type == dc_connection_mst_branch
case, right? I will do that at next version if you think it is ok.



I am not sure what are the consequences of of using hpd lock there with
regard to other locks acquired in DRM MST code during MST related HPD 
transactions since
i haven't dealt with this for a very long time. Maybe Harry or Nick can advise 
on this ?


Andrey




amdgpu_dm_irq_schedule_work does queuing of work(put
handle_hpd_rx_irq into work queue). The first call is
dm_irq_work_func, then call handle_hpd_rx_irq.

Signed-off-by: Xiaogang Chen 
---
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |  14 +--
   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c  | 114
++---
   2 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c9d82b9..730e540 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -69,18 +69,6 @@ struct common_irq_params {
   };
     /**
- * struct irq_list_head - Linked-list for low context IRQ handlers.
- *
- * @head: The list_head within &struct handler_data
- * @work: A work_struct containing the deferred handler work
- */
-struct irq_list_head {
-    struct list_head head;
-    /* In case this interrupt needs post-processing, 'work' will
be queued*/
-    struct work_struct work;
-};
-
-/**
    * struct dm_compressor_info - Buffer info used by frame buffer
compression
    * @cpu_addr: MMIO cpu addr
    * @bo_ptr: Pointer to the buffer object
@@ -270,7 +258,7 @@ struct amdgpu_display_manager {
    * Note that handlers are called in the same order as they were
    * registered (FIFO).
    */
-    struct irq_list_head
irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
+    struct list_head
irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
     /**
    * @irq_handler_list_high_tab:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 3577785..ada344a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -82,6 +82,7 @@ struct amdgpu_dm_irq_handler_data {
   struct amdgpu_display_manager *dm;
   /* DAL irq source which registered for this interrupt. */
   enum dc_irq_source irq_source;
+    struct work_struct work;
   };
     #define DM_IRQ_TABLE_LOCK(adev, flags) \
@@ -111,20 +112,10 @@

Re: [PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.



On 1/19/21 8:45 AM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  5 
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 
++
   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
   6 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 478a7d8..2953420 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -51,6 +51,7 @@
   #include 
   #include 
   #include 
+#include 
   #include 
   #include 
@@ -1041,6 +1042,10 @@ struct amdgpu_device {
boolin_pci_err_recovery;
struct pci_saved_state  *pci_state;
+
+   struct notifier_block   nb;
+   struct blocking_notifier_head   notifier;
+   struct list_headdevice_bo_list;
   };
   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45e23e3..e99f4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,8 @@
   #include 
   #include 
+#include 
+
   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = 
{
   };
+static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
+unsigned long action, void *data)
+{
+   struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
+   struct amdgpu_bo *bo = NULL;
+
+   /*
+* Following is a set of IOMMU group dependencies taken care of before
+* device's IOMMU group is removed
+*/
+   if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
+
+   spin_lock(&ttm_bo_glob.lru_lock);
+   list_for_each_entry(bo, &adev->device_bo_list, bo) {
+   if (bo->tbo.ttm)
+   ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
+   }
+   spin_unlock(&ttm_bo_glob.lru_lock);

That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.

You need to use a mutex here or even better make sure you can access the
device_bo_list without a lock in this moment.

I'd also be worried about the notifier mutex getting really badly in the
way.

Plus I'm worried why we even need this, it sounds a bit like papering over
the iommu subsystem. Assuming we clean up all our iommu mappings in our
device hotunplug/unload code, why do we still need to have an additional
iommu notifier on top, with all kinds of additional headaches? The iommu
shouldn't clean up before the devices in its group have cleaned up.

I think we need more info here on what the exact problem is first.
-Daniel



Originally I experienced the  crash bellow on IOMMU enabled device, it happens 
post device removal from PCI topology -
during shutting down of user client holding last reference to drm device file (X 
in my case).
The crash is because by the time I get to this point struct device->iommu_group 
pointer is NULL
already since the IOMMU group for the device is unset during PCI removal. So 
this contradicts what you said above

that the iommu shouldn't clean up before the devices in its group have cleaned 
up.
So instead of guessing when is the right place to place all IOMMU related 
cleanups it makes sense
to get notification from IOMMU subsystem in the form of event 
IOMMU_GROUP_NOTIFY_DEL_DEVICE

and use that place to do all the relevant cleanups.

Andrey


[  123.810074 <   28.126960>] BUG: kernel NULL pointer dereference, address: 
00c8

[  123.810080 <    0.06>] #PF: supervisor read access in kernel mode
[  123.810082 <    0.02>] #PF: error_code(0x) - not-present page
[  123.810085 <    0.03>] PGD 0 P4D 0
[  123.810089 <    0.04>] Oops:  [#1] SMP NOPTI
[  123.810094 <    0.05>] CPU: 5 PID: 1418 Comm: Xorg:shlo4 Tainted: 
G   O  5.9.0-rc2-dev+ #59
[  123.810096 <    0.02>] Hardware name: System manufacturer System Product 
Name/PRIME X470-PRO, BIOS 4406 02/28/2019

[  123.810105 <    0.09>] *RIP: 0010:iommu_get_dma_domain*+0x10/0x20
[  123.810108 <    0.03>] Code: b0 48 c7 87 98

Re: [PATCH v4 11/14] drm/amdgpu: Guard against write accesses after device removal



On 1/19/21 1:59 PM, Christian König wrote:

Am 19.01.21 um 19:22 schrieb Andrey Grodzovsky:


On 1/19/21 1:05 PM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 4:35 PM Andrey Grodzovsky
 wrote:

There is really no other way according to this article
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flwn.net%2FArticles%2F767885%2F&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Cee61fb937d2d4baedf6f08d8bcac5b02%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466795752297305%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=a9Y4ZMEVYaMP7IeMVxQgXGpAkDXSkedMAiWkyqwzEe8%3D&reserved=0 



"A perfect solution seems nearly impossible though; we cannot acquire a 
mutex on

the user
to prevent them from yanking a device and we cannot check for a presence 
change

after every
device access for performance reasons. "

But I assumed srcu_read_lock should be pretty seamless performance wise, no ?

The read side is supposed to be dirt cheap, the write side is were we
just stall for all readers to eventually complete on their own.
Definitely should be much cheaper than mmio read, on the mmio write
side it might actually hurt a bit. Otoh I think those don't stall the
cpu by default when they're timing out, so maybe if the overhead is
too much for those, we could omit them?

Maybe just do a small microbenchmark for these for testing, with a
register that doesn't change hw state. So with and without
drm_dev_enter/exit, and also one with the hw plugged out so that we
have actual timeouts in the transactions.
-Daniel



So say writing in a loop to some harmless scratch register for many times 
both for plugged

and unplugged case and measure total time delta ?


I think we should at least measure the following:

1. Writing X times to a scratch reg without your patch.
2. Writing X times to a scratch reg with your patch.
3. Writing X times to a scratch reg with the hardware physically disconnected.

I suggest to repeat that once for Polaris (or older) and once for Vega or Navi.

The SRBM on Polaris is meant to introduce some delay in each access, so it 
might react differently then the newer hardware.


Christian.



Will do.

Andrey






Andrey





The other solution would be as I suggested to keep all the device IO ranges
reserved and system
memory pages unfreed until the device is finalized in the driver but Daniel 
said

this would upset the PCI layer (the MMIO ranges reservation part).

Andrey




On 1/19/21 3:55 AM, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.

Wow, that adds quite some overhead to every register access. I'm not sure we
can do this.

Christian.


Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 
   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  9 
   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c    | 53 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h    |  3 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 70 
++

   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   | 49 ++---
   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 16 ++-
   drivers/gpu/drm/amd/amdgpu/psp_v12_0.c |  8 +---
   drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  |  8 +---
   9 files changed, 184 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e99f4f1..0a9d73c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -72,6 +72,8 @@
 #include 
   +#include 
+
   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -404,13 +406,21 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev,
uint32_t offset)
    */
   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t
value)
   {
+    int idx;
+
   if (adev->in_pci_err_recovery)
   return;
   +
+    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
   if (offset < adev->rmmio_size)
   writeb(value, adev->rmmio + offset);
   else
   BUG();
+
+    drm_dev_exit(idx);
   }
 /**
@@ -427,9 +437,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
   uint32_t reg, uint32_t v,
   uint32_t acc_flags)
   {
+    int idx;
+
   if (adev->in_pci_err_recovery)
   return;
   +    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
   if ((reg * 4) < adev->rmmio_size) {
   if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
   amdgpu_sriov_runtime(adev) &&
@@ -444,6 +459,8 @@ voi

Re: [PATCH v4 10/14] dmr/amdgpu: Move some sysfs attrs creation to default_attr




On 1/19/21 2:04 PM, Alex Deucher wrote:

On Tue, Jan 19, 2021 at 1:26 PM Greg KH  wrote:

On Tue, Jan 19, 2021 at 11:36:01AM -0500, Andrey Grodzovsky wrote:

On 1/19/21 2:34 AM, Greg KH wrote:

On Mon, Jan 18, 2021 at 04:01:19PM -0500, Andrey Grodzovsky wrote:

   static struct pci_driver amdgpu_kms_pci_driver = {
   .name = DRIVER_NAME,
   .id_table = pciidlist,
@@ -1595,6 +1607,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
   .shutdown = amdgpu_pci_shutdown,
   .driver.pm = &amdgpu_pm_ops,
   .err_handler = &amdgpu_pci_err_handler,
+ .driver.dev_groups = amdgpu_sysfs_groups,

Shouldn't this just be:
 groups - amdgpu_sysfs_groups,

Why go to the "driver root" here?


Because I still didn't get to your suggestion to propose a patch to add groups 
to
pci_driver, it's located in 'base' driver struct.

You are a pci driver, you should never have to mess with the "base"
driver struct.  Look at commit 92d50fc1602e ("PCI/IB: add support for
pci driver attribute groups") which got merged in 4.14, way back in
2017 :)

Per the previous discussion of this patch set:
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.mail-archive.com%2Famd-gfx%40lists.freedesktop.org%2Fmsg56019.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C1b43efdc8a164169eee508d8bcad1ece%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466799090087255%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=T462j96qC%2BXCZzgnJMG%2BbUEOG94GVuqkvTWfUB%2B3%2Fl8%3D&reserved=0

Alex



Got it, Next iteration I will include a patch like the above to pci-devel as 
part of the series and will update this patch accordingly.


Andrey





driver.pm also looks odd, but I'm just going to ignore that for now...

thanks,

greg k-h
___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C1b43efdc8a164169eee508d8bcad1ece%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466799090087255%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=reQqTGFCsEXvHOmSt8c4B6idrotIS4Q69WKw%2FRtpAEg%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v4 11/14] drm/amdgpu: Guard against write accesses after device removal



On 1/19/21 1:05 PM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 4:35 PM Andrey Grodzovsky
 wrote:

There is really no other way according to this article
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flwn.net%2FArticles%2F767885%2F&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C7a1f5ae6a06f4661d47708d8bca4cb32%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466763278674162%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=QupsglO9WRuis8XRLBFIhl6miTXVOdAnk8oP4BfSclQ%3D&reserved=0

"A perfect solution seems nearly impossible though; we cannot acquire a mutex on
the user
to prevent them from yanking a device and we cannot check for a presence change
after every
device access for performance reasons. "

But I assumed srcu_read_lock should be pretty seamless performance wise, no ?

The read side is supposed to be dirt cheap, the write side is were we
just stall for all readers to eventually complete on their own.
Definitely should be much cheaper than mmio read, on the mmio write
side it might actually hurt a bit. Otoh I think those don't stall the
cpu by default when they're timing out, so maybe if the overhead is
too much for those, we could omit them?

Maybe just do a small microbenchmark for these for testing, with a
register that doesn't change hw state. So with and without
drm_dev_enter/exit, and also one with the hw plugged out so that we
have actual timeouts in the transactions.
-Daniel



So say writing in a loop to some harmless scratch register for many times both 
for plugged

and unplugged case and measure total time delta ?

Andrey





The other solution would be as I suggested to keep all the device IO ranges
reserved and system
memory pages unfreed until the device is finalized in the driver but Daniel said
this would upset the PCI layer (the MMIO ranges reservation part).

Andrey




On 1/19/21 3:55 AM, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.

Wow, that adds quite some overhead to every register access. I'm not sure we
can do this.

Christian.


Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 
   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|  9 
   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 53 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|  3 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 70 
++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   | 49 ++---
   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 16 ++-
   drivers/gpu/drm/amd/amdgpu/psp_v12_0.c |  8 +---
   drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  |  8 +---
   9 files changed, 184 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e99f4f1..0a9d73c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -72,6 +72,8 @@
 #include 
   +#include 
+
   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -404,13 +406,21 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev,
uint32_t offset)
*/
   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t
value)
   {
+int idx;
+
   if (adev->in_pci_err_recovery)
   return;
   +
+if (!drm_dev_enter(&adev->ddev, &idx))
+return;
+
   if (offset < adev->rmmio_size)
   writeb(value, adev->rmmio + offset);
   else
   BUG();
+
+drm_dev_exit(idx);
   }
 /**
@@ -427,9 +437,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
   uint32_t reg, uint32_t v,
   uint32_t acc_flags)
   {
+int idx;
+
   if (adev->in_pci_err_recovery)
   return;
   +if (!drm_dev_enter(&adev->ddev, &idx))
+return;
+
   if ((reg * 4) < adev->rmmio_size) {
   if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
   amdgpu_sriov_runtime(adev) &&
@@ -444,6 +459,8 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
   }
 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
+
+drm_dev_exit(idx);
   }
 /*
@@ -454,9 +471,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
   void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
uint32_t reg, uint32_t v)
   {
+int idx;
+
   if (adev->in_pci_err_recovery)
   return;
   +if (!drm_dev_enter(&adev->ddev, &idx))
+return;
+
   if (amdgpu_sriov_fullaccess(adev) &&
   adev->gfx.rlc.funcs &am

Re: [PATCH v4 00/14] RFC Support hot device unplug in amdgpu




On 1/19/21 1:08 PM, Daniel Vetter wrote:

On Tue, Jan 19, 2021 at 6:31 PM Andrey Grodzovsky
 wrote:


On 1/19/21 9:16 AM, Daniel Vetter wrote:

On Mon, Jan 18, 2021 at 04:01:09PM -0500, Andrey Grodzovsky wrote:

Until now extracting a card either by physical extraction (e.g. eGPU with
thunderbolt connection or by emulation through  syfs -> 
/sys/bus/pci/devices/device_id/remove)
would cause random crashes in user apps. The random crashes in apps were
mostly due to the app having mapped a device backed BO into its address
space was still trying to access the BO while the backing device was gone.
To answer this first problem Christian suggested to fix the handling of mapped
memory in the clients when the device goes away by forcibly unmap all buffers 
the
user processes has by clearing their respective VMAs mapping the device BOs.
Then when the VMAs try to fill in the page tables again we check in the fault
handlerif the device is removed and if so, return an error. This will generate a
SIGBUS to the application which can then cleanly terminate.This indeed was done
but this in turn created a problem of kernel OOPs were the OOPSes were due to 
the
fact that while the app was terminating because of the SIGBUSit would trigger 
use
after free in the driver by calling to accesses device structures that were 
already
released from the pci remove sequence.This was handled by introducing a 'flush'
sequence during device removal were we wait for drm file reference to drop to 0
meaning all user clients directly using this device terminated.

v2:
Based on discussions in the mailing list with Daniel and Pekka [1] and based on 
the document
produced by Pekka from those discussions [2] the whole approach with returning 
SIGBUS and
waiting for all user clients having CPU mapping of device BOs to die was 
dropped.
Instead as per the document suggestion the device structures are kept alive 
until
the last reference to the device is dropped by user client and in the meanwhile 
all existing and new CPU mappings of the BOs
belonging to the device directly or by dma-buf import are rerouted to per user
process dummy rw page.Also, I skipped the 'Requirements for KMS UAPI' section 
of [2]
since i am trying to get the minimal set of requirements that still give useful 
solution
to work and this is the'Requirements for Render and Cross-Device UAPI' section 
and so my
test case is removing a secondary device, which is render only and is not 
involved
in KMS.

v3:
More updates following comments from v2 such as removing loop to find DRM file 
when rerouting
page faults to dummy page,getting rid of unnecessary sysfs handling refactoring 
and moving
prevention of GPU recovery post device unplug from amdgpu to scheduler layer.
On top of that added unplug support for the IOMMU enabled system.

v4:
Drop last sysfs hack and use sysfs default attribute.
Guard against write accesses after device removal to avoid modifying released 
memory.
Update dummy pages handling to on demand allocation and release through drm 
managed framework.
Add return value to scheduler job TO handler (by Luben Tuikov) and use this in 
amdgpu for prevention
of GPU recovery post device unplug
Also rebase on top of drm-misc-mext instead of amd-staging-drm-next

With these patches I am able to gracefully remove the secondary card using 
sysfs remove hook while glxgears
is running off of secondary card (DRI_PRIME=1) without kernel oopses or hangs 
and keep working
with the primary card or soft reset the device without hangs or oopses

TODOs for followup work:
Convert AMDGPU code to use devm (for hw stuff) and drmm (for sw stuff and 
allocations) (Daniel)
Support plugging the secondary device back after unplug - currently still 
experiencing HW error on plugging back.
Add support for 'Requirements for KMS UAPI' section of [2] - unplugging 
primary, display connected card.

[1] - Discussions during v3 of the patchset 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Famd-gfx%2Fmsg55576.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C9055ea164ca14a0cbce108d8bca53d37%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466765176719365%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=AqqeqmhF%2BZ1%2BRwMgtpmfoW1gtEnLGxiy3U5OMm%2BBqk8%3D&reserved=0
[2] - drm/doc: device hot-unplug for userspace 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Fdri-devel%2Fmsg259755.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C9055ea164ca14a0cbce108d8bca53d37%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466765176719365%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=oHHyRtTMTNQAnkzptG0B8%2FeeniU1z2DSca8L4yCYJcE%3D&reserved=0
[3] - Related gitlab ticket 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2F

Re: [PATCH v4 00/14] RFC Support hot device unplug in amdgpu



On 1/19/21 9:16 AM, Daniel Vetter wrote:

On Mon, Jan 18, 2021 at 04:01:09PM -0500, Andrey Grodzovsky wrote:

Until now extracting a card either by physical extraction (e.g. eGPU with
thunderbolt connection or by emulation through  syfs -> 
/sys/bus/pci/devices/device_id/remove)
would cause random crashes in user apps. The random crashes in apps were
mostly due to the app having mapped a device backed BO into its address
space was still trying to access the BO while the backing device was gone.
To answer this first problem Christian suggested to fix the handling of mapped
memory in the clients when the device goes away by forcibly unmap all buffers 
the
user processes has by clearing their respective VMAs mapping the device BOs.
Then when the VMAs try to fill in the page tables again we check in the fault
handlerif the device is removed and if so, return an error. This will generate a
SIGBUS to the application which can then cleanly terminate.This indeed was done
but this in turn created a problem of kernel OOPs were the OOPSes were due to 
the
fact that while the app was terminating because of the SIGBUSit would trigger 
use
after free in the driver by calling to accesses device structures that were 
already
released from the pci remove sequence.This was handled by introducing a 'flush'
sequence during device removal were we wait for drm file reference to drop to 0
meaning all user clients directly using this device terminated.

v2:
Based on discussions in the mailing list with Daniel and Pekka [1] and based on 
the document
produced by Pekka from those discussions [2] the whole approach with returning 
SIGBUS and
waiting for all user clients having CPU mapping of device BOs to die was 
dropped.
Instead as per the document suggestion the device structures are kept alive 
until
the last reference to the device is dropped by user client and in the meanwhile 
all existing and new CPU mappings of the BOs
belonging to the device directly or by dma-buf import are rerouted to per user
process dummy rw page.Also, I skipped the 'Requirements for KMS UAPI' section 
of [2]
since i am trying to get the minimal set of requirements that still give useful 
solution
to work and this is the'Requirements for Render and Cross-Device UAPI' section 
and so my
test case is removing a secondary device, which is render only and is not 
involved
in KMS.

v3:
More updates following comments from v2 such as removing loop to find DRM file 
when rerouting
page faults to dummy page,getting rid of unnecessary sysfs handling refactoring 
and moving
prevention of GPU recovery post device unplug from amdgpu to scheduler layer.
On top of that added unplug support for the IOMMU enabled system.

v4:
Drop last sysfs hack and use sysfs default attribute.
Guard against write accesses after device removal to avoid modifying released 
memory.
Update dummy pages handling to on demand allocation and release through drm 
managed framework.
Add return value to scheduler job TO handler (by Luben Tuikov) and use this in 
amdgpu for prevention
of GPU recovery post device unplug
Also rebase on top of drm-misc-mext instead of amd-staging-drm-next

With these patches I am able to gracefully remove the secondary card using 
sysfs remove hook while glxgears
is running off of secondary card (DRI_PRIME=1) without kernel oopses or hangs 
and keep working
with the primary card or soft reset the device without hangs or oopses

TODOs for followup work:
Convert AMDGPU code to use devm (for hw stuff) and drmm (for sw stuff and 
allocations) (Daniel)
Support plugging the secondary device back after unplug - currently still 
experiencing HW error on plugging back.
Add support for 'Requirements for KMS UAPI' section of [2] - unplugging 
primary, display connected card.

[1] - Discussions during v3 of the patchset 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Famd-gfx%2Fmsg55576.html&data=04%7C01%7Candrey.grodzovsky%40amd.com%7C4b12f8caf53645eaf0c608d8bc84d7fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466626035281917%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=E73dK7r1OBt1T9UcSt6kYbxYk9LL22EgizbpvkjfZ0c%3D&reserved=0
[2] - drm/doc: device hot-unplug for userspace 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Fdri-devel%2Fmsg259755.html&data=04%7C01%7Candrey.grodzovsky%40amd.com%7C4b12f8caf53645eaf0c608d8bc84d7fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466626035291908%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=EAzrOrNd14IA6gjjCVi9mAQJQZbcrFQbRNC3bN9gVQc%3D&reserved=0
[3] - Related gitlab ticket 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F1081&data=04%7C01%7Candrey.grodzovsky%40amd.com%7C4b12

Re: 回复: 回复: [PATCH 1/2] drm/amdgpu: race issue when jobs on 2 ring timeout

Well, it shouldn't happen with the hive locked as I am browsing the code but 
then your code should
reflect that and if you do fail to lock particular adev AFTER the hive is locked 
you should not silently break
iteration but throw an error, WARN_ON or BUG_ON then. Or alternatively bail out 
with unlocking all already

locked devices.

Andrey

On 1/19/21 12:09 PM, Chen, Horace wrote:

[AMD Official Use Only - Internal Distribution Only]

OK, I understand. You mean one device in the hive may be locked up 
independently without locking up the whole hive.

It could happen, I'll change my code.

Thanks & Regards,
Horace.

*发件人:* Grodzovsky, Andrey 
*发送时间:* 2021年1月20日 0:58
*收件人:* Chen, Horace ; amd-gfx@lists.freedesktop.org 

*抄送:* Quan, Evan ; Tuikov, Luben ; 
Koenig, Christian ; Deucher, Alexander 
; Xiao, Jack ; Zhang, Hawking 
; Liu, Monk ; Xu, Feifei 
; Wang, Kevin(Yang) ; Xiaojie Yuan 

*主题:* Re: 回复: [PATCH 1/2] drm/amdgpu: race issue when jobs on 2 ring timeout

On 1/19/21 11:39 AM, Chen, Horace wrote:

[AMD Official Use Only - Internal Distribution Only]

Hi Andrey,

I think the list in the XGMI hive won't be break in the middle if we lock the 
device before we change the list. Because if 2 devices in 1 hive went into 
the function, it will follow the same sequence to lock the devices. So one of 
them will definately break at the first device. I add iterate devices here is 
just to lock all device in the hive since we will change the device sequence 
in the hive soon after.

I didn't mean break in a sense of breaking the list itself, I just meant the 
literal 'break' instruction

to terminate the iteration once you failed to lock a particular device.

The reason to break the interation in the middle is that the list is changed 
during the iteration without taking any lock. It is quite bad since I'm 
fixing one of this issue. And for XGMI hive, there are 2 locks protecting the 
list, one is the device lock I changed here, the other one is in front of my 
change, there is a hive->lock to protect the hive.

Even the bad thing really happened, I think moving back through the list is 
also very dengerous since we don't know what the list finally be, Unless we 
stack the devices we have iterated through a mirrored list. That can be a big 
change.

Not sure we are on the same page, my concern is let's sat your XGMI hive 
consists of 2 devices, you manged to call successfully do
amdgpu_device_lock_adev for dev1 but then failed for dev2, in this case you 
will bail out without releasing dev1, no ?

Andrey

I'm ok to seperate the locking in amdgpu_device_lock_adev here, I'll do some 
test and update the code later.

Thanks & Regards,
Horace.

*发件人:* Grodzovsky, Andrey  

*发送时间:* 2021年1月19日 22:33
*收件人:* Chen, Horace  ; 
amd-gfx@lists.freedesktop.org  

*抄送:* Quan, Evan  ; Tuikov, 
Luben  ; Koenig, Christian 
 ; Deucher, 
Alexander  ; 
Xiao, Jack  ; Zhang, Hawking 
 ; Liu, Monk 
 ; Xu, Feifei  
; Wang, Kevin(Yang)  
; Xiaojie Yuan  

*主题:* Re: [PATCH 1/2] drm/amdgpu: race issue when jobs on 2 ring timeout

On 1/19/21 7:22 AM, Horace Chen wrote:
> Fix a racing issue when jobs on 2 rings timeout simultaneously.
>
> If 2 rings timed out at the same time, the amdgpu_device_gpu_recover
> will be reentered. Then the adev->gmc.xgmi.head will be grabbed
> by 2 local linked list, which may cause wild pointer issue in
> iterating.
>
> lock the device earily to prevent the node be added to 2 different
> lists.
>
> Signed-off-by: Horace Chen  
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 42 +++---
>   1 file changed, 30 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

> index 4d434803fb49..9574da3abc32 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4540,6 +4540,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,
>    int i, r = 0;
>    bool need_emergency_restart = false;
>    bool audio_suspended = false;
> + bool get_dev_lock = false;
>
>    /*
> * Special case: RAS triggered and full reset isn't supported
> @@ -4582,28 +4583,45 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,

> * Build list of devices to reset.
> * In case we are in XGMI hive mode, r

Re: 回复: [PATCH 1/2] drm/amdgpu: race issue when jobs on 2 ring timeout

On 1/19/21 11:39 AM, Chen, Horace wrote:

[AMD Official Use Only - Internal Distribution Only]

Hi Andrey,

I think the list in the XGMI hive won't be break in the middle if we lock the 
device before we change the list. Because if 2 devices in 1 hive went into the 
function, it will follow the same sequence to lock the devices. So one of them 
will definately break at the first device. I add iterate devices here is just 
to lock all device in the hive since we will change the device sequence in the 
hive soon after.

I didn't mean break in a sense of breaking the list itself, I just meant the 
literal 'break' instruction

to terminate the iteration once you failed to lock a particular device.

The reason to break the interation in the middle is that the list is changed 
during the iteration without taking any lock. It is quite bad since I'm fixing 
one of this issue. And for XGMI hive, there are 2 locks protecting the list, 
one is the device lock I changed here, the other one is in front of my change, 
there is a hive->lock to protect the hive.

Even the bad thing really happened, I think moving back through the list is 
also very dengerous since we don't know what the list finally be, Unless we 
stack the devices we have iterated through a mirrored list. That can be a big 
change.

Not sure we are on the same page, my concern is let's sat your XGMI hive 
consists of 2 devices, you manged to call successfully do
amdgpu_device_lock_adev for dev1 but then failed for dev2, in this case you will 
bail out without releasing dev1, no ?

Andrey

I'm ok to seperate the locking in amdgpu_device_lock_adev here, I'll do some 
test and update the code later.

Thanks & Regards,
Horace.

*发件人:* Grodzovsky, Andrey 
*发送时间:* 2021年1月19日 22:33
*收件人:* Chen, Horace ; amd-gfx@lists.freedesktop.org 

*抄送:* Quan, Evan ; Tuikov, Luben ; 
Koenig, Christian ; Deucher, Alexander 
; Xiao, Jack ; Zhang, Hawking 
; Liu, Monk ; Xu, Feifei 
; Wang, Kevin(Yang) ; Xiaojie Yuan 

*主题:* Re: [PATCH 1/2] drm/amdgpu: race issue when jobs on 2 ring timeout

On 1/19/21 7:22 AM, Horace Chen wrote:
> Fix a racing issue when jobs on 2 rings timeout simultaneously.
>
> If 2 rings timed out at the same time, the amdgpu_device_gpu_recover
> will be reentered. Then the adev->gmc.xgmi.head will be grabbed
> by 2 local linked list, which may cause wild pointer issue in
> iterating.
>
> lock the device earily to prevent the node be added to 2 different
> lists.
>
> Signed-off-by: Horace Chen 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 42 +++---
>   1 file changed, 30 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

> index 4d434803fb49..9574da3abc32 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4540,6 +4540,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,
>    int i, r = 0;
>    bool need_emergency_restart = false;
>    bool audio_suspended = false;
> + bool get_dev_lock = false;
>
>    /*
> * Special case: RAS triggered and full reset isn't supported
> @@ -4582,28 +4583,45 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,

> * Build list of devices to reset.
> * In case we are in XGMI hive mode, resort the device list
> * to put adev in the 1st position.
> +  *
> +  * lock the device before we try to operate the linked list
> +  * if didn't get the device lock, don't touch the linked list since
> +  * others may iterating it.
> */
>    INIT_LIST_HEAD(&device_list);
>    if (adev->gmc.xgmi.num_physical_nodes > 1) {
>    if (!hive)
>    return -ENODEV;
> - if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
> - list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
> - device_list_handle = &hive->device_list;
> +
> + list_for_each_entry(tmp_adev, &hive->device_list, 
gmc.xgmi.head) {
> + get_dev_lock = amdgpu_device_lock_adev(tmp_adev, hive);
> + if (!get_dev_lock)
> + break;

What about unlocking back all the devices you already locked if the break
happens in the middle of the iteration ?
Note that at skip_recovery: we don't do it. BTW, i see this issue is already in
the current code.

Also, maybe now it's better to separate the actual locking in
amdgpu_device_lock_adev
from the other stuff going on there since I don't think you would wont to toggle
stuff
like adev->mp1_state back and forth and also the function name is not
descriptive of
the other stuff going on there anyway.

Andrey

> + }
> + if (get_dev_lock) {
> + if (!list_is_first(&adev->gmc.xgmi.head, 
&hive->d

Re: [PATCH v4 10/14] dmr/amdgpu: Move some sysfs attrs creation to default_attr




On 1/19/21 2:34 AM, Greg KH wrote:

On Mon, Jan 18, 2021 at 04:01:19PM -0500, Andrey Grodzovsky wrote:

  static struct pci_driver amdgpu_kms_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
@@ -1595,6 +1607,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
.shutdown = amdgpu_pci_shutdown,
.driver.pm = &amdgpu_pm_ops,
.err_handler = &amdgpu_pci_err_handler,
+   .driver.dev_groups = amdgpu_sysfs_groups,

Shouldn't this just be:
groups - amdgpu_sysfs_groups,

Why go to the "driver root" here?



Because I still didn't get to your suggestion to propose a patch to add groups 
to
pci_driver, it's located in 'base' driver struct.

Andrey




Other than that tiny thing, looks good to me, nice cleanup!

greg k-h

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v4 11/14] drm/amdgpu: Guard against write accesses after device removal

There is really no other way according to this article 
https://lwn.net/Articles/767885/


"A perfect solution seems nearly impossible though; we cannot acquire a mutex on 
the user
to prevent them from yanking a device and we cannot check for a presence change 
after every

device access for performance reasons. "

But I assumed srcu_read_lock should be pretty seamless performance wise, no ?
The other solution would be as I suggested to keep all the device IO ranges 
reserved and system
memory pages unfreed until the device is finalized in the driver but Daniel said 
this would upset the PCI layer (the MMIO ranges reservation part).


Andrey




On 1/19/21 3:55 AM, Christian König wrote:

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.


Wow, that adds quite some overhead to every register access. I'm not sure we 
can do this.


Christian.



Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  9 
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c    | 53 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h    |  3 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 70 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   | 49 ++---
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 16 ++-
  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c |  8 +---
  drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  |  8 +---
  9 files changed, 184 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index e99f4f1..0a9d73c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -72,6 +72,8 @@
    #include 
  +#include 
+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -404,13 +406,21 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, 
uint32_t offset)

   */
  void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value)

  {
+    int idx;
+
  if (adev->in_pci_err_recovery)
  return;
  +
+    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
  if (offset < adev->rmmio_size)
  writeb(value, adev->rmmio + offset);
  else
  BUG();
+
+    drm_dev_exit(idx);
  }
    /**
@@ -427,9 +437,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
  uint32_t reg, uint32_t v,
  uint32_t acc_flags)
  {
+    int idx;
+
  if (adev->in_pci_err_recovery)
  return;
  +    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
  if ((reg * 4) < adev->rmmio_size) {
  if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
  amdgpu_sriov_runtime(adev) &&
@@ -444,6 +459,8 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
  }
    trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
+
+    drm_dev_exit(idx);
  }
    /*
@@ -454,9 +471,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
   uint32_t reg, uint32_t v)
  {
+    int idx;
+
  if (adev->in_pci_err_recovery)
  return;
  +    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
  if (amdgpu_sriov_fullaccess(adev) &&
  adev->gfx.rlc.funcs &&
  adev->gfx.rlc.funcs->is_rlcg_access_range) {
@@ -465,6 +487,8 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
  } else {
  writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
  }
+
+    drm_dev_exit(idx);
  }
    /**
@@ -499,15 +523,22 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
   */
  void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
  {
+    int idx;
+
  if (adev->in_pci_err_recovery)
  return;
  +    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
  if ((reg * 4) < adev->rio_mem_size)
  iowrite32(v, adev->rio_mem + (reg * 4));
  else {
  iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
  iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
  }
+
+    drm_dev_exit(idx);
  }
    /**
@@ -544,14 +575,21 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)

   */
  void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
  {
+    int idx;
+
  if (adev->in_pci_err_recovery)
  return;
  +    if (!drm_dev_enter(&adev->ddev, &idx))
+    return;
+
  if (index < adev->doorbell.num_doorbells) {
  writel(v, adev->doorbell.ptr + index);
  } else {

Re: [PATCH 2/2] drm/amdgpu: set job guilty if reset skipped


Reviewed-by: Andrey Grodzovsky 

Andrey

On 1/19/21 7:22 AM, Horace Chen wrote:

If 2 jobs on 2 different ring timed out the at a very short
period, the reset for second job will be skipped because the
reset is already in progress.

But it doesn't mean the second job is not guilty since it
also timed out and can be a bad job. So before skipped out
from the reset, we need to increase karma for this job too.

Signed-off-by: Horace Chen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9574da3abc32..1d6ff9fe37de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4574,6 +4574,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
job ? job->base.id : -1, hive->hive_id);
amdgpu_put_xgmi_hive(hive);
+   if (job)
+   drm_sched_increase_karma(&job->base);
return 0;
}
mutex_lock(&hive->hive_lock);
@@ -4617,6 +4619,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
job ? job->base.id : -1);
r = 0;
/* even we skipped this reset, still need to set the job to 
guilty */
+   if (job)
+   drm_sched_increase_karma(&job->base);
goto skip_recovery;
}
  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: race issue when jobs on 2 ring timeout




On 1/19/21 7:22 AM, Horace Chen wrote:

Fix a racing issue when jobs on 2 rings timeout simultaneously.

If 2 rings timed out at the same time, the amdgpu_device_gpu_recover
will be reentered. Then the adev->gmc.xgmi.head will be grabbed
by 2 local linked list, which may cause wild pointer issue in
iterating.

lock the device earily to prevent the node be added to 2 different
lists.

Signed-off-by: Horace Chen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 42 +++---
  1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4d434803fb49..9574da3abc32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4540,6 +4540,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
int i, r = 0;
bool need_emergency_restart = false;
bool audio_suspended = false;
+   bool get_dev_lock = false;
  
  	/*

 * Special case: RAS triggered and full reset isn't supported
@@ -4582,28 +4583,45 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,
 * Build list of devices to reset.
 * In case we are in XGMI hive mode, resort the device list
 * to put adev in the 1st position.
+*
+* lock the device before we try to operate the linked list
+* if didn't get the device lock, don't touch the linked list since
+* others may iterating it.
 */
INIT_LIST_HEAD(&device_list);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
if (!hive)
return -ENODEV;
-   if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
-   list_rotate_to_front(&adev->gmc.xgmi.head, 
&hive->device_list);
-   device_list_handle = &hive->device_list;
+
+   list_for_each_entry(tmp_adev, &hive->device_list, 
gmc.xgmi.head) {
+   get_dev_lock = amdgpu_device_lock_adev(tmp_adev, hive);
+   if (!get_dev_lock)
+   break;



What about unlocking back all the devices you already locked if the break
happens in the middle of the iteration ?
Note that at skip_recovery: we don't do it. BTW, i see this issue is already in 
the current code.


Also, maybe now it's better to separate the actual locking in 
amdgpu_device_lock_adev
from the other stuff going on there since I don't think you would wont to toggle 
stuff
like adev->mp1_state back and forth and also the function name is not 
descriptive of

the other stuff going on there anyway.

Andrey



+   }
+   if (get_dev_lock) {
+   if (!list_is_first(&adev->gmc.xgmi.head, 
&hive->device_list))
+   list_rotate_to_front(&adev->gmc.xgmi.head, 
&hive->device_list);
+   device_list_handle = &hive->device_list;
+   }
} else {
-   list_add_tail(&adev->gmc.xgmi.head, &device_list);
-   device_list_handle = &device_list;
+   get_dev_lock = amdgpu_device_lock_adev(adev, hive);
+   tmp_adev = adev;
+   if (get_dev_lock) {
+   list_add_tail(&adev->gmc.xgmi.head, &device_list);
+   device_list_handle = &device_list;
+   }
+   }
+
+   if (!get_dev_lock) {
+   dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as another 
already in progress",
+   job ? job->base.id : -1);
+   r = 0;
+   /* even we skipped this reset, still need to set the job to 
guilty */
+   goto skip_recovery;
}
  
  	/* block all schedulers and reset given job's ring */

list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-   if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
-   dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as 
another already in progress",
- job ? job->base.id : -1);
-   r = 0;
-   goto skip_recovery;
-   }
-
/*
 * Try to put the audio codec into suspend state
 * before gpu reset started.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 14/14] drm/amdgpu: Prevent any job recoveries after device is unplugged.

Return DRM_TASK_STATUS_ENODEV back to the scheduler when device
is not present so they timeout timer will not be rearmed.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index a111326..e4aa5fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -25,6 +25,8 @@
 #include 
 #include 
 
+#include 
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -34,6 +36,15 @@ static enum drm_task_status amdgpu_job_timedout(struct 
drm_sched_job *s_job)
struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info ti;
struct amdgpu_device *adev = ring->adev;
+   int idx;
+
+   if (!drm_dev_enter(&adev->ddev, &idx)) {
+   DRM_INFO("%s - device unplugged skipping recovery on 
scheduler:%s",
+__func__, s_job->sched->name);
+
+   /* Effectively the job is aborted as the device is gone */
+   return DRM_TASK_STATUS_ENODEV;
+   }
 
memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
@@ -41,7 +52,7 @@ static enum drm_task_status amdgpu_job_timedout(struct 
drm_sched_job *s_job)
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) 
{
DRM_ERROR("ring %s timeout, but soft recovered\n",
  s_job->sched->name);
-   return DRM_TASK_STATUS_ALIVE;
+   goto exit;
}
 
amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
@@ -53,13 +64,15 @@ static enum drm_task_status amdgpu_job_timedout(struct 
drm_sched_job *s_job)
 
if (amdgpu_device_should_recover_gpu(ring->adev)) {
amdgpu_device_gpu_recover(ring->adev, job);
-   return DRM_TASK_STATUS_ALIVE;
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
adev->virt.tdr_debug = true;
-   return DRM_TASK_STATUS_ALIVE;
}
+
+exit:
+   drm_dev_exit(idx);
+   return DRM_TASK_STATUS_ALIVE;
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 13/14] drm/sched: Make timeout timer rearm conditional.

We don't want to rearm the timer if driver hook reports
that the device is gone.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/scheduler/sched_main.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 73fccc5..9552334 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -314,6 +314,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
 {
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
+   enum drm_task_status status = DRM_TASK_STATUS_ALIVE;
 
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
@@ -331,7 +332,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
list_del_init(&job->list);
spin_unlock(&sched->job_list_lock);
 
-   job->sched->ops->timedout_job(job);
+   status = job->sched->ops->timedout_job(job);
 
/*
 * Guilty job did complete and hence needs to be manually 
removed
@@ -345,9 +346,11 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
spin_unlock(&sched->job_list_lock);
}
 
-   spin_lock(&sched->job_list_lock);
-   drm_sched_start_timeout(sched);
-   spin_unlock(&sched->job_list_lock);
+   if (status != DRM_TASK_STATUS_ENODEV) {
+   spin_lock(&sched->job_list_lock);
+   drm_sched_start_timeout(sched);
+   spin_unlock(&sched->job_list_lock);
+   }
 }
 
  /**
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 12/14] drm/scheduler: Job timeout handler returns status

From: Luben Tuikov 

This patch does not change current behaviour.

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

v2: Use enum as the status of a driver's job
timeout callback method.

v4: (By Andrey Grodzovsky)
Replace DRM_TASK_STATUS_COMPLETE with DRM_TASK_STATUS_ENODEV
to enable a hint to the schduler for when NOT to rearm the
timeout timer.

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 
Reported-by: kernel test robot 
Signed-off-by: Luben Tuikov 
Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 --
 drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +-
 drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
 drivers/gpu/drm/panfrost/panfrost_job.c |  9 ++---
 drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
 drivers/gpu/drm/v3d/v3d_sched.c | 32 +---
 include/drm/gpu_scheduler.h | 17 ++---
 7 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index ff48101..a111326 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct drm_sched_job *s_job)
 {
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) 
{
DRM_ERROR("ring %s timeout, but soft recovered\n",
  s_job->sched->name);
-   return;
+   return DRM_TASK_STATUS_ALIVE;
}
 
amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 
if (amdgpu_device_should_recover_gpu(ring->adev)) {
amdgpu_device_gpu_recover(ring->adev, job);
+   return DRM_TASK_STATUS_ALIVE;
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
adev->virt.tdr_debug = true;
+   return DRM_TASK_STATUS_ALIVE;
}
 }
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index cd46c88..c495169 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -82,7 +82,8 @@ static struct dma_fence *etnaviv_sched_run_job(struct 
drm_sched_job *sched_job)
return fence;
 }
 
-static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
+static enum drm_task_status etnaviv_sched_timedout_job(struct drm_sched_job
+  *sched_job)
 {
struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
struct etnaviv_gpu *gpu = submit->gpu;
@@ -120,9 +121,16 @@ static void etnaviv_sched_timedout_job(struct 
drm_sched_job *sched_job)
 
drm_sched_resubmit_jobs(&gpu->sched);
 
+   /* Tell the DRM scheduler that this task needs
+* more time.
+*/
+   drm_sched_start(&gpu->sched, true);
+   return DRM_TASK_STATUS_ALIVE;
+
 out_no_timeout:
/* restart scheduler after GPU is usable again */
drm_sched_start(&gpu->sched, true);
+   return DRM_TASK_STATUS_ALIVE;
 }
 
 static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
diff --git a

[PATCH v4 11/14] drm/amdgpu: Guard against write accesses after device removal

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|  9 
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 53 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 70 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   | 49 ++---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 16 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c |  8 +---
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  |  8 +---
 9 files changed, 184 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e99f4f1..0a9d73c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -72,6 +72,8 @@
 
 #include 
 
+#include 
+
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -404,13 +406,21 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, 
uint32_t offset)
  */
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value)
 {
+   int idx;
+
if (adev->in_pci_err_recovery)
return;
 
+
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return;
+
if (offset < adev->rmmio_size)
writeb(value, adev->rmmio + offset);
else
BUG();
+
+   drm_dev_exit(idx);
 }
 
 /**
@@ -427,9 +437,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags)
 {
+   int idx;
+
if (adev->in_pci_err_recovery)
return;
 
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return;
+
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
@@ -444,6 +459,8 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
}
 
trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
+
+   drm_dev_exit(idx);
 }
 
 /*
@@ -454,9 +471,14 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v)
 {
+   int idx;
+
if (adev->in_pci_err_recovery)
return;
 
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return;
+
if (amdgpu_sriov_fullaccess(adev) &&
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {
@@ -465,6 +487,8 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
+
+   drm_dev_exit(idx);
 }
 
 /**
@@ -499,15 +523,22 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
  */
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
+   int idx;
+
if (adev->in_pci_err_recovery)
return;
 
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return;
+
if ((reg * 4) < adev->rio_mem_size)
iowrite32(v, adev->rio_mem + (reg * 4));
else {
iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
}
+
+   drm_dev_exit(idx);
 }
 
 /**
@@ -544,14 +575,21 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)
  */
 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 {
+   int idx;
+
if (adev->in_pci_err_recovery)
return;
 
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return;
+
if (index < adev->doorbell.num_doorbells) {
writel(v, adev->doorbell.ptr + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
+
+   drm_dev_exit(idx);
 }
 
 /**
@@ -588,14 +626,21 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 
index)
  */
 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 {
+   int idx;
+
if (adev->in_pci_err_recovery)
return;
 
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return;
+
if (index < adev->doorbell.num_doorbells) {
atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
} else {
DRM_ERROR("writ

[PATCH v4 10/14] dmr/amdgpu: Move some sysfs attrs creation to default_attr

This allows to remove explicit creation and destruction
of those attrs and by this avoids warnings on device
finilizing post physical device extraction.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 17 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c  | 25 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 +-
 4 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 86add0f..0346e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1953,6 +1953,15 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct 
device *dev,
 static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
   NULL);
 
+static struct attribute *amdgpu_vbios_version_attrs[] = {
+   &dev_attr_vbios_version.attr,
+   NULL
+};
+
+const struct attribute_group amdgpu_vbios_version_attr_group = {
+   .attrs = amdgpu_vbios_version_attrs
+};
+
 /**
  * amdgpu_atombios_fini - free the driver info and callbacks for atombios
  *
@@ -1972,7 +1981,6 @@ void amdgpu_atombios_fini(struct amdgpu_device *adev)
adev->mode_info.atom_context = NULL;
kfree(adev->mode_info.atom_card_info);
adev->mode_info.atom_card_info = NULL;
-   device_remove_file(adev->dev, &dev_attr_vbios_version);
 }
 
 /**
@@ -1989,7 +1997,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
 {
struct card_info *atom_card_info =
kzalloc(sizeof(struct card_info), GFP_KERNEL);
-   int ret;
 
if (!atom_card_info)
return -ENOMEM;
@@ -2027,12 +2034,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
amdgpu_atombios_allocate_fb_scratch(adev);
}
 
-   ret = device_create_file(adev->dev, &dev_attr_vbios_version);
-   if (ret) {
-   DRM_ERROR("Failed to create device file for VBIOS version\n");
-   return ret;
-   }
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 9c0cd00..8fddd74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1587,6 +1587,18 @@ static struct pci_error_handlers amdgpu_pci_err_handler 
= {
.resume = amdgpu_pci_resume,
 };
 
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+extern const struct attribute_group amdgpu_vbios_version_attr_group;
+
+static const struct attribute_group *amdgpu_sysfs_groups[] = {
+   &amdgpu_vram_mgr_attr_group,
+   &amdgpu_gtt_mgr_attr_group,
+   &amdgpu_vbios_version_attr_group,
+   NULL,
+};
+
+
 static struct pci_driver amdgpu_kms_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
@@ -1595,6 +1607,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
.shutdown = amdgpu_pci_shutdown,
.driver.pm = &amdgpu_pm_ops,
.err_handler = &amdgpu_pci_err_handler,
+   .driver.dev_groups = amdgpu_sysfs_groups,
 };
 
 static int __init amdgpu_init(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8980329..3b7150e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -77,6 +77,16 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
 static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
   amdgpu_mem_info_gtt_used_show, NULL);
 
+static struct attribute *amdgpu_gtt_mgr_attributes[] = {
+   &dev_attr_mem_info_gtt_total.attr,
+   &dev_attr_mem_info_gtt_used.attr,
+   NULL
+};
+
+const struct attribute_group amdgpu_gtt_mgr_attr_group = {
+   .attrs = amdgpu_gtt_mgr_attributes
+};
+
 static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;
 /**
  * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
@@ -91,7 +101,6 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t 
gtt_size)
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
uint64_t start, size;
-   int ret;
 
man->use_tt = true;
man->func = &amdgpu_gtt_mgr_func;
@@ -104,17 +113,6 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
uint64_t gtt_size)
spin_lock_init(&mgr->lock);
atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
 
-   ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
-   if (ret) {
-   DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
-

[PATCH v4 09/14] drm/amdgpu: Remap all page faults to per process dummy page.

On device removal reroute all CPU mappings to dummy page
per drm_file instance or imported GEM object.

v4:
Update for modified ttm_bo_vm_dummy_page

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9fd2157..550dc5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -49,6 +49,7 @@
 
 #include 
 #include 
+#include 
 
 #include "amdgpu.h"
 #include "amdgpu_object.h"
@@ -1982,18 +1983,28 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
 static vm_fault_t amdgpu_ttm_fault(struct vm_fault *vmf)
 {
struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
+   struct drm_device *ddev = bo->base.dev;
vm_fault_t ret;
+   int idx;
 
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
 
-   ret = amdgpu_bo_fault_reserve_notify(bo);
-   if (ret)
-   goto unlock;
+   if (drm_dev_enter(ddev, &idx)) {
+   ret = amdgpu_bo_fault_reserve_notify(bo);
+   if (ret) {
+   drm_dev_exit(idx);
+   goto unlock;
+   }
 
-   ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
-  TTM_BO_VM_NUM_PREFAULT, 1);
+ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+   TTM_BO_VM_NUM_PREFAULT, 1);
+
+drm_dev_exit(idx);
+   } else {
+   ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+   }
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 08/14] drm/amdgpu: Fix a bunch of sdma code crash post device unplug

We can't allocate and submit IBs post device unplug.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ad91c0c..5096351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -31,6 +31,7 @@
 #include 
 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -1604,7 +1605,10 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
uint64_t pfn;
-   int r;
+   int r, idx;
+
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return -ENOENT;
 
memset(¶ms, 0, sizeof(params));
params.adev = adev;
@@ -1647,6 +1651,8 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (r)
goto error_unlock;
 
+
+   drm_dev_exit(idx);
do {
uint64_t tmp, num_entries, addr;
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  5 
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
 6 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 478a7d8..2953420 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -51,6 +51,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1041,6 +1042,10 @@ struct amdgpu_device {
 
boolin_pci_err_recovery;
struct pci_saved_state  *pci_state;
+
+   struct notifier_block   nb;
+   struct blocking_notifier_head   notifier;
+   struct list_headdevice_bo_list;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45e23e3..e99f4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,8 @@
 #include 
 #include 
 
+#include 
+
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = 
{
 };
 
 
+static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
+unsigned long action, void *data)
+{
+   struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
+   struct amdgpu_bo *bo = NULL;
+
+   /*
+* Following is a set of IOMMU group dependencies taken care of before
+* device's IOMMU group is removed
+*/
+   if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
+
+   spin_lock(&ttm_bo_glob.lru_lock);
+   list_for_each_entry(bo, &adev->device_bo_list, bo) {
+   if (bo->tbo.ttm)
+   ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
+   }
+   spin_unlock(&ttm_bo_glob.lru_lock);
+
+   if (adev->irq.ih.use_bus_addr)
+   amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+   if (adev->irq.ih1.use_bus_addr)
+   amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+   if (adev->irq.ih2.use_bus_addr)
+   amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+
+   amdgpu_gart_dummy_page_fini(adev);
+   }
+
+   return NOTIFY_OK;
+}
+
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
 
+   INIT_LIST_HEAD(&adev->device_bo_list);
+
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
 
@@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(pdev);
 
+   BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
+   adev->nb.notifier_call = amdgpu_iommu_group_notifier;
+
+   if (adev->dev->iommu_group) {
+   r = iommu_group_register_notifier(adev->dev->iommu_group, 
&adev->nb);
+   if (r)
+   goto failed;
+   }
+
return 0;
 
 failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 0db9330..486ad6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
*adev)
  *
  * Frees the dummy page used by the driver (all asics).
  */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
 {
if (!adev->dummy_page_addr)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index afa2e28..5678d9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgp

[PATCH v4 06/14] drm/amdgpu: Add early fini callback

Use it to call disply code dependent on device->drv_data
before it's set to NULL on device unplug

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 20 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++--
 drivers/gpu/drm/amd/include/amd_shared.h  |  2 ++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 90c8353..45e23e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2529,6 +2529,24 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+{
+   int i, r;
+
+   for (i = 0; i < adev->num_ip_blocks; i++) {
+   if (!adev->ip_blocks[i].version->funcs->early_fini)
+   continue;
+
+   r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+   if (r) {
+   DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+   }
+   }
+
+   return 0;
+}
+
 /**
  * amdgpu_device_ip_fini - run fini for hardware IPs
  *
@@ -3613,6 +3631,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_fbdev_fini(adev);
 
amdgpu_irq_fini_hw(adev);
+
+   amdgpu_device_ip_fini_early(adev);
 }
 
 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 86c2b2c..9b24f3e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1156,6 +1156,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
return -EINVAL;
 }
 
+static int amdgpu_dm_early_fini(void *handle)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+   amdgpu_dm_audio_fini(adev);
+
+   return 0;
+}
+
 static void amdgpu_dm_fini(struct amdgpu_device *adev)
 {
int i;
@@ -1164,8 +1173,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
drm_encoder_cleanup(&adev->dm.mst_encoders[i].base);
}
 
-   amdgpu_dm_audio_fini(adev);
-
amdgpu_dm_destroy_drm_device(&adev->dm);
 
 #ifdef CONFIG_DRM_AMD_DC_HDCP
@@ -2175,6 +2182,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
.late_init = dm_late_init,
.sw_init = dm_sw_init,
.sw_fini = dm_sw_fini,
+   .early_fini = amdgpu_dm_early_fini,
.hw_init = dm_hw_init,
.hw_fini = dm_hw_fini,
.suspend = dm_suspend,
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
b/drivers/gpu/drm/amd/include/amd_shared.h
index 9676016..63bb846 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -239,6 +239,7 @@ enum amd_dpm_forced_level;
  * @late_init: sets up late driver/hw state (post hw_init) - Optional
  * @sw_init: sets up driver state, does not configure hw
  * @sw_fini: tears down driver state, does not configure hw
+ * @early_fini: tears down stuff before dev detached from driver
  * @hw_init: sets up the hw state
  * @hw_fini: tears down the hw state
  * @late_fini: final cleanup
@@ -267,6 +268,7 @@ struct amd_ip_funcs {
int (*late_init)(void *handle);
int (*sw_init)(void *handle);
int (*sw_fini)(void *handle);
+   int (*early_fini)(void *handle);
int (*hw_init)(void *handle);
int (*hw_fini)(void *handle);
void (*late_fini)(void *handle);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 05/14] drm/amdgpu: Split amdgpu_device_fini into early and late

Some of the stuff in amdgpu_device_fini such as HW interrupts
disable and pending fences finilization must be done right away on
pci_remove while most of the stuff which relates to finilizing and
releasing driver data structures can be kept until
drm_driver.release hook is called, i.e. when the last device
reference is dropped.

v4: Change functions prefix early->hw and late->sw

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 15 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 26 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h|  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  3 ++-
 drivers/gpu/drm/amd/amdgpu/cik_ih.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/cz_ih.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/iceland_ih.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/si_ih.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/tonga_ih.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c |  2 +-
 16 files changed, 78 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f77443c..478a7d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1060,7 +1060,9 @@ static inline struct amdgpu_device 
*amdgpu_ttm_adev(struct ttm_bo_device *bdev)
 
 int amdgpu_device_init(struct amdgpu_device *adev,
   uint32_t flags);
-void amdgpu_device_fini(struct amdgpu_device *adev);
+void amdgpu_device_fini_hw(struct amdgpu_device *adev);
+void amdgpu_device_fini_sw(struct amdgpu_device *adev);
+
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
@@ -1273,6 +1275,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
 int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void amdgpu_driver_postclose_kms(struct drm_device *dev,
 struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
 int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
 int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 348ac67..90c8353 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3579,14 +3579,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  * Tear down the driver info (all asics).
  * Called at driver shutdown.
  */
-void amdgpu_device_fini(struct amdgpu_device *adev)
+void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 {
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
adev->shutdown = true;
 
-   kfree(adev->pci_state);
-
/* make sure IB test finished before entering exclusive mode
 * to avoid preemption on IB test
 * */
@@ -3603,11 +3601,24 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
else
drm_atomic_helper_shutdown(adev_to_drm(adev));
}
-   amdgpu_fence_driver_fini(adev);
+   amdgpu_fence_driver_fini_hw(adev);
+
if (adev->pm_sysfs_en)
amdgpu_pm_sysfs_fini(adev);
+   if (adev->ucode_sysfs_en)
+   amdgpu_ucode_sysfs_fini(adev);
+   sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
+
+
amdgpu_fbdev_fini(adev);
+
+   amdgpu_irq_fini_hw(adev);
+}
+
+void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+{
amdgpu_device_ip_fini(adev);
+   amdgpu_fence_driver_fini_sw(adev);
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
@@ -3636,14 +3647,13 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
 
-   if (adev->ucode_sysfs_en)
-   amdgpu_ucode_sysfs_fini(adev);
-
-   sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
if (adev->mman.discovery_bin)
amdgpu_discovery_fini(adev);
+
+   kfree(adev->pci_state);
+
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 72efd57..9c0cd00 100644
--- a/drivers/gpu/drm/am

[PATCH v4 04/14] drm/sched: Cancel and flush all oustatdning jobs before finish.

To avoid any possible use after free.

Signed-off-by: Andrey Grodzovsky 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/scheduler/sched_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 997aa15..92637b7 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -899,6 +899,9 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
if (sched->thread)
kthread_stop(sched->thread);
 
+   /* Confirm no work left behind accessing device structures */
+   cancel_delayed_work_sync(&sched->work_tdr);
+
sched->ready = false;
 }
 EXPORT_SYMBOL(drm_sched_fini);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 02/14] drm: Unamp the entire device address space on device unplug

Invalidate all BOs CPU mappings once device is removed.

v3: Move the code from TTM into drm_dev_unplug

Signed-off-by: Andrey Grodzovsky 
Reviewed-by: Christian König 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/drm_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index d384a5b..20d22e4 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -469,6 +469,9 @@ void drm_dev_unplug(struct drm_device *dev)
synchronize_srcu(&drm_unplug_srcu);
 
drm_dev_unregister(dev);
+
+   /* Clear all CPU mappings pointing to this device */
+   unmap_mapping_range(dev->anon_inode->i_mapping, 0, 0, 1);
 }
 EXPORT_SYMBOL(drm_dev_unplug);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 03/14] drm/ttm: Expose ttm_tt_unpopulate for driver use

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/ttm/ttm_tt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 7f75a13..f9e0b0d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -341,3 +341,4 @@ void ttm_tt_unpopulate(struct ttm_bo_device *bdev,
ttm_pool_free(&bdev->pool, ttm);
ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
 }
+EXPORT_SYMBOL(ttm_tt_unpopulate);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 01/14] drm/ttm: Remap all page faults to per process dummy page.

On device removal reroute all CPU mappings to dummy page.

v3:
Remove loop to find DRM file and instead access it
by vma->vm_file->private_data. Move dummy page installation
into a separate function.

v4:
Map the entire BOs VA space into on demand allocated dummy page
on the first fault for that BO.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 -
 include/drm/ttm/ttm_bo_api.h|  2 +
 2 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 6dc96cf..ed89da3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -34,6 +34,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -380,25 +382,103 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 }
 EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
 
+static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res)
+{
+   struct page *dummy_page = (struct page *)res;
+
+   __free_page(dummy_page);
+}
+
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct ttm_buffer_object *bo = vma->vm_private_data;
+   struct ttm_bo_device *bdev = bo->bdev;
+   struct drm_device *ddev = bo->base.dev;
+   vm_fault_t ret = VM_FAULT_NOPAGE;
+   unsigned long address = vma->vm_start;
+   unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> 
PAGE_SHIFT;
+   unsigned long pfn;
+   struct page *page;
+   int i;
+
+   /*
+* Wait for buffer data in transit, due to a pipelined
+* move.
+*/
+   ret = ttm_bo_vm_fault_idle(bo, vmf);
+   if (unlikely(ret != 0))
+   return ret;
+
+   /* Allocate new dummy page to map all the VA range in this VMA to it*/
+   page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+   if (!page)
+   return VM_FAULT_OOM;
+
+   pfn = page_to_pfn(page);
+
+   /*
+* Prefault the entire VMA range right away to avoid further faults
+*/
+   for (i = 0; i < num_prefault; ++i) {
+
+   if (unlikely(address >= vma->vm_end))
+   break;
+
+   if (vma->vm_flags & VM_MIXEDMAP)
+   ret = vmf_insert_mixed_prot(vma, address,
+   __pfn_to_pfn_t(pfn, 
PFN_DEV),
+   prot);
+   else
+   ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
+
+   /* Never error on prefaulted PTEs */
+   if (unlikely((ret & VM_FAULT_ERROR))) {
+   if (i == 0)
+   return VM_FAULT_NOPAGE;
+   else
+   break;
+   }
+
+   address += PAGE_SIZE;
+   }
+
+   /* Set the page to be freed using drmm release action */
+   if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page))
+   return VM_FAULT_OOM;
+
+   return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
+
 vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 {
struct vm_area_struct *vma = vmf->vma;
pgprot_t prot;
struct ttm_buffer_object *bo = vma->vm_private_data;
+   struct drm_device *ddev = bo->base.dev;
vm_fault_t ret;
+   int idx;
 
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
 
prot = vma->vm_page_prot;
-   ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
+   if (drm_dev_enter(ddev, &idx)) {
+   ret = ttm_bo_vm_fault_reserved(vmf, prot, 
TTM_BO_VM_NUM_PREFAULT, 1);
+   drm_dev_exit(idx);
+   } else {
+   ret = ttm_bo_vm_dummy_page(vmf, prot);
+   }
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
 
dma_resv_unlock(bo->base.resv);
 
return ret;
+
+   return ret;
 }
 EXPORT_SYMBOL(ttm_bo_vm_fault);
 
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index e17be32..12fb240 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma);
 int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
 void *buf, int len, int write);
 
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
+
 #endif
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v4 00/14] RFC Support hot device unplug in amdgpu

Until now extracting a card either by physical extraction (e.g. eGPU with 
thunderbolt connection or by emulation through  syfs -> 
/sys/bus/pci/devices/device_id/remove) 
would cause random crashes in user apps. The random crashes in apps were 
mostly due to the app having mapped a device backed BO into its address 
space was still trying to access the BO while the backing device was gone.
To answer this first problem Christian suggested to fix the handling of mapped 
memory in the clients when the device goes away by forcibly unmap all buffers 
the 
user processes has by clearing their respective VMAs mapping the device BOs. 
Then when the VMAs try to fill in the page tables again we check in the fault 
handlerif the device is removed and if so, return an error. This will generate 
a 
SIGBUS to the application which can then cleanly terminate.This indeed was done 
but this in turn created a problem of kernel OOPs were the OOPSes were due to 
the 
fact that while the app was terminating because of the SIGBUSit would trigger 
use 
after free in the driver by calling to accesses device structures that were 
already 
released from the pci remove sequence.This was handled by introducing a 'flush' 
sequence during device removal were we wait for drm file reference to drop to 0 
meaning all user clients directly using this device terminated.

v2:
Based on discussions in the mailing list with Daniel and Pekka [1] and based on 
the document 
produced by Pekka from those discussions [2] the whole approach with returning 
SIGBUS and 
waiting for all user clients having CPU mapping of device BOs to die was 
dropped. 
Instead as per the document suggestion the device structures are kept alive 
until 
the last reference to the device is dropped by user client and in the meanwhile 
all existing and new CPU mappings of the BOs 
belonging to the device directly or by dma-buf import are rerouted to per user 
process dummy rw page.Also, I skipped the 'Requirements for KMS UAPI' section 
of [2] 
since i am trying to get the minimal set of requirements that still give useful 
solution 
to work and this is the'Requirements for Render and Cross-Device UAPI' section 
and so my 
test case is removing a secondary device, which is render only and is not 
involved 
in KMS.

v3:
More updates following comments from v2 such as removing loop to find DRM file 
when rerouting 
page faults to dummy page,getting rid of unnecessary sysfs handling refactoring 
and moving 
prevention of GPU recovery post device unplug from amdgpu to scheduler layer. 
On top of that added unplug support for the IOMMU enabled system.

v4:
Drop last sysfs hack and use sysfs default attribute.
Guard against write accesses after device removal to avoid modifying released 
memory.
Update dummy pages handling to on demand allocation and release through drm 
managed framework.
Add return value to scheduler job TO handler (by Luben Tuikov) and use this in 
amdgpu for prevention 
of GPU recovery post device unplug
Also rebase on top of drm-misc-mext instead of amd-staging-drm-next

With these patches I am able to gracefully remove the secondary card using 
sysfs remove hook while glxgears 
is running off of secondary card (DRI_PRIME=1) without kernel oopses or hangs 
and keep working 
with the primary card or soft reset the device without hangs or oopses

TODOs for followup work:
Convert AMDGPU code to use devm (for hw stuff) and drmm (for sw stuff and 
allocations) (Daniel)
Support plugging the secondary device back after unplug - currently still 
experiencing HW error on plugging back.
Add support for 'Requirements for KMS UAPI' section of [2] - unplugging 
primary, display connected card.

[1] - Discussions during v3 of the patchset 
https://www.spinics.net/lists/amd-gfx/msg55576.html
[2] - drm/doc: device hot-unplug for userspace 
https://www.spinics.net/lists/dri-devel/msg259755.html
[3] - Related gitlab ticket https://gitlab.freedesktop.org/drm/amd/-/issues/1081

Andrey Grodzovsky (13):
  drm/ttm: Remap all page faults to per process dummy page.
  drm: Unamp the entire device address space on device unplug
  drm/ttm: Expose ttm_tt_unpopulate for driver use
  drm/sched: Cancel and flush all oustatdning jobs before finish.
  drm/amdgpu: Split amdgpu_device_fini into early and late
  drm/amdgpu: Add early fini callback
  drm/amdgpu: Register IOMMU topology notifier per device.
  drm/amdgpu: Fix a bunch of sdma code crash post device unplug
  drm/amdgpu: Remap all page faults to per process dummy page.
  dmr/amdgpu: Move some sysfs attrs creation to default_attr
  drm/amdgpu: Guard against write accesses after device removal
  drm/sched: Make timeout timer rearm conditional.
  drm/amdgpu: Prevent any job recoveries after device is unplugged.

Luben Tuikov (1):
  drm/scheduler: Job timeout handler returns status

 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  11 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c

Re: [PATCH 2/2] drm/amdgpu: set job guilty if reset skipped

2021-01-14 Thread Andrey Grodzovsky


Reviewed-by: Andrey Grodzovsky 

Andrey

On 1/14/21 8:37 AM, Horace Chen wrote:

If 2 jobs on 2 different ring timed out the at a very
short period, the reset for second job will be skipped
because the reset is already in progress.

But it doesn't mean the second job is not guilty since it also
timed out and can be a bad job. So before skipped out from the
reset, we need to increase karma for this job too.

Signed-off-by: Horace Chen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a28e138ac72c..d1112e29c8b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4572,6 +4572,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
job ? job->base.id : -1, hive->hive_id);
+   if(job)
+   drm_sched_increase_karma(&job->base);
amdgpu_put_xgmi_hive(hive);
return 0;
}
@@ -4596,6 +4598,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as 
another already in progress",
job ? job->base.id : -1);
r = 0;
+   if(job)
+   drm_sched_increase_karma(&job->base);
goto skip_recovery;
}
  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: race issue when jobs on 2 ring timeout

2021-01-14 Thread Andrey Grodzovsky




On 1/14/21 8:37 AM, Horace Chen wrote:

Fix a racing issue when jobs on 2 rings timeout simultaneously.

If 2 rings timed out at the same time, the
amdgpu_device_gpu_recover will be reentered. Then the
adev->gmc.xgmi.head will be grabbed by 2 local linked list,
which may cause wild pointer issue in iterating.

lock the device earily to prevent the node be added to 2
different lists.

for xgmi there is a hive lock which can promise there won't have
2 devices on same hive reenter the interation. So xgmi doesn't
need to lock the device.



Note that amdgpu_device_lock_adev does bunch of other stuff besides taking
the lock and I don't think we want to skip them for the other devices in case of 
XGMI.


Andrey




Signed-off-by: Horace Chen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ---
  1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4d434803fb49..a28e138ac72c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4591,19 +4591,20 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,
list_rotate_to_front(&adev->gmc.xgmi.head, 
&hive->device_list);
device_list_handle = &hive->device_list;
} else {
+   /* if current dev is already in reset, skip adding list to 
prevent race issue */
+   if (!amdgpu_device_lock_adev(adev, hive)) {
+   dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as 
another already in progress",
+   job ? job->base.id : -1);
+   r = 0;
+   goto skip_recovery;
+   }
+
list_add_tail(&adev->gmc.xgmi.head, &device_list);
device_list_handle = &device_list;
}
  
  	/* block all schedulers and reset given job's ring */

list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-   if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
-   dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as 
another already in progress",
- job ? job->base.id : -1);
-   r = 0;
-   goto skip_recovery;
-   }
-
/*
 * Try to put the audio codec into suspend state
 * before gpu reset started.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdgpu/display: buffer INTERRUPT_LOW_IRQ_CONTEXT interrupt work

2021-01-13 Thread Andrey Grodzovsky



On 1/14/21 12:11 AM, Chen, Xiaogang wrote:

On 1/12/2021 10:54 PM, Grodzovsky, Andrey wrote:


On 1/4/21 1:01 AM, Xiaogang.Chen wrote:

From: Xiaogang Chen 

amdgpu DM handles INTERRUPT_LOW_IRQ_CONTEXT interrupt(hpd, hpd_rx) by
using work queue and uses single work_struct. If previous interrupt
has not been handled new interrupts(same type) will be discarded and
driver just sends "amdgpu_dm_irq_schedule_work FAILED" message out.
If some important hpd, hpd_rx related interrupts are missed by driver
the hot (un)plug devices may cause system hang or unstable, such as
system resumes from S3 sleep with mst device connected.

This patch dynamically allocates new amdgpu_dm_irq_handler_data for
new interrupts if previous INTERRUPT_LOW_IRQ_CONTEXT interrupt work
has not been handled. So the new interrupt works can be queued to the
same workqueue_struct, instead discard the new interrupts.
All allocated amdgpu_dm_irq_handler_data are put into a single linked
list and will be reused after.



I believe this creates a possible concurrency between already executing work 
item
and the new incoming one for which you allocate a new work item on the fly. 
While
handle_hpd_irq is serialized with aconnector->hpd_lock I am seeing that for 
handle_hpd_rx_irq
it's not locked for MST use case (which is the most frequently used with this 
interrupt).  Did you

verified that handle_hpd_rx_irq is reentrant ?

handle_hpd_rx_irq is put at a work queue. Its execution is serialized by the 
work queue. So there is no reentrant.


You are using system_highpri_wq which has the property that it has multiple 
workers thread pool spread across all the
active CPUs, see all work queue definitions here 
https://elixir.bootlin.com/linux/v5.11-rc3/source/include/linux/workqueue.h#L358
I beleieve that what you saying about no chance of reentrnacy would be correct 
if it would be same work item dequeued for execution
while previous instance is still running, see the explanation here - 
https://elixir.bootlin.com/linux/v5.11-rc3/source/kernel/workqueue.c#L1435.
Non reentrancy is guaranteed only for the same work item. If you want non 
reentrancy (full serializtion) for different work items you should create

you own single threaded work-queue using create_singlethread_workqueue


amdgpu_dm_irq_schedule_work does queuing of work(put handle_hpd_rx_irq into 
work queue). The first call is dm_irq_work_func, then call handle_hpd_rx_irq.




Signed-off-by: Xiaogang Chen 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |  14 +--
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c  | 114 
++---

  2 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

index c9d82b9..730e540 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -69,18 +69,6 @@ struct common_irq_params {
  };
    /**
- * struct irq_list_head - Linked-list for low context IRQ handlers.
- *
- * @head: The list_head within &struct handler_data
- * @work: A work_struct containing the deferred handler work
- */
-struct irq_list_head {
-    struct list_head head;
-    /* In case this interrupt needs post-processing, 'work' will be queued*/
-    struct work_struct work;
-};
-
-/**
   * struct dm_compressor_info - Buffer info used by frame buffer compression
   * @cpu_addr: MMIO cpu addr
   * @bo_ptr: Pointer to the buffer object
@@ -270,7 +258,7 @@ struct amdgpu_display_manager {
   * Note that handlers are called in the same order as they were
   * registered (FIFO).
   */
-    struct irq_list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
+    struct list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
    /**
   * @irq_handler_list_high_tab:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c

index 3577785..ada344a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -82,6 +82,7 @@ struct amdgpu_dm_irq_handler_data {
  struct amdgpu_display_manager *dm;
  /* DAL irq source which registered for this interrupt. */
  enum dc_irq_source irq_source;
+    struct work_struct work;
  };
    #define DM_IRQ_TABLE_LOCK(adev, flags) \
@@ -111,20 +112,10 @@ static void init_handler_common_data(struct 
amdgpu_dm_irq_handler_data *hcd,

   */
  static void dm_irq_work_func(struct work_struct *work)
  {
-    struct irq_list_head *irq_list_head =
-    container_of(work, struct irq_list_head, work);
-    struct list_head *handler_list = &irq_list_head->head;
-    struct amdgpu_dm_irq_handler_data *handler_data;
-
-    list_for_each_entry(handler_data, handler_list, list) {
-    DRM_DEBUG_KMS("DM_IRQ: work_func: for dal_src=%d\n",
-    handler_data->irq_source);
+    struct amdgpu_dm_irq_handler_d

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-13 Thread Andrey Grodzovsky



On 1/13/21 4:14 AM, Christian König wrote:

Am 12.01.21 um 16:59 schrieb Andrey Grodzovsky:


On 1/12/21 7:32 AM, Christian König wrote:

Am 12.01.21 um 10:10 schrieb Daniel Vetter:

On Mon, Jan 11, 2021 at 03:45:10PM -0500, Andrey Grodzovsky wrote:

On 1/11/21 11:15 AM, Daniel Vetter wrote:

On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:

On Fri, Jan 08, 2021 at 04:49:55PM +, Grodzovsky, Andrey wrote:
Ok then, I guess I will proceed with the dummy pages list 
implementation then.


Andrey


From: Koenig, Christian 
Sent: 08 January 2021 09:52
To: Grodzovsky, Andrey ; Daniel Vetter 

Cc: amd-gfx@lists.freedesktop.org ; 
dri-de...@lists.freedesktop.org ; 
daniel.vet...@ffwll.ch ; r...@kernel.org 
; l.st...@pengutronix.de ; 
yuq...@gmail.com ; e...@anholt.net ; 
Deucher, Alexander ; 
gre...@linuxfoundation.org ; 
ppaala...@gmail.com ; Wentland, Harry 


Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

Mhm, I'm not aware of any let over pointer between TTM and GEM and we
worked quite hard on reducing the size of the amdgpu_bo, so another
extra pointer just for that corner case would suck quite a bit.

We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
things) which are fairly single-use, so I'm really not much seeing the
point in making this a special case. It also means the lifetime management
becomes a bit iffy, since we can't throw away the dummy page then the last
reference to the bo is released (since we don't track it there), but only
when the last pointer to the device is released. Potentially this means a
pile of dangling pages hanging around for too long.

Also if you really, really, really want to have this list, please don't
reinvent it since we have it already. drmm_ is exactly meant for resources
that should be freed when the final drm_device reference disappears.
-Daniel


I maybe was eager to early, see i need to explicitly allocate the dummy page
using page_alloc so
i cannot use drmm_kmalloc for this, so once again like with the list i need
to wrap it with a container struct
which i can then allocate using drmm_kmalloc and inside there will be page
pointer. But then
on release it needs to free the page and so i supposedly need to use 
drmm_add_action

to free the page before the container struct is released but drmm_kmalloc
doesn't allow to set
release action on struct allocation. So I created a new
drmm_kmalloc_with_action API function
but then you also need to supply the optional data pointer for the release
action (the struct page in this case)
and so this all becomes a bit overcomplicated (but doable). Is this extra
API worth adding ? Maybe it can
be useful in general.

drm_add_action_or_reset (for better control flow) has both a void * data
and a cleanup function (and it internally allocates the tracking structure
for that for you). So should work as-is? Allocating a tracking structure
for our tracking structure for a page would definitely be a bit too much.

Essentiall drmm_add_action is your kcalloc_with_action function you want,
as long as all you need is a single void * pointer (we could do the
kzalloc_with_action though, there's enough space, just no need yet for any
of the current users).


Yeah, but my thinking was that we should use the page LRU for this and not 
another container structure.


Christian.



Which specific list did you mean ?


The struct page * you get from get_free_page() already has an lru member of 
type list_head.


This way you can link pages together for later destruction without the need of 
a container object.


Christian.



I get it now, this is a good advise, and indeed makes the container struct i 
created obsolete but, currently I am going
with Daniel's suggestion to use drm_add_action_or_reset which makes the list 
itself also unneeded.


Andrey






Andrey





-Daniel





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdgpu/display: buffer INTERRUPT_LOW_IRQ_CONTEXT interrupt work

2021-01-12 Thread Andrey Grodzovsky



On 1/4/21 1:01 AM, Xiaogang.Chen wrote:

From: Xiaogang Chen 

amdgpu DM handles INTERRUPT_LOW_IRQ_CONTEXT interrupt(hpd, hpd_rx) by
using work queue and uses single work_struct. If previous interrupt
has not been handled new interrupts(same type) will be discarded and
driver just sends "amdgpu_dm_irq_schedule_work FAILED" message out.
If some important hpd, hpd_rx related interrupts are missed by driver
the hot (un)plug devices may cause system hang or unstable, such as
system resumes from S3 sleep with mst device connected.

This patch dynamically allocates new amdgpu_dm_irq_handler_data for
new interrupts if previous INTERRUPT_LOW_IRQ_CONTEXT interrupt work
has not been handled. So the new interrupt works can be queued to the
same workqueue_struct, instead discard the new interrupts.
All allocated amdgpu_dm_irq_handler_data are put into a single linked
list and will be reused after.



I believe this creates a possible concurrency between already executing work 
item
and the new incoming one for which you allocate a new work item on the fly. 
While
handle_hpd_irq is serialized with aconnector->hpd_lock I am seeing that for 
handle_hpd_rx_irq
it's not locked for MST use case (which is the most frequently used with this 
interrupt).  Did you

verified that handle_hpd_rx_irq is reentrant ?




Signed-off-by: Xiaogang Chen 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |  14 +--
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c  | 114 ++---
  2 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c9d82b9..730e540 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -69,18 +69,6 @@ struct common_irq_params {
  };
  
  /**

- * struct irq_list_head - Linked-list for low context IRQ handlers.
- *
- * @head: The list_head within &struct handler_data
- * @work: A work_struct containing the deferred handler work
- */
-struct irq_list_head {
-   struct list_head head;
-   /* In case this interrupt needs post-processing, 'work' will be queued*/
-   struct work_struct work;
-};
-
-/**
   * struct dm_compressor_info - Buffer info used by frame buffer compression
   * @cpu_addr: MMIO cpu addr
   * @bo_ptr: Pointer to the buffer object
@@ -270,7 +258,7 @@ struct amdgpu_display_manager {
 * Note that handlers are called in the same order as they were
 * registered (FIFO).
 */
-   struct irq_list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
+   struct list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
  
  	/**

 * @irq_handler_list_high_tab:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 3577785..ada344a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -82,6 +82,7 @@ struct amdgpu_dm_irq_handler_data {
struct amdgpu_display_manager *dm;
/* DAL irq source which registered for this interrupt. */
enum dc_irq_source irq_source;
+   struct work_struct work;
  };
  
  #define DM_IRQ_TABLE_LOCK(adev, flags) \

@@ -111,20 +112,10 @@ static void init_handler_common_data(struct 
amdgpu_dm_irq_handler_data *hcd,
   */
  static void dm_irq_work_func(struct work_struct *work)
  {
-   struct irq_list_head *irq_list_head =
-   container_of(work, struct irq_list_head, work);
-   struct list_head *handler_list = &irq_list_head->head;
-   struct amdgpu_dm_irq_handler_data *handler_data;
-
-   list_for_each_entry(handler_data, handler_list, list) {
-   DRM_DEBUG_KMS("DM_IRQ: work_func: for dal_src=%d\n",
-   handler_data->irq_source);
+   struct amdgpu_dm_irq_handler_data *handler_data =
+container_of(work, struct amdgpu_dm_irq_handler_data, work);
  
-		DRM_DEBUG_KMS("DM_IRQ: schedule_work: for dal_src=%d\n",

-   handler_data->irq_source);
-
-   handler_data->handler(handler_data->handler_arg);
-   }
+   handler_data->handler(handler_data->handler_arg);
  
  	/* Call a DAL subcomponent which registered for interrupt notification

 * at INTERRUPT_LOW_IRQ_CONTEXT.
@@ -156,7 +147,7 @@ static struct list_head *remove_irq_handler(struct 
amdgpu_device *adev,
break;
case INTERRUPT_LOW_IRQ_CONTEXT:
default:
-   hnd_list = &adev->dm.irq_handler_list_low_tab[irq_source].head;
+   hnd_list = &adev->dm.irq_handler_list_low_tab[irq_source];
break;
}
  
@@ -287,7 +278,8 @@ void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device *adev,

break;
case INTERRUPT_LOW_IRQ_CONTEXT:
default:
-   hnd_list = &adev->dm.irq_handler_list_l

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-12 Thread Andrey Grodzovsky



On 1/12/21 7:32 AM, Christian König wrote:

Am 12.01.21 um 10:10 schrieb Daniel Vetter:

On Mon, Jan 11, 2021 at 03:45:10PM -0500, Andrey Grodzovsky wrote:

On 1/11/21 11:15 AM, Daniel Vetter wrote:

On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:

On Fri, Jan 08, 2021 at 04:49:55PM +, Grodzovsky, Andrey wrote:
Ok then, I guess I will proceed with the dummy pages list implementation 
then.


Andrey


From: Koenig, Christian 
Sent: 08 January 2021 09:52
To: Grodzovsky, Andrey ; Daniel Vetter 

Cc: amd-gfx@lists.freedesktop.org ; 
dri-de...@lists.freedesktop.org ; 
daniel.vet...@ffwll.ch ; r...@kernel.org 
; l.st...@pengutronix.de ; 
yuq...@gmail.com ; e...@anholt.net ; 
Deucher, Alexander ; 
gre...@linuxfoundation.org ; 
ppaala...@gmail.com ; Wentland, Harry 


Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

Mhm, I'm not aware of any let over pointer between TTM and GEM and we
worked quite hard on reducing the size of the amdgpu_bo, so another
extra pointer just for that corner case would suck quite a bit.

We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
things) which are fairly single-use, so I'm really not much seeing the
point in making this a special case. It also means the lifetime management
becomes a bit iffy, since we can't throw away the dummy page then the last
reference to the bo is released (since we don't track it there), but only
when the last pointer to the device is released. Potentially this means a
pile of dangling pages hanging around for too long.

Also if you really, really, really want to have this list, please don't
reinvent it since we have it already. drmm_ is exactly meant for resources
that should be freed when the final drm_device reference disappears.
-Daniel


I maybe was eager to early, see i need to explicitly allocate the dummy page
using page_alloc so
i cannot use drmm_kmalloc for this, so once again like with the list i need
to wrap it with a container struct
which i can then allocate using drmm_kmalloc and inside there will be page
pointer. But then
on release it needs to free the page and so i supposedly need to use 
drmm_add_action

to free the page before the container struct is released but drmm_kmalloc
doesn't allow to set
release action on struct allocation. So I created a new
drmm_kmalloc_with_action API function
but then you also need to supply the optional data pointer for the release
action (the struct page in this case)
and so this all becomes a bit overcomplicated (but doable). Is this extra
API worth adding ? Maybe it can
be useful in general.

drm_add_action_or_reset (for better control flow) has both a void * data
and a cleanup function (and it internally allocates the tracking structure
for that for you). So should work as-is? Allocating a tracking structure
for our tracking structure for a page would definitely be a bit too much.

Essentiall drmm_add_action is your kcalloc_with_action function you want,
as long as all you need is a single void * pointer (we could do the
kzalloc_with_action though, there's enough space, just no need yet for any
of the current users).


Yeah, but my thinking was that we should use the page LRU for this and not 
another container structure.


Christian.



Which specific list did you mean ?

Andrey





-Daniel



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-12 Thread Andrey Grodzovsky


So - basically allocate the page and pass it as void* pointer to drmm_add_action
with a release function which will do the free page, right ?

Andrey

On 1/12/21 4:10 AM, Daniel Vetter wrote:

drm_add_action_or_reset (for better control flow) has both a void * data
and a cleanup function (and it internally allocates the tracking structure
for that for you). So should work as-is? Allocating a tracking structure
for our tracking structure for a page would definitely be a bit too much.

Essentiall drmm_add_action is your kcalloc_with_action function you want,
as long as all you need is a single void * pointer (we could do the
kzalloc_with_action though, there's enough space, just no need yet for any
of the current users).
-Daniel

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-11 Thread Andrey Grodzovsky



On 1/11/21 11:15 AM, Daniel Vetter wrote:

On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:

On Fri, Jan 08, 2021 at 04:49:55PM +, Grodzovsky, Andrey wrote:

Ok then, I guess I will proceed with the dummy pages list implementation then.

Andrey


From: Koenig, Christian 
Sent: 08 January 2021 09:52
To: Grodzovsky, Andrey ; Daniel Vetter 

Cc: amd-gfx@lists.freedesktop.org ; dri-de...@lists.freedesktop.org ; 
daniel.vet...@ffwll.ch ; r...@kernel.org ; l.st...@pengutronix.de 
; yuq...@gmail.com ; e...@anholt.net ; Deucher, Alexander 
; gre...@linuxfoundation.org ; ppaala...@gmail.com 
; Wentland, Harry 
Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

Mhm, I'm not aware of any let over pointer between TTM and GEM and we
worked quite hard on reducing the size of the amdgpu_bo, so another
extra pointer just for that corner case would suck quite a bit.

We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
things) which are fairly single-use, so I'm really not much seeing the
point in making this a special case. It also means the lifetime management
becomes a bit iffy, since we can't throw away the dummy page then the last
reference to the bo is released (since we don't track it there), but only
when the last pointer to the device is released. Potentially this means a
pile of dangling pages hanging around for too long.

Also if you really, really, really want to have this list, please don't
reinvent it since we have it already. drmm_ is exactly meant for resources
that should be freed when the final drm_device reference disappears.
-Daniel



I maybe was eager to early, see i need to explicitly allocate the dummy page 
using page_alloc so
i cannot use drmm_kmalloc for this, so once again like with the list i need to 
wrap it with a container struct
which i can then allocate using drmm_kmalloc and inside there will be page 
pointer. But then

on release it needs to free the page and so i supposedly need to use 
drmm_add_action
to free the page before the container struct is released but drmm_kmalloc 
doesn't allow to set
release action on struct allocation. So I created a new drmm_kmalloc_with_action 
API function
but then you also need to supply the optional data pointer for the release 
action (the struct page in this case)
and so this all becomes a bit overcomplicated (but doable). Is this extra API 
worth adding ? Maybe it can

be useful in general.

Andrey



  

If you need some ideas for redundant pointers:
- destroy callback (kinda not cool to not have this const anyway), we
   could refcount it all with the overall gem bo. Quite a bit of work.
- bdev pointer, if we move the device ttm stuff into struct drm_device, or
   create a common struct ttm_device, we can ditch that
- We could probably merge a few of the fields and find 8 bytes somewhere
- we still have 2 krefs, would probably need to fix that before we can
   merge the destroy callbacks

So there's plenty of room still, if the size of a bo struct is really that
critical. Imo it's not.



Christian.

Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:

Daniel had some objections to this (see bellow) and so I guess I need
you both to agree on the approach before I proceed.

Andrey

On 1/8/21 9:33 AM, Christian König wrote:

Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:

Hey Christian, just a ping.

Was there any question for me here?

As far as I can see the best approach would still be to fill the VMA
with a single dummy page and avoid pointers in the GEM object.

Christian.


Andrey

On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:

On 1/7/21 11:30 AM, Daniel Vetter wrote:

On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:

On 1/7/21 11:21 AM, Daniel Vetter wrote:

On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:

On 11/23/20 3:01 AM, Christian König wrote:

Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:

On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.

Uff, one page for each exported DMA-buf? That's not
something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together
so they are freed when the device is finally reaped?

For sure better to optimize and allocate on demand when we reach
this corner case, but why the linking ?
Shouldn't drm_prime_gem_destroy be good enough place to free ?

I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault
and link
the together in the bdev instead.

And when the bdev is then finally destroyed after the last
application
closed we can finally release all of them.

Christian.

Hey, started to implement this and then realized that by
allocating a page
for each fault indiscrimi

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-11 Thread Andrey Grodzovsky



On 1/11/21 11:15 AM, Daniel Vetter wrote:

On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:

On Fri, Jan 08, 2021 at 04:49:55PM +, Grodzovsky, Andrey wrote:

Ok then, I guess I will proceed with the dummy pages list implementation then.

Andrey


From: Koenig, Christian 
Sent: 08 January 2021 09:52
To: Grodzovsky, Andrey ; Daniel Vetter 

Cc: amd-gfx@lists.freedesktop.org ; dri-de...@lists.freedesktop.org ; 
daniel.vet...@ffwll.ch ; r...@kernel.org ; l.st...@pengutronix.de 
; yuq...@gmail.com ; e...@anholt.net ; Deucher, Alexander 
; gre...@linuxfoundation.org ; ppaala...@gmail.com 
; Wentland, Harry 
Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

Mhm, I'm not aware of any let over pointer between TTM and GEM and we
worked quite hard on reducing the size of the amdgpu_bo, so another
extra pointer just for that corner case would suck quite a bit.

We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
things) which are fairly single-use, so I'm really not much seeing the
point in making this a special case. It also means the lifetime management
becomes a bit iffy, since we can't throw away the dummy page then the last
reference to the bo is released (since we don't track it there), but only
when the last pointer to the device is released. Potentially this means a
pile of dangling pages hanging around for too long.

Also if you really, really, really want to have this list, please don't
reinvent it since we have it already. drmm_ is exactly meant for resources
that should be freed when the final drm_device reference disappears.
-Daniel



Can you elaborate ? We still need to actually implement the list but you want me 
to use
drmm_add_action for it's destruction instead of explicitly doing it (like I'm 
already doing from  ttm_bo_device_release) ?


Andrey


  

If you need some ideas for redundant pointers:
- destroy callback (kinda not cool to not have this const anyway), we
   could refcount it all with the overall gem bo. Quite a bit of work.
- bdev pointer, if we move the device ttm stuff into struct drm_device, or
   create a common struct ttm_device, we can ditch that
- We could probably merge a few of the fields and find 8 bytes somewhere
- we still have 2 krefs, would probably need to fix that before we can
   merge the destroy callbacks

So there's plenty of room still, if the size of a bo struct is really that
critical. Imo it's not.



Christian.

Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:

Daniel had some objections to this (see bellow) and so I guess I need
you both to agree on the approach before I proceed.

Andrey

On 1/8/21 9:33 AM, Christian König wrote:

Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:

Hey Christian, just a ping.

Was there any question for me here?

As far as I can see the best approach would still be to fill the VMA
with a single dummy page and avoid pointers in the GEM object.

Christian.


Andrey

On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:

On 1/7/21 11:30 AM, Daniel Vetter wrote:

On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:

On 1/7/21 11:21 AM, Daniel Vetter wrote:

On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:

On 11/23/20 3:01 AM, Christian König wrote:

Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:

On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.

Uff, one page for each exported DMA-buf? That's not
something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together
so they are freed when the device is finally reaped?

For sure better to optimize and allocate on demand when we reach
this corner case, but why the linking ?
Shouldn't drm_prime_gem_destroy be good enough place to free ?

I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault
and link
the together in the bdev instead.

And when the bdev is then finally destroyed after the last
application
closed we can finally release all of them.

Christian.

Hey, started to implement this and then realized that by
allocating a page
for each fault indiscriminately
we will be allocating a new page for each faulting virtual
address within a
VA range belonging the same BO
and this is obviously too much and not the intention. Should I
instead use
let's say a hashtable with the hash
key being faulting BO address to actually keep allocating and
reusing same
dummy zero page per GEM BO
(or for that matter DRM file object address for non imported
BOs) ?

Why do we need a hashtable? All the sw structures to track this
should
still be around:
- if gem_bo->dma_buf is set the buffer is currently exported as
a dma-buf,
 so defensively allocate a per-bo p

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-08 Thread Andrey Grodzovsky

Daniel had some objections to this (see bellow) and so I guess I need you both 
to agree on the approach before I proceed.


Andrey

On 1/8/21 9:33 AM, Christian König wrote:

Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:

Hey Christian, just a ping.


Was there any question for me here?

As far as I can see the best approach would still be to fill the VMA with a 
single dummy page and avoid pointers in the GEM object.


Christian.



Andrey

On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:


On 1/7/21 11:30 AM, Daniel Vetter wrote:

On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:

On 1/7/21 11:21 AM, Daniel Vetter wrote:

On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:

On 11/23/20 3:01 AM, Christian König wrote:

Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:

On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.

Uff, one page for each exported DMA-buf? That's not something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together
so they are freed when the device is finally reaped?

For sure better to optimize and allocate on demand when we reach
this corner case, but why the linking ?
Shouldn't drm_prime_gem_destroy be good enough place to free ?

I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault and link
the together in the bdev instead.

And when the bdev is then finally destroyed after the last application
closed we can finally release all of them.

Christian.

Hey, started to implement this and then realized that by allocating a page
for each fault indiscriminately
we will be allocating a new page for each faulting virtual address within a
VA range belonging the same BO
and this is obviously too much and not the intention. Should I instead use
let's say a hashtable with the hash
key being faulting BO address to actually keep allocating and reusing same
dummy zero page per GEM BO
(or for that matter DRM file object address for non imported BOs) ?

Why do we need a hashtable? All the sw structures to track this should
still be around:
- if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
    so defensively allocate a per-bo page
- otherwise allocate a per-file page


That exactly what we have in current implementation



Or is the idea to save the struct page * pointer? That feels a bit like
over-optimizing stuff. Better to have a simple implementation first and
then tune it if (and only if) any part of it becomes a problem for normal
usage.


Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
Christian suggested to instead keep a linked list of dummy pages to be
allocated on demand once we hit a vm_fault. I will then also prefault the 
entire

VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
to that single dummy page.

This strongly feels like premature optimization. If you're worried about
the overhead on amdgpu, pay down the debt by removing one of the redundant
pointers between gem and ttm bo structs (I think we still have some) :-)

Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
pointer just because" games with hashtables.
-Daniel



Well, if you and Christian can agree on this approach and suggest maybe what 
pointer is
redundant and can be removed from GEM struct so we can use the 'credit' to 
add the dummy page

to GEM I will be happy to follow through.

P.S Hash table is off the table anyway and we are talking only about linked 
list here since by prefaulting
the entire VA range for a vmf->vma i will be avoiding redundant page faults 
to same VMA VA range and so
don't need to search and reuse an existing dummy page but simply create a 
new one for each next fault.


Andrey



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-08 Thread Andrey Grodzovsky


Hey Christian, just a ping.

Andrey

On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:


On 1/7/21 11:30 AM, Daniel Vetter wrote:

On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:

On 1/7/21 11:21 AM, Daniel Vetter wrote:

On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:

On 11/23/20 3:01 AM, Christian König wrote:

Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:

On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.

Uff, one page for each exported DMA-buf? That's not something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together
so they are freed when the device is finally reaped?

For sure better to optimize and allocate on demand when we reach
this corner case, but why the linking ?
Shouldn't drm_prime_gem_destroy be good enough place to free ?

I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault and link
the together in the bdev instead.

And when the bdev is then finally destroyed after the last application
closed we can finally release all of them.

Christian.

Hey, started to implement this and then realized that by allocating a page
for each fault indiscriminately
we will be allocating a new page for each faulting virtual address within a
VA range belonging the same BO
and this is obviously too much and not the intention. Should I instead use
let's say a hashtable with the hash
key being faulting BO address to actually keep allocating and reusing same
dummy zero page per GEM BO
(or for that matter DRM file object address for non imported BOs) ?

Why do we need a hashtable? All the sw structures to track this should
still be around:
- if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
    so defensively allocate a per-bo page
- otherwise allocate a per-file page


That exactly what we have in current implementation



Or is the idea to save the struct page * pointer? That feels a bit like
over-optimizing stuff. Better to have a simple implementation first and
then tune it if (and only if) any part of it becomes a problem for normal
usage.


Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
Christian suggested to instead keep a linked list of dummy pages to be
allocated on demand once we hit a vm_fault. I will then also prefault the 
entire

VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
to that single dummy page.

This strongly feels like premature optimization. If you're worried about
the overhead on amdgpu, pay down the debt by removing one of the redundant
pointers between gem and ttm bo structs (I think we still have some) :-)

Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
pointer just because" games with hashtables.
-Daniel



Well, if you and Christian can agree on this approach and suggest maybe what 
pointer is
redundant and can be removed from GEM struct so we can use the 'credit' to add 
the dummy page

to GEM I will be happy to follow through.

P.S Hash table is off the table anyway and we are talking only about linked 
list here since by prefaulting
the entire VA range for a vmf->vma i will be avoiding redundant page faults to 
same VMA VA range and so
don't need to search and reuse an existing dummy page but simply create a new 
one for each next fault.


Andrey





Andrey



-Daniel


Andrey



Andrey



Regards,
Christian.


Signed-off-by: Andrey Grodzovsky 
---
    drivers/gpu/drm/drm_file.c  |  8 
    drivers/gpu/drm/drm_prime.c | 10 ++
    include/drm/drm_file.h  |  2 ++
    include/drm/drm_gem.h   |  2 ++
    4 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 0ac4566..ff3d39f 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor 
*minor)

    goto out_prime_destroy;
    }
    +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!file->dummy_page) {
+    ret = -ENOMEM;
+    goto out_prime_destroy;
+    }
+
    return file;
      out_prime_destroy:
@@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
    if (dev->driver->postclose)
    dev->driver->postclose(dev, file);
    +    __free_page(file->dummy_page);
+
drm_prime_destroy_file_private(&file->prime);
WARN_ON(!list_empty(&file->event_list));
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 1693aa7..987b45c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device 
*dev,

      ret = drm

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-07 Thread Andrey Grodzovsky



On 1/7/21 11:30 AM, Daniel Vetter wrote:

On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:

On 1/7/21 11:21 AM, Daniel Vetter wrote:

On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:

On 11/23/20 3:01 AM, Christian König wrote:

Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:

On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.

Uff, one page for each exported DMA-buf? That's not something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together
so they are freed when the device is finally reaped?

For sure better to optimize and allocate on demand when we reach
this corner case, but why the linking ?
Shouldn't drm_prime_gem_destroy be good enough place to free ?

I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault and link
the together in the bdev instead.

And when the bdev is then finally destroyed after the last application
closed we can finally release all of them.

Christian.

Hey, started to implement this and then realized that by allocating a page
for each fault indiscriminately
we will be allocating a new page for each faulting virtual address within a
VA range belonging the same BO
and this is obviously too much and not the intention. Should I instead use
let's say a hashtable with the hash
key being faulting BO address to actually keep allocating and reusing same
dummy zero page per GEM BO
(or for that matter DRM file object address for non imported BOs) ?

Why do we need a hashtable? All the sw structures to track this should
still be around:
- if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
so defensively allocate a per-bo page
- otherwise allocate a per-file page


That exactly what we have in current implementation



Or is the idea to save the struct page * pointer? That feels a bit like
over-optimizing stuff. Better to have a simple implementation first and
then tune it if (and only if) any part of it becomes a problem for normal
usage.


Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
Christian suggested to instead keep a linked list of dummy pages to be
allocated on demand once we hit a vm_fault. I will then also prefault the entire
VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
to that single dummy page.

This strongly feels like premature optimization. If you're worried about
the overhead on amdgpu, pay down the debt by removing one of the redundant
pointers between gem and ttm bo structs (I think we still have some) :-)

Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
pointer just because" games with hashtables.
-Daniel



Well, if you and Christian can agree on this approach and suggest maybe what 
pointer is
redundant and can be removed from GEM struct so we can use the 'credit' to add 
the dummy page

to GEM I will be happy to follow through.

P.S Hash table is off the table anyway and we are talking only about linked list 
here since by prefaulting
the entire VA range for a vmf->vma i will be avoiding redundant page faults to 
same VMA VA range and so
don't need to search and reuse an existing dummy page but simply create a new 
one for each next fault.


Andrey





Andrey



-Daniel


Andrey



Andrey



Regards,
Christian.


Signed-off-by: Andrey Grodzovsky 
---
    drivers/gpu/drm/drm_file.c  |  8 
    drivers/gpu/drm/drm_prime.c | 10 ++
    include/drm/drm_file.h  |  2 ++
    include/drm/drm_gem.h   |  2 ++
    4 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 0ac4566..ff3d39f 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
    goto out_prime_destroy;
    }
    +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!file->dummy_page) {
+    ret = -ENOMEM;
+    goto out_prime_destroy;
+    }
+
    return file;
      out_prime_destroy:
@@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
    if (dev->driver->postclose)
    dev->driver->postclose(dev, file);
    +    __free_page(file->dummy_page);
+
    drm_prime_destroy_file_private(&file->prime);
      WARN_ON(!list_empty(&file->event_list));
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 1693aa7..987b45c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
      ret = drm_prime_add_buf_handle(&file_priv->prime,
    dma_b

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-07 Thread Andrey Grodzovsky


Typo Correction bellow

On 1/7/21 11:26 AM, Andrey Grodzovsky wrote:


Or is the idea to save the struct page * pointer? That feels a bit like
over-optimizing stuff. Better to have a simple implementation first and
then tune it if (and only if) any part of it becomes a problem for normal
usage.



Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
Christian suggested to instead keep a linked list of dummy pages to be
allocated on demand once we hit a vm_fault. I will then also prefault the entire
VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
to that single dummy page.



Obviously the range is from  vma->vm_start to vma->vm_end

Andrey




Andrey 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-07 Thread Andrey Grodzovsky



On 1/7/21 11:21 AM, Daniel Vetter wrote:

On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:

On 11/23/20 3:01 AM, Christian König wrote:

Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:

On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.

Uff, one page for each exported DMA-buf? That's not something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together
so they are freed when the device is finally reaped?


For sure better to optimize and allocate on demand when we reach
this corner case, but why the linking ?
Shouldn't drm_prime_gem_destroy be good enough place to free ?

I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault and link
the together in the bdev instead.

And when the bdev is then finally destroyed after the last application
closed we can finally release all of them.

Christian.


Hey, started to implement this and then realized that by allocating a page
for each fault indiscriminately
we will be allocating a new page for each faulting virtual address within a
VA range belonging the same BO
and this is obviously too much and not the intention. Should I instead use
let's say a hashtable with the hash
key being faulting BO address to actually keep allocating and reusing same
dummy zero page per GEM BO
(or for that matter DRM file object address for non imported BOs) ?

Why do we need a hashtable? All the sw structures to track this should
still be around:
- if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
   so defensively allocate a per-bo page
- otherwise allocate a per-file page



That exactly what we have in current implementation




Or is the idea to save the struct page * pointer? That feels a bit like
over-optimizing stuff. Better to have a simple implementation first and
then tune it if (and only if) any part of it becomes a problem for normal
usage.



Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
Christian suggested to instead keep a linked list of dummy pages to be
allocated on demand once we hit a vm_fault. I will then also prefault the entire
VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
to that single dummy page.

Andrey



-Daniel


Andrey



Andrey



Regards,
Christian.


Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/drm_file.c  |  8 
   drivers/gpu/drm/drm_prime.c | 10 ++
   include/drm/drm_file.h  |  2 ++
   include/drm/drm_gem.h   |  2 ++
   4 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 0ac4566..ff3d39f 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
   goto out_prime_destroy;
   }
   +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!file->dummy_page) {
+    ret = -ENOMEM;
+    goto out_prime_destroy;
+    }
+
   return file;
     out_prime_destroy:
@@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
   if (dev->driver->postclose)
   dev->driver->postclose(dev, file);
   +    __free_page(file->dummy_page);
+
   drm_prime_destroy_file_private(&file->prime);
     WARN_ON(!list_empty(&file->event_list));
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 1693aa7..987b45c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
     ret = drm_prime_add_buf_handle(&file_priv->prime,
   dma_buf, *handle);
+
+    if (!ret) {
+    obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!obj->dummy_page)
+    ret = -ENOMEM;
+    }
+
   mutex_unlock(&file_priv->prime.lock);
   if (ret)
   goto fail;
@@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct
drm_gem_object *obj, struct sg_table *sg)
   dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
   dma_buf = attach->dmabuf;
   dma_buf_detach(attach->dmabuf, attach);
+
+    __free_page(obj->dummy_page);
+
   /* remove the reference */
   dma_buf_put(dma_buf);
   }
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 716990b..2a011fc 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -346,6 +346,8 @@ struct drm_file {
    */
   struct drm_prime_file_private prime;
   +    struct page *dummy_page;
+
   /* private: */
   #if IS_ENABLED(CONFIG_DRM_LEGACY)
   unsigned long lock_count; /* DRI1 legacy lock count */
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
i

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2021-01-05 Thread Andrey Grodzovsky



On 11/23/20 3:01 AM, Christian König wrote:

Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:


On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.


Uff, one page for each exported DMA-buf? That's not something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together so they are 
freed when the device is finally reaped?



For sure better to optimize and allocate on demand when we reach this corner 
case, but why the linking ?

Shouldn't drm_prime_gem_destroy be good enough place to free ?


I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault and link the 
together in the bdev instead.


And when the bdev is then finally destroyed after the last application closed 
we can finally release all of them.


Christian.



Hey, started to implement this and then realized that by allocating a page for 
each fault indiscriminately
we will be allocating a new page for each faulting virtual address within a VA 
range belonging the same BO
and this is obviously too much and not the intention. Should I instead use let's 
say a hashtable with the hash
key being faulting BO address to actually keep allocating and reusing same dummy 
zero page per GEM BO

(or for that matter DRM file object address for non imported BOs) ?

Andrey






Andrey




Regards,
Christian.



Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/drm_file.c  |  8 
  drivers/gpu/drm/drm_prime.c | 10 ++
  include/drm/drm_file.h  |  2 ++
  include/drm/drm_gem.h   |  2 ++
  4 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 0ac4566..ff3d39f 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
  goto out_prime_destroy;
  }
  +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!file->dummy_page) {
+    ret = -ENOMEM;
+    goto out_prime_destroy;
+    }
+
  return file;
    out_prime_destroy:
@@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
  if (dev->driver->postclose)
  dev->driver->postclose(dev, file);
  +    __free_page(file->dummy_page);
+
  drm_prime_destroy_file_private(&file->prime);
    WARN_ON(!list_empty(&file->event_list));
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 1693aa7..987b45c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
    ret = drm_prime_add_buf_handle(&file_priv->prime,
  dma_buf, *handle);
+
+    if (!ret) {
+    obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!obj->dummy_page)
+    ret = -ENOMEM;
+    }
+
  mutex_unlock(&file_priv->prime.lock);
  if (ret)
  goto fail;
@@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct drm_gem_object 
*obj, struct sg_table *sg)

  dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
  dma_buf = attach->dmabuf;
  dma_buf_detach(attach->dmabuf, attach);
+
+    __free_page(obj->dummy_page);
+
  /* remove the reference */
  dma_buf_put(dma_buf);
  }
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 716990b..2a011fc 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -346,6 +346,8 @@ struct drm_file {
   */
  struct drm_prime_file_private prime;
  +    struct page *dummy_page;
+
  /* private: */
  #if IS_ENABLED(CONFIG_DRM_LEGACY)
  unsigned long lock_count; /* DRI1 legacy lock count */
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 337a483..76a97a3 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -311,6 +311,8 @@ struct drm_gem_object {
   *
   */
  const struct drm_gem_object_funcs *funcs;
+
+    struct page *dummy_page;
  };
    /**



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Candrey.grodzovsky%40amd.com%7Ce08536eb5d514059a20108d88f85f7f1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417152856369678%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=5Xpxivlggqknu%2FgVtpmrpYHT9g%2B%2Buj5JCPyJyoh%2B7V4%3D&reserved=0 



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailm

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2021-01-04 Thread Andrey Grodzovsky

Hey Daniel, back from vacation and going over our last long thread i think you 
didn't reply

to my last question bellow (Or at least I can't find it).

Andrey

On 12/17/20 4:13 PM, Andrey Grodzovsky wrote:

Ok, so I assumed that with vmap_local you were trying to solve the problem of
quick reinsertion
of another device into same MMIO range that my driver still points too but
actually are you trying to solve
the issue of exported dma buffers outliving the device ? For this we have
drm_device refcount in the GEM layer
i think.

That's completely different lifetime problems. Don't mix them up :-)
One problem is the hardware disappearing, and for that we _have_ to
guarantee timeliness, or otherwise the pci subsystem gets pissed
(since like you say, a new device might show up and need it's mmio
bars assigned to io ranges). The other is lifetim of the software
objects we use as interfaces, both from userspace and from other
kernel drivers. There we fundamentally can't enforce timely cleanup,
and have to resort to refcounting.



So regarding the second issue, as I mentioned above, don't we already use 
drm_dev_get/put
for exported BOs ? Earlier in this discussion you mentioned that we are ok for 
dma buffers since
we already have the refcounting at the GEM layer and the real life cycle 
problem we have is the dma_fences
for which there is no drm_dev refcounting. Seems to me then that vmap_local is 
superfluous because
of the recounting we already have for exported dma_bufs and for dma_fences it 
won't help.


Andrey 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use



On 12/17/20 3:42 PM, Daniel Vetter wrote:

On Thu, Dec 17, 2020 at 8:19 PM Andrey Grodzovsky
 wrote:


On 12/17/20 7:01 AM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 07:20:02PM -0500, Andrey Grodzovsky wrote:

On 12/16/20 6:15 PM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 7:26 PM Andrey Grodzovsky
 wrote:

On 12/16/20 12:12 PM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 5:18 PM Christian König
 wrote:

Am 16.12.20 um 17:13 schrieb Andrey Grodzovsky:

On 12/16/20 9:21 AM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 9:04 AM Christian König
 wrote:

Am 15.12.20 um 21:18 schrieb Andrey Grodzovsky:

[SNIP]

While we can't control user application accesses to the mapped
buffers explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle
drm_dev_enter/exit in any such sensitive place were we might
CPU access a DMA buffer from the kernel ?

Yes, I fear we are going to need that.


Things like CPU page table updates, ring buffer accesses and FW
memcpy ? Is there other places ?

Puh, good question. I have no idea.


Another point is that at this point the driver shouldn't access any
such buffers as we are at the process finishing the device.
AFAIK there is no page fault mechanism for kernel mappings so I
don't think there is anything else to do ?

Well there is a page fault handler for kernel mappings, but that one
just prints the stack trace into the system log and calls BUG(); :)

Long story short we need to avoid any access to released pages after
unplug. No matter if it's from the kernel or userspace.

I was just about to start guarding with drm_dev_enter/exit CPU
accesses from kernel to GTT ot VRAM buffers but then i looked more in
the code
and seems like ttm_tt_unpopulate just deletes DMA mappings (for the
sake of device to main memory access). Kernel page table is not
touched
until last bo refcount is dropped and the bo is released
(ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap). This
is both
for GTT BOs maped to kernel by kmap (or vmap) and for VRAM BOs mapped
by ioremap. So as i see it, nothing will bad will happen after we
unpopulate a BO while we still try to use a kernel mapping for it,
system memory pages backing GTT BOs are still mapped and not freed and
for
VRAM BOs same is for the IO physical ranges mapped into the kernel
page table since iounmap wasn't called yet.

The problem is the system pages would be freed and if we kernel driver
still happily write to them we are pretty much busted because we write
to freed up memory.

OK, i see i missed ttm_tt_unpopulate->..->ttm_pool_free which will
release
the GTT BO pages. But then isn't there a problem in ttm_bo_release since
ttm_bo_cleanup_memtype_use which also leads to pages release comes
before bo->destroy which unmaps the pages from kernel page table ? Won't
we have end up writing to freed memory in this time interval ? Don't we
need to postpone pages freeing to after kernel page table unmapping ?

BOs are only destroyed when there is a guarantee that nobody is
accessing them any more.

The problem here is that the pages as well as the VRAM can be
immediately reused after the hotplug event.


Similar for vram, if this is actual hotunplug and then replug, there's
going to be a different device behind the same mmio bar range most
likely (the higher bridges all this have the same windows assigned),

No idea how this actually works but if we haven't called iounmap yet
doesn't it mean that those physical ranges that are still mapped into
page
table should be reserved and cannot be reused for another
device ? As a guess, maybe another subrange from the higher bridge's
total
range will be allocated.

Nope, the PCIe subsystem doesn't care about any ioremap still active for
a range when it is hotplugged.


and that's bad news if we keep using it for current drivers. So we
really have to point all these cpu ptes to some other place.

We can't just unmap it without syncing against any in kernel accesses
to those buffers
and since page faulting technique we use for user mapped buffers seems
to not be possible
for kernel mapped buffers I am not sure how to do it gracefully...

We could try to replace the kmap with a dummy page under the hood, but
that is extremely tricky.

Especially since BOs which are just 1 page in size could point to the
linear mapping directly.

I think it's just more work. Essentially
- convert as much as possible of the kernel mappings to vmap_local,
which Thomas Zimmermann is rolling out. That way a dma_resv_lock will
serve as a barrier, and ofc any new vmap needs to fail or hand out a
dummy mapping.

Read those patches. I am not sure how this helps with protecting
against accesses to released backing pages or IO physical ranges of BO
which is already mapped during the unplug event ?

By eliminating such users, and replacing them with local maps which
are s

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use



On 12/17/20 3:48 PM, Daniel Vetter wrote:

On Thu, Dec 17, 2020 at 9:38 PM Andrey Grodzovsky
 wrote:


On 12/17/20 3:10 PM, Christian König wrote:

[SNIP]

By eliminating such users, and replacing them with local maps which

are strictly bound in how long they can exist (and hence we can
serialize against them finishing in our hotunplug code).

Not sure I see how serializing against BO map/unmap helps - our problem as
you described is that once
device is extracted and then something else quickly takes it's place in the
PCI topology
and gets assigned same physical IO ranges, then our driver will start
accessing this
new device because our 'zombie' BOs are still pointing to those ranges.

Until your driver's remove callback is finished the ranges stay reserved.


The ranges stay reserved until unmapped which happens in bo->destroy

I'm not sure of that. Why do you think that?


Because of this sequence
ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap->...->iounmap
Is there another place I am missing ?

iounmap is just the mapping, it doesn't reserve anything in the resource tree.

And I don't think we should keep resources reserved past the pci
remove callback, because that would upset the pci subsystem trying to
assign resources to a newly hotplugged pci device.



I assumed we are talking about VA ranges still mapped in the page table. I just 
assumed
that part of ioremap is also reservation of the mapped physical ranges. In fact, 
if we
do can explicitly reserve those ranges (as you mention here) then together with 
postponing
system memory pages freeing/releasing back to the page pool until after BO is 
unmapped
from the kernel address space I believe this could solve the issue of quick HW 
reinsertion

and make all the drm_dev_ener/exit guarding obsolete.

Andrey



Also from a quick check amdgpu does not reserve the pci bars it's
using. Somehow most drm drivers don't do that, not exactly sure why,
maybe auto-enumeration of resources just works too good and we don't
need the safety net of kernel/resource.c anymore.
-Daniel



which for most internally allocated buffers is during sw_fini when last drm_put
is called.



If that's not the case, then hotunplug would be fundamentally impossible
ot handle correctly.

Of course all the mmio actions will time out, so it might take some time
to get through it all.


I found that PCI code provides pci_device_is_present function
we can use to avoid timeouts - it reads device vendor and checks if all 1s is
returned
or not. We can call it from within register accessors before trying read/write

That's way to much overhead! We need to keep that much lower or it will result
in quite a performance drop.

I suggest to rather think about adding drm_dev_enter/exit guards.


Sure, this one is just a bit upstream to the disconnect event. Eventually none
of them is watertight.

Andrey



Christian.


Another point regarding serializing - problem  is that some of those BOs are
very long lived, take for example the HW command
ring buffer Christian mentioned before -
(amdgpu_ring_init->amdgpu_bo_create_kernel), it's life span
is basically for the entire time the device exists, it's destroyed only in
the SW fini stage (when last drm_dev
reference is dropped) and so should I grab it's dma_resv_lock from
amdgpu_pci_remove code and wait
for it to be unmapped before proceeding with the PCI remove code ? This can
take unbound time and that why I don't understand
how serializing will help.

Uh you need to untangle that. After hw cleanup is done no one is allowed
to touch that ringbuffer bo anymore from the kernel.


I would assume we are not allowed to touch it once we identified the device is
gone in order to minimize the chance of accidental writes to some other
device which might now
occupy those IO ranges ?



   That's what
drm_dev_enter/exit guards are for. Like you say we cant wait for all sw
references to disappear.


Yes, didn't make sense to me why would we use vmap_local for internally
allocated buffers. I think we should also guard registers read/writes for the
same reason as above.



The vmap_local is for mappings done by other drivers, through the dma-buf
interface (where "other drivers" can include fbdev/fbcon, if you use the
generic helpers).
-Daniel


Ok, so I assumed that with vmap_local you were trying to solve the problem of
quick reinsertion
of another device into same MMIO range that my driver still points too but
actually are you trying to solve
the issue of exported dma buffers outliving the device ? For this we have
drm_device refcount in the GEM layer
i think.

Andrey



Andrey



It doesn't
solve all your problems, but it's a tool to get there.
-Daniel


Andrey



- handle fbcon somehow. I think shutting it all down should work out.
- worst case keep the system backing storage around for shared dma-buf
until the o

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

On 12/17/20 3:10 PM, Christian König wrote:

[SNIP]

By eliminating such users, and replacing them with local maps which

are strictly bound in how long they can exist (and hence we can
serialize against them finishing in our hotunplug code).

Not sure I see how serializing against BO map/unmap helps - our problem as
you described is that once
device is extracted and then something else quickly takes it's place in the
PCI topology
and gets assigned same physical IO ranges, then our driver will start
accessing this

new device because our 'zombie' BOs are still pointing to those ranges.

Until your driver's remove callback is finished the ranges stay reserved.

The ranges stay reserved until unmapped which happens in bo->destroy

I'm not sure of that. Why do you think that?

Because of this sequence
ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap->...->iounmap

Is there another place I am missing ?

which for most internally allocated buffers is during sw_fini when last drm_put
is called.

If that's not the case, then hotunplug would be fundamentally impossible
ot handle correctly.

Of course all the mmio actions will time out, so it might take some time
to get through it all.

I found that PCI code provides pci_device_is_present function
we can use to avoid timeouts - it reads device vendor and checks if all 1s is
returned

or not. We can call it from within register accessors before trying read/write

That's way to much overhead! We need to keep that much lower or it will result
in quite a performance drop.

I suggest to rather think about adding drm_dev_enter/exit guards.

Sure, this one is just a bit upstream to the disconnect event. Eventually none
of them is watertight.

Andrey

Christian.

Another point regarding serializing - problem is that some of those BOs are
very long lived, take for example the HW command
ring buffer Christian mentioned before -
(amdgpu_ring_init->amdgpu_bo_create_kernel), it's life span
is basically for the entire time the device exists, it's destroyed only in
the SW fini stage (when last drm_dev
reference is dropped) and so should I grab it's dma_resv_lock from
amdgpu_pci_remove code and wait
for it to be unmapped before proceeding with the PCI remove code ? This can
take unbound time and that why I don't understand
how serializing will help.

Uh you need to untangle that. After hw cleanup is done no one is allowed
to touch that ringbuffer bo anymore from the kernel.

I would assume we are not allowed to touch it once we identified the device is
gone in order to minimize the chance of accidental writes to some other
device which might now

occupy those IO ranges ?

That's what
drm_dev_enter/exit guards are for. Like you say we cant wait for all sw
references to disappear.

Yes, didn't make sense to me why would we use vmap_local for internally
allocated buffers. I think we should also guard registers read/writes for the
same reason as above.

The vmap_local is for mappings done by other drivers, through the dma-buf
interface (where "other drivers" can include fbdev/fbcon, if you use the
generic helpers).
-Daniel

Ok, so I assumed that with vmap_local you were trying to solve the problem of
quick reinsertion
of another device into same MMIO range that my driver still points too but
actually are you trying to solve
the issue of exported dma buffers outliving the device ? For this we have
drm_device refcount in the GEM layer

i think.

Andrey

It doesn't
solve all your problems, but it's a tool to get there.
-Daniel

Andrey

- handle fbcon somehow. I think shutting it all down should work out.
- worst case keep the system backing storage around for shared dma-buf
until the other non-dynamic driver releases it. for vram we require
dynamic importers (and maybe it wasn't such a bright idea to allow
pinning of importer buffers, might need to revisit that).

Cheers, Daniel

Christian.

Andrey

-Daniel

Christian.

I loaded the driver with vm_update_mode=3
meaning all VM updates done using CPU and hasn't seen any OOPs after
removing the device. I guess i can test it more by allocating GTT and
VRAM BOs
and trying to read/write to them after device is removed.

Andrey

Regards,
Christian.

Andrey

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C92654f053679415de74808d8a2838b3e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637438033181843512%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=%2BeS7v5CrHRfblj2FFCd4nrDLxUxzam6EyHM6poPkGc4%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use



On 12/17/20 7:01 AM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 07:20:02PM -0500, Andrey Grodzovsky wrote:

On 12/16/20 6:15 PM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 7:26 PM Andrey Grodzovsky
 wrote:

On 12/16/20 12:12 PM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 5:18 PM Christian König
 wrote:

Am 16.12.20 um 17:13 schrieb Andrey Grodzovsky:

On 12/16/20 9:21 AM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 9:04 AM Christian König
 wrote:

Am 15.12.20 um 21:18 schrieb Andrey Grodzovsky:

[SNIP]

While we can't control user application accesses to the mapped
buffers explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle
drm_dev_enter/exit in any such sensitive place were we might
CPU access a DMA buffer from the kernel ?

Yes, I fear we are going to need that.


Things like CPU page table updates, ring buffer accesses and FW
memcpy ? Is there other places ?

Puh, good question. I have no idea.


Another point is that at this point the driver shouldn't access any
such buffers as we are at the process finishing the device.
AFAIK there is no page fault mechanism for kernel mappings so I
don't think there is anything else to do ?

Well there is a page fault handler for kernel mappings, but that one
just prints the stack trace into the system log and calls BUG(); :)

Long story short we need to avoid any access to released pages after
unplug. No matter if it's from the kernel or userspace.

I was just about to start guarding with drm_dev_enter/exit CPU
accesses from kernel to GTT ot VRAM buffers but then i looked more in
the code
and seems like ttm_tt_unpopulate just deletes DMA mappings (for the
sake of device to main memory access). Kernel page table is not
touched
until last bo refcount is dropped and the bo is released
(ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap). This
is both
for GTT BOs maped to kernel by kmap (or vmap) and for VRAM BOs mapped
by ioremap. So as i see it, nothing will bad will happen after we
unpopulate a BO while we still try to use a kernel mapping for it,
system memory pages backing GTT BOs are still mapped and not freed and
for
VRAM BOs same is for the IO physical ranges mapped into the kernel
page table since iounmap wasn't called yet.

The problem is the system pages would be freed and if we kernel driver
still happily write to them we are pretty much busted because we write
to freed up memory.

OK, i see i missed ttm_tt_unpopulate->..->ttm_pool_free which will
release
the GTT BO pages. But then isn't there a problem in ttm_bo_release since
ttm_bo_cleanup_memtype_use which also leads to pages release comes
before bo->destroy which unmaps the pages from kernel page table ? Won't
we have end up writing to freed memory in this time interval ? Don't we
need to postpone pages freeing to after kernel page table unmapping ?

BOs are only destroyed when there is a guarantee that nobody is
accessing them any more.

The problem here is that the pages as well as the VRAM can be
immediately reused after the hotplug event.


Similar for vram, if this is actual hotunplug and then replug, there's
going to be a different device behind the same mmio bar range most
likely (the higher bridges all this have the same windows assigned),

No idea how this actually works but if we haven't called iounmap yet
doesn't it mean that those physical ranges that are still mapped into
page
table should be reserved and cannot be reused for another
device ? As a guess, maybe another subrange from the higher bridge's
total
range will be allocated.

Nope, the PCIe subsystem doesn't care about any ioremap still active for
a range when it is hotplugged.


and that's bad news if we keep using it for current drivers. So we
really have to point all these cpu ptes to some other place.

We can't just unmap it without syncing against any in kernel accesses
to those buffers
and since page faulting technique we use for user mapped buffers seems
to not be possible
for kernel mapped buffers I am not sure how to do it gracefully...

We could try to replace the kmap with a dummy page under the hood, but
that is extremely tricky.

Especially since BOs which are just 1 page in size could point to the
linear mapping directly.

I think it's just more work. Essentially
- convert as much as possible of the kernel mappings to vmap_local,
which Thomas Zimmermann is rolling out. That way a dma_resv_lock will
serve as a barrier, and ofc any new vmap needs to fail or hand out a
dummy mapping.

Read those patches. I am not sure how this helps with protecting
against accesses to released backing pages or IO physical ranges of BO
which is already mapped during the unplug event ?

By eliminating such users, and replacing them with local maps which
are strictly bound in how long they can exist (and hence we can
serialize against them finishing in our hotunplug

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-12-16 Thread Andrey Grodzovsky



On 12/16/20 6:15 PM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 7:26 PM Andrey Grodzovsky
 wrote:


On 12/16/20 12:12 PM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 5:18 PM Christian König
 wrote:

Am 16.12.20 um 17:13 schrieb Andrey Grodzovsky:

On 12/16/20 9:21 AM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 9:04 AM Christian König
 wrote:

Am 15.12.20 um 21:18 schrieb Andrey Grodzovsky:

[SNIP]

While we can't control user application accesses to the mapped
buffers explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle
drm_dev_enter/exit in any such sensitive place were we might
CPU access a DMA buffer from the kernel ?

Yes, I fear we are going to need that.


Things like CPU page table updates, ring buffer accesses and FW
memcpy ? Is there other places ?

Puh, good question. I have no idea.


Another point is that at this point the driver shouldn't access any
such buffers as we are at the process finishing the device.
AFAIK there is no page fault mechanism for kernel mappings so I
don't think there is anything else to do ?

Well there is a page fault handler for kernel mappings, but that one
just prints the stack trace into the system log and calls BUG(); :)

Long story short we need to avoid any access to released pages after
unplug. No matter if it's from the kernel or userspace.

I was just about to start guarding with drm_dev_enter/exit CPU
accesses from kernel to GTT ot VRAM buffers but then i looked more in
the code
and seems like ttm_tt_unpopulate just deletes DMA mappings (for the
sake of device to main memory access). Kernel page table is not
touched
until last bo refcount is dropped and the bo is released
(ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap). This
is both
for GTT BOs maped to kernel by kmap (or vmap) and for VRAM BOs mapped
by ioremap. So as i see it, nothing will bad will happen after we
unpopulate a BO while we still try to use a kernel mapping for it,
system memory pages backing GTT BOs are still mapped and not freed and
for
VRAM BOs same is for the IO physical ranges mapped into the kernel
page table since iounmap wasn't called yet.

The problem is the system pages would be freed and if we kernel driver
still happily write to them we are pretty much busted because we write
to freed up memory.

OK, i see i missed ttm_tt_unpopulate->..->ttm_pool_free which will
release
the GTT BO pages. But then isn't there a problem in ttm_bo_release since
ttm_bo_cleanup_memtype_use which also leads to pages release comes
before bo->destroy which unmaps the pages from kernel page table ? Won't
we have end up writing to freed memory in this time interval ? Don't we
need to postpone pages freeing to after kernel page table unmapping ?

BOs are only destroyed when there is a guarantee that nobody is
accessing them any more.

The problem here is that the pages as well as the VRAM can be
immediately reused after the hotplug event.


Similar for vram, if this is actual hotunplug and then replug, there's
going to be a different device behind the same mmio bar range most
likely (the higher bridges all this have the same windows assigned),

No idea how this actually works but if we haven't called iounmap yet
doesn't it mean that those physical ranges that are still mapped into
page
table should be reserved and cannot be reused for another
device ? As a guess, maybe another subrange from the higher bridge's
total
range will be allocated.

Nope, the PCIe subsystem doesn't care about any ioremap still active for
a range when it is hotplugged.


and that's bad news if we keep using it for current drivers. So we
really have to point all these cpu ptes to some other place.

We can't just unmap it without syncing against any in kernel accesses
to those buffers
and since page faulting technique we use for user mapped buffers seems
to not be possible
for kernel mapped buffers I am not sure how to do it gracefully...

We could try to replace the kmap with a dummy page under the hood, but
that is extremely tricky.

Especially since BOs which are just 1 page in size could point to the
linear mapping directly.

I think it's just more work. Essentially
- convert as much as possible of the kernel mappings to vmap_local,
which Thomas Zimmermann is rolling out. That way a dma_resv_lock will
serve as a barrier, and ofc any new vmap needs to fail or hand out a
dummy mapping.

Read those patches. I am not sure how this helps with protecting
against accesses to released backing pages or IO physical ranges of BO
which is already mapped during the unplug event ?

By eliminating such users, and replacing them with local maps which
are strictly bound in how long they can exist (and hence we can
serialize against them finishing in our hotunplug code).


Not sure I see how serializing against BO map/unmap helps -  our problem as you 
described is that o

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-12-16 Thread Andrey Grodzovsky



On 12/16/20 12:12 PM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 5:18 PM Christian König
 wrote:

Am 16.12.20 um 17:13 schrieb Andrey Grodzovsky:

On 12/16/20 9:21 AM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 9:04 AM Christian König
 wrote:

Am 15.12.20 um 21:18 schrieb Andrey Grodzovsky:

[SNIP]

While we can't control user application accesses to the mapped
buffers explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle
drm_dev_enter/exit in any such sensitive place were we might
CPU access a DMA buffer from the kernel ?

Yes, I fear we are going to need that.


Things like CPU page table updates, ring buffer accesses and FW
memcpy ? Is there other places ?

Puh, good question. I have no idea.


Another point is that at this point the driver shouldn't access any
such buffers as we are at the process finishing the device.
AFAIK there is no page fault mechanism for kernel mappings so I
don't think there is anything else to do ?

Well there is a page fault handler for kernel mappings, but that one
just prints the stack trace into the system log and calls BUG(); :)

Long story short we need to avoid any access to released pages after
unplug. No matter if it's from the kernel or userspace.

I was just about to start guarding with drm_dev_enter/exit CPU
accesses from kernel to GTT ot VRAM buffers but then i looked more in
the code
and seems like ttm_tt_unpopulate just deletes DMA mappings (for the
sake of device to main memory access). Kernel page table is not
touched
until last bo refcount is dropped and the bo is released
(ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap). This
is both
for GTT BOs maped to kernel by kmap (or vmap) and for VRAM BOs mapped
by ioremap. So as i see it, nothing will bad will happen after we
unpopulate a BO while we still try to use a kernel mapping for it,
system memory pages backing GTT BOs are still mapped and not freed and
for
VRAM BOs same is for the IO physical ranges mapped into the kernel
page table since iounmap wasn't called yet.

The problem is the system pages would be freed and if we kernel driver
still happily write to them we are pretty much busted because we write
to freed up memory.


OK, i see i missed ttm_tt_unpopulate->..->ttm_pool_free which will
release
the GTT BO pages. But then isn't there a problem in ttm_bo_release since
ttm_bo_cleanup_memtype_use which also leads to pages release comes
before bo->destroy which unmaps the pages from kernel page table ? Won't
we have end up writing to freed memory in this time interval ? Don't we
need to postpone pages freeing to after kernel page table unmapping ?

BOs are only destroyed when there is a guarantee that nobody is
accessing them any more.

The problem here is that the pages as well as the VRAM can be
immediately reused after the hotplug event.




Similar for vram, if this is actual hotunplug and then replug, there's
going to be a different device behind the same mmio bar range most
likely (the higher bridges all this have the same windows assigned),


No idea how this actually works but if we haven't called iounmap yet
doesn't it mean that those physical ranges that are still mapped into
page
table should be reserved and cannot be reused for another
device ? As a guess, maybe another subrange from the higher bridge's
total
range will be allocated.

Nope, the PCIe subsystem doesn't care about any ioremap still active for
a range when it is hotplugged.


and that's bad news if we keep using it for current drivers. So we
really have to point all these cpu ptes to some other place.


We can't just unmap it without syncing against any in kernel accesses
to those buffers
and since page faulting technique we use for user mapped buffers seems
to not be possible
for kernel mapped buffers I am not sure how to do it gracefully...

We could try to replace the kmap with a dummy page under the hood, but
that is extremely tricky.

Especially since BOs which are just 1 page in size could point to the
linear mapping directly.

I think it's just more work. Essentially
- convert as much as possible of the kernel mappings to vmap_local,
which Thomas Zimmermann is rolling out. That way a dma_resv_lock will
serve as a barrier, and ofc any new vmap needs to fail or hand out a
dummy mapping.


Read those patches. I am not sure how this helps with protecting
against accesses to released backing pages or IO physical ranges of BO
which is already mapped during the unplug event ?

Andrey



- handle fbcon somehow. I think shutting it all down should work out.
- worst case keep the system backing storage around for shared dma-buf
until the other non-dynamic driver releases it. for vram we require
dynamic importers (and maybe it wasn't such a bright idea to allow
pinning of importer buffers, might need to revisit that).

Cheers, Daniel


Christian.


Andrey

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-12-16 Thread Andrey Grodzovsky

On 12/16/20 9:21 AM, Daniel Vetter wrote:

On Wed, Dec 16, 2020 at 9:04 AM Christian König
wrote:

Am 15.12.20 um 21:18 schrieb Andrey Grodzovsky:

[SNIP]

While we can't control user application accesses to the mapped
buffers explicitly and hence we use page fault rerouting
I am thinking that in this case we may be able to sprinkle
drm_dev_enter/exit in any such sensitive place were we might
CPU access a DMA buffer from the kernel ?

Yes, I fear we are going to need that.

Things like CPU page table updates, ring buffer accesses and FW
memcpy ? Is there other places ?

Puh, good question. I have no idea.

Another point is that at this point the driver shouldn't access any
such buffers as we are at the process finishing the device.
AFAIK there is no page fault mechanism for kernel mappings so I
don't think there is anything else to do ?

Well there is a page fault handler for kernel mappings, but that one
just prints the stack trace into the system log and calls BUG(); :)

Long story short we need to avoid any access to released pages after
unplug. No matter if it's from the kernel or userspace.

I was just about to start guarding with drm_dev_enter/exit CPU
accesses from kernel to GTT ot VRAM buffers but then i looked more in
the code
and seems like ttm_tt_unpopulate just deletes DMA mappings (for the
sake of device to main memory access). Kernel page table is not touched
until last bo refcount is dropped and the bo is released
(ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap). This
is both
for GTT BOs maped to kernel by kmap (or vmap) and for VRAM BOs mapped
by ioremap. So as i see it, nothing will bad will happen after we
unpopulate a BO while we still try to use a kernel mapping for it,
system memory pages backing GTT BOs are still mapped and not freed and
for
VRAM BOs same is for the IO physical ranges mapped into the kernel
page table since iounmap wasn't called yet.

The problem is the system pages would be freed and if we kernel driver
still happily write to them we are pretty much busted because we write
to freed up memory.

OK, i see i missed ttm_tt_unpopulate->..->ttm_pool_free which will release
the GTT BO pages. But then isn't there a problem in ttm_bo_release since
ttm_bo_cleanup_memtype_use which also leads to pages release comes
before bo->destroy which unmaps the pages from kernel page table ? Won't
we have end up writing to freed memory in this time interval ? Don't we
need to postpone pages freeing to after kernel page table unmapping ?

Similar for vram, if this is actual hotunplug and then replug, there's
going to be a different device behind the same mmio bar range most
likely (the higher bridges all this have the same windows assigned),

No idea how this actually works but if we haven't called iounmap yet
doesn't it mean that those physical ranges that are still mapped into page
table should be reserved and cannot be reused for another
device ? As a guess, maybe another subrange from the higher bridge's total
range will be allocated.

and that's bad news if we keep using it for current drivers. So we
really have to point all these cpu ptes to some other place.

We can't just unmap it without syncing against any in kernel accesses to those
buffers
and since page faulting technique we use for user mapped buffers seems to not be
possible

for kernel mapped buffers I am not sure how to do it gracefully...

Andrey

-Daniel

Christian.

Andrey

Regards,
Christian.

Andrey

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C6ee2a428d88a4742f45a08d8a1cde9c7%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637437253067654506%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=WRL2smY7iemgZdlH3taUZCoa8h%2BuaKD1Hv0tbHUclAQ%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-12-15 Thread Andrey Grodzovsky



On 11/24/20 11:44 AM, Christian König wrote:

Am 24.11.20 um 17:22 schrieb Andrey Grodzovsky:


On 11/24/20 2:41 AM, Christian König wrote:

Am 23.11.20 um 22:08 schrieb Andrey Grodzovsky:


On 11/23/20 3:41 PM, Christian König wrote:

Am 23.11.20 um 21:38 schrieb Andrey Grodzovsky:


On 11/23/20 3:20 PM, Christian König wrote:

Am 23.11.20 um 21:05 schrieb Andrey Grodzovsky:


On 11/25/20 5:42 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.


It would be cleaner if we could do the whole handling in TTM. I also 
need to double check what you are doing with this function.


Christian.



Check patch "drm/amdgpu: Register IOMMU topology notifier per device." 
to see
how i use it. I don't see why this should go into TTM mid-layer - the 
stuff I do inside
is vendor specific and also I don't think TTM is explicitly aware of 
IOMMU ?

Do you mean you prefer the IOMMU notifier to be registered from within TTM
and then use a hook to call into vendor specific handler ?


No, that is really vendor specific.

What I meant is to have a function like ttm_resource_manager_evict_all() 
which you only need to call and all tt objects are unpopulated.



So instead of this BO list i create and later iterate in amdgpu from the 
IOMMU patch you just want to do it within

TTM with a single function ? Makes much more sense.


Yes, exactly.

The list_empty() checks we have in TTM for the LRU are actually not the 
best idea, we should now check the pin_count instead. This way we could 
also have a list of the pinned BOs in TTM.



So from my IOMMU topology handler I will iterate the TTM LRU for the 
unpinned BOs and this new function for the pinned ones  ?
It's probably a good idea to combine both iterations into this new function 
to cover all the BOs allocated on the device.


Yes, that's what I had in my mind as well.






BTW: Have you thought about what happens when we unpopulate a BO while we 
still try to use a kernel mapping for it? That could have unforeseen 
consequences.



Are you asking what happens to kmap or vmap style mapped CPU accesses once 
we drop all the DMA backing pages for a particular BO ? Because for user 
mappings
(mmap) we took care of this with dummy page reroute but indeed nothing was 
done for in kernel CPU mappings.


Yes exactly that.

In other words what happens if we free the ring buffer while the kernel 
still writes to it?


Christian.



While we can't control user application accesses to the mapped buffers 
explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle 
drm_dev_enter/exit in any such sensitive place were we might

CPU access a DMA buffer from the kernel ?


Yes, I fear we are going to need that.

Things like CPU page table updates, ring buffer accesses and FW memcpy ? Is 
there other places ?


Puh, good question. I have no idea.

Another point is that at this point the driver shouldn't access any such 
buffers as we are at the process finishing the device.
AFAIK there is no page fault mechanism for kernel mappings so I don't think 
there is anything else to do ?


Well there is a page fault handler for kernel mappings, but that one just 
prints the stack trace into the system log and calls BUG(); :)


Long story short we need to avoid any access to released pages after unplug. 
No matter if it's from the kernel or userspace.



I was just about to start guarding with drm_dev_enter/exit CPU accesses from 
kernel to GTT ot VRAM buffers but then i looked more in the code
and seems like ttm_tt_unpopulate just deletes DMA mappings (for the sake of 
device to main memory access). Kernel page table is not touched
until last bo refcount is dropped and the bo is released 
(ttm_bo_release->destroy->amdgpu_bo_destroy->amdgpu_bo_kunmap). This is both
for GTT BOs maped to kernel by kmap (or vmap) and for VRAM BOs mapped by 
ioremap. So as i see it, nothing will bad will happen after we
unpopulate a BO while we still try to use a kernel mapping for it, system memory 
pages backing GTT BOs are still mapped and not freed and for
VRAM BOs same is for the IO physical ranges mapped into the kernel page table 
since iounmap wasn't called yet. I loaded the driver with vm_update_mode=3
meaning all VM updates done using CPU and hasn't seen any OOPs after removing 
the device. I guess i can test it more by allocating GTT and VRAM BOs

and trying to read/write to them after device is removed.

Andrey




Regards,
Christian.



Andrey




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v2] [PATCH] drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs

For BOs imported from outside of amdgpu, setting of amdgpu_gem_object_funcs
was missing in amdgpu_dma_buf_create_obj. Fix by refactoring BO creation
and amdgpu_gem_object_funcs setting into single function called
from both code paths.

Fixes: d693def4fd1c ("drm: Remove obsolete GEM and PRIME callbacks
from struct drm_driver")

v2: Use use amdgpu_gem_object_create() directly

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  8 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 41 -
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e5919ef..e42175e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -424,6 +424,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
+   struct drm_gem_object *gobj;
int ret;
 
memset(&bp, 0, sizeof(bp));
@@ -434,17 +435,20 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
bp.type = ttm_bo_type_sg;
bp.resv = resv;
dma_resv_lock(resv, NULL);
-   ret = amdgpu_bo_create(adev, &bp, &bo);
+   ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_CPU,
+   0, ttm_bo_type_sg, resv, &gobj);
if (ret)
goto error;
 
+   bo = gem_to_amdgpu_bo(gobj);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
bo->prime_shared_count = 1;
 
dma_resv_unlock(resv);
-   return &bo->tbo.base;
+   return gobj;
 
 error:
dma_resv_unlock(resv);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index c9f94fb..ccf4d80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -70,26 +70,12 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,
bp.type = type;
bp.resv = resv;
bp.preferred_domain = initial_domain;
-retry:
bp.flags = flags;
bp.domain = initial_domain;
r = amdgpu_bo_create(adev, &bp, &bo);
-   if (r) {
-   if (r != -ERESTARTSYS) {
-   if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
-   flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-   goto retry;
-   }
-
-   if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-   initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
-   goto retry;
-   }
-   DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, 
%d)\n",
- size, initial_domain, alignment, r);
-   }
+   if (r)
return r;
-   }
+
*obj = &bo->tbo.base;
(*obj)->funcs = &amdgpu_gem_object_funcs;
 
@@ -239,7 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
uint64_t size = args->in.bo_size;
struct dma_resv *resv = NULL;
struct drm_gem_object *gobj;
-   uint32_t handle;
+   uint32_t handle, initial_domain;
int r;
 
/* reject invalid gem flags */
@@ -283,9 +269,28 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
resv = vm->root.base.bo->tbo.base.resv;
}
 
+retry:
+   initial_domain = (u32)(0x & args->in.domains);
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
-(u32)(0x & args->in.domains),
+initial_domain,
 flags, ttm_bo_type_device, resv, &gobj);
+   if (r) {
+   if (r != -ERESTARTSYS) {
+   if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+   flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+   goto retry;
+   }
+
+   if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+   initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+   goto retry;
+   }
+   DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, 
%d)\n",
+ size, initial_domain, args->in.alignment, r);
+   }
+   return

Re: [PATCH] drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs



On 12/8/20 2:01 PM, Christian König wrote:

Am 08.12.20 um 19:52 schrieb Andrey Grodzovsky:


On 12/8/20 1:47 PM, Christian König wrote:

Am 08.12.20 um 19:44 schrieb Andrey Grodzovsky:


On 12/8/20 1:29 PM, Christian König wrote:

Am 08.12.20 um 19:26 schrieb Andrey Grodzovsky:


On 12/8/20 12:36 PM, Christian König wrote:

Am 08.12.20 um 18:10 schrieb Andrey Grodzovsky:
For BOs imported from outside of amdgpu, setting of 
amdgpu_gem_object_funcs

was missing in amdgpu_dma_buf_create_obj. Fix by refactoring BO creation
and amdgpu_gem_object_funcs setting into single function called
from both code paths.


Can you outline why we can't use amdgpu_gem_object_create() directly?

I mean we have a bit of extra error handling in there and we need to 
grab the resv lock and set the domains after creation, but that 
shouldn't matter and I don't see why that should not work.



On top of what you mentioned you also have bp.domain/bp.preferred_domain 
being set differently so you need to add another
argument to amdgpu_gem_object_create to reflect this difference which 
clutters even more the already cluttered argument list.


That should be outside of the call to amdgpu_gem_object_create(), similar 
to how it is outside of the amdgpu_bo_create currently.



So you agree we have to add another argument to amdgpu_gem_object_create 
(e.g. u32 preferred_domain) which will be 0 for amdgpu_dma_buf_create_obj
and equal to initial_domain for all the code path currently calling 
amdgpu_gem_object_create ?


No, I just don't see the need for that. We need to overwrite the preferred 
domain after the function call anyway.


DMA-buf imports are created with the initial domain CPU and then get that 
overwritten to GTT after creation.







Regarding the extra error handling - you have the 'retry' dance in 
amdgpu_gem_object_create which jumps back to the middle of amdgpu_bo_param
initialization but you don't have it in amdgpu_dma_buf_create_obj which 
also complicates the reuse of amdgpu_gem_object_create as is.


Regarding the extra error handling, that kicks in only when 
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED is specified as flags or 
AMDGPU_GEM_DOMAIN_VRAM as initial domain. Neither is the case here.



Yes, still, it makes me a bit uncomfortable relying on internal 
implementation details of an API function I call to do the thing I expect.


Yeah, ok that is a rather good argument.

Christian.



So should I just keep it as is or you think it's still would be more 
beneficial to unify them the way you propose ?


Maybe we should move the error handling into amdgpu_gem_create_ioctl() anyway.

We don't really want that handling in the userptr stuff and for the call from 
amdgpufb_create_pinned_object() that is actually a bug!


E.g. for the fb emulation we can't fall back from VRAM to GTT like in the 
create ioctl.



What about amdgpu_mode_dumb_create, seems like there GTT domain is also relevant 
and so the error handling would be needed there too.


Andrey




Christian.



Andrey






Andrey




Christian.



Andrey




Thanks,
Christian.



This fixes null ptr regression casued by commit
d693def drm: Remove obsolete GEM and PRIME callbacks from struct 
drm_driver


Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 13 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h |  5 +
  3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index e5919ef..da4d0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -405,6 +405,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
drm_gem_object *gobj,

  return buf;
  }
  +
  /**
   * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
   *
@@ -424,7 +425,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_bo *bo;
  struct amdgpu_bo_param bp;
-    int ret;
+    struct drm_gem_object *obj;
    memset(&bp, 0, sizeof(bp));
  bp.size = dma_buf->size;
@@ -434,21 +435,19 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  bp.type = ttm_bo_type_sg;
  bp.resv = resv;
  dma_resv_lock(resv, NULL);
-    ret = amdgpu_bo_create(adev, &bp, &bo);
-    if (ret)
+    obj = amdgpu_gem_object_create_raw(adev, &bp);
+    if (IS_ERR(obj))
  goto error;
  +    bo = gem_to_amdgpu_bo(obj);
  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
  if (dma_buf->ops != &amdgpu_dmabuf_ops)
  bo->prime_shared_count = 1;
  -    dma_resv_unlock(resv);
-    return &bo->tbo.base;
-
  error:
  dma_resv_unlock(re

Re: [PATCH] drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs



On 12/8/20 1:47 PM, Christian König wrote:

Am 08.12.20 um 19:44 schrieb Andrey Grodzovsky:


On 12/8/20 1:29 PM, Christian König wrote:

Am 08.12.20 um 19:26 schrieb Andrey Grodzovsky:


On 12/8/20 12:36 PM, Christian König wrote:

Am 08.12.20 um 18:10 schrieb Andrey Grodzovsky:

For BOs imported from outside of amdgpu, setting of amdgpu_gem_object_funcs
was missing in amdgpu_dma_buf_create_obj. Fix by refactoring BO creation
and amdgpu_gem_object_funcs setting into single function called
from both code paths.


Can you outline why we can't use amdgpu_gem_object_create() directly?

I mean we have a bit of extra error handling in there and we need to grab 
the resv lock and set the domains after creation, but that shouldn't 
matter and I don't see why that should not work.



On top of what you mentioned you also have bp.domain/bp.preferred_domain 
being set differently so you need to add another
argument to amdgpu_gem_object_create to reflect this difference which 
clutters even more the already cluttered argument list.


That should be outside of the call to amdgpu_gem_object_create(), similar to 
how it is outside of the amdgpu_bo_create currently.



So you agree we have to add another argument to amdgpu_gem_object_create 
(e.g. u32 preferred_domain) which will be 0 for amdgpu_dma_buf_create_obj
and equal to initial_domain for all the code path currently calling 
amdgpu_gem_object_create ?


No, I just don't see the need for that. We need to overwrite the preferred 
domain after the function call anyway.


DMA-buf imports are created with the initial domain CPU and then get that 
overwritten to GTT after creation.







Regarding the extra error handling - you have the 'retry' dance in 
amdgpu_gem_object_create which jumps back to the middle of amdgpu_bo_param
initialization but you don't have it in amdgpu_dma_buf_create_obj which 
also complicates the reuse of amdgpu_gem_object_create as is.


Regarding the extra error handling, that kicks in only when 
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED is specified as flags or 
AMDGPU_GEM_DOMAIN_VRAM as initial domain. Neither is the case here.



Yes, still, it makes me a bit uncomfortable relying on internal 
implementation details of an API function I call to do the thing I expect.


Yeah, ok that is a rather good argument.

Christian.



So should I just keep it as is or you think it's still would be more beneficial 
to unify them the way you propose ?


Andrey






Andrey




Christian.



Andrey




Thanks,
Christian.



This fixes null ptr regression casued by commit
d693def drm: Remove obsolete GEM and PRIME callbacks from struct drm_driver

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 13 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h |  5 +
  3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index e5919ef..da4d0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -405,6 +405,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
drm_gem_object *gobj,

  return buf;
  }
  +
  /**
   * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
   *
@@ -424,7 +425,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_bo *bo;
  struct amdgpu_bo_param bp;
-    int ret;
+    struct drm_gem_object *obj;
    memset(&bp, 0, sizeof(bp));
  bp.size = dma_buf->size;
@@ -434,21 +435,19 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  bp.type = ttm_bo_type_sg;
  bp.resv = resv;
  dma_resv_lock(resv, NULL);
-    ret = amdgpu_bo_create(adev, &bp, &bo);
-    if (ret)
+    obj = amdgpu_gem_object_create_raw(adev, &bp);
+    if (IS_ERR(obj))
  goto error;
  +    bo = gem_to_amdgpu_bo(obj);
  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
  if (dma_buf->ops != &amdgpu_dmabuf_ops)
  bo->prime_shared_count = 1;
  -    dma_resv_unlock(resv);
-    return &bo->tbo.base;
-
  error:
  dma_resv_unlock(resv);
-    return ERR_PTR(ret);
+    return obj;
  }
    /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index c9f94fb..5f22ce6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -52,13 +52,26 @@ static void amdgpu_gem_object_free(struct 
drm_gem_object *gobj)

  }
  }
  +struct drm_gem_object *amdgpu_gem_object_create_raw(struct 
amdgpu_device *adev,

+    struct amdgpu_bo_param *bp)
+{
+    struct amdgpu_bo *bo;
+    int r;
+
+    r =

Re: [PATCH] drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs



On 12/8/20 1:29 PM, Christian König wrote:

Am 08.12.20 um 19:26 schrieb Andrey Grodzovsky:


On 12/8/20 12:36 PM, Christian König wrote:

Am 08.12.20 um 18:10 schrieb Andrey Grodzovsky:

For BOs imported from outside of amdgpu, setting of amdgpu_gem_object_funcs
was missing in amdgpu_dma_buf_create_obj. Fix by refactoring BO creation
and amdgpu_gem_object_funcs setting into single function called
from both code paths.


Can you outline why we can't use amdgpu_gem_object_create() directly?

I mean we have a bit of extra error handling in there and we need to grab 
the resv lock and set the domains after creation, but that shouldn't matter 
and I don't see why that should not work.



On top of what you mentioned you also have bp.domain/bp.preferred_domain 
being set differently so you need to add another
argument to amdgpu_gem_object_create to reflect this difference which 
clutters even more the already cluttered argument list.


That should be outside of the call to amdgpu_gem_object_create(), similar to 
how it is outside of the amdgpu_bo_create currently.



So you agree we have to add another argument to amdgpu_gem_object_create (e.g. 
u32 preferred_domain) which will be 0 for amdgpu_dma_buf_create_obj
and equal to initial_domain for all the code path currently calling 
amdgpu_gem_object_create ?





Regarding the extra error handling -  you have the 'retry' dance in 
amdgpu_gem_object_create which jumps back to the middle of amdgpu_bo_param
initialization but you don't have it in amdgpu_dma_buf_create_obj which also 
complicates the reuse of amdgpu_gem_object_create as is.


Regarding the extra error handling, that kicks in only when 
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED is specified as flags or 
AMDGPU_GEM_DOMAIN_VRAM as initial domain. Neither is the case here.



Yes, still, it makes me a bit uncomfortable relying on internal implementation 
details of an API function I call to do the thing I expect.


Andrey




Christian.



Andrey




Thanks,
Christian.



This fixes null ptr regression casued by commit
d693def drm: Remove obsolete GEM and PRIME callbacks from struct drm_driver

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 13 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h |  5 +
  3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index e5919ef..da4d0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -405,6 +405,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
drm_gem_object *gobj,

  return buf;
  }
  +
  /**
   * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
   *
@@ -424,7 +425,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_bo *bo;
  struct amdgpu_bo_param bp;
-    int ret;
+    struct drm_gem_object *obj;
    memset(&bp, 0, sizeof(bp));
  bp.size = dma_buf->size;
@@ -434,21 +435,19 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  bp.type = ttm_bo_type_sg;
  bp.resv = resv;
  dma_resv_lock(resv, NULL);
-    ret = amdgpu_bo_create(adev, &bp, &bo);
-    if (ret)
+    obj = amdgpu_gem_object_create_raw(adev, &bp);
+    if (IS_ERR(obj))
  goto error;
  +    bo = gem_to_amdgpu_bo(obj);
  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
  if (dma_buf->ops != &amdgpu_dmabuf_ops)
  bo->prime_shared_count = 1;
  -    dma_resv_unlock(resv);
-    return &bo->tbo.base;
-
  error:
  dma_resv_unlock(resv);
-    return ERR_PTR(ret);
+    return obj;
  }
    /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index c9f94fb..5f22ce6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -52,13 +52,26 @@ static void amdgpu_gem_object_free(struct 
drm_gem_object *gobj)

  }
  }
  +struct drm_gem_object *amdgpu_gem_object_create_raw(struct amdgpu_device 
*adev,

+    struct amdgpu_bo_param *bp)
+{
+    struct amdgpu_bo *bo;
+    int r;
+
+    r = amdgpu_bo_create(adev, bp, &bo);
+    if (r)
+    return ERR_PTR(r);
+
+    bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
+    return &bo->tbo.base;
+}
+
  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
   int alignment, u32 initial_domain,
   u64 flags, enum ttm_bo_type type,
   struct dma_resv *resv,
   struct drm_gem_object **obj)
  {
-    struct amdgpu_bo *bo;
  struct amdgpu_bo_param bp;
  int

Re: [PATCH] drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs



On 12/8/20 12:36 PM, Christian König wrote:

Am 08.12.20 um 18:10 schrieb Andrey Grodzovsky:

For BOs imported from outside of amdgpu, setting of amdgpu_gem_object_funcs
was missing in amdgpu_dma_buf_create_obj. Fix by refactoring BO creation
and amdgpu_gem_object_funcs setting into single function called
from both code paths.


Can you outline why we can't use amdgpu_gem_object_create() directly?

I mean we have a bit of extra error handling in there and we need to grab the 
resv lock and set the domains after creation, but that shouldn't matter and I 
don't see why that should not work.



On top of what you mentioned you also have bp.domain/bp.preferred_domain being 
set differently so you need to add another
argument to amdgpu_gem_object_create to reflect this difference which clutters 
even more the already cluttered argument list.
Regarding the extra error handling -  you have the 'retry' dance in 
amdgpu_gem_object_create which jumps back to the middle of amdgpu_bo_param
initialization but you don't have it in amdgpu_dma_buf_create_obj which also 
complicates the reuse of amdgpu_gem_object_create as is.


Andrey




Thanks,
Christian.



This fixes null ptr regression casued by commit
d693def drm: Remove obsolete GEM and PRIME callbacks from struct drm_driver

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 13 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h |  5 +
  3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index e5919ef..da4d0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -405,6 +405,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
drm_gem_object *gobj,

  return buf;
  }
  +
  /**
   * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
   *
@@ -424,7 +425,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)

  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_bo *bo;
  struct amdgpu_bo_param bp;
-    int ret;
+    struct drm_gem_object *obj;
    memset(&bp, 0, sizeof(bp));
  bp.size = dma_buf->size;
@@ -434,21 +435,19 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  bp.type = ttm_bo_type_sg;
  bp.resv = resv;
  dma_resv_lock(resv, NULL);
-    ret = amdgpu_bo_create(adev, &bp, &bo);
-    if (ret)
+    obj = amdgpu_gem_object_create_raw(adev, &bp);
+    if (IS_ERR(obj))
  goto error;
  +    bo = gem_to_amdgpu_bo(obj);
  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
  if (dma_buf->ops != &amdgpu_dmabuf_ops)
  bo->prime_shared_count = 1;
  -    dma_resv_unlock(resv);
-    return &bo->tbo.base;
-
  error:
  dma_resv_unlock(resv);
-    return ERR_PTR(ret);
+    return obj;
  }
    /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index c9f94fb..5f22ce6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -52,13 +52,26 @@ static void amdgpu_gem_object_free(struct drm_gem_object 
*gobj)

  }
  }
  +struct drm_gem_object *amdgpu_gem_object_create_raw(struct amdgpu_device 
*adev,

+    struct amdgpu_bo_param *bp)
+{
+    struct amdgpu_bo *bo;
+    int r;
+
+    r = amdgpu_bo_create(adev, bp, &bo);
+    if (r)
+    return ERR_PTR(r);
+
+    bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
+    return &bo->tbo.base;
+}
+
  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
   int alignment, u32 initial_domain,
   u64 flags, enum ttm_bo_type type,
   struct dma_resv *resv,
   struct drm_gem_object **obj)
  {
-    struct amdgpu_bo *bo;
  struct amdgpu_bo_param bp;
  int r;
  @@ -73,8 +86,9 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,

  retry:
  bp.flags = flags;
  bp.domain = initial_domain;
-    r = amdgpu_bo_create(adev, &bp, &bo);
-    if (r) {
+    *obj = amdgpu_gem_object_create_raw(adev, &bp);
+    if (IS_ERR(*obj)) {
+    r = PTR_ERR(*obj);
  if (r != -ERESTARTSYS) {
  if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
  flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
@@ -90,8 +104,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,

  }
  return r;
  }
-    *obj = &bo->tbo.base;
-    (*obj)->funcs = &amdgpu_gem_object_funcs;
    return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 
b/drivers/gpu/drm/am

[PATCH] drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs

For BOs imported from outside of amdgpu, setting of amdgpu_gem_object_funcs
was missing in amdgpu_dma_buf_create_obj. Fix by refactoring BO creation
and amdgpu_gem_object_funcs setting into single function called
from both code paths.

This fixes null ptr regression casued by commit
d693def drm: Remove obsolete GEM and PRIME callbacks from struct drm_driver

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 13 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h |  5 +
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e5919ef..da4d0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -405,6 +405,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
drm_gem_object *gobj,
return buf;
 }
 
+
 /**
  * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
  *
@@ -424,7 +425,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
-   int ret;
+   struct drm_gem_object *obj;
 
memset(&bp, 0, sizeof(bp));
bp.size = dma_buf->size;
@@ -434,21 +435,19 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
bp.type = ttm_bo_type_sg;
bp.resv = resv;
dma_resv_lock(resv, NULL);
-   ret = amdgpu_bo_create(adev, &bp, &bo);
-   if (ret)
+   obj = amdgpu_gem_object_create_raw(adev, &bp);
+   if (IS_ERR(obj))
goto error;
 
+   bo = gem_to_amdgpu_bo(obj);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
bo->prime_shared_count = 1;
 
-   dma_resv_unlock(resv);
-   return &bo->tbo.base;
-
 error:
dma_resv_unlock(resv);
-   return ERR_PTR(ret);
+   return obj;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index c9f94fb..5f22ce6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -52,13 +52,26 @@ static void amdgpu_gem_object_free(struct drm_gem_object 
*gobj)
}
 }
 
+struct drm_gem_object *amdgpu_gem_object_create_raw(struct amdgpu_device *adev,
+   struct amdgpu_bo_param *bp)
+{
+   struct amdgpu_bo *bo;
+   int r;
+
+   r = amdgpu_bo_create(adev, bp, &bo);
+   if (r)
+   return ERR_PTR(r);
+
+   bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
+   return &bo->tbo.base;
+}
+
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 int alignment, u32 initial_domain,
 u64 flags, enum ttm_bo_type type,
 struct dma_resv *resv,
 struct drm_gem_object **obj)
 {
-   struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int r;
 
@@ -73,8 +86,9 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,
 retry:
bp.flags = flags;
bp.domain = initial_domain;
-   r = amdgpu_bo_create(adev, &bp, &bo);
-   if (r) {
+   *obj = amdgpu_gem_object_create_raw(adev, &bp);
+   if (IS_ERR(*obj)) {
+   r = PTR_ERR(*obj);
if (r != -ERESTARTSYS) {
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
@@ -90,8 +104,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,
}
return r;
}
-   *obj = &bo->tbo.base;
-   (*obj)->funcs = &amdgpu_gem_object_funcs;
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index 637bf51..a6b90d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -38,12 +38,17 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
 /*
  * GEM objects.
  */
+
+struct amdgpu_bo_param;
+
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 int alignment, u32 initial_domain,
 u64 flags, enum ttm_bo_type type,
 struct dma_resv *resv,
 struct drm_gem_object **obj);
+struct drm_gem_object *amdgpu_gem_object_create_raw(struct amdgpu_device

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Andrey Grodzovsky



On 12/7/20 2:19 PM, Christian König wrote:

Am 07.12.20 um 20:09 schrieb Andrey Grodzovsky:


On 12/7/20 1:04 PM, Christian König wrote:

Am 07.12.20 um 17:00 schrieb Andrey Grodzovsky:


On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole 
approach here isn't correct.


See even when the job has been completed or canceled we still want to 
restart the timer.


The reason for this is that the timer is then not restarted for the 
current job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole device 
was hot plugged and we return -ENODEV here. E.g. what Andrey has been 
working on.



We discussed this with Luben off line a few days ago but came to a 
conclusion that for the next job the timer restart in drm_sched_job_begin 
should do the work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts the 
timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms the 
timer, I don't see how it is related to whether the HW was idle before ?


It doesn't rearm the timer. It initially starts the timer when the hardware 
is idle.



It schedules delayed work for the timer task if ring mirror list not empty. 
Am i missing something ?



Ok, let me explain from the beginning.

drm_sched_start_timeout() initially starts the timer, it does NOT rearms it! 
When the timer is already running it doesn't has any effect at all.


In a sense that delayed work cannot be enqueued while another instance is still 
in the queue I agree.

I forgot about this in the context of drm_sched_start_timeout.




When a job completes drm_sched_get_cleanup_job() cancels the timer, frees the 
job and then starts a new timer for the engine.


When a timeout happens the job is either canceled or give some extra time by 
putting it back on the pending list.


When the job is canceled the timer must be restarted for the next job, because 
drm_sched_job_begin() was already called long ago.



Now i get it. Next job might have called (and probably did) drm_sched_job_begin 
while previous timer work (currently executing one)
was still in the workqueue and so we cannot count on it to actually have 
restarted the timer and so we must do it.






When the job gets some extra time we should also restart the timer.



Same as above.

Thanks for clarifying this.

Andrey




The only case when the timer should not be restarted is when the device was 
hotplugged and is completely gone now.


I think the right approach to stop this messing with the ring mirror list is 
to avoid using the job altogether for recovery.


What we should do instead is to put the recovery information on the scheduler 
fence, because that is the object which stays alive after pushing the job to 
the hardware.






Christian.



Andrey




Christian.



Andrey




The function should probably be renamed to drm_sched_job_pushed() because 
it doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Andrey Grodzovsky



On 12/7/20 1:04 PM, Christian König wrote:

Am 07.12.20 um 17:00 schrieb Andrey Grodzovsky:


On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole approach 
here isn't correct.


See even when the job has been completed or canceled we still want to 
restart the timer.


The reason for this is that the timer is then not restarted for the 
current job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole device 
was hot plugged and we return -ENODEV here. E.g. what Andrey has been 
working on.



We discussed this with Luben off line a few days ago but came to a 
conclusion that for the next job the timer restart in drm_sched_job_begin 
should do the work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts the 
timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms the 
timer, I don't see how it is related to whether the HW was idle before ?


It doesn't rearm the timer. It initially starts the timer when the hardware is 
idle.



It schedules delayed work for the timer task if ring mirror list not empty. Am i 
missing something ?


Andrey




Christian.



Andrey




The function should probably be renamed to drm_sched_job_pushed() because it 
doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 +
  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct drm_sched_job 
*s_job)

  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job 
*s_job)
  amdgpu_ring_soft_recovery(ring, job->vmid, 
s_job->s_fence->parent)) {

  DRM_ERROR("ring %s timeout, but soft recovered\n",
    s_job->sched->name);
-    return;
+    return DRM_TASK_STATUS_ALIVE;
  }
    amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job 
*s_job)

    if (amdgpu_device_should_recover_gpu(ring->adev)) {
  amdgpu_device_gpu_recover(ring->adev, job);
+    return DRM_TASK_STATUS_ALIVE;
  } else {
drm_sched_suspend_timeout(&ring->sched);
  if (amdgpu_sriov_vf(adev))
  adev->virt.tdr_debug = true;
+    return DRM_TASK_STATUS_ALIVE;
  }
  }
  diff --git a/drive

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Andrey Grodzovsky



On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole approach 
here isn't correct.


See even when the job has been completed or canceled we still want to 
restart the timer.


The reason for this is that the timer is then not restarted for the current 
job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole device was 
hot plugged and we return -ENODEV here. E.g. what Andrey has been working on.



We discussed this with Luben off line a few days ago but came to a conclusion 
that for the next job the timer restart in drm_sched_job_begin should do the 
work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts the 
timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms the timer, 
I don't see how it is related to whether the HW was idle before ?


Andrey




The function should probably be renamed to drm_sched_job_pushed() because it 
doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 +
  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct drm_sched_job *s_job)
  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
  amdgpu_ring_soft_recovery(ring, job->vmid, 
s_job->s_fence->parent)) {

  DRM_ERROR("ring %s timeout, but soft recovered\n",
    s_job->sched->name);
-    return;
+    return DRM_TASK_STATUS_ALIVE;
  }
    amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job 
*s_job)

    if (amdgpu_device_should_recover_gpu(ring->adev)) {
  amdgpu_device_gpu_recover(ring->adev, job);
+    return DRM_TASK_STATUS_ALIVE;
  } else {
  drm_sched_suspend_timeout(&ring->sched);
  if (amdgpu_sriov_vf(adev))
  adev->virt.tdr_debug = true;
+    return DRM_TASK_STATUS_ALIVE;
  }
  }
  diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c

index cd46c882269c..c49516942328 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -82,7 +82,8 @@ static struct dma_fence *etnaviv_sched_run_job(struct 
drm_sched_job *sche

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-04 Thread Andrey Grodzovsky



On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole approach here 
isn't correct.


See even when the job has been completed or canceled we still want to restart 
the timer.


The reason for this is that the timer is then not restarted for the current 
job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole device was 
hot plugged and we return -ENODEV here. E.g. what Andrey has been working on.



We discussed this with Luben off line a few days ago but came to a conclusion 
that for the next job the timer restart in drm_sched_job_begin should do the 
work, no ?


Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 +
  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct drm_sched_job *s_job)
  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
  amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
  DRM_ERROR("ring %s timeout, but soft recovered\n",
    s_job->sched->name);
-    return;
+    return DRM_TASK_STATUS_ALIVE;
  }
    amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
    if (amdgpu_device_should_recover_gpu(ring->adev)) {
  amdgpu_device_gpu_recover(ring->adev, job);
+    return DRM_TASK_STATUS_ALIVE;
  } else {
  drm_sched_suspend_timeout(&ring->sched);
  if (amdgpu_sriov_vf(adev))
  adev->virt.tdr_debug = true;
+    return DRM_TASK_STATUS_ALIVE;
  }
  }
  diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c

index cd46c882269c..c49516942328 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -82,7 +82,8 @@ static struct dma_fence *etnaviv_sched_run_job(struct 
drm_sched_job *sched_job)

  return fence;
  }
  -static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
+static enum drm_task_status etnaviv_sched_timedout_job(struct drm_sched_job
+   *sched_job)
  {
  struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
  struct etnaviv_gpu *gpu = submit->gpu;
@@ -120,9 +121,16 @@ static void etnaviv_sched_timedout_job(struct 
drm_sched_job *sched_job)

    drm_sched_resubmit_jobs(&gpu->sched);
  +    /* Tell the DRM scheduler that

Re: [PATCH v2 5/8] drm/amdgpu: Refactor sysfs removal

2020-12-02 Thread Andrey Grodzovsky



On 12/2/20 1:20 PM, Greg KH wrote:

On Wed, Dec 02, 2020 at 01:02:06PM -0500, Andrey Grodzovsky wrote:

On 12/2/20 12:34 PM, Greg KH wrote:

On Wed, Dec 02, 2020 at 10:48:01AM -0500, Andrey Grodzovsky wrote:

On 11/11/20 10:34 AM, Greg KH wrote:

On Wed, Nov 11, 2020 at 10:13:13AM -0500, Andrey Grodzovsky wrote:

On 11/10/20 12:59 PM, Greg KH wrote:

On Tue, Nov 10, 2020 at 12:54:21PM -0500, Andrey Grodzovsky wrote:

Hi, back to this after a long context switch for some higher priority stuff.

So here I was able eventually to drop all this code and this change here 
https://nam11.safelinks.protection.outlook.com/?url=https:%2F%2Fcgit.freedesktop.org%2F~agrodzov%2Flinux%2Fcommit%2F%3Fh%3Damd-staging-drm-next-device-unplug%26id%3D61852c8a59b4dd89d637693552c73175b9f2ccd6&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C13040ab9b50947a95acc08d896eec15d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637425299507092187%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=CIXEl9hWHTAdo7t9yrdtu0OdEIZ3X2GQmJRhDUj28mw%3D&reserved=0
was enough for me. Seems like while device_remove_file can handle the use
case where the file and the parent directory already gone,
sysfs_remove_group goes down in flames in that case
due to kobj->sd being unset on device removal.

A driver shouldn't ever have to remove individual sysfs groups, the
driver core/bus logic should do it for them automatically.

And whenever a driver calls a sysfs_* call, that's a hint that something
is not working properly.

Do you mean that while the driver creates the groups and files explicitly
from it's different subsystems it should not explicitly remove each
one of them because all of them should be removed at once (and
recursively) when the device is being removed ?

Individual drivers should never add groups/files in sysfs, the driver
core should do it properly for you if you have everything set up
properly.  And yes, the driver core will automatically remove them as
well.

Please use the default groups attribute for your bus/subsystem and this
will happen automagically.

Hi Greg, I tried your suggestion to hang amdgpu's sysfs
attributes on default attributes in struct device.groups but turns out it's
not usable since by the
time i have access to struct device from amdgpu code it has already been
initialized by pci core
(i.e.  past the point where device_add->device_add_attrs->device_add_groups
with dev->groups is called)
and so i can't really use it.

That's odd, why can't you just set the groups pointer in your pci_driver
structure?  That's what it is there for, right?

I am probably missing something but amdgpu sysfs attrs are per device not
per driver

Oops, you are right, you want the 'dev_groups' field.  Looks like pci
doesn't export that directly, so you can do:
.driver {
.dev_groups = my_device_groups;
},
in your pci_driver structure.

Or I'm sure the PCI driver maintainer would take a patch like
7d9c1d2f7aca ("USB: add support for dev_groups to struct
usb_device_driver") was done for the USB subsystem, as diving into the
"raw" .driver pointer isn't really that clean or nice in my opinion.



Looks like what I need exactly. I will probably start with assigning raw pointer 
just

to push ahead my work and in parallel will probably submit same patch as yours
for PCI subsystem review as the rework to switch to this is really minimal.

Andrey




thanks,

greg k-h

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v2 5/8] drm/amdgpu: Refactor sysfs removal

2020-12-02 Thread Andrey Grodzovsky



On 12/2/20 12:34 PM, Greg KH wrote:

On Wed, Dec 02, 2020 at 10:48:01AM -0500, Andrey Grodzovsky wrote:

On 11/11/20 10:34 AM, Greg KH wrote:

On Wed, Nov 11, 2020 at 10:13:13AM -0500, Andrey Grodzovsky wrote:

On 11/10/20 12:59 PM, Greg KH wrote:

On Tue, Nov 10, 2020 at 12:54:21PM -0500, Andrey Grodzovsky wrote:

Hi, back to this after a long context switch for some higher priority stuff.

So here I was able eventually to drop all this code and this change here 
https://nam11.safelinks.protection.outlook.com/?url=https:%2F%2Fcgit.freedesktop.org%2F~agrodzov%2Flinux%2Fcommit%2F%3Fh%3Damd-staging-drm-next-device-unplug%26id%3D61852c8a59b4dd89d637693552c73175b9f2ccd6&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C29ff7efb89bd47d8488708d896e86e7c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637425272317529134%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=Vzc3fVofA6%2BMPSqHmBqcWavQLKWU1%2FXKJFun24irLf0%3D&reserved=0
was enough for me. Seems like while device_remove_file can handle the use
case where the file and the parent directory already gone,
sysfs_remove_group goes down in flames in that case
due to kobj->sd being unset on device removal.

A driver shouldn't ever have to remove individual sysfs groups, the
driver core/bus logic should do it for them automatically.

And whenever a driver calls a sysfs_* call, that's a hint that something
is not working properly.


Do you mean that while the driver creates the groups and files explicitly
from it's different subsystems it should not explicitly remove each
one of them because all of them should be removed at once (and
recursively) when the device is being removed ?

Individual drivers should never add groups/files in sysfs, the driver
core should do it properly for you if you have everything set up
properly.  And yes, the driver core will automatically remove them as
well.

Please use the default groups attribute for your bus/subsystem and this
will happen automagically.


Hi Greg, I tried your suggestion to hang amdgpu's sysfs
attributes on default attributes in struct device.groups but turns out it's
not usable since by the
time i have access to struct device from amdgpu code it has already been
initialized by pci core
(i.e.  past the point where device_add->device_add_attrs->device_add_groups
with dev->groups is called)
and so i can't really use it.

That's odd, why can't you just set the groups pointer in your pci_driver
structure?  That's what it is there for, right?


I am probably missing something but amdgpu sysfs attrs are per device not per 
driver
and their life cycle is bound to the device and their location in the sysfs 
topology is
under each device. Putting them as driver default attr will not put them in 
their current per device location
and won't make them automatically be destroyed once a particular device goes 
away, no ?


Andrey





What I can only think of using is creating my own struct attribute_group **
array in amdgpu where I aggregate all
amdgpu sysfs attributes, call device_add_groups in the end of amgpu pci
probe with that array and on device remove call
device_remove_groups with the same array.

Horrid, no, see above :)

thanks,

greg k-h

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v2 5/8] drm/amdgpu: Refactor sysfs removal

2020-12-02 Thread Andrey Grodzovsky



On 11/11/20 10:34 AM, Greg KH wrote:

On Wed, Nov 11, 2020 at 10:13:13AM -0500, Andrey Grodzovsky wrote:

On 11/10/20 12:59 PM, Greg KH wrote:

On Tue, Nov 10, 2020 at 12:54:21PM -0500, Andrey Grodzovsky wrote:

Hi, back to this after a long context switch for some higher priority stuff.

So here I was able eventually to drop all this code and this change here 
https://nam11.safelinks.protection.outlook.com/?url=https:%2F%2Fcgit.freedesktop.org%2F~agrodzov%2Flinux%2Fcommit%2F%3Fh%3Damd-staging-drm-next-device-unplug%26id%3D61852c8a59b4dd89d637693552c73175b9f2ccd6&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C9fbfecac94a340dfb68408d886571609%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637407055896651058%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=Ye8HJR1vidppcOBnlOgVu5GwKD2%2Bb5ztHbiI%2BubKKT0%3D&reserved=0
was enough for me. Seems like while device_remove_file can handle the use
case where the file and the parent directory already gone,
sysfs_remove_group goes down in flames in that case
due to kobj->sd being unset on device removal.

A driver shouldn't ever have to remove individual sysfs groups, the
driver core/bus logic should do it for them automatically.

And whenever a driver calls a sysfs_* call, that's a hint that something
is not working properly.



Do you mean that while the driver creates the groups and files explicitly
from it's different subsystems it should not explicitly remove each
one of them because all of them should be removed at once (and
recursively) when the device is being removed ?

Individual drivers should never add groups/files in sysfs, the driver
core should do it properly for you if you have everything set up
properly.  And yes, the driver core will automatically remove them as
well.

Please use the default groups attribute for your bus/subsystem and this
will happen automagically.



Hi Greg, I tried your suggestion to hang amdgpu's sysfs
attributes on default attributes in struct device.groups but turns out it's not 
usable since by the
time i have access to struct device from amdgpu code it has already been 
initialized by pci core
(i.e.  past the point where device_add->device_add_attrs->device_add_groups with 
dev->groups is called)

and so i can't really use it.

What I can only think of using is creating my own struct attribute_group ** 
array in amdgpu where I aggregate all
amdgpu sysfs attributes, call device_add_groups in the end of amgpu pci probe 
with that array and on device remove call

device_remove_groups with the same array.

Do you maybe have a better suggestion for me ?

Andrey




thanks,

greg k-h


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-11-27 Thread Andrey Grodzovsky



On 11/27/20 9:59 AM, Daniel Vetter wrote:

On Wed, Nov 25, 2020 at 02:34:44PM -0500, Andrey Grodzovsky wrote:

On 11/25/20 11:36 AM, Daniel Vetter wrote:

On Wed, Nov 25, 2020 at 01:57:40PM +0100, Christian König wrote:

Am 25.11.20 um 11:40 schrieb Daniel Vetter:

On Tue, Nov 24, 2020 at 05:44:07PM +0100, Christian König wrote:

Am 24.11.20 um 17:22 schrieb Andrey Grodzovsky:

On 11/24/20 2:41 AM, Christian König wrote:

Am 23.11.20 um 22:08 schrieb Andrey Grodzovsky:

On 11/23/20 3:41 PM, Christian König wrote:

Am 23.11.20 um 21:38 schrieb Andrey Grodzovsky:

On 11/23/20 3:20 PM, Christian König wrote:

Am 23.11.20 um 21:05 schrieb Andrey Grodzovsky:

On 11/25/20 5:42 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.

It would be cleaner if we could do the whole
handling in TTM. I also need to double check
what you are doing with this function.

Christian.

Check patch "drm/amdgpu: Register IOMMU topology
notifier per device." to see
how i use it. I don't see why this should go
into TTM mid-layer - the stuff I do inside
is vendor specific and also I don't think TTM is
explicitly aware of IOMMU ?
Do you mean you prefer the IOMMU notifier to be
registered from within TTM
and then use a hook to call into vendor specific handler ?

No, that is really vendor specific.

What I meant is to have a function like
ttm_resource_manager_evict_all() which you only need
to call and all tt objects are unpopulated.

So instead of this BO list i create and later iterate in
amdgpu from the IOMMU patch you just want to do it
within
TTM with a single function ? Makes much more sense.

Yes, exactly.

The list_empty() checks we have in TTM for the LRU are
actually not the best idea, we should now check the
pin_count instead. This way we could also have a list of the
pinned BOs in TTM.

So from my IOMMU topology handler I will iterate the TTM LRU for
the unpinned BOs and this new function for the pinned ones  ?
It's probably a good idea to combine both iterations into this
new function to cover all the BOs allocated on the device.

Yes, that's what I had in my mind as well.


BTW: Have you thought about what happens when we unpopulate
a BO while we still try to use a kernel mapping for it? That
could have unforeseen consequences.

Are you asking what happens to kmap or vmap style mapped CPU
accesses once we drop all the DMA backing pages for a particular
BO ? Because for user mappings
(mmap) we took care of this with dummy page reroute but indeed
nothing was done for in kernel CPU mappings.

Yes exactly that.

In other words what happens if we free the ring buffer while the
kernel still writes to it?

Christian.

While we can't control user application accesses to the mapped buffers
explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle
drm_dev_enter/exit in any such sensitive place were we might
CPU access a DMA buffer from the kernel ?

Yes, I fear we are going to need that.

Uh ... problem is that dma_buf_vmap are usually permanent things. Maybe we
could stuff this into begin/end_cpu_access


Do you mean guarding with drm_dev_enter/exit in dma_buf_ops.begin/end_cpu_access
driver specific hook ?



(but only for the kernel, so a
bit tricky)?


Why only kernel ? Why is it a problem to do it if it comes from dma_buf_ioctl by
some user process ? And  if we do need this distinction I think we should be 
able to
differentiate by looking at current->mm (i.e. mm_struct) pointer being NULL
for kernel thread.

Userspace mmap is handled by punching out the pte. So we don't need to do
anything special there.

For kernel mmap the begin/end should be all in the same context (so we
could use the srcu lock that works underneath drm_dev_enter/exit), since
at least right now kernel vmaps of dma-buf are very long-lived.



If by same context you mean the right drm_device (the exporter's one)
then this should be ok as I am seeing from amdgpu implementation
of the callback - amdgpu_dma_buf_begin_cpu_access. We just need to add
handler for .end_cpu_access callback to call drm_dev_exit there.

Andrey




But the good news is that Thomas Zimmerman is working on this problem
already for different reasons, so it might be that we won't have any
long-lived kernel vmap anymore. And we could put the drm_dev_enter/exit in
there.


Oh very very good point! I haven't thought about DMA-buf mmaps in this
context yet.



btw the other issue with dma-buf (and even worse with dma_fence) is
refcounting of the underlying drm_device. I'd expect that all your
callbacks go boom if the dma_buf outlives your drm_device. That part isn't
yet solved in your series here.

Well thinking more about this, it seems to be a another really good argument
why mapping pages from DMA-bufs into application address space dire

Re: [PATCH v3 10/12] drm/amdgpu: Avoid sysfs dirs removal post device unplug

2020-11-27 Thread Andrey Grodzovsky



On 11/27/20 10:04 AM, Daniel Vetter wrote:

On Wed, Nov 25, 2020 at 12:39:47PM -0500, Andrey Grodzovsky wrote:

On 11/25/20 4:04 AM, Daniel Vetter wrote:

On Tue, Nov 24, 2020 at 11:27 PM Andrey Grodzovsky
 wrote:

On 11/24/20 9:49 AM, Daniel Vetter wrote:

On Sat, Nov 21, 2020 at 12:21:20AM -0500, Andrey Grodzovsky wrote:

Avoids NULL ptr due to kobj->sd being unset on device removal.

Signed-off-by: Andrey Grodzovsky 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   | 4 +++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 4 +++-
2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index caf828a..812e592 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -27,6 +27,7 @@
#include 
#include 
#include 
+#include 

#include "amdgpu.h"
#include "amdgpu_ras.h"
@@ -1043,7 +1044,8 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct 
amdgpu_device *adev)
   .attrs = attrs,
   };

-sysfs_remove_group(&adev->dev->kobj, &group);
+if (!drm_dev_is_unplugged(&adev->ddev))
+sysfs_remove_group(&adev->dev->kobj, &group);

This looks wrong. sysfs, like any other interface, should be
unconditionally thrown out when we do the drm_dev_unregister. Whether
hotunplugged or not should matter at all. Either this isn't needed at all,
or something is wrong with the ordering here. But definitely fishy.
-Daniel

So technically this is needed because kobejct's sysfs directory entry kobj->sd
is set to NULL
on device removal (from sysfs_remove_dir) but because we don't finalize the 
device
until last reference to drm file is dropped (which can happen later) we end up
calling sysfs_remove_file/dir after
this pointer is NULL. sysfs_remove_file checks for NULL and aborts while
sysfs_remove_dir
is not and that why I guard against calls to sysfs_remove_dir.
But indeed the whole approach in the driver is incorrect, as Greg pointed out -
we should use
default groups attributes instead of explicit calls to sysfs interface and this
would save those troubles.
But again. the issue here of scope of work, converting all of amdgpu to default
groups attributes is somewhat
lengthy process with extra testing as the entire driver is papered with sysfs
references and seems to me more of a standalone
cleanup, just like switching to devm_ and drmm_ work. To me at least it seems
that it makes more sense
to finalize and push the hot unplug patches so that this new functionality can
be part of the driver sooner
and then incrementally improve it by working on those other topics. Just as
devm_/drmm_ I also added sysfs cleanup
to my TODO list in the RFC patch.

Hm, whether you solve this with the default group stuff to
auto-remove, or remove explicitly at the right time doesn't matter
much. The underlying problem you have here is that it's done way too
late.

As far as I understood correctly the default group attrs by reading this
article by Greg - 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.linux.com%2Fnews%2Fhow-create-sysfs-file-correctly%2F&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C3e993d1dfad7462608d892e5bbb9%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637420862696611997%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=HAlEqI6CYR3k1n9FFAibpjBlK7I7x9W23yd5CWJVYgM%3D&reserved=0
it will be removed together with the device and not too late like now and I 
quote
from the last paragraph there:

"By setting this value, you don’t have to do anything in your
probe() or release() functions at all in order for the
sysfs files to be properly created and destroyed whenever your
device is added or removed from the system. And you will, most
importantly, do it in a race-free manner, which is always a good thing."

To me this seems like the best solution to the late remove issue. What do
you think ?



   sysfs removal (like all uapi interfaces) need to be removed as
part of drm_dev_unregister.


Do you mean we need to trace and aggregate all sysfs files creation within
the low level drivers and then call some sysfs release function inside
drm_dev_unregister
to iterate and release them all ?

That would just reinvent the proper solution Greg explained above. For now
I think you just need some driver callback that you call right after
drm_dev_unplug (or drm_dev_unregister) to clean up these sysfs interfaces.
Afaiui the important part is to clean up your additional interfaces from
the ->remove callback, since at that point the core sysfs stuff still
exists.

Maybe you want to do another loop over all IP blocks and a ->unregister
callback, or maybe it's just 1-2 cases you call directly.



Most of them are barried within non ip block entites (e.g
amdgpu_device_fin

Re: [PATCH 3/6] drm/scheduler: Job timeout handler returns status

2020-11-26 Thread Andrey Grodzovsky




On 11/24/20 10:17 PM, Luben Tuikov wrote:

The job timeout handler now returns status
indicating back to the DRM layer whether the job
was successfully cancelled or whether more time
should be given to the job to complete.

Signed-off-by: Luben Tuikov 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 --
  include/drm/gpu_scheduler.h | 13 ++---
  2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index ff48101bab55..81b73790ecc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  
-static void amdgpu_job_timedout(struct drm_sched_job *s_job)

+static int amdgpu_job_timedout(struct drm_sched_job *s_job)
  {
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) 
{
DRM_ERROR("ring %s timeout, but soft recovered\n",
  s_job->sched->name);
-   return;
+   return 0;
}
  
  	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);

@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
  
  	if (amdgpu_device_should_recover_gpu(ring->adev)) {

amdgpu_device_gpu_recover(ring->adev, job);
+   return 0;



For amdgpu specifically - not that amdgpu_device_gpu_recover returns a value 
which is 0 for successful GPU reset
meaning we reset the GPU and resubmitted to HW the job that triggered the 
timeout to HW (guilty).
It means the job is still should be considered part of pending list and so a non 
zero value
should be returned. I think only if we reset the GPU and don't submit back the 
guilty job then

it can be considered 'aborted' - but I don't think we even do this.

Andrey



} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
adev->virt.tdr_debug = true;
+   return 1;
}
  }
  
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h

index 2e0c368e19f6..61f7121e1c19 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
  
  	/**

- * @timedout_job: Called when a job has taken too long to execute,
- * to trigger GPU recovery.
+* @timedout_job: Called when a job has taken too long to execute,
+* to trigger GPU recovery.
+*
+* Return 0, if the job has been aborted successfully and will
+* never be heard of from the device. Return non-zero if the
+* job wasn't able to be aborted, i.e. if more time should be
+* given to this job. The result is not "bool" as this
+* function is not a predicate, although its result may seem
+* as one.
 */
-   void (*timedout_job)(struct drm_sched_job *sched_job);
+   int (*timedout_job)(struct drm_sched_job *sched_job);
  
  	/**

   * @free_job: Called once the job's finished fence has been signaled

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-11-25 Thread Andrey Grodzovsky



On 11/25/20 11:36 AM, Daniel Vetter wrote:

On Wed, Nov 25, 2020 at 01:57:40PM +0100, Christian König wrote:

Am 25.11.20 um 11:40 schrieb Daniel Vetter:

On Tue, Nov 24, 2020 at 05:44:07PM +0100, Christian König wrote:

Am 24.11.20 um 17:22 schrieb Andrey Grodzovsky:

On 11/24/20 2:41 AM, Christian König wrote:

Am 23.11.20 um 22:08 schrieb Andrey Grodzovsky:

On 11/23/20 3:41 PM, Christian König wrote:

Am 23.11.20 um 21:38 schrieb Andrey Grodzovsky:

On 11/23/20 3:20 PM, Christian König wrote:

Am 23.11.20 um 21:05 schrieb Andrey Grodzovsky:

On 11/25/20 5:42 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.

It would be cleaner if we could do the whole
handling in TTM. I also need to double check
what you are doing with this function.

Christian.

Check patch "drm/amdgpu: Register IOMMU topology
notifier per device." to see
how i use it. I don't see why this should go
into TTM mid-layer - the stuff I do inside
is vendor specific and also I don't think TTM is
explicitly aware of IOMMU ?
Do you mean you prefer the IOMMU notifier to be
registered from within TTM
and then use a hook to call into vendor specific handler ?

No, that is really vendor specific.

What I meant is to have a function like
ttm_resource_manager_evict_all() which you only need
to call and all tt objects are unpopulated.

So instead of this BO list i create and later iterate in
amdgpu from the IOMMU patch you just want to do it
within
TTM with a single function ? Makes much more sense.

Yes, exactly.

The list_empty() checks we have in TTM for the LRU are
actually not the best idea, we should now check the
pin_count instead. This way we could also have a list of the
pinned BOs in TTM.

So from my IOMMU topology handler I will iterate the TTM LRU for
the unpinned BOs and this new function for the pinned ones  ?
It's probably a good idea to combine both iterations into this
new function to cover all the BOs allocated on the device.

Yes, that's what I had in my mind as well.


BTW: Have you thought about what happens when we unpopulate
a BO while we still try to use a kernel mapping for it? That
could have unforeseen consequences.

Are you asking what happens to kmap or vmap style mapped CPU
accesses once we drop all the DMA backing pages for a particular
BO ? Because for user mappings
(mmap) we took care of this with dummy page reroute but indeed
nothing was done for in kernel CPU mappings.

Yes exactly that.

In other words what happens if we free the ring buffer while the
kernel still writes to it?

Christian.

While we can't control user application accesses to the mapped buffers
explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle
drm_dev_enter/exit in any such sensitive place were we might
CPU access a DMA buffer from the kernel ?

Yes, I fear we are going to need that.

Uh ... problem is that dma_buf_vmap are usually permanent things. Maybe we
could stuff this into begin/end_cpu_access



Do you mean guarding with drm_dev_enter/exit in dma_buf_ops.begin/end_cpu_access
driver specific hook ?



(but only for the kernel, so a
bit tricky)?



Why only kernel ? Why is it a problem to do it if it comes from dma_buf_ioctl by
some user process ? And  if we do need this distinction I think we should be 
able to
differentiate by looking at current->mm (i.e. mm_struct) pointer being NULL for 
kernel thread.




Oh very very good point! I haven't thought about DMA-buf mmaps in this
context yet.



btw the other issue with dma-buf (and even worse with dma_fence) is
refcounting of the underlying drm_device. I'd expect that all your
callbacks go boom if the dma_buf outlives your drm_device. That part isn't
yet solved in your series here.

Well thinking more about this, it seems to be a another really good argument
why mapping pages from DMA-bufs into application address space directly is a
very bad idea :)

But yes, we essentially can't remove the device as long as there is a
DMA-buf with mappings. No idea how to clean that one up.

drm_dev_get/put in drm_prime helpers should get us like 90% there I think.



What are the other 10% ?




The even more worrying thing is random dma_fence attached to the dma_resv
object. We could try to clean all of ours up, but they could have escaped
already into some other driver. And since we're talking about egpu
hotunplug, dma_fence escaping to the igpu is a pretty reasonable use-case.

I have no how to fix that one :-/
-Daniel



I assume you are referring to sync_file_create/sync_file_get_fence API  for 
dma_fence export/import ?

So with DMA bufs we have the drm_gem_object as exporter specific private data
and so we can do drm_dev_get and put at the drm_gem_object layer to bind device 
life cycle
to that of each GEM object but, we don't have such m

Re: [PATCH v3 08/12] drm/amdgpu: Split amdgpu_device_fini into early and late

2020-11-25 Thread Andrey Grodzovsky




On 11/25/20 5:41 AM, Daniel Vetter wrote:

On Tue, Nov 24, 2020 at 10:51:57AM -0500, Andrey Grodzovsky wrote:

On 11/24/20 9:53 AM, Daniel Vetter wrote:

On Sat, Nov 21, 2020 at 12:21:18AM -0500, Andrey Grodzovsky wrote:

Some of the stuff in amdgpu_device_fini such as HW interrupts
disable and pending fences finilization must be done right away on
pci_remove while most of the stuff which relates to finilizing and
releasing driver data structures can be kept until
drm_driver.release hook is called, i.e. when the last device
reference is dropped.


Uh fini_late and fini_early are rathare meaningless namings, since no
clear why there's a split. If you used drm_connector_funcs as inspiration,
that's kinda not good because 'register' itself is a reserved keyword.
That's why we had to add late_ prefix, could as well have used
C_sucks_ as prefix :-) And then the early_unregister for consistency.

I think fini_hw and fini_sw (or maybe fini_drm) would be a lot clearer
about what they're doing.

I still strongly recommend that you cut over as much as possible of the
fini_hw work to devm_ and for the fini_sw/drm stuff there's drmm_
-Daniel


Definitely, and I put it in a TODO list in the RFC patch.Also, as I
mentioned before -
I just prefer to leave it for a follow up work because it's non trivial and
requires shuffling
a lof of stuff around in the driver. I was thinking of committing the work
in incremental steps -
so it's easier to merge it and control for breakages.

Yeah doing devm/drmm conversion later on makes sense. It'd still try to
have better names than what you're currently going with. A few of these
will likely stick around for very long, not just interim.
-Daniel


Will do.

Andrey





Andrey



Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  7 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 15 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 24 +++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h|  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 12 +++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|  3 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  3 ++-
   9 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 83ac06a..6243f6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1063,7 +1063,9 @@ static inline struct amdgpu_device 
*amdgpu_ttm_adev(struct ttm_bo_device *bdev)
   int amdgpu_device_init(struct amdgpu_device *adev,
   uint32_t flags);
-void amdgpu_device_fini(struct amdgpu_device *adev);
+void amdgpu_device_fini_early(struct amdgpu_device *adev);
+void amdgpu_device_fini_late(struct amdgpu_device *adev);
+
   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
@@ -1275,6 +1277,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
   int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file 
*file_priv);
   void amdgpu_driver_postclose_kms(struct drm_device *dev,
 struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
   int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
   int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
   int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f60b70..797d94d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3557,14 +3557,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* Tear down the driver info (all asics).
* Called at driver shutdown.
*/
-void amdgpu_device_fini(struct amdgpu_device *adev)
+void amdgpu_device_fini_early(struct amdgpu_device *adev)
   {
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
adev->shutdown = true;
-   kfree(adev->pci_state);
-
/* make sure IB test finished before entering exclusive mode
 * to avoid preemption on IB test
 * */
@@ -3581,11 +3579,18 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
else
drm_atomic_helper_shutdown(adev_to_drm(adev));
}
-   amdgpu_fence_driver_fini(adev);
+   amdgpu_fence_driver_fini_early(adev);
if (adev->pm_sysfs_en)
amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
+
+   amdgpu_irq_fini_early(adev);
+}
+
+void amdgpu_device_fini_late(struct amdgpu_device *adev

Re: [PATCH v3 10/12] drm/amdgpu: Avoid sysfs dirs removal post device unplug

2020-11-25 Thread Andrey Grodzovsky



On 11/25/20 4:04 AM, Daniel Vetter wrote:

On Tue, Nov 24, 2020 at 11:27 PM Andrey Grodzovsky
 wrote:


On 11/24/20 9:49 AM, Daniel Vetter wrote:

On Sat, Nov 21, 2020 at 12:21:20AM -0500, Andrey Grodzovsky wrote:

Avoids NULL ptr due to kobj->sd being unset on device removal.

Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   | 4 +++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 4 +++-
   2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index caf828a..812e592 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -27,6 +27,7 @@
   #include 
   #include 
   #include 
+#include 

   #include "amdgpu.h"
   #include "amdgpu_ras.h"
@@ -1043,7 +1044,8 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct 
amdgpu_device *adev)
  .attrs = attrs,
  };

-sysfs_remove_group(&adev->dev->kobj, &group);
+if (!drm_dev_is_unplugged(&adev->ddev))
+sysfs_remove_group(&adev->dev->kobj, &group);

This looks wrong. sysfs, like any other interface, should be
unconditionally thrown out when we do the drm_dev_unregister. Whether
hotunplugged or not should matter at all. Either this isn't needed at all,
or something is wrong with the ordering here. But definitely fishy.
-Daniel


So technically this is needed because kobejct's sysfs directory entry kobj->sd
is set to NULL
on device removal (from sysfs_remove_dir) but because we don't finalize the 
device
until last reference to drm file is dropped (which can happen later) we end up
calling sysfs_remove_file/dir after
this pointer is NULL. sysfs_remove_file checks for NULL and aborts while
sysfs_remove_dir
is not and that why I guard against calls to sysfs_remove_dir.
But indeed the whole approach in the driver is incorrect, as Greg pointed out -
we should use
default groups attributes instead of explicit calls to sysfs interface and this
would save those troubles.
But again. the issue here of scope of work, converting all of amdgpu to default
groups attributes is somewhat
lengthy process with extra testing as the entire driver is papered with sysfs
references and seems to me more of a standalone
cleanup, just like switching to devm_ and drmm_ work. To me at least it seems
that it makes more sense
to finalize and push the hot unplug patches so that this new functionality can
be part of the driver sooner
and then incrementally improve it by working on those other topics. Just as
devm_/drmm_ I also added sysfs cleanup
to my TODO list in the RFC patch.

Hm, whether you solve this with the default group stuff to
auto-remove, or remove explicitly at the right time doesn't matter
much. The underlying problem you have here is that it's done way too
late.


As far as I understood correctly the default group attrs by reading this
article by Greg - https://www.linux.com/news/how-create-sysfs-file-correctly/
it will be removed together with the device and not too late like now and I 
quote
from the last paragraph there:

"By setting this value, you don’t have to do anything in your
probe() or release() functions at all in order for the
sysfs files to be properly created and destroyed whenever your
device is added or removed from the system. And you will, most
importantly, do it in a race-free manner, which is always a good thing."

To me this seems like the best solution to the late remove issue. What do
you think ?



  sysfs removal (like all uapi interfaces) need to be removed as
part of drm_dev_unregister.



Do you mean we need to trace and aggregate all sysfs files creation within
the low level drivers and then call some sysfs release function inside 
drm_dev_unregister

to iterate and release them all ?



  I guess aside from the split into fini_hw
and fini_sw, you also need an unregister_late callback (like we have
already for drm_connector, so that e.g. backlight and similar stuff
can be unregistered).



Is this the callback you suggest to call from within drm_dev_unregister and
it will be responsible to release all sysfs files created within the driver ?

Andrey




Papering over the underlying bug like this doesn't really fix much,
the lifetimes are still wrong.
-Daniel


Andrey



  return 0;
   }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 2b7c90b..54331fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -24,6 +24,7 @@
   #include 
   #include 
   #include 
+#include 

   #include "amdgpu.h"
   #include "amdgpu_ucode.h"
@@ -464,7 +465,8 @@ int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)

   void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
   {
-sysfs_remove_group(&adev->dev->k

Re: [PATCH v3 10/12] drm/amdgpu: Avoid sysfs dirs removal post device unplug




On 11/24/20 9:49 AM, Daniel Vetter wrote:

On Sat, Nov 21, 2020 at 12:21:20AM -0500, Andrey Grodzovsky wrote:

Avoids NULL ptr due to kobj->sd being unset on device removal.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   | 4 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 4 +++-
  2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index caf828a..812e592 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -27,6 +27,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "amdgpu.h"

  #include "amdgpu_ras.h"
@@ -1043,7 +1044,8 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct 
amdgpu_device *adev)
.attrs = attrs,
};
  
-	sysfs_remove_group(&adev->dev->kobj, &group);

+   if (!drm_dev_is_unplugged(&adev->ddev))
+   sysfs_remove_group(&adev->dev->kobj, &group);

This looks wrong. sysfs, like any other interface, should be
unconditionally thrown out when we do the drm_dev_unregister. Whether
hotunplugged or not should matter at all. Either this isn't needed at all,
or something is wrong with the ordering here. But definitely fishy.
-Daniel



So technically this is needed because kobejct's sysfs directory entry kobj->sd 
is set to NULL

on device removal (from sysfs_remove_dir) but because we don't finalize the 
device
until last reference to drm file is dropped (which can happen later) we end up 
calling sysfs_remove_file/dir after
this pointer is NULL. sysfs_remove_file checks for NULL and aborts while 
sysfs_remove_dir

is not and that why I guard against calls to sysfs_remove_dir.
But indeed the whole approach in the driver is incorrect, as Greg pointed out - 
we should use
default groups attributes instead of explicit calls to sysfs interface and this 
would save those troubles.
But again. the issue here of scope of work, converting all of amdgpu to default 
groups attributes is somewhat
lengthy process with extra testing as the entire driver is papered with sysfs 
references and seems to me more of a standalone
cleanup, just like switching to devm_ and drmm_ work. To me at least it seems 
that it makes more sense
to finalize and push the hot unplug patches so that this new functionality can 
be part of the driver sooner
and then incrementally improve it by working on those other topics. Just as 
devm_/drmm_ I also added sysfs cleanup

to my TODO list in the RFC patch.

Andrey




  
  	return 0;

  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 2b7c90b..54331fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -24,6 +24,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "amdgpu.h"

  #include "amdgpu_ucode.h"
@@ -464,7 +465,8 @@ int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
  
  void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)

  {
-   sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
+   if (!drm_dev_is_unplugged(&adev->ddev))
+   sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
  }
  
  static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,

--
2.7.4


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 07/12] drm/sched: Prevent any job recoveries after device is unplugged.



On 11/24/20 12:11 PM, Luben Tuikov wrote:

On 2020-11-24 2:50 a.m., Christian König wrote:

Am 24.11.20 um 02:12 schrieb Luben Tuikov:

On 2020-11-23 3:06 a.m., Christian König wrote:

Am 23.11.20 um 06:37 schrieb Andrey Grodzovsky:

On 11/22/20 6:57 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

No point to try recovery if device is gone, it's meaningless.

I think that this should go into the device specific recovery
function and not in the scheduler.

The timeout timer is rearmed here, so this prevents any new recovery
work to restart from here
after drm_dev_unplug was executed from amdgpu_pci_remove.It will not
cover other places like
job cleanup or starting new job but those should stop once the
scheduler thread is stopped later.

Yeah, but this is rather unclean. We should probably return an error
code instead if the timer should be rearmed or not.

Christian, this is exactly my work I told you about
last week on Wednesday in our weekly meeting. And
which I wrote to you in an email last year about this
time.

Yeah, that's why I'm suggesting it here as well.

It seems you're suggesting that Andrey do it, while
all too well you know I've been working on this
for some time now.

I wrote you about this last year same time
in an email. And I discussed it on the Wednesday
meeting.

You could've mentioned that here the first time.



Luben, I actually strongly prefer that you do it and share ur patch with me 
since I don't

want to do unneeded refactoring which will conflict with with ur work. Also, 
please
usedrm-misc for this since it's not amdgpu specific work and will be easier for 
me.

Andrey





So what do we do now?

Split your patches into smaller parts and submit them chunk by chunk.

E.g. renames first and then functional changes grouped by area they change.

I have, but my final patch, a tiny one but which implements
the core reason for the change seems buggy, and I'm looking
for a way to debug it.

Regards,
Luben



Regards,
Christian.


I can submit those changes without the last part,
which builds on this change.

I'm still testing the last part and was hoping
to submit it all in one sequence of patches,
after my testing.

Regards,
Luben


Christian.


Andrey



Christian.


Signed-off-by: Andrey Grodzovsky 
---
    drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
    drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  3 ++-
    drivers/gpu/drm/lima/lima_sched.c |  3 ++-
    drivers/gpu/drm/panfrost/panfrost_job.c   |  2 +-
    drivers/gpu/drm/scheduler/sched_main.c    | 15 ++-
    drivers/gpu/drm/v3d/v3d_sched.c   | 15 ++-
    include/drm/gpu_scheduler.h   |  6 +-
    7 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d56f402..d0b0021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -487,7 +487,7 @@ int amdgpu_fence_driver_init_ring(struct
amdgpu_ring *ring,
      r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
   num_hw_submission, amdgpu_job_hang_limit,
-   timeout, ring->name);
+   timeout, ring->name, &adev->ddev);
    if (r) {
    DRM_ERROR("Failed to create scheduler on ring %s.\n",
      ring->name);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index cd46c88..7678287 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -185,7 +185,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
      ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
- msecs_to_jiffies(500), dev_name(gpu->dev));
+ msecs_to_jiffies(500), dev_name(gpu->dev),
+ gpu->drm);
    if (ret)
    return ret;
    diff --git a/drivers/gpu/drm/lima/lima_sched.c
b/drivers/gpu/drm/lima/lima_sched.c
index dc6df9e..8a7e5d7ca 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -505,7 +505,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe
*pipe, const char *name)
      return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
      lima_job_hang_limit, msecs_to_jiffies(timeout),
-  name);
+  name,
+  pipe->ldev->ddev);
    }
      void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c
b/drivers/gpu/drm/panfrost/panfrost_job.c
index 30e7b71..37b03b01 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use



On 11/24/20 2:41 AM, Christian König wrote:

Am 23.11.20 um 22:08 schrieb Andrey Grodzovsky:


On 11/23/20 3:41 PM, Christian König wrote:

Am 23.11.20 um 21:38 schrieb Andrey Grodzovsky:


On 11/23/20 3:20 PM, Christian König wrote:

Am 23.11.20 um 21:05 schrieb Andrey Grodzovsky:


On 11/25/20 5:42 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.


It would be cleaner if we could do the whole handling in TTM. I also 
need to double check what you are doing with this function.


Christian.



Check patch "drm/amdgpu: Register IOMMU topology notifier per device." to 
see
how i use it. I don't see why this should go into TTM mid-layer - the 
stuff I do inside

is vendor specific and also I don't think TTM is explicitly aware of IOMMU ?
Do you mean you prefer the IOMMU notifier to be registered from within TTM
and then use a hook to call into vendor specific handler ?


No, that is really vendor specific.

What I meant is to have a function like ttm_resource_manager_evict_all() 
which you only need to call and all tt objects are unpopulated.



So instead of this BO list i create and later iterate in amdgpu from the 
IOMMU patch you just want to do it within

TTM with a single function ? Makes much more sense.


Yes, exactly.

The list_empty() checks we have in TTM for the LRU are actually not the best 
idea, we should now check the pin_count instead. This way we could also have 
a list of the pinned BOs in TTM.



So from my IOMMU topology handler I will iterate the TTM LRU for the unpinned 
BOs and this new function for the pinned ones  ?
It's probably a good idea to combine both iterations into this new function 
to cover all the BOs allocated on the device.


Yes, that's what I had in my mind as well.






BTW: Have you thought about what happens when we unpopulate a BO while we 
still try to use a kernel mapping for it? That could have unforeseen 
consequences.



Are you asking what happens to kmap or vmap style mapped CPU accesses once we 
drop all the DMA backing pages for a particular BO ? Because for user mappings
(mmap) we took care of this with dummy page reroute but indeed nothing was 
done for in kernel CPU mappings.


Yes exactly that.

In other words what happens if we free the ring buffer while the kernel still 
writes to it?


Christian.



While we can't control user application accesses to the mapped buffers 
explicitly and hence we use page fault rerouting
I am thinking that in this  case we may be able to sprinkle drm_dev_enter/exit 
in any such sensitive place were we might
CPU access a DMA buffer from the kernel ? Things like CPU page table updates, 
ring buffer accesses and FW memcpy ? Is there other places ?
Another point is that at this point the driver shouldn't access any such buffers 
as we are at the process finishing the device.
AFAIK there is no page fault mechanism for kernel mappings so I don't think 
there is anything else to do ?


Andrey






Andrey




Christian.



Andrey




Give me a day or two to look into this.

Christian.



Andrey






Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_tt.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 1ccf1ef..29248a5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -495,3 +495,4 @@ void ttm_tt_unpopulate(struct ttm_tt *ttm)
  else
  ttm_pool_unpopulate(ttm);
  }
+EXPORT_SYMBOL(ttm_tt_unpopulate);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C9be029f26a4746347a6108d88fed299b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417596065559955%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=tZ3do%2FeKzBtRlNaFbBjCtRvUHKdvwDZ7SoYhEBu4%2BT8%3D&reserved=0 








___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 08/12] drm/amdgpu: Split amdgpu_device_fini into early and late




On 11/24/20 9:53 AM, Daniel Vetter wrote:

On Sat, Nov 21, 2020 at 12:21:18AM -0500, Andrey Grodzovsky wrote:

Some of the stuff in amdgpu_device_fini such as HW interrupts
disable and pending fences finilization must be done right away on
pci_remove while most of the stuff which relates to finilizing and
releasing driver data structures can be kept until
drm_driver.release hook is called, i.e. when the last device
reference is dropped.


Uh fini_late and fini_early are rathare meaningless namings, since no
clear why there's a split. If you used drm_connector_funcs as inspiration,
that's kinda not good because 'register' itself is a reserved keyword.
That's why we had to add late_ prefix, could as well have used
C_sucks_ as prefix :-) And then the early_unregister for consistency.

I think fini_hw and fini_sw (or maybe fini_drm) would be a lot clearer
about what they're doing.

I still strongly recommend that you cut over as much as possible of the
fini_hw work to devm_ and for the fini_sw/drm stuff there's drmm_
-Daniel



Definitely, and I put it in a TODO list in the RFC patch.Also, as I mentioned 
before -
I just prefer to leave it for a follow up work because it's non trivial and 
requires shuffling
a lof of stuff around in the driver. I was thinking of committing the work in 
incremental steps -

so it's easier to merge it and control for breakages.

Andrey





Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  7 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 15 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 24 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 12 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  3 ++-
  9 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 83ac06a..6243f6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1063,7 +1063,9 @@ static inline struct amdgpu_device 
*amdgpu_ttm_adev(struct ttm_bo_device *bdev)
  
  int amdgpu_device_init(struct amdgpu_device *adev,

   uint32_t flags);
-void amdgpu_device_fini(struct amdgpu_device *adev);
+void amdgpu_device_fini_early(struct amdgpu_device *adev);
+void amdgpu_device_fini_late(struct amdgpu_device *adev);
+
  int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
  
  void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,

@@ -1275,6 +1277,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
  int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file 
*file_priv);
  void amdgpu_driver_postclose_kms(struct drm_device *dev,
 struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
  int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
  int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f60b70..797d94d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3557,14 +3557,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
   * Tear down the driver info (all asics).
   * Called at driver shutdown.
   */
-void amdgpu_device_fini(struct amdgpu_device *adev)
+void amdgpu_device_fini_early(struct amdgpu_device *adev)
  {
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
adev->shutdown = true;
  
-	kfree(adev->pci_state);

-
/* make sure IB test finished before entering exclusive mode
 * to avoid preemption on IB test
 * */
@@ -3581,11 +3579,18 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
else
drm_atomic_helper_shutdown(adev_to_drm(adev));
}
-   amdgpu_fence_driver_fini(adev);
+   amdgpu_fence_driver_fini_early(adev);
if (adev->pm_sysfs_en)
amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
+
+   amdgpu_irq_fini_early(adev);
+}
+
+void amdgpu_device_fini_late(struct amdgpu_device *adev)
+{
amdgpu_device_ip_fini(adev);
+   amdgpu_fence_driver_fini_late(adev);
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
@@ -3621,6 +3626,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_pmu_fini(adev);
if (adev->

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-11-23 Thread Andrey Grodzovsky



On 11/23/20 3:41 PM, Christian König wrote:

Am 23.11.20 um 21:38 schrieb Andrey Grodzovsky:


On 11/23/20 3:20 PM, Christian König wrote:

Am 23.11.20 um 21:05 schrieb Andrey Grodzovsky:


On 11/25/20 5:42 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.


It would be cleaner if we could do the whole handling in TTM. I also need 
to double check what you are doing with this function.


Christian.



Check patch "drm/amdgpu: Register IOMMU topology notifier per device." to see
how i use it. I don't see why this should go into TTM mid-layer - the stuff 
I do inside

is vendor specific and also I don't think TTM is explicitly aware of IOMMU ?
Do you mean you prefer the IOMMU notifier to be registered from within TTM
and then use a hook to call into vendor specific handler ?


No, that is really vendor specific.

What I meant is to have a function like ttm_resource_manager_evict_all() 
which you only need to call and all tt objects are unpopulated.



So instead of this BO list i create and later iterate in amdgpu from the 
IOMMU patch you just want to do it within

TTM with a single function ? Makes much more sense.


Yes, exactly.

The list_empty() checks we have in TTM for the LRU are actually not the best 
idea, we should now check the pin_count instead. This way we could also have a 
list of the pinned BOs in TTM.



So from my IOMMU topology handler I will iterate the TTM LRU for the unpinned 
BOs and this new function for the pinned ones  ?
It's probably a good idea to combine both iterations into this new function to 
cover all the BOs allocated on the device.





BTW: Have you thought about what happens when we unpopulate a BO while we 
still try to use a kernel mapping for it? That could have unforeseen 
consequences.



Are you asking what happens to kmap or vmap style mapped CPU accesses once we 
drop all the DMA backing pages for a particular BO ? Because for user mappings
(mmap) we took care of this with dummy page reroute but indeed nothing was done 
for in kernel CPU mappings.


Andrey




Christian.



Andrey




Give me a day or two to look into this.

Christian.



Andrey






Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_tt.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 1ccf1ef..29248a5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -495,3 +495,4 @@ void ttm_tt_unpopulate(struct ttm_tt *ttm)
  else
  ttm_pool_unpopulate(ttm);
  }
+EXPORT_SYMBOL(ttm_tt_unpopulate);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C9be029f26a4746347a6108d88fed299b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417596065559955%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=tZ3do%2FeKzBtRlNaFbBjCtRvUHKdvwDZ7SoYhEBu4%2BT8%3D&reserved=0 






___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-11-23 Thread Andrey Grodzovsky



On 11/23/20 3:20 PM, Christian König wrote:

Am 23.11.20 um 21:05 schrieb Andrey Grodzovsky:


On 11/25/20 5:42 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.


It would be cleaner if we could do the whole handling in TTM. I also need to 
double check what you are doing with this function.


Christian.



Check patch "drm/amdgpu: Register IOMMU topology notifier per device." to see
how i use it. I don't see why this should go into TTM mid-layer - the stuff I 
do inside

is vendor specific and also I don't think TTM is explicitly aware of IOMMU ?
Do you mean you prefer the IOMMU notifier to be registered from within TTM
and then use a hook to call into vendor specific handler ?


No, that is really vendor specific.

What I meant is to have a function like ttm_resource_manager_evict_all() which 
you only need to call and all tt objects are unpopulated.



So instead of this BO list i create and later iterate in amdgpu from the IOMMU 
patch you just want to do it within

TTM with a single function ? Makes much more sense.

Andrey




Give me a day or two to look into this.

Christian.



Andrey






Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_tt.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 1ccf1ef..29248a5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -495,3 +495,4 @@ void ttm_tt_unpopulate(struct ttm_tt *ttm)
  else
  ttm_pool_unpopulate(ttm);
  }
+EXPORT_SYMBOL(ttm_tt_unpopulate);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C9be029f26a4746347a6108d88fed299b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417596065559955%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=tZ3do%2FeKzBtRlNaFbBjCtRvUHKdvwDZ7SoYhEBu4%2BT8%3D&reserved=0 




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

2020-11-23 Thread Andrey Grodzovsky



On 11/25/20 5:42 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.


It would be cleaner if we could do the whole handling in TTM. I also need to 
double check what you are doing with this function.


Christian.



Check patch "drm/amdgpu: Register IOMMU topology notifier per device." to see
how i use it. I don't see why this should go into TTM mid-layer - the stuff I do 
inside

is vendor specific and also I don't think TTM is explicitly aware of IOMMU ?
Do you mean you prefer the IOMMU notifier to be registered from within TTM
and then use a hook to call into vendor specific handler ?

Andrey






Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_tt.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 1ccf1ef..29248a5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -495,3 +495,4 @@ void ttm_tt_unpopulate(struct ttm_tt *ttm)
  else
  ttm_pool_unpopulate(ttm);
  }
+EXPORT_SYMBOL(ttm_tt_unpopulate);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 07/12] drm/sched: Prevent any job recoveries after device is unplugged.

2020-11-22 Thread Andrey Grodzovsky



On 11/22/20 6:57 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

No point to try recovery if device is gone, it's meaningless.


I think that this should go into the device specific recovery function and not 
in the scheduler.



The timeout timer is rearmed here, so this prevents any new recovery work to 
restart from here
after drm_dev_unplug was executed from amdgpu_pci_remove.It will not cover other 
places like
job cleanup or starting new job but those should stop once the scheduler thread 
is stopped later.


Andrey




Christian.



Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
  drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  3 ++-
  drivers/gpu/drm/lima/lima_sched.c |  3 ++-
  drivers/gpu/drm/panfrost/panfrost_job.c   |  2 +-
  drivers/gpu/drm/scheduler/sched_main.c    | 15 ++-
  drivers/gpu/drm/v3d/v3d_sched.c   | 15 ++-
  include/drm/gpu_scheduler.h   |  6 +-
  7 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

index d56f402..d0b0021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -487,7 +487,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
    r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
 num_hw_submission, amdgpu_job_hang_limit,
-   timeout, ring->name);
+   timeout, ring->name, &adev->ddev);
  if (r) {
  DRM_ERROR("Failed to create scheduler on ring %s.\n",
    ring->name);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c

index cd46c88..7678287 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -185,7 +185,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
    ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
   etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
- msecs_to_jiffies(500), dev_name(gpu->dev));
+ msecs_to_jiffies(500), dev_name(gpu->dev),
+ gpu->drm);
  if (ret)
  return ret;
  diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c

index dc6df9e..8a7e5d7ca 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -505,7 +505,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, 
const char *name)

    return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
    lima_job_hang_limit, msecs_to_jiffies(timeout),
-  name);
+  name,
+  pipe->ldev->ddev);
  }
    void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c

index 30e7b71..37b03b01 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -520,7 +520,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
  ret = drm_sched_init(&js->queue[j].sched,
   &panfrost_sched_ops,
   1, 0, msecs_to_jiffies(500),
- "pan_js");
+ "pan_js", pfdev->ddev);
  if (ret) {
  dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
  goto err_sched;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c

index c3f0bd0..95db8c6 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -53,6 +53,7 @@
  #include 
  #include 
  #include 
+#include 
    #define CREATE_TRACE_POINTS
  #include "gpu_scheduler_trace.h"
@@ -283,8 +284,16 @@ static void drm_sched_job_timedout(struct work_struct 
*work)

  struct drm_gpu_scheduler *sched;
  struct drm_sched_job *job;
  +    int idx;
+
  sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
  +    if (!drm_dev_enter(sched->ddev, &idx)) {
+    DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s",
+ __func__, sched->name);
+    return;
+    }
+
  /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
  spin_lock(&sched->job_list_lock);
  job = list_first_entry_or_null(&sched->ring_mirror_list,
@@ -316,6 +325,8 @@ static void drm_sched_job_timedout(struct work_struct *work)
  spin_lock(&sched->job_list_lock);
  drm_sched_start_timeout(sched);
  spin_unlock(&sched->job_list_lock);
+
+    drm_dev_exit(idx);
  }
     /**
@@ -845,7 +856,8 @@ int drm_sched_init(struct drm_gpu_scheduler

Re: [PATCH v3 04/12] drm/ttm: Set dma addr to null after freee

2020-11-22 Thread Andrey Grodzovsky



On 11/21/20 9:13 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Fixes oops.


That file doesn't even exist any more. What oops should this fix?



Which file ?
We set dma_address to NULL in every other place after unmap. This is so that
if dma address was already unmapped we skip it next time we enter 
ttm_unmap_and_unpopulate_pages

with same tt for some reason.
The oops happens with IOMMU enabled. The device is removed from it's IOMMU group
during PCI remove but the BOs are all still alive if user mode client holds 
reference to drm file.

Later when the refernece is droppped and device fini happens i get oops in
ttm_unmap_and_unpopulate_pages->dma_unmap_page becaue of IOMMU group structures 
being gone already.
Patch  [11/12] drm/amdgpu: Register IOMMU topology notifier per device together 
with this patch solve the oops.


Andrey



Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c

index b40a467..b0df328 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -1160,6 +1160,8 @@ void ttm_unmap_and_unpopulate_pages(struct device *dev, 
struct ttm_dma_tt *tt)

  dma_unmap_page(dev, tt->dma_address[i], num_pages * PAGE_SIZE,
 DMA_BIDIRECTIONAL);
  +    tt->dma_address[i] = 0;
+
  i += num_pages;
  }
  ttm_pool_unpopulate(&tt->ttm);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Candrey.grodzovsky%40amd.com%7C1c70eb602a49497aff3508d88e27ad1a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637415648381338288%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=p8HjrEfydKrspsFCp1v8KCdT6lKr1OEKXdF3%2BSoh4zk%3D&reserved=0 


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object

2020-11-22 Thread Andrey Grodzovsky



On 11/21/20 9:15 AM, Christian König wrote:

Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:

Will be used to reroute CPU mapped BO's page faults once
device is removed.


Uff, one page for each exported DMA-buf? That's not something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together so they are 
freed when the device is finally reaped?



For sure better to optimize and allocate on demand when we reach this corner 
case, but why the linking ?

Shouldn't drm_prime_gem_destroy be good enough place to free ?

Andrey




Regards,
Christian.



Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/drm_file.c  |  8 
  drivers/gpu/drm/drm_prime.c | 10 ++
  include/drm/drm_file.h  |  2 ++
  include/drm/drm_gem.h   |  2 ++
  4 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 0ac4566..ff3d39f 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
  goto out_prime_destroy;
  }
  +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!file->dummy_page) {
+    ret = -ENOMEM;
+    goto out_prime_destroy;
+    }
+
  return file;
    out_prime_destroy:
@@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
  if (dev->driver->postclose)
  dev->driver->postclose(dev, file);
  +    __free_page(file->dummy_page);
+
  drm_prime_destroy_file_private(&file->prime);
    WARN_ON(!list_empty(&file->event_list));
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 1693aa7..987b45c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
    ret = drm_prime_add_buf_handle(&file_priv->prime,
  dma_buf, *handle);
+
+    if (!ret) {
+    obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!obj->dummy_page)
+    ret = -ENOMEM;
+    }
+
  mutex_unlock(&file_priv->prime.lock);
  if (ret)
  goto fail;
@@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, 
struct sg_table *sg)

  dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
  dma_buf = attach->dmabuf;
  dma_buf_detach(attach->dmabuf, attach);
+
+    __free_page(obj->dummy_page);
+
  /* remove the reference */
  dma_buf_put(dma_buf);
  }
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 716990b..2a011fc 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -346,6 +346,8 @@ struct drm_file {
   */
  struct drm_prime_file_private prime;
  +    struct page *dummy_page;
+
  /* private: */
  #if IS_ENABLED(CONFIG_DRM_LEGACY)
  unsigned long lock_count; /* DRI1 legacy lock count */
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 337a483..76a97a3 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -311,6 +311,8 @@ struct drm_gem_object {
   *
   */
  const struct drm_gem_object_funcs *funcs;
+
+    struct page *dummy_page;
  };
    /**



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v3 12/12] drm/amdgpu: Fix a bunch of sdma code crash post device unplug

We can't allocate and submit IBs post device unplug.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fdbe7d4..a62ad20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -31,6 +31,7 @@
 #include 
 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -1602,7 +1603,10 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
uint64_t pfn;
-   int r;
+   int r, idx;
+
+   if (!drm_dev_enter(&adev->ddev, &idx))
+   return -ENOENT;
 
memset(¶ms, 0, sizeof(params));
params.adev = adev;
@@ -1645,6 +1649,8 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (r)
goto error_unlock;
 
+
+   drm_dev_exit(idx);
do {
uint64_t tmp, num_entries, addr;
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v3 11/12] drm/amdgpu: Register IOMMU topology notifier per device.

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  5 
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
 6 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6243f6d..c41957e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -51,6 +51,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1044,6 +1045,10 @@ struct amdgpu_device {
 
boolin_pci_err_recovery;
struct pci_saved_state  *pci_state;
+
+   struct notifier_block   nb;
+   struct blocking_notifier_head   notifier;
+   struct list_headdevice_bo_list;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 96368a8..bc84c20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,8 @@
 #include 
 #include 
 
+#include 
+
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3179,6 +3181,39 @@ static const struct attribute *amdgpu_dev_attributes[] = 
{
 };
 
 
+static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
+unsigned long action, void *data)
+{
+   struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
+   struct amdgpu_bo *bo = NULL;
+
+   /*
+* Following is a set of IOMMU group dependencies taken care of before
+* device's IOMMU group is removed
+*/
+   if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
+
+   spin_lock(&ttm_bo_glob.lru_lock);
+   list_for_each_entry(bo, &adev->device_bo_list, bo) {
+   if (bo->tbo.ttm)
+   ttm_tt_unpopulate(bo->tbo.ttm);
+   }
+   spin_unlock(&ttm_bo_glob.lru_lock);
+
+   if (adev->irq.ih.use_bus_addr)
+   amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+   if (adev->irq.ih1.use_bus_addr)
+   amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+   if (adev->irq.ih2.use_bus_addr)
+   amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+
+   amdgpu_gart_dummy_page_fini(adev);
+   }
+
+   return NOTIFY_OK;
+}
+
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -3283,6 +3318,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
 
+   INIT_LIST_HEAD(&adev->device_bo_list);
+
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
 
@@ -3553,6 +3590,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(pdev);
 
+   BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
+   adev->nb.notifier_call = amdgpu_iommu_group_notifier;
+
+   if (adev->dev->iommu_group) {
+   r = iommu_group_register_notifier(adev->dev->iommu_group, 
&adev->nb);
+   if (r)
+   goto failed;
+   }
+
return 0;
 
 failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index e01e681..34c17bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
*adev)
  *
  * Frees the dummy page used by the driver (all asics).
  */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
 {
if (!adev->dummy_page_addr)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index afa2e28..5678d9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
+void amdgpu_gart_du

[PATCH v3 07/12] drm/sched: Prevent any job recoveries after device is unplugged.

No point to try recovery if device is gone, it's meaningless.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  3 ++-
 drivers/gpu/drm/lima/lima_sched.c |  3 ++-
 drivers/gpu/drm/panfrost/panfrost_job.c   |  2 +-
 drivers/gpu/drm/scheduler/sched_main.c| 15 ++-
 drivers/gpu/drm/v3d/v3d_sched.c   | 15 ++-
 include/drm/gpu_scheduler.h   |  6 +-
 7 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d56f402..d0b0021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -487,7 +487,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
   num_hw_submission, amdgpu_job_hang_limit,
-  timeout, ring->name);
+  timeout, ring->name, &adev->ddev);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
  ring->name);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index cd46c88..7678287 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -185,7 +185,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
 
ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
 etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
-msecs_to_jiffies(500), dev_name(gpu->dev));
+msecs_to_jiffies(500), dev_name(gpu->dev),
+gpu->drm);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c
index dc6df9e..8a7e5d7ca 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -505,7 +505,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, 
const char *name)
 
return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
  lima_job_hang_limit, msecs_to_jiffies(timeout),
- name);
+ name,
+ pipe->ldev->ddev);
 }
 
 void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index 30e7b71..37b03b01 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -520,7 +520,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
ret = drm_sched_init(&js->queue[j].sched,
 &panfrost_sched_ops,
 1, 0, msecs_to_jiffies(500),
-"pan_js");
+"pan_js", pfdev->ddev);
if (ret) {
dev_err(pfdev->dev, "Failed to create scheduler: %d.", 
ret);
goto err_sched;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index c3f0bd0..95db8c6 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -53,6 +53,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define CREATE_TRACE_POINTS
 #include "gpu_scheduler_trace.h"
@@ -283,8 +284,16 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
 
+   int idx;
+
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
+   if (!drm_dev_enter(sched->ddev, &idx)) {
+   DRM_INFO("%s - device unplugged skipping recovery on 
scheduler:%s",
+__func__, sched->name);
+   return;
+   }
+
/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
spin_lock(&sched->job_list_lock);
job = list_first_entry_or_null(&sched->ring_mirror_list,
@@ -316,6 +325,8 @@ static void drm_sched_job_timedout(struct work_struct *work)
spin_lock(&sched->job_list_lock);
drm_sched_start_timeout(sched);
spin_unlock(&sched->job_list_lock);
+
+   drm_dev_exit(idx);
 }
 
  /**
@@ -845,7 +856,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
   unsigned hw_submission,
   unsigned hang_limit,
   long timeout,
-  const char *name)
+  const char *name,
+

[PATCH v3 05/12] drm/ttm: Expose ttm_tt_unpopulate for driver use

It's needed to drop iommu backed pages on device unplug
before device's IOMMU group is released.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/ttm/ttm_tt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 1ccf1ef..29248a5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -495,3 +495,4 @@ void ttm_tt_unpopulate(struct ttm_tt *ttm)
else
ttm_pool_unpopulate(ttm);
 }
+EXPORT_SYMBOL(ttm_tt_unpopulate);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v3 08/12] drm/amdgpu: Split amdgpu_device_fini into early and late

Some of the stuff in amdgpu_device_fini such as HW interrupts
disable and pending fences finilization must be done right away on
pci_remove while most of the stuff which relates to finilizing and
releasing driver data structures can be kept until
drm_driver.release hook is called, i.e. when the last device
reference is dropped.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 15 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 24 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  3 ++-
 9 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 83ac06a..6243f6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1063,7 +1063,9 @@ static inline struct amdgpu_device 
*amdgpu_ttm_adev(struct ttm_bo_device *bdev)
 
 int amdgpu_device_init(struct amdgpu_device *adev,
   uint32_t flags);
-void amdgpu_device_fini(struct amdgpu_device *adev);
+void amdgpu_device_fini_early(struct amdgpu_device *adev);
+void amdgpu_device_fini_late(struct amdgpu_device *adev);
+
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
@@ -1275,6 +1277,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
 int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void amdgpu_driver_postclose_kms(struct drm_device *dev,
 struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
 int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
 int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f60b70..797d94d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3557,14 +3557,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  * Tear down the driver info (all asics).
  * Called at driver shutdown.
  */
-void amdgpu_device_fini(struct amdgpu_device *adev)
+void amdgpu_device_fini_early(struct amdgpu_device *adev)
 {
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
adev->shutdown = true;
 
-   kfree(adev->pci_state);
-
/* make sure IB test finished before entering exclusive mode
 * to avoid preemption on IB test
 * */
@@ -3581,11 +3579,18 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
else
drm_atomic_helper_shutdown(adev_to_drm(adev));
}
-   amdgpu_fence_driver_fini(adev);
+   amdgpu_fence_driver_fini_early(adev);
if (adev->pm_sysfs_en)
amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
+
+   amdgpu_irq_fini_early(adev);
+}
+
+void amdgpu_device_fini_late(struct amdgpu_device *adev)
+{
amdgpu_device_ip_fini(adev);
+   amdgpu_fence_driver_fini_late(adev);
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
@@ -3621,6 +3626,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_pmu_fini(adev);
if (adev->mman.discovery_bin)
amdgpu_discovery_fini(adev);
+
+   kfree(adev->pci_state);
+
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 7f98cf1..3d130fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1244,14 +1244,10 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 {
struct drm_device *dev = pci_get_drvdata(pdev);
 
-#ifdef MODULE
-   if (THIS_MODULE->state != MODULE_STATE_GOING)
-#endif
-   DRM_ERROR("Hotplug removal is not supported\n");
drm_dev_unplug(dev);
amdgpu_driver_unload_kms(dev);
+
pci_disable_device(pdev);
-   pci_set_drvdata(pdev, NULL);
drm_dev_put(dev);
 }
 
@@ -1557,6 +1553,7 @@ static struct drm_driver kms_driver = {
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
.fops = &amdgpu_driver_kms_fops,
+   .release = &amdgpu_driver_release_kms,
 
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = dr

[PATCH v3 09/12] drm/amdgpu: Add early fini callback

Use it to call disply code dependent on device->drv_data
before it's set to NULL on device unplug

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 20 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++--
 drivers/gpu/drm/amd/include/amd_shared.h  |  2 ++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 797d94d..96368a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2508,6 +2508,24 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+{
+   int i, r;
+
+   for (i = 0; i < adev->num_ip_blocks; i++) {
+   if (!adev->ip_blocks[i].version->funcs->early_fini)
+   continue;
+
+   r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+   if (r) {
+   DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+   }
+   }
+
+   return 0;
+}
+
 /**
  * amdgpu_device_ip_fini - run fini for hardware IPs
  *
@@ -3585,6 +3603,8 @@ void amdgpu_device_fini_early(struct amdgpu_device *adev)
amdgpu_fbdev_fini(adev);
 
amdgpu_irq_fini_early(adev);
+
+   amdgpu_device_ip_fini_early(adev);
 }
 
 void amdgpu_device_fini_late(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1da4ad5..278d1f6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1158,6 +1158,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
return -EINVAL;
 }
 
+static int amdgpu_dm_early_fini(void *handle)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+   amdgpu_dm_audio_fini(adev);
+
+   return 0;
+}
+
 static void amdgpu_dm_fini(struct amdgpu_device *adev)
 {
int i;
@@ -1166,8 +1175,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
drm_encoder_cleanup(&adev->dm.mst_encoders[i].base);
}
 
-   amdgpu_dm_audio_fini(adev);
-
amdgpu_dm_destroy_drm_device(&adev->dm);
 
 #ifdef CONFIG_DRM_AMD_DC_HDCP
@@ -2150,6 +2157,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
.late_init = dm_late_init,
.sw_init = dm_sw_init,
.sw_fini = dm_sw_fini,
+   .early_fini = amdgpu_dm_early_fini,
.hw_init = dm_hw_init,
.hw_fini = dm_hw_fini,
.suspend = dm_suspend,
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
b/drivers/gpu/drm/amd/include/amd_shared.h
index 9676016..63bb846 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -239,6 +239,7 @@ enum amd_dpm_forced_level;
  * @late_init: sets up late driver/hw state (post hw_init) - Optional
  * @sw_init: sets up driver state, does not configure hw
  * @sw_fini: tears down driver state, does not configure hw
+ * @early_fini: tears down stuff before dev detached from driver
  * @hw_init: sets up the hw state
  * @hw_fini: tears down the hw state
  * @late_fini: final cleanup
@@ -267,6 +268,7 @@ struct amd_ip_funcs {
int (*late_init)(void *handle);
int (*sw_init)(void *handle);
int (*sw_fini)(void *handle);
+   int (*early_fini)(void *handle);
int (*hw_init)(void *handle);
int (*hw_fini)(void *handle);
void (*late_fini)(void *handle);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v3 10/12] drm/amdgpu: Avoid sysfs dirs removal post device unplug