RE: [PATCH] drm/amdgpu: Field type update in drm_amdgpu_info_vbios

2021-05-11 Thread Gu, JiaWei (Will)
[AMD Official Use Only - Internal Distribution Only]

Done. UMR patch sent out.

Hi Tom,

Please help review it to bring the umr part back.

Thanks in advance!
Jiawei

From: Deucher, Alexander 
Sent: Tuesday, May 11, 2021 9:16 PM
To: Gu, JiaWei (Will) ; amd-gfx@lists.freedesktop.org
Cc: StDenis, Tom ; keesc...@chromium.org; Nieto, David M 

Subject: Re: [PATCH] drm/amdgpu: Field type update in drm_amdgpu_info_vbios


[AMD Official Use Only - Internal Distribution Only]

Please update the umr patch as well.

Acked-by: Alex Deucher 
mailto:alexander.deuc...@amd.com>>


From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 on behalf of Jiawei Gu mailto:jiawei...@amd.com>>
Sent: Tuesday, May 11, 2021 1:31 AM
To: amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: StDenis, Tom mailto:tom.stde...@amd.com>>; Gu, JiaWei 
(Will) mailto:jiawei...@amd.com>>; 
keesc...@chromium.org 
mailto:keesc...@chromium.org>>; Nieto, David M 
mailto:david.ni...@amd.com>>
Subject: [PATCH] drm/amdgpu: Field type update in drm_amdgpu_info_vbios

Use numeric type serial in drm_amdgpu_info_vbios instead.

Signed-off-by: Jiawei Gu mailto:jiawei...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
 include/uapi/drm/amdgpu_drm.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 75f34a9008e7..e1008a79b441 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -871,7 +871,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
 memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, 
sizeof(atom_context->vbios_pn));
 vbios_info.version = atom_context->version;
 memcpy(vbios_info.date, atom_context->date, 
sizeof(atom_context->date));
-   memcpy(vbios_info.serial, adev->serial, 
sizeof(adev->serial));
+   vbios_info.serial = adev->unique_id;
 vbios_info.dev_id = adev->pdev->device;
 vbios_info.rev_id = adev->pdev->revision;
 vbios_info.sub_dev_id = atom_context->sub_dev_id;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 2b487a8d2727..2d9e84658bbc 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -957,7 +957,7 @@ struct drm_amdgpu_info_vbios {
 __u8 vbios_pn[64];
 __u32 version;
 __u8 date[32];
-   __u8 serial[16];
+   __u64 serial;
 __u32 dev_id;
 __u32 rev_id;
 __u32 sub_dev_id;
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Calexander.deucher%40amd.com%7C9c3ad129089d4585544f08d9143e1892%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637563079204768525%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=efCxiLv4J5KWmoKqy1geARlt%2FJppT1eetVI3R%2FMNfzw%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add compile flag for securedisplay

2021-05-11 Thread Huang Rui
On Wed, May 12, 2021 at 09:51:21AM +0800, Su, Jinzhou (Joe) wrote:
> Add compile flag CONFIG_DEBUG_FS to clear the warning:
> unused variable 'amdgpu_securedisplay_debugfs_ops'
> 
> Signed-off-by: Jinzhou Su 

According to the 0 day's robot mail, please feel free to add below
reported-by:

"Reported-by: kernel test robot "

Patch is Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> index 5369c8dd0764..123453999093 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> @@ -86,6 +86,8 @@ void psp_prep_securedisplay_cmd_buf(struct psp_context 
> *psp, struct securedispla
>   (*cmd)->cmd_id = command_id;
>  }
>  
> +#if defined(CONFIG_DEBUG_FS)
> +
>  static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char 
> __user *buf,
>   size_t size, loff_t *pos)
>  {
> @@ -162,6 +164,8 @@ static const struct file_operations 
> amdgpu_securedisplay_debugfs_ops = {
>   .llseek = default_llseek
>  };
>  
> +#endif
> +
>  void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev)
>  {
>  #if defined(CONFIG_DEBUG_FS)
> -- 
> 2.27.0
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] add vbios info query

2021-05-11 Thread Jiawei Gu
Signed-off-by: Jiawei Gu 
---
 src/app/CMakeLists.txt |  1 +
 src/app/main.c |  8 +
 src/app/vbios.c| 58 ++
 src/lib/lowlevel/linux/query_drm.c | 11 ++
 src/umr.h  | 15 
 src/umrapp.h   |  1 +
 6 files changed, 94 insertions(+)
 create mode 100644 src/app/vbios.c

diff --git a/src/app/CMakeLists.txt b/src/app/CMakeLists.txt
index ca7d46b..462e4fc 100644
--- a/src/app/CMakeLists.txt
+++ b/src/app/CMakeLists.txt
@@ -35,6 +35,7 @@ add_library(umrapp STATIC
   pp_table.c
   navi10_ppt.c
   read_metrics.c
+  vbios.c
   ${GUI_SOURCE}
 )
 
diff --git a/src/app/main.c b/src/app/main.c
index 47ddb38..b484cf3 100644
--- a/src/app/main.c
+++ b/src/app/main.c
@@ -825,6 +825,11 @@ int main(int argc, char **argv)
asic = get_asic();
ih_self_test(asic);
 #endif
+   } else if (!strcmp(argv[i], "--vbios_info") || !strcmp(argv[i], 
"-vi")) {
+   if (!asic)
+   asic = get_asic();
+   if (umr_print_vbios_info(asic) != 0)
+   fprintf(stderr, "[ERROR]: Cannot print vbios 
info.\n");
} else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], 
"-h")) {
printf("User Mode Register debugger v%s for AMDGPU 
devices (build: %s [%s]), Copyright (c) 2021, AMD Inc.\n"
 "\n*** Device Selection ***\n"
@@ -951,6 +956,9 @@ printf(
"\n\t\tPrint the GPU metrics table for the device."
 "\n\t--power, -p \n\t\tRead the conetent of clocks, temperature, gpu loading 
at runtime"
"\n\t\toptions 'use_colour' to colourize output \n");
+printf(
+"\n*** Video BIOS Information ***\n"
+"\n\t--vbios_info, -vi \n\t\tPrint Video BIOS information\n");
 
 #if UMR_GUI
 printf(
diff --git a/src/app/vbios.c b/src/app/vbios.c
new file mode 100644
index 000..98e0f87
--- /dev/null
+++ b/src/app/vbios.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis 
+ *
+ */
+#include "umrapp.h"
+
+#define AMDGPU_INFO_VBIOS  0x1B
+#define AMDGPU_INFO_VBIOS_INFO 0x3
+int umr_print_vbios_info(struct umr_asic *asic)
+{
+   char fname[64];
+   int r;
+   struct umr_vbios_info vbios_info;
+
+   if (asic->fd.drm < 0) {
+   snprintf(fname, sizeof(fname)-1, "/dev/dri/card%d", 
asic->instance);
+   asic->fd.drm = open(fname, O_RDWR);
+   }
+
+   r = umr_query_drm_vbios(asic, AMDGPU_INFO_VBIOS, AMDGPU_INFO_VBIOS_INFO,
+   _info, sizeof(vbios_info));
+   if (r)
+   return r;
+
+   printf("vbios name  : %s\n", vbios_info.name);
+   printf("vbios dbdf  : 0x%x\n", vbios_info.dbdf);
+   printf("vbios pn: %s\n", vbios_info.vbios_pn);
+   printf("vbios version   : %d\n", vbios_info.version);
+   printf("vbios date  : %s\n", vbios_info.date);
+   printf("vbios serial: %lld\n", vbios_info.serial);
+   printf("vbios dev_id: 0x%x\n", vbios_info.dev_id);
+   printf("vbios rev_id: 0x%x\n", vbios_info.rev_id);
+   printf("vbios sub_dev_id: 0x%x\n", vbios_info.sub_dev_id);
+   printf("vbios sub_ved_id: 0x%x\n", vbios_info.sub_ved_id);
+
+   close(asic->fd.drm);
+   return 0;
+}
\ No newline at end of file
diff --git a/src/lib/lowlevel/linux/query_drm.c 
b/src/lib/lowlevel/linux/query_drm.c
index d0c82d4..f4ab709 100644
--- a/src/lib/lowlevel/linux/query_drm.c
+++ b/src/lib/lowlevel/linux/query_drm.c
@@ -49,7 +49,18 @@ int umr_query_drm(struct umr_asic *asic, int field, void 
*ret, int size)
inf.return_size = size;
inf.query = field;
return ioctl(asic->fd.drm, 

Re: [PATCH] drm/amdgpu: Add compile flag for securedisplay

2021-05-11 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Jinzhou Su 

Sent: Tuesday, May 11, 2021 9:51 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Su, Jinzhou (Joe) ; Huang, Ray 
Subject: [PATCH] drm/amdgpu: Add compile flag for securedisplay

Add compile flag CONFIG_DEBUG_FS to clear the warning:
unused variable 'amdgpu_securedisplay_debugfs_ops'

Signed-off-by: Jinzhou Su 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 5369c8dd0764..123453999093 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -86,6 +86,8 @@ void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, 
struct securedispla
 (*cmd)->cmd_id = command_id;
 }

+#if defined(CONFIG_DEBUG_FS)
+
 static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char 
__user *buf,
 size_t size, loff_t *pos)
 {
@@ -162,6 +164,8 @@ static const struct file_operations 
amdgpu_securedisplay_debugfs_ops = {
 .llseek = default_llseek
 };

+#endif
+
 void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
--
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Calexander.deucher%40amd.com%7C14fa5aa45bb3405a908508d914e880d0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637563811100744522%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=1lE%2FskHrU0TkqeNBL%2Fnl%2B%2BHOECO5gSFmPM7nrUInV3Y%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: psp program IH_RB_CTRL on navi12 and sienna_cichlid

2021-05-11 Thread YuBiao Wang
[Why]
IH_RB_CNTL is blocked by PSP so we need to ask psp to help config it.

[How]
Move psp ip block before ih, and use psp to program IH_RB_CNTL under sriov.

Signed-off-by: YuBiao Wang 
---
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 20 ++--
 drivers/gpu/drm/amd/amdgpu/nv.c|  4 ++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index f4e4040bbd25..5ee923ccdeb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -151,7 +151,15 @@ static int navi10_ih_toggle_ring_interrupts(struct 
amdgpu_device *adev,
/* enable_intr field is only valid in ring0 */
if (ih == >irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 
0));
-   WREG32(ih_regs->ih_rb_cntl, tmp);
+
+   if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+   if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) {
+   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+   return -ETIMEDOUT;
+   }
+   } else {
+   WREG32(ih_regs->ih_rb_cntl, tmp);
+   }
 
if (enable) {
ih->enabled = true;
@@ -261,7 +269,15 @@ static int navi10_ih_enable_ring(struct amdgpu_device 
*adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
}
-   WREG32(ih_regs->ih_rb_cntl, tmp);
+
+   if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+   if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) {
+   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+   return -ETIMEDOUT;
+   }
+   } else {
+   WREG32(ih_regs->ih_rb_cntl, tmp);
+   }
 
if (ih == >irq.ih) {
/* set the ih ring 0 writeback address whether it's enabled or 
not */
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 75d1f9b939b2..aebd330daaca 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -742,8 +742,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_NAVI12:
amdgpu_device_ip_block_add(adev, _common_ip_block);
amdgpu_device_ip_block_add(adev, _v10_0_ip_block);
-   amdgpu_device_ip_block_add(adev, _ih_ip_block);
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
+   amdgpu_device_ip_block_add(adev, _ih_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
@@ -764,9 +764,9 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
amdgpu_device_ip_block_add(adev, _common_ip_block);
amdgpu_device_ip_block_add(adev, _v10_0_ip_block);
-   amdgpu_device_ip_block_add(adev, _ih_ip_block);
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
+   amdgpu_device_ip_block_add(adev, _ih_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
is_support_sw_smu(adev))
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Add compile flag for securedisplay

2021-05-11 Thread Jinzhou Su
Add compile flag CONFIG_DEBUG_FS to clear the warning:
unused variable 'amdgpu_securedisplay_debugfs_ops'

Signed-off-by: Jinzhou Su 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 5369c8dd0764..123453999093 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -86,6 +86,8 @@ void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, 
struct securedispla
(*cmd)->cmd_id = command_id;
 }
 
+#if defined(CONFIG_DEBUG_FS)
+
 static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char 
__user *buf,
size_t size, loff_t *pos)
 {
@@ -162,6 +164,8 @@ static const struct file_operations 
amdgpu_securedisplay_debugfs_ops = {
.llseek = default_llseek
 };
 
+#endif
+
 void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
-- 
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Register bad page handler for Aldebaran

2021-05-11 Thread Mukul Joshi
On Aldebaran, GPU driver will handle bad page retirement
even though UMC is host managed. As a result, register a
bad page retirement handler on the mce notifier chain to
retire bad pages on Aldebaran.

Signed-off-by: Mukul Joshi 
Reviewed-by: John Clements 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 154 
 1 file changed, 154 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b1c57a5b6e89..02263f509b36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -34,7 +34,9 @@
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "atom.h"
+#include 
 
+static bool notifier_registered;
 static const char *RAS_FS_NAME = "ras";
 
 const char *ras_error_string[] = {
@@ -73,6 +75,11 @@ const char *ras_block_string[] = {
 /* typical ECC bad page rate(1 bad page per 100MB VRAM) */
 #define RAS_BAD_PAGE_RATE  (100 * 1024 * 1024ULL)
 
+#define GET_MCA_IPID_GPUID(m)  (((m) >> 44) & 0xF)
+#define GET_UMC_INST_NIBBLE(m) (((m) >> 20) & 0xF)
+#define GET_CHAN_INDEX_NIBBLE(m)   (((m) >> 12) & 0xF)
+#define GPU_ID_OFFSET  8
+
 enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -85,6 +92,7 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
uint64_t addr);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+static void amdgpu_register_bad_pages_mca_notifier(void);
 
 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
 {
@@ -1978,6 +1986,10 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
goto free;
}
 
+   if ((adev->asic_type == CHIP_ALDEBARAN) &&
+   (adev->gmc.xgmi.connected_to_cpu))
+   amdgpu_register_bad_pages_mca_notifier();
+
return 0;
 
 free:
@@ -2427,3 +2439,145 @@ void amdgpu_release_ras_context(struct amdgpu_device 
*adev)
kfree(con);
}
 }
+
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+   struct amdgpu_gpu_instance *gpu_instance;
+   int i;
+   struct amdgpu_device *adev = NULL;
+
+   mutex_lock(_info.mutex);
+
+   for (i = 0; i < mgpu_info.num_gpu; i++) {
+   gpu_instance = &(mgpu_info.gpu_ins[i]);
+   adev = gpu_instance->adev;
+
+   if (adev->gmc.xgmi.connected_to_cpu &&
+   adev->gmc.xgmi.physical_node_id == node_id)
+   break;
+   adev = NULL;
+   }
+
+   mutex_unlock(_info.mutex);
+
+   return adev;
+}
+
+static void find_umc_inst_chan_index(struct mce *m, uint32_t *umc_inst,
+uint32_t *chan_index)
+{
+   uint32_t val1 = 0;
+   uint32_t val2 = 0;
+   uint32_t rem = 0;
+
+   /*
+* Bit 20-23 provides the UMC instance nibble.
+* Bit 12-15 provides the channel index nibble.
+*/
+   val1 = GET_UMC_INST_NIBBLE(m->ipid);
+   val2 = GET_CHAN_INDEX_NIBBLE(m->ipid);
+
+   *umc_inst = val1/2;
+   rem = val1%2;
+
+   *chan_index = (4*rem) + val2;
+}
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+   unsigned long val, void *data)
+{
+   struct mce *m = (struct mce *)data;
+   struct amdgpu_device *adev = NULL;
+   uint32_t gpu_id = 0;
+   uint32_t umc_inst = 0;
+   uint32_t chan_index = 0;
+   struct ras_err_data err_data = {0, 0, 0, NULL};
+   struct eeprom_table_record err_rec;
+   uint64_t retired_page;
+
+   /*
+* If the error was generated in UMC_V2, which belongs to GPU UMCs,
+* and error occurred in DramECC (Extended error code = 0) then only
+* process the error, else bail out.
+*/
+   if (!m || !(is_smca_umc_v2(m->bank) && (XEC(m->status, 0x1f) == 0x0)))
+   return NOTIFY_DONE;
+
+   gpu_id = GET_MCA_IPID_GPUID(m->ipid);
+
+   /*
+* GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+*/
+   gpu_id -= GPU_ID_OFFSET;
+
+   adev = find_adev(gpu_id);
+   if (!adev) {
+   dev_warn(adev->dev, "%s: Unable to find adev for gpu_id: %d\n",
+__func__, gpu_id);
+   return NOTIFY_DONE;
+   }
+
+   /*
+* If it is correctable error, then print a message and return.
+*/
+   if (mce_is_correctable(m)) {
+   dev_info(adev->dev, "%s: UMC Correctable error detected.",
+   __func__);
+   return NOTIFY_OK;
+   }
+
+   /*
+* If it is uncorrectable error, then find out UMC instance and
+* channel index.
+*/
+  

Re: [PATCH v2] drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz monitors are connected

2021-05-11 Thread Alex Deucher
On Mon, May 10, 2021 at 11:33 PM Kai-Heng Feng
 wrote:
>
> On Fri, Apr 30, 2021 at 12:57 PM Kai-Heng Feng
>  wrote:
> >
> > Screen flickers rapidly when two 4K 60Hz monitors are in use. This issue
> > doesn't happen when one monitor is 4K 60Hz (pixelclock 594MHz) and
> > another one is 4K 30Hz (pixelclock 297MHz).
> >
> > The issue is gone after setting "power_dpm_force_performance_level" to
> > "high". Following the indication, we found that the issue occurs when
> > sclk is too low.
> >
> > So resolve the issue by disabling sclk switching when there are two
> > monitors requires high pixelclock (> 297MHz).
> >
> > v2:
> >  - Only apply the fix to Oland.
> > Signed-off-by: Kai-Heng Feng 
>
> A gentle ping...

Applied.  Thanks for the reminder.

Alex


>
> > ---
> >  drivers/gpu/drm/radeon/radeon.h| 1 +
> >  drivers/gpu/drm/radeon/radeon_pm.c | 8 
> >  drivers/gpu/drm/radeon/si_dpm.c| 3 +++
> >  3 files changed, 12 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/radeon/radeon.h 
> > b/drivers/gpu/drm/radeon/radeon.h
> > index 42281fce552e6..56ed5634cebef 100644
> > --- a/drivers/gpu/drm/radeon/radeon.h
> > +++ b/drivers/gpu/drm/radeon/radeon.h
> > @@ -1549,6 +1549,7 @@ struct radeon_dpm {
> > void*priv;
> > u32 new_active_crtcs;
> > int new_active_crtc_count;
> > +   int high_pixelclock_count;
> > u32 current_active_crtcs;
> > int current_active_crtc_count;
> > bool single_display;
> > diff --git a/drivers/gpu/drm/radeon/radeon_pm.c 
> > b/drivers/gpu/drm/radeon/radeon_pm.c
> > index 0c1950f4e146f..3861c0b98fcf3 100644
> > --- a/drivers/gpu/drm/radeon/radeon_pm.c
> > +++ b/drivers/gpu/drm/radeon/radeon_pm.c
> > @@ -1767,6 +1767,7 @@ static void radeon_pm_compute_clocks_dpm(struct 
> > radeon_device *rdev)
> > struct drm_device *ddev = rdev->ddev;
> > struct drm_crtc *crtc;
> > struct radeon_crtc *radeon_crtc;
> > +   struct radeon_connector *radeon_connector;
> >
> > if (!rdev->pm.dpm_enabled)
> > return;
> > @@ -1776,6 +1777,7 @@ static void radeon_pm_compute_clocks_dpm(struct 
> > radeon_device *rdev)
> > /* update active crtc counts */
> > rdev->pm.dpm.new_active_crtcs = 0;
> > rdev->pm.dpm.new_active_crtc_count = 0;
> > +   rdev->pm.dpm.high_pixelclock_count = 0;
> > if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
> > list_for_each_entry(crtc,
> > >mode_config.crtc_list, head) {
> > @@ -1783,6 +1785,12 @@ static void radeon_pm_compute_clocks_dpm(struct 
> > radeon_device *rdev)
> > if (crtc->enabled) {
> > rdev->pm.dpm.new_active_crtcs |= (1 << 
> > radeon_crtc->crtc_id);
> > rdev->pm.dpm.new_active_crtc_count++;
> > +   if (!radeon_crtc->connector)
> > +   continue;
> > +
> > +   radeon_connector = 
> > to_radeon_connector(radeon_crtc->connector);
> > +   if 
> > (radeon_connector->pixelclock_for_modeset > 297000)
> > +   
> > rdev->pm.dpm.high_pixelclock_count++;
> > }
> > }
> > }
> > diff --git a/drivers/gpu/drm/radeon/si_dpm.c 
> > b/drivers/gpu/drm/radeon/si_dpm.c
> > index 9186095518047..3cc2b96a7f368 100644
> > --- a/drivers/gpu/drm/radeon/si_dpm.c
> > +++ b/drivers/gpu/drm/radeon/si_dpm.c
> > @@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(struct 
> > radeon_device *rdev,
> > (rdev->pdev->device == 0x6605)) {
> > max_sclk = 75000;
> > }
> > +
> > +   if (rdev->pm.dpm.high_pixelclock_count > 1)
> > +   disable_sclk_switching = true;
> > }
> >
> > if (rps->vce_active) {
> > --
> > 2.30.2
> >
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: fix fence calculation

2021-05-11 Thread David M Nieto
The proper metric for fence utilization over several
contexts is an harmonic mean, but such calculation is
prohibitive in kernel space, so the code approximates it.

Because the approximation diverges when one context has a
very small ratio compared with the other context, this change
filter out ratios smaller that 0.01%

Signed-off-by: David M Nieto 
Change-Id: I5b6e0ce5f489a5f55855d35354a6a3653e9d613b
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 9036c93b4a0c..78579ad03e93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -703,11 +703,22 @@ ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr 
*mgr, uint32_t hwip,
idp = >ctx_handles;
mutex_lock(>lock);
idr_for_each_entry(idp, ctx, id) {
+   ktime_t ttotal = 0, tmax = 0;
+
if (!ctx->entities[hwip][idx])
continue;
 
centity = ctx->entities[hwip][idx];
-   amdgpu_ctx_fence_time(ctx, centity, , );
+   amdgpu_ctx_fence_time(ctx, centity, , );
+
+   /* Harmonic mean approximation diverges for very small
+* values. If ratio < 0.01% ignore
+*/
+   if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
+   continue;
+
+   total = ktime_add(total, ttotal);
+   max = ktime_after(tmax, max) ? tmax : max;
}
 
mutex_unlock(>lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 10dcf59a5c6b..3541dfb059ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -30,6 +30,7 @@ struct drm_file;
 struct amdgpu_fpriv;
 
 #define AMDGPU_MAX_ENTITY_NUM 4
+#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) (max > 16384ULL*total)
 
 struct amdgpu_ctx_entity {
uint64_tsequence;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: free resources on fence usage query

2021-05-11 Thread David M Nieto
Free the resources if the fence needs to be ignored
during the ratio calculation

Signed-off-by: David M Nieto 
Change-Id: Ibfc55a94c53d4b3a1dba8fff4c53fd893195bb96
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 01fe60fedcbe..9036c93b4a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -669,11 +669,15 @@ void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct 
amdgpu_ctx_entity *cen
if (!fence)
continue;
s_fence = to_drm_sched_fence(fence);
-   if (!dma_fence_is_signaled(_fence->scheduled))
+   if (!dma_fence_is_signaled(_fence->scheduled)) {
+   dma_fence_put(fence);
continue;
+   }
t1 = s_fence->scheduled.timestamp;
-   if (t1 >= now)
+   if (!ktime_before(t1, now)) {
+   dma_fence_put(fence);
continue;
+   }
if (dma_fence_is_signaled(_fence->finished) &&
s_fence->finished.timestamp < now)
*total += ktime_sub(s_fence->finished.timestamp, t1);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

2021-05-11 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

The local variables need to be initialized to zero, since amdgpu_ctx_fence_time 
accumulates and does not initialize

David

From: Christian König 
Sent: Tuesday, May 11, 2021 12:53 AM
To: Nieto, David M ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

Am 10.05.21 um 22:29 schrieb David M Nieto:
> The proper metric for fence utilization over several
> contexts is an harmonic mean, but such calculation is
> prohibitive in kernel space, so the code approximates it.
>
> Because the approximation diverges when one context has a
> very small ratio compared with the other context, this change
> filter out ratios smaller that 0.01%
>
> Signed-off-by: David M Nieto 
> Change-Id: I5b6e0ce5f489a5f55855d35354a6a3653e9d613b
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
>   2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 9036c93b4a0c..89ee464b9424 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -698,16 +698,27 @@ ktime_t amdgpu_ctx_mgr_fence_usage(struct 
> amdgpu_ctx_mgr *mgr, uint32_t hwip,
>struct amdgpu_ctx_entity *centity;
>ktime_t total = 0, max = 0;
>
> +

Unrelated white space change.

>if (idx >= AMDGPU_MAX_ENTITY_NUM)
>return 0;
>idp = >ctx_handles;
>mutex_lock(>lock);
>idr_for_each_entry(idp, ctx, id) {
> + ktime_t ttotal = tmax = ktime_set(0, 0);

There should be a blank line between decleration and code and please
don't initialize local variables if it isn't necessary.

Christian.

>if (!ctx->entities[hwip][idx])
>continue;
>
>centity = ctx->entities[hwip][idx];
> - amdgpu_ctx_fence_time(ctx, centity, , );
> + amdgpu_ctx_fence_time(ctx, centity, , );
> +
> + /* Harmonic mean approximation diverges for very small
> +  * values. If ratio < 0.01% ignore
> +  */
> + if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
> + continue;
> +
> + total = ktime_add(total, ttotal);
> + max = ktime_after(tmax, max) ? tmax : max;
>}
>
>mutex_unlock(>lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 10dcf59a5c6b..3541dfb059ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -30,6 +30,7 @@ struct drm_file;
>   struct amdgpu_fpriv;
>
>   #define AMDGPU_MAX_ENTITY_NUM 4
> +#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) (max > 16384ULL*total)
>
>   struct amdgpu_ctx_entity {
>uint64_tsequence;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix uninitialized return value

2021-05-11 Thread Alex Deucher
On Tue, May 11, 2021 at 1:25 PM ts8060  wrote:
>
> The amdgpu_display_suspend_helper() function uses an uninitialized
> variable as its return value, causing a failure to suspend/resume on a
> Radeon R7 240/340 GPU, with the following error in dmesg:
> [drm:amdgpu_device_ip_suspend_phase1 [amdgpu]] *ERROR* suspend of IP block 
>  failed 118489088
>
> Initialize the variable to 0.
>
> Signed-off-by: Tim Saunders 

Already fixed here:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4b12ee6f426e5e36396501a58f3a1af5b92a7e06

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index 8a1fb8b6606e..27626d8a0647 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -1544,7 +1544,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device 
> *adev)
> struct drm_crtc *crtc;
> struct drm_connector *connector;
> struct drm_connector_list_iter iter;
> -   int r;
> +   int r = 0;
>
> /* turn off display hw */
> drm_modeset_lock_all(dev);
> --
> 2.31.1
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 10/16] drm/amdgpu: Guard against write accesses after device removal

2021-05-11 Thread Andrey Grodzovsky



On 2021-05-11 2:50 a.m., Christian König wrote:

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.

v5:
Protect more places wher memcopy_to/form_io takes place
Protect IB submissions

v6: Switch to !drm_dev_enter instead of scoping entire code
with brackets.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   |  9 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c    | 17 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 63 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 70 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  | 49 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   | 31 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   | 22 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  7 +-
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c    | 44 ++--
  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c    |  8 +--
  drivers/gpu/drm/amd/amdgpu/psp_v3_1.c |  8 +--
  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 26 ---
  drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 22 +++---
  .../drm/amd/pm/powerplay/smumgr/smu7_smumgr.c |  2 +
  17 files changed, 257 insertions(+), 145 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index a0bff4713672..94c415176cdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -71,6 +71,8 @@
  #include 
  #include 
+#include 
+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -281,7 +283,10 @@ void amdgpu_device_vram_access(struct 
amdgpu_device *adev, loff_t pos,

  unsigned long flags;
  uint32_t hi = ~0;
  uint64_t last;
+    int idx;
+ if (!drm_dev_enter(>ddev, ))
+ return;
  #ifdef CONFIG_64BIT
  last = min(pos + size, adev->gmc.visible_vram_size);
@@ -299,8 +304,10 @@ void amdgpu_device_vram_access(struct 
amdgpu_device *adev, loff_t pos,

  memcpy_fromio(buf, addr, count);
  }
-    if (count == size)
+    if (count == size) {
+    drm_dev_exit(idx);
  return;
+    }


Maybe use a goto instead, but really just a nit pick.




  pos += count;
  buf += count / 4;
@@ -323,6 +330,8 @@ void amdgpu_device_vram_access(struct 
amdgpu_device *adev, loff_t pos,

  *buf++ = RREG32_NO_KIQ(mmMM_DATA);
  }
  spin_unlock_irqrestore(>mmio_idx_lock, flags);
+
+    drm_dev_exit(idx);
  }
  /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

index 4d32233cde92..04ba5eef1e88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -31,6 +31,8 @@
  #include "amdgpu_ras.h"
  #include "amdgpu_xgmi.h"
+#include 
+
  /**
   * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
   *
@@ -151,6 +153,10 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device 
*adev, void *cpu_pt_addr,

  {
  void __iomem *ptr = (void *)cpu_pt_addr;
  uint64_t value;
+    int idx;
+
+    if (!drm_dev_enter(>ddev, ))
+    return 0;
  /*
   * The following is for PTE only. GART does not have PDEs.
@@ -158,6 +164,9 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device 
*adev, void *cpu_pt_addr,

  value = addr & 0xF000ULL;
  value |= flags;
  writeq(value, ptr + (gpu_page_idx * 8));
+
+    drm_dev_exit(idx);
+
  return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

index 148a3b481b12..62fcbd446c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -30,6 +30,7 @@
  #include 
  #include 
+#include 
  #include "amdgpu.h"
  #include "atom.h"
@@ -137,7 +138,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
unsigned num_ibs,

  bool secure;
  unsigned i;
-    int r = 0;
+    int idx, r = 0;
  bool need_pipe_sync = false;
  if (num_ibs == 0)
@@ -169,13 +170,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
unsigned num_ibs,

  return -EINVAL;
  }
+    if (!drm_dev_enter(>ddev, ))
+    return -ENODEV;
+
  alloc_size = ring->funcs->emit_frame_size + num_ibs *
  ring->funcs->emit_ib_size;
  r = amdgpu_ring_alloc(ring, alloc_size);
  if (r) {
  dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
-    return r;
+    goto exit;
  }
  need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -205,7 +209,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
unsigned num_ibs,

  r = 

[PATCH] drm/amdgpu: fix uninitialized return value

2021-05-11 Thread ts8060
The amdgpu_display_suspend_helper() function uses an uninitialized
variable as its return value, causing a failure to suspend/resume on a
Radeon R7 240/340 GPU, with the following error in dmesg:
[drm:amdgpu_device_ip_suspend_phase1 [amdgpu]] *ERROR* suspend of IP block 
 failed 118489088

Initialize the variable to 0.

Signed-off-by: Tim Saunders 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 8a1fb8b6606e..27626d8a0647 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1544,7 +1544,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device 
*adev)
struct drm_crtc *crtc;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
-   int r;
+   int r = 0;

/* turn off display hw */
drm_modeset_lock_all(dev);
--
2.31.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-11 Thread Nieto, David M
[AMD Public Use]

The point of having the device ID in the structure is because we are reading it 
from the VBIOS header, not the asic registers. They should match, but an user 
may flash a VBIOS for a different devid and they may not match.

Regarding sysfs vs ioctl I see value in providing it in both ways, Mesa uses 
IOCTL and other DRM based tools may benefit as well. I recently went through 
the trouble of having to add sysfs string parsing for some data not available 
in ioctl, and while not very complicated, it is a programming inconvenience.

I understand that being uapi, changing it is not easy, but this is information 
extracted from a VBIOS header, something that has been kept stable for many 
years.

David

From: Christian König 
Sent: Tuesday, May 11, 2021 7:07 AM
To: Deucher, Alexander ; Marek Olšák 

Cc: Kees Cook ; Gu, JiaWei (Will) ; 
amd-gfx list ; Deng, Emily ; 
Alex Deucher ; Nieto, David M 
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

Yeah, but umr is making strong use of sysfs as well.

The only justification of this interface would be if we want to use it in Mesa.

And I agree with Marek that looks redundant with the device structure to me as 
well.

Christian.

Am 11.05.21 um 16:04 schrieb Deucher, Alexander:

[AMD Public Use]

It's being used by umr and some other smi tools to provide vbios information 
for debugging.

Alex


From: amd-gfx 

 on behalf of Marek Olšák 
Sent: Tuesday, May 11, 2021 4:18 AM
To: Christian König 

Cc: Kees Cook ; Gu, JiaWei 
(Will) ; amd-gfx list 
; Deng, 
Emily ; Alex Deucher 
; Nieto, David M 

Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

Mesa doesn't use sysfs.

Note that this is a uapi, meaning that once it's in the kernel, it can't be 
changed like that.

What's the use case for this new interface? Isn't it partially redundant with 
the current device info structure, which seems to have the equivalent of dev_id 
and rev_id?

Marek

On Tue, May 11, 2021 at 3:51 AM Christian König 
mailto:ckoenig.leichtzumer...@gmail.com>> 
wrote:
Marek and other userspace folks need to decide that.

Basic question here is if Mesa is already accessing sysfs nodes for OpenGL or 
RADV. If that is the case then we should probably expose the information there 
as well.

If that isn't the case (which I think it is) then we should implement it as 
IOCTL.

Regards,
Christian.

Am 10.05.21 um 22:19 schrieb Nieto, David M:

One of the primary usecases is to add this information to the renderer string, 
I am not sure if there are other cases of UMD drivers accessing sysfs nodes, 
but I think if we think permissions, if a client is authenticated and opens the 
render device then it can use the IOCTL, it is unclear to me we can make a such 
an assumption for sysfs nodes…



I think there is value in having both tbh.



Regards,

David



From: Christian König 

Date: Monday, May 10, 2021 at 6:48 AM
To: "Nieto, David M" , "Gu, 
JiaWei (Will)" 
Cc: Alex Deucher , "Deng, 
Emily" , Kees Cook 
, amd-gfx list 

Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



Well we could add both as sysfs file(s).

Question here is rather what is the primary use case of this and if the 
application has the necessary access permissions to the sysfs files?

Regards,
Christian.

Am 10.05.21 um 15:42 schrieb Nieto, David M:

Then the application would need to issue the ioctl and then open a sysfs file 
to get all the information it needs. It makes little sense from a programming 
perspective to add an incomplete interface in my opinion





From: Gu, JiaWei (Will) 
Sent: Monday, May 10, 2021 12:13:07 AM
To: Nieto, David M 
Cc: Alex Deucher ; amd-gfx 
list ; 
Kees Cook ; Deng, Emily 

Subject: RE: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from drm_amdgpu_info_vbios, not 
the whole struct.

struct drm_amdgpu_info_vbios {
__u8 name[64];
__u32 dbdf;
__u8 vbios_pn[64];
__u32 version;
__u8 date[32];
__u8 serial[16]; // jiawei: shall we delete this
__u32 

Re: [PATCH v6 06/16] drm/amdgpu: Handle IOMMU enabled case.

2021-05-11 Thread Andrey Grodzovsky




On 2021-05-11 11:56 a.m., Alex Deucher wrote:

On Mon, May 10, 2021 at 12:37 PM Andrey Grodzovsky
 wrote:


Handle all DMA IOMMU gropup related dependencies before the
group is removed.

v5: Drop IOMMU notifier and switch to lockless call to ttm_tt_unpopulate
v6: Drop the BO unamp list

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   | 3 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   | 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 9 +
  drivers/gpu/drm/amd/amdgpu/cik_ih.c| 1 -
  drivers/gpu/drm/amd/amdgpu/cz_ih.c | 1 -
  drivers/gpu/drm/amd/amdgpu/iceland_ih.c| 1 -
  drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 3 ---
  drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 -
  drivers/gpu/drm/amd/amdgpu/tonga_ih.c  | 1 -
  drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 3 ---
  11 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 18598eda18f6..a0bff4713672 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3256,7 +3256,6 @@ static const struct attribute *amdgpu_dev_attributes[] = {
 NULL
  };

-
  /**
   * amdgpu_device_init - initialize the driver
   *
@@ -3698,12 +3697,13 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 amdgpu_ucode_sysfs_fini(adev);
 sysfs_remove_files(>dev->kobj, amdgpu_dev_attributes);

-
 amdgpu_fbdev_fini(adev);

 amdgpu_irq_fini_hw(adev);

 amdgpu_device_ip_fini_early(adev);
+
+   amdgpu_gart_dummy_page_fini(adev);
  }

  void amdgpu_device_fini_sw(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index c5a9a4fb10d2..354e68081b53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
*adev)
   *
   * Frees the dummy page used by the driver (all asics).
   */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
  {
 if (!adev->dummy_page_addr)
 return;
@@ -375,5 +375,4 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
   */
  void amdgpu_gart_fini(struct amdgpu_device *adev)
  {
-   amdgpu_gart_dummy_page_fini(adev);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..78dc7a23da56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -58,6 +58,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
  int amdgpu_gart_init(struct amdgpu_device *adev);
  void amdgpu_gart_fini(struct amdgpu_device *adev);
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
  int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages);
  int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 233b64dab94b..a14973a7a9c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -361,6 +361,15 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
 if (!amdgpu_device_has_dc_support(adev))
 flush_work(>hotplug_work);
 }
+
+   if (adev->irq.ih_soft.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih_soft);


Why is the ih_soft handled here and in the various ih sw_fini functions?


Post last rebase new ASICs i think were added which i missed.
Taking care of this with prev. comment by Christian together right now.

Andrey




+   if (adev->irq.ih.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih);
+   if (adev->irq.ih1.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih1);
+   if (adev->irq.ih2.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih2);
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c 
b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 183d44a6583c..df385ffc9768 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -310,7 +310,6 @@ static int cik_ih_sw_fini(void *handle)
 struct amdgpu_device *adev = (struct amdgpu_device *)handle;

 amdgpu_irq_fini_sw(adev);
-   amdgpu_ih_ring_fini(adev, >irq.ih);
 amdgpu_irq_remove_domain(adev);

 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c 
b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index d32743949003..b8c47e0cf37a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -302,7 +302,6 @@ static int 

Re: [PATCH v6 06/16] drm/amdgpu: Handle IOMMU enabled case.

2021-05-11 Thread Alex Deucher
On Mon, May 10, 2021 at 12:37 PM Andrey Grodzovsky
 wrote:
>
> Handle all DMA IOMMU gropup related dependencies before the
> group is removed.
>
> v5: Drop IOMMU notifier and switch to lockless call to ttm_tt_unpopulate
> v6: Drop the BO unamp list
>
> Signed-off-by: Andrey Grodzovsky 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   | 3 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   | 1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 9 +
>  drivers/gpu/drm/amd/amdgpu/cik_ih.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/cz_ih.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/iceland_ih.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 3 ---
>  drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/tonga_ih.c  | 1 -
>  drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 3 ---
>  11 files changed, 13 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 18598eda18f6..a0bff4713672 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3256,7 +3256,6 @@ static const struct attribute *amdgpu_dev_attributes[] 
> = {
> NULL
>  };
>
> -
>  /**
>   * amdgpu_device_init - initialize the driver
>   *
> @@ -3698,12 +3697,13 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
> amdgpu_ucode_sysfs_fini(adev);
> sysfs_remove_files(>dev->kobj, amdgpu_dev_attributes);
>
> -
> amdgpu_fbdev_fini(adev);
>
> amdgpu_irq_fini_hw(adev);
>
> amdgpu_device_ip_fini_early(adev);
> +
> +   amdgpu_gart_dummy_page_fini(adev);
>  }
>
>  void amdgpu_device_fini_sw(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index c5a9a4fb10d2..354e68081b53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
> *adev)
>   *
>   * Frees the dummy page used by the driver (all asics).
>   */
> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>  {
> if (!adev->dummy_page_addr)
> return;
> @@ -375,5 +375,4 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
>   */
>  void amdgpu_gart_fini(struct amdgpu_device *adev)
>  {
> -   amdgpu_gart_dummy_page_fini(adev);
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> index a25fe97b0196..78dc7a23da56 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> @@ -58,6 +58,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>  int amdgpu_gart_init(struct amdgpu_device *adev);
>  void amdgpu_gart_fini(struct amdgpu_device *adev);
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>  int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>int pages);
>  int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index 233b64dab94b..a14973a7a9c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -361,6 +361,15 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
> if (!amdgpu_device_has_dc_support(adev))
> flush_work(>hotplug_work);
> }
> +
> +   if (adev->irq.ih_soft.ring)
> +   amdgpu_ih_ring_fini(adev, >irq.ih_soft);

Why is the ih_soft handled here and in the various ih sw_fini functions?

> +   if (adev->irq.ih.ring)
> +   amdgpu_ih_ring_fini(adev, >irq.ih);
> +   if (adev->irq.ih1.ring)
> +   amdgpu_ih_ring_fini(adev, >irq.ih1);
> +   if (adev->irq.ih2.ring)
> +   amdgpu_ih_ring_fini(adev, >irq.ih2);
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c 
> b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
> index 183d44a6583c..df385ffc9768 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
> @@ -310,7 +310,6 @@ static int cik_ih_sw_fini(void *handle)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> amdgpu_irq_fini_sw(adev);
> -   amdgpu_ih_ring_fini(adev, >irq.ih);
> amdgpu_irq_remove_domain(adev);
>
> return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c 
> b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
> index d32743949003..b8c47e0cf37a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
> @@ -302,7 +302,6 @@ static int cz_ih_sw_fini(void 

Re: [RFC] Add BPF_PROG_TYPE_CGROUP_IOCTL

2021-05-11 Thread Alex Deucher
On Fri, May 7, 2021 at 7:45 PM Tejun Heo  wrote:
>
> Hello,
>
> On Fri, May 07, 2021 at 06:30:56PM -0400, Alex Deucher wrote:
> > Maybe we are speaking past each other.  I'm not following.  We got
> > here because a device specific cgroup didn't make sense.  With my
> > Linux user hat on, that makes sense.  I don't want to write code to a
> > bunch of device specific interfaces if I can avoid it.  But as for
> > temporal vs spatial partitioning of the GPU, the argument seems to be
> > a sort of hand-wavy one that both spatial and temporal partitioning
> > make sense on CPUs, but only temporal partitioning makes sense on
> > GPUs.  I'm trying to understand that assertion.  There are some GPUs
>
> Spatial partitioning as implemented in cpuset isn't a desirable model. It's
> there partly because it has historically been there. It doesn't really
> require dynamic hierarchical distribution of anything and is more of a way
> to batch-update per-task configuration, which is how it's actually
> implemented. It's broken too in that it interferes with per-task affinity
> settings. So, not exactly a good example to follow. In addition, this sort
> of partitioning requires more hardware knowledge and GPUs are worse than
> CPUs in that hardwares differ more.
>
> Features like this are trivial to implement from userland side by making
> per-process settings inheritable and restricting who can update the
> settings.
>
> > that can more easily be temporally partitioned and some that can be
> > more easily spatially partitioned.  It doesn't seem any different than
> > CPUs.
>
> Right, it doesn't really matter how the resource is distributed. What
> matters is how granular and generic the distribution can be. If gpus can
> implement work-conserving proportional distribution, that's something which
> is widely useful and inherently requires dynamic scheduling from kernel
> side. If it's about setting per-vendor affinities, this is way too much
> cgroup interface for a feature which can be easily implemented outside
> cgroup. Just do per-process (or whatever handles gpus use) and confine their
> configurations from cgroup side however way.
>
> While the specific theme changes a bit, we're basically having the same
> discussion with the same conclusion over the past however many months.
> Hopefully, the point is clear by now.

Thanks, that helps a lot.

Alex
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 06/16] drm/amdgpu: Handle IOMMU enabled case.

2021-05-11 Thread Andrey Grodzovsky



On 2021-05-11 2:44 a.m., Christian König wrote:

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

v5: Drop IOMMU notifier and switch to lockless call to ttm_tt_unpopulate
v6: Drop the BO unamp list

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   | 3 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   | 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c    | 9 +
  drivers/gpu/drm/amd/amdgpu/cik_ih.c    | 1 -
  drivers/gpu/drm/amd/amdgpu/cz_ih.c | 1 -
  drivers/gpu/drm/amd/amdgpu/iceland_ih.c    | 1 -
  drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 3 ---
  drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 -
  drivers/gpu/drm/amd/amdgpu/tonga_ih.c  | 1 -
  drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 3 ---
  11 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 18598eda18f6..a0bff4713672 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3256,7 +3256,6 @@ static const struct attribute 
*amdgpu_dev_attributes[] = {

  NULL
  };
-
  /**
   * amdgpu_device_init - initialize the driver
   *
@@ -3698,12 +3697,13 @@ void amdgpu_device_fini_hw(struct 
amdgpu_device *adev)

  amdgpu_ucode_sysfs_fini(adev);
  sysfs_remove_files(>dev->kobj, amdgpu_dev_attributes);
-
  amdgpu_fbdev_fini(adev);
  amdgpu_irq_fini_hw(adev);
  amdgpu_device_ip_fini_early(adev);
+
+    amdgpu_gart_dummy_page_fini(adev);


I think you should probably just call amdgpu_gart_fini() here.


  }
  void amdgpu_device_fini_sw(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index c5a9a4fb10d2..354e68081b53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct 
amdgpu_device *adev)

   *
   * Frees the dummy page used by the driver (all asics).
   */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
  {
  if (!adev->dummy_page_addr)
  return;
@@ -375,5 +375,4 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
   */
  void amdgpu_gart_fini(struct amdgpu_device *adev)
  {
-    amdgpu_gart_dummy_page_fini(adev);
  }


Well either you remove amdgpu_gart_fini() or just call 
amdgpu_gart_fini() instead of amdgpu_gart_dummy_page_fini().


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

index a25fe97b0196..78dc7a23da56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -58,6 +58,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device 
*adev);

  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
  int amdgpu_gart_init(struct amdgpu_device *adev);
  void amdgpu_gart_fini(struct amdgpu_device *adev);
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
  int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 int pages);
  int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c

index 233b64dab94b..a14973a7a9c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -361,6 +361,15 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
  if (!amdgpu_device_has_dc_support(adev))
  flush_work(>hotplug_work);
  }
+
+    if (adev->irq.ih_soft.ring)
+    amdgpu_ih_ring_fini(adev, >irq.ih_soft);
+    if (adev->irq.ih.ring)
+    amdgpu_ih_ring_fini(adev, >irq.ih);
+    if (adev->irq.ih1.ring)
+    amdgpu_ih_ring_fini(adev, >irq.ih1);
+    if (adev->irq.ih2.ring)
+    amdgpu_ih_ring_fini(adev, >irq.ih2);


You should probably make the function NULL save instead of checking here.

Christian.


Agree, in fact it's already does this check inside amdgpu_ih_ring_fini
so I will just drop the checks.

Andrey




  }
  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c 
b/drivers/gpu/drm/amd/amdgpu/cik_ih.c

index 183d44a6583c..df385ffc9768 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -310,7 +310,6 @@ static int cik_ih_sw_fini(void *handle)
  struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  amdgpu_irq_fini_sw(adev);
-    amdgpu_ih_ring_fini(adev, >irq.ih);
  amdgpu_irq_remove_domain(adev);
  return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c 
b/drivers/gpu/drm/amd/amdgpu/cz_ih.c

index d32743949003..b8c47e0cf37a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c

[PATCH 2/2] drm/amdgpu/display: restore the backlight on modeset (v2)

2021-05-11 Thread Alex Deucher
To stay consistent with the user's setting.

v2: rebase on multi-eDP support

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1337
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 167c8759fbc9..979e4dafa735 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9075,6 +9075,12 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state)
/* Update audio instances for each connector. */
amdgpu_dm_commit_audio(dev, state);
 
+#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||  \
+   defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
+   /* restore the backlight level */
+   if (dm->backlight_dev)
+   amdgpu_dm_backlight_set_level(dm, dm->brightness[0]);
+#endif
/*
 * send vblank event on all events not handled in flip and
 * mark consumed event for drm_atomic_helper_commit_hw_done
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu/display: add helper functions to get/set backlight (v2)

2021-05-11 Thread Alex Deucher
And cache the value.  These can be used by the backlight callbacks
and modesetting functions.

v2: rebase on latest backlight changes.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1337
Signed-off-by: Alex Deucher 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 42 ++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  7 
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 5df187a6e25f..167c8759fbc9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3467,26 +3467,28 @@ static u32 convert_brightness_to_user(const struct 
amdgpu_dm_backlight_caps *cap
 max - min);
 }
 
-static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
+static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+u32 user_brightness)
 {
-   struct amdgpu_display_manager *dm = bl_get_data(bd);
struct amdgpu_dm_backlight_caps caps;
struct dc_link *link[AMDGPU_DM_MAX_NUM_EDP];
-   u32 brightness;
+   u32 brightness[AMDGPU_DM_MAX_NUM_EDP];
bool rc;
int i;
 
amdgpu_dm_update_backlight_caps(dm);
caps = dm->backlight_caps;
 
-   for (i = 0; i < dm->num_of_edps; i++)
+   for (i = 0; i < dm->num_of_edps; i++) {
+   dm->brightness[i] = user_brightness;
+   brightness[i] = convert_brightness_from_user(, 
dm->brightness[i]);
link[i] = (struct dc_link *)dm->backlight_link[i];
+   }
 
-   brightness = convert_brightness_from_user(, bd->props.brightness);
-   // Change brightness based on AUX property
+   /* Change brightness based on AUX property */
if (caps.aux_support) {
for (i = 0; i < dm->num_of_edps; i++) {
-   rc = dc_link_set_backlight_level_nits(link[i], true, 
brightness,
+   rc = dc_link_set_backlight_level_nits(link[i], true, 
brightness[i],
AUX_BL_DEFAULT_TRANSITION_TIME_MS);
if (!rc) {
DRM_ERROR("DM: Failed to update backlight via 
AUX on eDP[%d]\n", i);
@@ -3495,7 +3497,7 @@ static int amdgpu_dm_backlight_update_status(struct 
backlight_device *bd)
}
} else {
for (i = 0; i < dm->num_of_edps; i++) {
-   rc = dc_link_set_backlight_level(dm->backlight_link[i], 
brightness, 0);
+   rc = dc_link_set_backlight_level(dm->backlight_link[i], 
brightness[i], 0);
if (!rc) {
DRM_ERROR("DM: Failed to update backlight on 
eDP[%d]\n", i);
break;
@@ -3506,9 +3508,17 @@ static int amdgpu_dm_backlight_update_status(struct 
backlight_device *bd)
return rc ? 0 : 1;
 }
 
-static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
+static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
 {
struct amdgpu_display_manager *dm = bl_get_data(bd);
+
+   amdgpu_dm_backlight_set_level(dm, bd->props.brightness);
+
+   return 0;
+}
+
+static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm)
+{
struct amdgpu_dm_backlight_caps caps;
 
amdgpu_dm_update_backlight_caps(dm);
@@ -3521,17 +3531,24 @@ static int amdgpu_dm_backlight_get_brightness(struct 
backlight_device *bd)
 
rc = dc_link_get_backlight_level_nits(link, , );
if (!rc)
-   return bd->props.brightness;
+   return dm->brightness[0];
return convert_brightness_to_user(, avg);
} else {
int ret = dc_link_get_backlight_level(dm->backlight_link[0]);
 
if (ret == DC_ERROR_UNEXPECTED)
-   return bd->props.brightness;
+   return dm->brightness[0];
return convert_brightness_to_user(, ret);
}
 }
 
+static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
+{
+   struct amdgpu_display_manager *dm = bl_get_data(bd);
+
+   return amdgpu_dm_backlight_get_level(dm);
+}
+
 static const struct backlight_ops amdgpu_dm_backlight_ops = {
.options = BL_CORE_SUSPENDRESUME,
.get_brightness = amdgpu_dm_backlight_get_brightness,
@@ -3543,8 +3560,11 @@ amdgpu_dm_register_backlight_device(struct 
amdgpu_display_manager *dm)
 {
char bl_name[16];
struct backlight_properties props = { 0 };
+   int i;
 
amdgpu_dm_update_backlight_caps(dm);
+   for (i = 0; i < dm->num_of_edps; i++)
+   dm->brightness[i] = AMDGPU_MAX_BL_LEVEL;
 
props.max_brightness = AMDGPU_MAX_BL_LEVEL;
props.brightness = 

Re: [PATCH v2 2/2] drm/amdkfd: new range accessible by all GPUs

2021-05-11 Thread Felix Kuehling
Am 2021-05-11 um 11:15 a.m. schrieb Philip Yang:
> If xnack is on, new range is created to recover retry vm fault or
> created by SVM API calls, set all GPUs have access to the range.
>
> Signed-off-by: Philip Yang 

Reviewed-by: Felix Kuehling 


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index d9111fea724b..6a677bdfcadb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -258,6 +258,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
> uint64_t start,
>  {
>   uint64_t size = last - start + 1;
>   struct svm_range *prange;
> + struct kfd_process *p;
>  
>   prange = kzalloc(sizeof(*prange), GFP_KERNEL);
>   if (!prange)
> @@ -277,6 +278,11 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
> uint64_t start,
>   prange->validate_timestamp = 0;
>   mutex_init(>migrate_mutex);
>   mutex_init(>lock);
> +
> + p = container_of(svms, struct kfd_process, svms);
> + if (p->xnack_enabled)
> + bitmap_fill(prange->bitmap_access, MAX_GPU_INSTANCE);
> +
>   svm_range_set_default_attributes(>preferred_loc,
>>prefetch_loc,
>>granularity, >flags);
> @@ -2243,7 +2249,7 @@ svm_range *svm_range_create_unregistered_range(struct 
> amdgpu_device *adev,
>  
>   prange = svm_range_new(>svms, start, last);
>   if (!prange) {
> - pr_debug("Failed to create prange in address [0x%llx]\\n", 
> addr);
> + pr_debug("Failed to create prange in address [0x%llx]\n", addr);
>   return NULL;
>   }
>   if (kfd_process_gpuid_from_kgd(p, adev, , )) {
> @@ -2251,9 +2257,7 @@ svm_range *svm_range_create_unregistered_range(struct 
> amdgpu_device *adev,
>   svm_range_free(prange);
>   return NULL;
>   }
> - prange->preferred_loc = gpuid;
> - prange->actual_loc = 0;
> - /* Gurantee prange is migrate it */
> +
>   svm_range_add_to_svms(prange);
>   svm_range_add_notifier_locked(mm, prange);
>  
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 2/2] drm/amdkfd: new range accessible by all GPUs

2021-05-11 Thread Philip Yang
If xnack is on, new range is created to recover retry vm fault or
created by SVM API calls, set all GPUs have access to the range.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d9111fea724b..6a677bdfcadb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -258,6 +258,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
 {
uint64_t size = last - start + 1;
struct svm_range *prange;
+   struct kfd_process *p;
 
prange = kzalloc(sizeof(*prange), GFP_KERNEL);
if (!prange)
@@ -277,6 +278,11 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
prange->validate_timestamp = 0;
mutex_init(>migrate_mutex);
mutex_init(>lock);
+
+   p = container_of(svms, struct kfd_process, svms);
+   if (p->xnack_enabled)
+   bitmap_fill(prange->bitmap_access, MAX_GPU_INSTANCE);
+
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
 >granularity, >flags);
@@ -2243,7 +2249,7 @@ svm_range *svm_range_create_unregistered_range(struct 
amdgpu_device *adev,
 
prange = svm_range_new(>svms, start, last);
if (!prange) {
-   pr_debug("Failed to create prange in address [0x%llx]\\n", 
addr);
+   pr_debug("Failed to create prange in address [0x%llx]\n", addr);
return NULL;
}
if (kfd_process_gpuid_from_kgd(p, adev, , )) {
@@ -2251,9 +2257,7 @@ svm_range *svm_range_create_unregistered_range(struct 
amdgpu_device *adev,
svm_range_free(prange);
return NULL;
}
-   prange->preferred_loc = gpuid;
-   prange->actual_loc = 0;
-   /* Gurantee prange is migrate it */
+
svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdkfd: unregistered range accessible by all GPUs

2021-05-11 Thread philip yang

  


On 2021-05-10 8:56 a.m., Felix Kuehling
  wrote:


  Am 2021-05-07 um 3:07 p.m. schrieb Philip Yang:

  
New range is created to recover retry vm fault, set all GPUs have access
to the range. The new range preferred_loc is default value
KFD_IOCTL_SVM_LOCATION_UNDEFINED.

Correct one typo.

Signed-off-by: Philip Yang 

  
  
Would it be better to move this into svm_range_new, conditional on
p->xnack_enabled? That way it would correctly apply to ranges created
through SVM API calls (e.g. in svm_range_handle_overlap or
svm_range_add) as well?


Yes, this is good idea, patch 1/2 is not needed with this change.

Philip


  
Regards,
  Felix



  
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d9111fea724b..537b12e75f54 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2243,7 +2243,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
 
 	prange = svm_range_new(>svms, start, last);
 	if (!prange) {
-		pr_debug("Failed to create prange in address [0x%llx]\\n", addr);
+		pr_debug("Failed to create prange in address [0x%llx]\n", addr);
 		return NULL;
 	}
 	if (kfd_process_gpuid_from_kgd(p, adev, , )) {
@@ -2251,9 +2251,8 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
 		svm_range_free(prange);
 		return NULL;
 	}
-	prange->preferred_loc = gpuid;
-	prange->actual_loc = 0;
-	/* Gurantee prange is migrate it */
+
+	bitmap_fill(prange->bitmap_access, MAX_GPU_INSTANCE);
 	svm_range_add_to_svms(prange);
 	svm_range_add_notifier_locked(mm, prange);
 

  

  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 01/16] drm/ttm: Remap all page faults to per process dummy page.

2021-05-11 Thread Christian König



Am 11.05.21 um 16:44 schrieb Andrey Grodzovsky:


On 2021-05-11 2:38 a.m., Christian König wrote:

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

On device removal reroute all CPU mappings to dummy page.

v3:
Remove loop to find DRM file and instead access it
by vma->vm_file->private_data. Move dummy page installation
into a separate function.

v4:
Map the entire BOs VA space into on demand allocated dummy page
on the first fault for that BO.

v5: Remove duplicate return.

v6: Polish ttm_bo_vm_dummy_page, remove superflous code.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 57 
-

  include/drm/ttm/ttm_bo_api.h    |  2 ++
  2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
b/drivers/gpu/drm/ttm/ttm_bo_vm.c

index b31b18058965..e5a9615519d1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -34,6 +34,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  #include 
  #include 
  #include 
@@ -380,19 +382,72 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct 
vm_fault *vmf,

  }
  EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
  +static void ttm_bo_release_dummy_page(struct drm_device *dev, 
void *res)

+{
+    struct page *dummy_page = (struct page *)res;
+
+    __free_page(dummy_page);
+}
+
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
+{
+    struct vm_area_struct *vma = vmf->vma;
+    struct ttm_buffer_object *bo = vma->vm_private_data;
+    struct drm_device *ddev = bo->base.dev;
+    vm_fault_t ret = VM_FAULT_NOPAGE;
+    unsigned long address;
+    unsigned long pfn;
+    struct page *page;
+
+    /* Allocate new dummy page to map all the VA range in this VMA 
to it*/

+    page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!page)
+    return VM_FAULT_OOM;
+
+    pfn = page_to_pfn(page);
+
+    /* Prefault the entire VMA range right away to avoid further 
faults */
+    for (address = vma->vm_start; address < vma->vm_end; address += 
PAGE_SIZE) {

+



+    if (unlikely(address >= vma->vm_end))
+    break;


That extra check can be removed as far as I can see.



+
+    if (vma->vm_flags & VM_MIXEDMAP)
+    ret = vmf_insert_mixed_prot(vma, address,
+    __pfn_to_pfn_t(pfn, PFN_DEV),
+    prot);
+    else
+    ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
+    }
+



+    /* Set the page to be freed using drmm release action */
+    if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, 
page))

+    return VM_FAULT_OOM;


You should probably move that before inserting the page into the VMA 
and also free the allocated page if it goes wrong.



drmm_add_action_or_reset will automatically release the page if the 
add action fails, that the 'reset' part of the function.


Ah! Ok that makes it even more important that you do this before you 
insert the page into any VMA.


Otherwise userspace has access to a freed page with the rather ugly 
consequences.


Christian.



Andrey




Apart from that patch looks good to me,
Christian.


+
+    return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
+
  vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
  {
  struct vm_area_struct *vma = vmf->vma;
  pgprot_t prot;
  struct ttm_buffer_object *bo = vma->vm_private_data;
+    struct drm_device *ddev = bo->base.dev;
  vm_fault_t ret;
+    int idx;
    ret = ttm_bo_vm_reserve(bo, vmf);
  if (ret)
  return ret;
    prot = vma->vm_page_prot;
-    ret = ttm_bo_vm_fault_reserved(vmf, prot, 
TTM_BO_VM_NUM_PREFAULT, 1);

+    if (drm_dev_enter(ddev, )) {
+    ret = ttm_bo_vm_fault_reserved(vmf, prot, 
TTM_BO_VM_NUM_PREFAULT, 1);

+    drm_dev_exit(idx);
+    } else {
+    ret = ttm_bo_vm_dummy_page(vmf, prot);
+    }
  if (ret == VM_FAULT_RETRY && !(vmf->flags & 
FAULT_FLAG_RETRY_NOWAIT))

  return ret;
  diff --git a/include/drm/ttm/ttm_bo_api.h 
b/include/drm/ttm/ttm_bo_api.h

index 639521880c29..254ede97f8e3 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -620,4 +620,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, 
unsigned long addr,

   void *buf, int len, int write);
  bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
  +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t 
prot);

+
  #endif




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 04/16] drm/amdkfd: Split kfd suspend from devie exit

2021-05-11 Thread Andrey Grodzovsky



On 2021-05-11 2:40 a.m., Christian König wrote:

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

Helps to expdite HW related stuff to amdgpu_pci_remove

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 ++-
  3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 5f6696a3c778..2b06dee9a0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct 
amdgpu_device *adev)

  }
  }
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
  {
  if (adev->kfd.dev) {
  kgd2kfd_device_exit(adev->kfd.dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 14f68c028126..f8e10af99c28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device 
*adev,

  const void *ih_ring_entry);
  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
  int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,

  uint32_t vmid, uint64_t gpu_addr,
  uint32_t *ib_cmd, uint32_t ib_len);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c

index 357b9bf62a1c..ab6d2a43c9a3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -858,10 +858,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
  return kfd->init_complete;
  }
+
+


Looks like unnecessary white space change to me.


  void kgd2kfd_device_exit(struct kfd_dev *kfd)
  {
  if (kfd->init_complete) {
-    kgd2kfd_suspend(kfd, false);


Where is the call to this function now?

Christian.


In patch 'drm/amdgpu: Add early fini callback' in
amdgpu_device_ip_fini_early->amdgpu_amdkfd_suspend->kgd2kfd_suspend

Andrey




  device_queue_manager_uninit(kfd->dqm);
  kfd_interrupt_exit(kfd);
  kfd_topology_remove_device(kfd);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 01/16] drm/ttm: Remap all page faults to per process dummy page.

2021-05-11 Thread Andrey Grodzovsky


On 2021-05-11 2:38 a.m., Christian König wrote:

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

On device removal reroute all CPU mappings to dummy page.

v3:
Remove loop to find DRM file and instead access it
by vma->vm_file->private_data. Move dummy page installation
into a separate function.

v4:
Map the entire BOs VA space into on demand allocated dummy page
on the first fault for that BO.

v5: Remove duplicate return.

v6: Polish ttm_bo_vm_dummy_page, remove superflous code.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 57 -
  include/drm/ttm/ttm_bo_api.h    |  2 ++
  2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
b/drivers/gpu/drm/ttm/ttm_bo_vm.c

index b31b18058965..e5a9615519d1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -34,6 +34,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  #include 
  #include 
  #include 
@@ -380,19 +382,72 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct 
vm_fault *vmf,

  }
  EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
  +static void ttm_bo_release_dummy_page(struct drm_device *dev, void 
*res)

+{
+    struct page *dummy_page = (struct page *)res;
+
+    __free_page(dummy_page);
+}
+
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
+{
+    struct vm_area_struct *vma = vmf->vma;
+    struct ttm_buffer_object *bo = vma->vm_private_data;
+    struct drm_device *ddev = bo->base.dev;
+    vm_fault_t ret = VM_FAULT_NOPAGE;
+    unsigned long address;
+    unsigned long pfn;
+    struct page *page;
+
+    /* Allocate new dummy page to map all the VA range in this VMA 
to it*/

+    page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!page)
+    return VM_FAULT_OOM;
+
+    pfn = page_to_pfn(page);
+
+    /* Prefault the entire VMA range right away to avoid further 
faults */
+    for (address = vma->vm_start; address < vma->vm_end; address += 
PAGE_SIZE) {

+



+    if (unlikely(address >= vma->vm_end))
+    break;


That extra check can be removed as far as I can see.



+
+    if (vma->vm_flags & VM_MIXEDMAP)
+    ret = vmf_insert_mixed_prot(vma, address,
+    __pfn_to_pfn_t(pfn, PFN_DEV),
+    prot);
+    else
+    ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
+    }
+



+    /* Set the page to be freed using drmm release action */
+    if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, 
page))

+    return VM_FAULT_OOM;


You should probably move that before inserting the page into the VMA 
and also free the allocated page if it goes wrong.



drmm_add_action_or_reset will automatically release the page if the add 
action fails, that the 'reset' part of the function.


Andrey




Apart from that patch looks good to me,
Christian.


+
+    return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
+
  vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
  {
  struct vm_area_struct *vma = vmf->vma;
  pgprot_t prot;
  struct ttm_buffer_object *bo = vma->vm_private_data;
+    struct drm_device *ddev = bo->base.dev;
  vm_fault_t ret;
+    int idx;
    ret = ttm_bo_vm_reserve(bo, vmf);
  if (ret)
  return ret;
    prot = vma->vm_page_prot;
-    ret = ttm_bo_vm_fault_reserved(vmf, prot, 
TTM_BO_VM_NUM_PREFAULT, 1);

+    if (drm_dev_enter(ddev, )) {
+    ret = ttm_bo_vm_fault_reserved(vmf, prot, 
TTM_BO_VM_NUM_PREFAULT, 1);

+    drm_dev_exit(idx);
+    } else {
+    ret = ttm_bo_vm_dummy_page(vmf, prot);
+    }
  if (ret == VM_FAULT_RETRY && !(vmf->flags & 
FAULT_FLAG_RETRY_NOWAIT))

  return ret;
  diff --git a/include/drm/ttm/ttm_bo_api.h 
b/include/drm/ttm/ttm_bo_api.h

index 639521880c29..254ede97f8e3 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -620,4 +620,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, 
unsigned long addr,

   void *buf, int len, int write);
  bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
  +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
+
  #endif



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-11 Thread Christian König

Yeah, but umr is making strong use of sysfs as well.

The only justification of this interface would be if we want to use it 
in Mesa.


And I agree with Marek that looks redundant with the device structure to 
me as well.


Christian.

Am 11.05.21 um 16:04 schrieb Deucher, Alexander:


[AMD Public Use]


It's being used by umr and some other smi tools to provide vbios 
information for debugging.


Alex


*From:* amd-gfx  on behalf of 
Marek Olšák 

*Sent:* Tuesday, May 11, 2021 4:18 AM
*To:* Christian König 
*Cc:* Kees Cook ; Gu, JiaWei (Will) 
; amd-gfx list ; 
Deng, Emily ; Alex Deucher 
; Nieto, David M 
*Subject:* Re: [PATCH] drm/amdgpu: Align serial size in 
drm_amdgpu_info_vbios

Mesa doesn't use sysfs.

Note that this is a uapi, meaning that once it's in the kernel, it 
can't be changed like that.


What's the use case for this new interface? Isn't it partially 
redundant with the current device info structure, which seems to have 
the equivalent of dev_id and rev_id?


Marek

On Tue, May 11, 2021 at 3:51 AM Christian König 
> wrote:


Marek and other userspace folks need to decide that.

Basic question here is if Mesa is already accessing sysfs nodes
for OpenGL or RADV. If that is the case then we should probably
expose the information there as well.

If that isn't the case (which I think it is) then we should
implement it as IOCTL.

Regards,
Christian.

Am 10.05.21 um 22:19 schrieb Nieto, David M:


One of the primary usecases is to add this information to the
renderer string, I am not sure if there are other cases of UMD
drivers accessing sysfs nodes, but I think if we think
permissions, if a client is authenticated and opens the render
device then it can use the IOCTL, it is unclear to me we can make
a such an assumption for sysfs nodes…

I think there is value in having both tbh.

Regards,

David

*From: *Christian König 

*Date: *Monday, May 10, 2021 at 6:48 AM
*To: *"Nieto, David M" 
, "Gu, JiaWei (Will)"
 
*Cc: *Alex Deucher 
, "Deng, Emily"
 , Kees Cook
 , amd-gfx
list 

*Subject: *Re: [PATCH] drm/amdgpu: Align serial size in
drm_amdgpu_info_vbios

Well we could add both as sysfs file(s).

Question here is rather what is the primary use case of this and
if the application has the necessary access permissions to the
sysfs files?

Regards,
Christian.

Am 10.05.21 um 15:42 schrieb Nieto, David M:

Then the application would need to issue the ioctl and then
open a sysfs file to get all the information it needs. It
makes little sense from a programming perspective to add an
incomplete interface in my opinion



*From:*Gu, JiaWei (Will) 

*Sent:* Monday, May 10, 2021 12:13:07 AM
*To:* Nieto, David M 

*Cc:* Alex Deucher 
; amd-gfx list

; Kees Cook
 ; Deng,
Emily  
*Subject:* RE: [PATCH] drm/amdgpu: Align serial size in
drm_amdgpu_info_vbios

[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from
drm_amdgpu_info_vbios, not the whole struct.

struct drm_amdgpu_info_vbios {
    __u8 name[64];
    __u32 dbdf;
    __u8 vbios_pn[64];
    __u32 version;
    __u8 date[32];
    __u8 serial[16]; // jiawei: shall we delete this
    __u32 dev_id;
    __u32 rev_id;
    __u32 sub_dev_id;
    __u32 sub_ved_id;
};

serial[16] in drm_amdgpu_info_vbios  copied from
adev->serial, but there's already a sysfs named
serial_number, which exposes it already.

static ssize_t amdgpu_device_get_serial_number(struct device
*dev,
    struct device_attribute *attr, char *buf)
{
    struct drm_device *ddev = dev_get_drvdata(dev);
    struct amdgpu_device *adev = ddev->dev_private;

    return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
}

Thanks,
Jiawei


-Original Message-
From: Nieto, David M 

Sent: Monday, May 10, 2021 2:53 PM
 

Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-11 Thread Deucher, Alexander
[AMD Public Use]

It's being used by umr and some other smi tools to provide vbios information 
for debugging.

Alex


From: amd-gfx  on behalf of Marek Olšák 

Sent: Tuesday, May 11, 2021 4:18 AM
To: Christian König 
Cc: Kees Cook ; Gu, JiaWei (Will) ; 
amd-gfx list ; Deng, Emily ; 
Alex Deucher ; Nieto, David M 
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

Mesa doesn't use sysfs.

Note that this is a uapi, meaning that once it's in the kernel, it can't be 
changed like that.

What's the use case for this new interface? Isn't it partially redundant with 
the current device info structure, which seems to have the equivalent of dev_id 
and rev_id?

Marek

On Tue, May 11, 2021 at 3:51 AM Christian König 
mailto:ckoenig.leichtzumer...@gmail.com>> 
wrote:
Marek and other userspace folks need to decide that.

Basic question here is if Mesa is already accessing sysfs nodes for OpenGL or 
RADV. If that is the case then we should probably expose the information there 
as well.

If that isn't the case (which I think it is) then we should implement it as 
IOCTL.

Regards,
Christian.

Am 10.05.21 um 22:19 schrieb Nieto, David M:

One of the primary usecases is to add this information to the renderer string, 
I am not sure if there are other cases of UMD drivers accessing sysfs nodes, 
but I think if we think permissions, if a client is authenticated and opens the 
render device then it can use the IOCTL, it is unclear to me we can make a such 
an assumption for sysfs nodes…



I think there is value in having both tbh.



Regards,

David



From: Christian König 

Date: Monday, May 10, 2021 at 6:48 AM
To: "Nieto, David M" , "Gu, 
JiaWei (Will)" 
Cc: Alex Deucher , "Deng, 
Emily" , Kees Cook 
, amd-gfx list 

Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



Well we could add both as sysfs file(s).

Question here is rather what is the primary use case of this and if the 
application has the necessary access permissions to the sysfs files?

Regards,
Christian.

Am 10.05.21 um 15:42 schrieb Nieto, David M:

Then the application would need to issue the ioctl and then open a sysfs file 
to get all the information it needs. It makes little sense from a programming 
perspective to add an incomplete interface in my opinion





From: Gu, JiaWei (Will) 
Sent: Monday, May 10, 2021 12:13:07 AM
To: Nieto, David M 
Cc: Alex Deucher ; amd-gfx 
list ; 
Kees Cook ; Deng, Emily 

Subject: RE: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from drm_amdgpu_info_vbios, not 
the whole struct.

struct drm_amdgpu_info_vbios {
__u8 name[64];
__u32 dbdf;
__u8 vbios_pn[64];
__u32 version;
__u8 date[32];
__u8 serial[16]; // jiawei: shall we delete this
__u32 dev_id;
__u32 rev_id;
__u32 sub_dev_id;
__u32 sub_ved_id;
};

serial[16] in drm_amdgpu_info_vbios  copied from adev->serial, but there's 
already a sysfs named serial_number, which exposes it already.

static ssize_t amdgpu_device_get_serial_number(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;

return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
}

Thanks,
Jiawei


-Original Message-
From: Nieto, David M 
Sent: Monday, May 10, 2021 2:53 PM
To: Gu, JiaWei (Will) 
Cc: Alex Deucher ; amd-gfx 
list ; 
Kees Cook ; Deng, Emily 

Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

No, this structure contains all the details of the vbios: date, serial number, 
name, etc.

The sysfs node only contains the vbios name string

> On May 9, 2021, at 23:33, Gu, JiaWei (Will) 
>  wrote:
>
> [AMD Official Use Only - Internal Distribution Only]
>
> With a second thought,
> __u8 serial[16] in drm_amdgpu_info_vbios is a bit redundant, sysfs 
> serial_number already exposes it.
>
> Is it fine to abandon it from drm_amdgpu_info_vbios struct? @Alex
> Deucher @Nieto, David M
>
> Best regards,
> Jiawei
>
> -Original Message-
> From: Alex Deucher 
> Sent: Sunday, May 9, 

Re: [PATCH] drm/amdkfd: refine the poison data consumption handling

2021-05-11 Thread Felix Kuehling
Am 2021-05-11 um 4:06 a.m. schrieb Dennis Li:
> The user applications maybe register the KFD_EVENT_TYPE_HW_EXCEPTION and

I guess the HW exception event is sent because the current handling of
poison consumption triggers a mode2 reset. If that can be removed in the
future, then we should not send a HW_EXCEPTION any more.


> KFD_EVENT_TYPE_MEMORY events, driver could notify them when poison data
> consumed. Beside that, some applications maybe register SIGBUS signal
> hander. These applications will handle poison data by themselves, exit
> or re-create context to re-dispatch works.
>
> Signed-off-by: Dennis Li 

Reviewed-by: Felix Kuehling 


>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index ba2c2ce0c55a..4d210f23c33c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -1050,3 +1050,42 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
>   }
>   srcu_read_unlock(_processes_srcu, idx);
>  }
> +
> +void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
> +{
> + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
> + struct kfd_hsa_memory_exception_data memory_exception_data;
> + struct kfd_hsa_hw_exception_data hw_exception_data;
> + struct kfd_event *ev;
> + uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
> +
> + if (!p)
> + return; /* Presumably process exited. */
> +
> + memset(_exception_data, 0, sizeof(hw_exception_data));
> + hw_exception_data.gpu_id = dev->id;
> + hw_exception_data.memory_lost = 1;
> + hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
> +
> + memset(_exception_data, 0, sizeof(memory_exception_data));
> + memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
> + memory_exception_data.gpu_id = dev->id;
> + memory_exception_data.failure.imprecise = true;
> +
> + mutex_lock(>event_mutex);
> + idr_for_each_entry_continue(>event_idr, ev, id) {
> + if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
> + ev->hw_exception_data = hw_exception_data;
> + set_event(ev);
> + }
> +
> + if (ev->type == KFD_EVENT_TYPE_MEMORY) {
> + ev->memory_exception_data = memory_exception_data;
> + set_event(ev);
> + }
> + }
> + mutex_unlock(>event_mutex);
> +
> + /* user application will handle SIGBUS signal */
> + send_sig(SIGBUS, p->lead_thread, 0);
> +}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> index 97c36e3c8c80..9f9b1dfb9c37 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> @@ -230,7 +230,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
>   sq_intr_err);
>   if (sq_intr_err != 
> SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
>   sq_intr_err != 
> SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
> - kfd_signal_hw_exception_event(pasid);
> + kfd_signal_poison_consumed_event(dev, 
> pasid);
>   amdgpu_amdkfd_gpu_reset(dev->kgd);
>   return;
>   }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 64552f6b8ba4..daa9d47514c6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1144,6 +1144,8 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 
> pasid,
>  
>  void kfd_signal_reset_event(struct kfd_dev *dev);
>  
> +void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
> +
>  void kfd_flush_tlb(struct kfd_process_device *pdd);
>  
>  int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: use dma_alloc_pages for the page pool

2021-05-11 Thread Christoph Hellwig
On Tue, May 11, 2021 at 09:35:20AM +0200, Christian König wrote:
> We certainly going to need the drm_need_swiotlb() for userptr support 
> (unless we add some approach for drivers to opt out of swiotlb).

swiotlb use is driven by three things:

 1) addressing limitations of the device
 2) addressing limitations of the interconnect
 3) virtualiztion modes that require it

not sure how the driver could opt out.  What is the problem with userptr
support?

> Then while I really want to get rid of GFP_DMA32 as well I'm not 100% sure 
> if we can handle this without the flag.

Note that this is still using GFP_DMA32 underneath where required,
just in a layer that can decide that ѕensibly.

> And last we need something better to store the DMA address and order than 
> allocating a separate memory object for each page.

Yeah.  If you use __GFP_COMP for the allocations we can find the order
from the page itself, which might be useful.  For 64-bit platforms
the dma address could be store in page->private, or depending on how
the page gets used the dma_addr field in struct page that overloads
the lru field and is used by the networking page pool could be used.

Maybe we could even have a common page pool between net and drm, but
I don't want to go there myself, not being an expert on either subsystem.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 04/16] drm/amdkfd: Split kfd suspend from devie exit

2021-05-11 Thread Deucher, Alexander
[AMD Public Use]

Typo in the subject: devie > device

Alex

From: Grodzovsky, Andrey 
Sent: Monday, May 10, 2021 12:36 PM
To: dri-de...@lists.freedesktop.org ; 
amd-gfx@lists.freedesktop.org ; 
linux-...@vger.kernel.org ; 
ckoenig.leichtzumer...@gmail.com ; 
daniel.vet...@ffwll.ch ; Wentland, Harry 

Cc: ppaala...@gmail.com ; Deucher, Alexander 
; gre...@linuxfoundation.org 
; helg...@kernel.org ; 
Kuehling, Felix ; Grodzovsky, Andrey 

Subject: [PATCH v6 04/16] drm/amdkfd: Split kfd suspend from devie exit

Helps to expdite HW related stuff to amdgpu_pci_remove

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c| 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 5f6696a3c778..2b06dee9a0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 }
 }

-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
 {
 if (adev->kfd.dev) {
 kgd2kfd_device_exit(adev->kfd.dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 14f68c028126..f8e10af99c28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 const void *ih_ring_entry);
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 uint32_t vmid, uint64_t gpu_addr,
 uint32_t *ib_cmd, uint32_t ib_len);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 357b9bf62a1c..ab6d2a43c9a3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -858,10 +858,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 return kfd->init_complete;
 }

+
+
 void kgd2kfd_device_exit(struct kfd_dev *kfd)
 {
 if (kfd->init_complete) {
-   kgd2kfd_suspend(kfd, false);
 device_queue_manager_uninit(kfd->dqm);
 kfd_interrupt_exit(kfd);
 kfd_topology_remove_device(kfd);
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Field type update in drm_amdgpu_info_vbios

2021-05-11 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

Please update the umr patch as well.

Acked-by: Alex Deucher 


From: amd-gfx  on behalf of Jiawei Gu 

Sent: Tuesday, May 11, 2021 1:31 AM
To: amd-gfx@lists.freedesktop.org 
Cc: StDenis, Tom ; Gu, JiaWei (Will) ; 
keesc...@chromium.org ; Nieto, David M 

Subject: [PATCH] drm/amdgpu: Field type update in drm_amdgpu_info_vbios

Use numeric type serial in drm_amdgpu_info_vbios instead.

Signed-off-by: Jiawei Gu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
 include/uapi/drm/amdgpu_drm.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 75f34a9008e7..e1008a79b441 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -871,7 +871,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
 memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, 
sizeof(atom_context->vbios_pn));
 vbios_info.version = atom_context->version;
 memcpy(vbios_info.date, atom_context->date, 
sizeof(atom_context->date));
-   memcpy(vbios_info.serial, adev->serial, 
sizeof(adev->serial));
+   vbios_info.serial = adev->unique_id;
 vbios_info.dev_id = adev->pdev->device;
 vbios_info.rev_id = adev->pdev->revision;
 vbios_info.sub_dev_id = atom_context->sub_dev_id;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 2b487a8d2727..2d9e84658bbc 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -957,7 +957,7 @@ struct drm_amdgpu_info_vbios {
 __u8 vbios_pn[64];
 __u32 version;
 __u8 date[32];
-   __u8 serial[16];
+   __u64 serial;
 __u32 dev_id;
 __u32 rev_id;
 __u32 sub_dev_id;
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Calexander.deucher%40amd.com%7C9c3ad129089d4585544f08d9143e1892%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637563079204768525%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=efCxiLv4J5KWmoKqy1geARlt%2FJppT1eetVI3R%2FMNfzw%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/pm: Update aldebaran pmfw interface

2021-05-11 Thread Zhang, Hawking
[AMD Public Use]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
_
From: Lazar, Lijo 
Sent: Tuesday, May 11, 2021 20:30
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Xu, Feifei ; 
Wang, Kevin(Yang) 
Subject: [PATCH] drm/amd/pm: Update aldebaran pmfw interface


[AMD Public Use]

Update aldebaran driver-PMFW interface to version 0x07

Signed-off-by: Lijo Lazar lijo.la...@amd.com
---
 drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h | 7 +--
 drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h 
b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h
index d23533bda002..a017983ff1fa 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h
@@ -64,7 +64,7 @@
 #define FEATURE_SMUIO_CG_BIT28
 #define FEATURE_THM_CG_BIT  29
 #define FEATURE_CLK_CG_BIT  30
-#define FEATURE_SPARE_31_BIT31
+#define FEATURE_EDC_BIT 31
 #define FEATURE_SPARE_32_BIT32
 #define FEATURE_SPARE_33_BIT33
 #define FEATURE_SPARE_34_BIT34
@@ -439,8 +439,11 @@ typedef struct {
   int8_t   XgmiOffset; // in Amps
   uint8_t  Padding_TelemetryXgmi;

+  uint16_t  EdcPowerLimit;
+  uint16_t  spare6;
+
   //reserved
-  uint32_t reserved[15];
+  uint32_t reserved[14];

 } PPTable_t;

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
index 8145e1cbf181..1687709507b3 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
@@ -26,7 +26,7 @@
 #include "amdgpu_smu.h"

 #define SMU13_DRIVER_IF_VERSION_INV 0x
-#define SMU13_DRIVER_IF_VERSION_ALDE 0x6
+#define SMU13_DRIVER_IF_VERSION_ALDE 0x07

 /* MP Apertures */
 #define MP0_Public 0x0380
--
2.17.1


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/pm: Update aldebaran pmfw interface

2021-05-11 Thread Lazar, Lijo
[AMD Public Use]

Update aldebaran driver-PMFW interface to version 0x07

Signed-off-by: Lijo Lazar lijo.la...@amd.com
---
 drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h | 7 +--
 drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h 
b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h
index d23533bda002..a017983ff1fa 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h
@@ -64,7 +64,7 @@
 #define FEATURE_SMUIO_CG_BIT28
 #define FEATURE_THM_CG_BIT  29
 #define FEATURE_CLK_CG_BIT  30
-#define FEATURE_SPARE_31_BIT31
+#define FEATURE_EDC_BIT 31
 #define FEATURE_SPARE_32_BIT32
 #define FEATURE_SPARE_33_BIT33
 #define FEATURE_SPARE_34_BIT34
@@ -439,8 +439,11 @@ typedef struct {
   int8_t   XgmiOffset; // in Amps
   uint8_t  Padding_TelemetryXgmi;

+  uint16_t  EdcPowerLimit;
+  uint16_t  spare6;
+
   //reserved
-  uint32_t reserved[15];
+  uint32_t reserved[14];

 } PPTable_t;

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
index 8145e1cbf181..1687709507b3 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
@@ -26,7 +26,7 @@
 #include "amdgpu_smu.h"

 #define SMU13_DRIVER_IF_VERSION_INV 0x
-#define SMU13_DRIVER_IF_VERSION_ALDE 0x6
+#define SMU13_DRIVER_IF_VERSION_ALDE 0x07

 /* MP Apertures */
 #define MP0_Public 0x0380
--
2.17.1


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Expose active display color configurations to userspace

2021-05-11 Thread Pekka Paalanen
On Tue, 11 May 2021 12:03:30 +0200
Werner Sembach  wrote:

> Am 11.05.21 um 10:07 schrieb Pekka Paalanen:
> > On Mon, 10 May 2021 17:47:01 -0400
> > Alex Deucher  wrote:
> >  
> >> On Fri, May 7, 2021 at 3:27 PM Werner Sembach  
> >> wrote:  
> >>> xrandr --prop and other userspace info tools have currently no way of
> >>> telling which color configuration is used on HDMI and DP ports.
> >>>
> >>> The ongoing transsition from HDMI 1.4 to 2.0 and the different bandwidth
> >>> requirements of YCbCr 4:2:0 and RGB color format raise different
> >>> incompatibilities. Having these configuration information readily
> >>> available is a useful tool in debuging washed out colors, color artefacts
> >>> on small fonts and missing refreshrate options.
> >> I think we would ideally want these as generic connector properties
> >> rather than AMD specific ones since they are not really AMD specific.
> >> I believe there is already a generic drm property (max_bpc) for the
> >> color depth.  At this point, I think having a generic RGB vs YCbCr
> >> property would make sense.  I'm not sure about the color space.  
> 
> Problem is: amdgpu does not really use generic structs for these 3
> properties as far as I can tell. It uses own defines
> https://elixir.bootlin.com/linux/v5.13-rc1/source/drivers/gpu/drm/amd/display/dc/dc_hw_types.h#L647
> in own structs
> https://elixir.bootlin.com/linux/v5.13-rc1/source/drivers/gpu/drm/amd/display/dc/dc_stream.h#L141
> 
> Intel uses generic defines
> https://elixir.bootlin.com/linux/v5.13-rc1/source/include/linux/hdmi.h#L71
> https://elixir.bootlin.com/linux/v5.13-rc1/source/include/drm/drm_dp_helper.h#L1568
> split up between dp and hdmi in own structs
> https://elixir.bootlin.com/linux/v5.13-rc1/source/drivers/gpu/drm/i915/display/intel_display_types.h#L879
> 
> So the property would need some translation from amd, intel, hdmi,
> and dp to one enumeration representing all?

If so, much better to do that in the kernel in one place per driver
that actually know what they mean, rather than in half of the Wayland
compositors by people who barely understand even the general concept.
Like me.

> > Hi,
> >
> > I believe that userspace will definitely want to know what exactly is
> > going on on the monitor cable, so I think exposing more information is
> > good, but I agree that it needs to be done with generic KMS properties
> > as much as possible. Userspace is not going to bother having explicit
> > code for driver-specific properties.
> >
> > I think a major use case will be Wayland color management, where a
> > Wayland compositor will want to make sure that the video signal
> > parameters have not changed since the monitor was last measured
> > (profiled). If the signal configuration is different, the measured
> > color profile may be invalid and therefore the end user needs to be
> > warned. See some ideas in
> > https://gitlab.freedesktop.org/wayland/weston/-/issues/467
> > under the heading "Color calibration auditing system".
> >
> > About the color space: is that something a kernel driver will decide
> > on its own? I mean in the same sense as the driver will negotiate
> > HDMI/DP link parameters, perhaps falling back to smaller requirements
> > if higher requirements signal does not seem to work.
> >
> > We only need readback properties for things that generic userspace
> > won't or cannot control explicitly, e.g. because the kernel driver has
> > room to make a choice rather than fail.  
> 
> Some explanation why I choose these 3 properties:
> 
> output color space: Mainly to see if full or limited RGB was chosen.

IOW, the driver makes the decision. Therefore userspace will need to
know what it picked. Ok.

> While in theory the driver should choose the right one automatically,
> I read articles that in some cases it doesn't (hence why the
> "Broadcast RGB" property for intel-gfx driver and "output_csc" for
> the old radeon driver exist). The next step ofc would be to bring
> over the "Broadcast RGB" property to amdgpu/make it a generic
> property. But then still: having a feedback channel to see if the
> chosen setting got correctly applied should not hurt in any way,
> shape, or form?

Feedback is good IMO, yes, when it's not clearly redundant. As long as
you don't tie the driver developers' hands in a knot or preclude
support for hardware not invented yet. But I think the option to not
expose a specific KMS property is an escape hatch enough. You might
have problems with the requirements for DRM UAPI additions though,
since they call for a proper userspace consumer.

What defines what is "the right one"?

I believe that we also need to aim for a fully known display pipeline,
so if the driver is doing compression from full range 8-bit to limited
range 8-bit, losing a little precision, userspace needs to know. This
is a long stretch and maybe not even fully feasible, but I think it
would be good to get as close as possible - within the limits of being
driver-agnostic UAPI.

[PATCH] drm/amd/amdgpu: psp program IH_RB_CTRL on navi12 and navi21

2021-05-11 Thread YuBiao Wang
[Why]
IH_RB_CNTL is blocked by PSP so we need to ask psp to help config it.

[How]
Move psp ip block before ih, and use psp to program IH_RB_CNTL under sriov.

Signed-off-by: YuBiao Wang 
---
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 20 ++--
 drivers/gpu/drm/amd/amdgpu/nv.c|  4 ++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index f4e4040bbd25..5ee923ccdeb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -151,7 +151,15 @@ static int navi10_ih_toggle_ring_interrupts(struct 
amdgpu_device *adev,
/* enable_intr field is only valid in ring0 */
if (ih == >irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 
0));
-   WREG32(ih_regs->ih_rb_cntl, tmp);
+
+   if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+   if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) {
+   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+   return -ETIMEDOUT;
+   }
+   } else {
+   WREG32(ih_regs->ih_rb_cntl, tmp);
+   }
 
if (enable) {
ih->enabled = true;
@@ -261,7 +269,15 @@ static int navi10_ih_enable_ring(struct amdgpu_device 
*adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
}
-   WREG32(ih_regs->ih_rb_cntl, tmp);
+
+   if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+   if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) {
+   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+   return -ETIMEDOUT;
+   }
+   } else {
+   WREG32(ih_regs->ih_rb_cntl, tmp);
+   }
 
if (ih == >irq.ih) {
/* set the ih ring 0 writeback address whether it's enabled or 
not */
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 75d1f9b939b2..aebd330daaca 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -742,8 +742,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_NAVI12:
amdgpu_device_ip_block_add(adev, _common_ip_block);
amdgpu_device_ip_block_add(adev, _v10_0_ip_block);
-   amdgpu_device_ip_block_add(adev, _ih_ip_block);
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
+   amdgpu_device_ip_block_add(adev, _ih_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
@@ -764,9 +764,9 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
amdgpu_device_ip_block_add(adev, _common_ip_block);
amdgpu_device_ip_block_add(adev, _v10_0_ip_block);
-   amdgpu_device_ip_block_add(adev, _ih_ip_block);
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
+   amdgpu_device_ip_block_add(adev, _ih_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
is_support_sw_smu(adev))
amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Expose active display color configurations to userspace

2021-05-11 Thread Werner Sembach
Am 11.05.21 um 10:07 schrieb Pekka Paalanen:
> On Mon, 10 May 2021 17:47:01 -0400
> Alex Deucher  wrote:
>
>> On Fri, May 7, 2021 at 3:27 PM Werner Sembach  
>> wrote:
>>> xrandr --prop and other userspace info tools have currently no way of
>>> telling which color configuration is used on HDMI and DP ports.
>>>
>>> The ongoing transsition from HDMI 1.4 to 2.0 and the different bandwidth
>>> requirements of YCbCr 4:2:0 and RGB color format raise different
>>> incompatibilities. Having these configuration information readily
>>> available is a useful tool in debuging washed out colors, color artefacts
>>> on small fonts and missing refreshrate options.  
>> I think we would ideally want these as generic connector properties
>> rather than AMD specific ones since they are not really AMD specific.
>> I believe there is already a generic drm property (max_bpc) for the
>> color depth.  At this point, I think having a generic RGB vs YCbCr
>> property would make sense.  I'm not sure about the color space.

Problem is: amdgpu does not really use generic structs for these 3 properties 
as far as I can tell. It uses own defines 
https://elixir.bootlin.com/linux/v5.13-rc1/source/drivers/gpu/drm/amd/display/dc/dc_hw_types.h#L647
 in own structs 
https://elixir.bootlin.com/linux/v5.13-rc1/source/drivers/gpu/drm/amd/display/dc/dc_stream.h#L141

Intel uses generic defines 
https://elixir.bootlin.com/linux/v5.13-rc1/source/include/linux/hdmi.h#L71 
https://elixir.bootlin.com/linux/v5.13-rc1/source/include/drm/drm_dp_helper.h#L1568
 split up between dp and hdmi in own structs 
https://elixir.bootlin.com/linux/v5.13-rc1/source/drivers/gpu/drm/i915/display/intel_display_types.h#L879

So the property would need some translation from amd, intel, hdmi, and dp to 
one enumeration representing all?

> Hi,
>
> I believe that userspace will definitely want to know what exactly is
> going on on the monitor cable, so I think exposing more information is
> good, but I agree that it needs to be done with generic KMS properties
> as much as possible. Userspace is not going to bother having explicit
> code for driver-specific properties.
>
> I think a major use case will be Wayland color management, where a
> Wayland compositor will want to make sure that the video signal
> parameters have not changed since the monitor was last measured
> (profiled). If the signal configuration is different, the measured
> color profile may be invalid and therefore the end user needs to be
> warned. See some ideas in
> https://gitlab.freedesktop.org/wayland/weston/-/issues/467
> under the heading "Color calibration auditing system".
>
> About the color space: is that something a kernel driver will decide
> on its own? I mean in the same sense as the driver will negotiate
> HDMI/DP link parameters, perhaps falling back to smaller requirements
> if higher requirements signal does not seem to work.
>
> We only need readback properties for things that generic userspace
> won't or cannot control explicitly, e.g. because the kernel driver has
> room to make a choice rather than fail.

Some explanation why I choose these 3 properties:

output color space: Mainly to see if full or limited RGB was chosen. While in 
theory the driver should choose the right one automatically, I read articles 
that in some cases it doesn't (hence why the "Broadcast RGB" property for 
intel-gfx driver and "output_csc" for the old radeon driver exist). The next 
step ofc would be to bring over the "Broadcast RGB" property to amdgpu/make it 
a generic property. But then still: having a feedback channel to see if the 
chosen setting got correctly applied should not hurt in any way, shape, or form?

pixel encoding: Probably the most important of the 3: This should be 
accompanied with a "preferred pixel encoding" user controllable setting and is 
mainly thought as a feedback channel for that, because it might not always the 
obvious if the Display + GPU + Driver + Link encoder combination actually 
supports and therefore applies the selected "preferred pixel encoding". For 
example: I have a display here that can display 4k@60Hz or WQHD@120Hz, but 
YCbCr is only supported by it for 4k@60Hz (also it's not supported for 4k30Hz).

The "preferred pixel encoding" setting is required because certain devices 
(both PC's and display's) might wrongly advertise their capabilities. The 
current fix in this case is to write a custom edid which is a kinda hacky 
solution. Examples:
1. RGB and YCbCr4:4:4 in theory carry the same amount of color information, but 
some displays look worse in one or the other, because they do bad internal 
conversion.
2. A laptop wants to output YCbCr4:4:4 but because of bad shielding of the 
port/the cable/the display, the screen goes black every few seconds. Using 
YCbCr4:2:0, and therefore a lower signal clock, stabilizes the connection 
without changing hardware.

color depth: While "max bpc" can be used to change the color depth, there is 
currently no way to 

Re: [PATCH] drm/ttm: use dma_alloc_pages for the page pool

2021-05-11 Thread Christian König

Am 11.05.21 um 10:50 schrieb Christoph Hellwig:

On Tue, May 11, 2021 at 09:35:20AM +0200, Christian König wrote:

We certainly going to need the drm_need_swiotlb() for userptr support
(unless we add some approach for drivers to opt out of swiotlb).

swiotlb use is driven by three things:

  1) addressing limitations of the device
  2) addressing limitations of the interconnect
  3) virtualiztion modes that require it

not sure how the driver could opt out.  What is the problem with userptr
support?


userptr grabs the pages for a certain virtual memory address, map them 
in the IOMMU and then expect the device to have coherent access to it.


When SWIOTLB is in place we need to fail that gracefully, try to not 
expose the functionality or even don't load the driver in the first place.



Then while I really want to get rid of GFP_DMA32 as well I'm not 100% sure
if we can handle this without the flag.

Note that this is still using GFP_DMA32 underneath where required,
just in a layer that can decide that ѕensibly.


Completely agree, I'm just not sure if every driver gets its coherent 
mask right under every condition.


Might be a good idea to double check the coherent mask in nouveau/radeon 
when they want to use GFP_DMA32.



And last we need something better to store the DMA address and order than
allocating a separate memory object for each page.

Yeah.  If you use __GFP_COMP for the allocations we can find the order
from the page itself, which might be useful.  For 64-bit platforms
the dma address could be store in page->private, or depending on how
the page gets used the dma_addr field in struct page that overloads
the lru field and is used by the networking page pool could be used.


Yes, I've considered that as well. But I do need the list_head and dma 
address at the same time.



Maybe we could even have a common page pool between net and drm, but
I don't want to go there myself, not being an expert on either subsystem.


I had the same thought and also the same concerns, can't judge what the 
net code is doing with this.


Regards,
Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-11 Thread Marek Olšák
Mesa doesn't use sysfs.

Note that this is a uapi, meaning that once it's in the kernel, it can't be
changed like that.

What's the use case for this new interface? Isn't it partially redundant
with the current device info structure, which seems to have the equivalent
of dev_id and rev_id?

Marek

On Tue, May 11, 2021 at 3:51 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Marek and other userspace folks need to decide that.
>
> Basic question here is if Mesa is already accessing sysfs nodes for OpenGL
> or RADV. If that is the case then we should probably expose the information
> there as well.
>
> If that isn't the case (which I think it is) then we should implement it
> as IOCTL.
>
> Regards,
> Christian.
>
> Am 10.05.21 um 22:19 schrieb Nieto, David M:
>
> One of the primary usecases is to add this information to the renderer
> string, I am not sure if there are other cases of UMD drivers accessing
> sysfs nodes, but I think if we think permissions, if a client is
> authenticated and opens the render device then it can use the IOCTL, it is
> unclear to me we can make a such an assumption for sysfs nodes…
>
>
>
> I think there is value in having both tbh.
>
>
>
> Regards,
>
> David
>
>
>
> *From: *Christian König 
> 
> *Date: *Monday, May 10, 2021 at 6:48 AM
> *To: *"Nieto, David M"  , "Gu,
> JiaWei (Will)"  
> *Cc: *Alex Deucher  ,
> "Deng, Emily"  , Kees Cook
>  , amd-gfx list
>  
> *Subject: *Re: [PATCH] drm/amdgpu: Align serial size in
> drm_amdgpu_info_vbios
>
>
>
> Well we could add both as sysfs file(s).
>
> Question here is rather what is the primary use case of this and if the
> application has the necessary access permissions to the sysfs files?
>
> Regards,
> Christian.
>
> Am 10.05.21 um 15:42 schrieb Nieto, David M:
>
> Then the application would need to issue the ioctl and then open a sysfs
> file to get all the information it needs. It makes little sense from a
> programming perspective to add an incomplete interface in my opinion
>
>
> --
>
> *From:* Gu, JiaWei (Will)  
> *Sent:* Monday, May 10, 2021 12:13:07 AM
> *To:* Nieto, David M  
> *Cc:* Alex Deucher  ;
> amd-gfx list 
> ; Kees Cook 
> ; Deng, Emily 
> 
> *Subject:* RE: [PATCH] drm/amdgpu: Align serial size in
> drm_amdgpu_info_vbios
>
>
>
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi David,
>
> What I meant is to ONLY delete the serial[16] from drm_amdgpu_info_vbios,
> not the whole struct.
>
> struct drm_amdgpu_info_vbios {
> __u8 name[64];
> __u32 dbdf;
> __u8 vbios_pn[64];
> __u32 version;
> __u8 date[32];
> __u8 serial[16]; // jiawei: shall we delete this
> __u32 dev_id;
> __u32 rev_id;
> __u32 sub_dev_id;
> __u32 sub_ved_id;
> };
>
> serial[16] in drm_amdgpu_info_vbios  copied from adev->serial, but there's
> already a sysfs named serial_number, which exposes it already.
>
> static ssize_t amdgpu_device_get_serial_number(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> struct drm_device *ddev = dev_get_drvdata(dev);
> struct amdgpu_device *adev = ddev->dev_private;
>
> return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
> }
>
> Thanks,
> Jiawei
>
>
> -Original Message-
> From: Nieto, David M  
> Sent: Monday, May 10, 2021 2:53 PM
> To: Gu, JiaWei (Will)  
> Cc: Alex Deucher  ; amd-gfx
> list  ;
> Kees Cook  ; Deng, Emily
>  
> Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios
>
> No, this structure contains all the details of the vbios: date, serial
> number, name, etc.
>
> The sysfs node only contains the vbios name string
>
> > On May 9, 2021, at 23:33, Gu, JiaWei (Will) 
>  wrote:
> >
> > [AMD Official Use Only - Internal Distribution Only]
> >
> > With a second thought,
> > __u8 serial[16] in drm_amdgpu_info_vbios is a bit redundant, sysfs
> serial_number already exposes it.
> >
> > Is it fine to abandon it from drm_amdgpu_info_vbios struct? @Alex
> > Deucher @Nieto, David M
> >
> > Best regards,
> > Jiawei
> >
> > -Original Message-
> > From: Alex Deucher  
> > Sent: Sunday, May 9, 2021 11:59 PM
> > To: Gu, JiaWei (Will)  
> > Cc: amd-gfx list 
> ; Kees Cook
> >  
> > Subject: Re: [PATCH] drm/amdgpu: Align serial size in
> > drm_amdgpu_info_vbios
> >
> >> On Sat, May 8, 2021 at 2:48 AM Jiawei Gu 
>  wrote:
> >>
> >> 20 should be serial char size now instead of 16.
> >>
> >> Signed-off-by: Jiawei Gu  
> >
> > Please make sure this keeps proper 64 bit alignment in the structure.
> >
> > Alex
> >
> >
> >> ---
> >> include/uapi/drm/amdgpu_drm.h | 2 +-
> >> 1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/include/uapi/drm/amdgpu_drm.h
> >> b/include/uapi/drm/amdgpu_drm.h index 2b487a8d2727..1c20721f90da
> >> 100644
> >> --- a/include/uapi/drm/amdgpu_drm.h
> >> +++ b/include/uapi/drm/amdgpu_drm.h
> >> @@ -957,7 +957,7 @@ struct drm_amdgpu_info_vbios {
> >>

Re: [PATCH] drm/amd/display: Expose active display color configurations to userspace

2021-05-11 Thread Pekka Paalanen
On Mon, 10 May 2021 17:47:01 -0400
Alex Deucher  wrote:

> On Fri, May 7, 2021 at 3:27 PM Werner Sembach  
> wrote:
> >
> > xrandr --prop and other userspace info tools have currently no way of
> > telling which color configuration is used on HDMI and DP ports.
> >
> > The ongoing transsition from HDMI 1.4 to 2.0 and the different bandwidth
> > requirements of YCbCr 4:2:0 and RGB color format raise different
> > incompatibilities. Having these configuration information readily
> > available is a useful tool in debuging washed out colors, color artefacts
> > on small fonts and missing refreshrate options.  
> 
> I think we would ideally want these as generic connector properties
> rather than AMD specific ones since they are not really AMD specific.
> I believe there is already a generic drm property (max_bpc) for the
> color depth.  At this point, I think having a generic RGB vs YCbCr
> property would make sense.  I'm not sure about the color space.

Hi,

I believe that userspace will definitely want to know what exactly is
going on on the monitor cable, so I think exposing more information is
good, but I agree that it needs to be done with generic KMS properties
as much as possible. Userspace is not going to bother having explicit
code for driver-specific properties.

I think a major use case will be Wayland color management, where a
Wayland compositor will want to make sure that the video signal
parameters have not changed since the monitor was last measured
(profiled). If the signal configuration is different, the measured
color profile may be invalid and therefore the end user needs to be
warned. See some ideas in
https://gitlab.freedesktop.org/wayland/weston/-/issues/467
under the heading "Color calibration auditing system".

About the color space: is that something a kernel driver will decide
on its own? I mean in the same sense as the driver will negotiate
HDMI/DP link parameters, perhaps falling back to smaller requirements
if higher requirements signal does not seem to work.

We only need readback properties for things that generic userspace
won't or cannot control explicitly, e.g. because the kernel driver has
room to make a choice rather than fail.


Thanks,
pq


pgpGy19YysYU0.pgp
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: refine the poison data consumption handling

2021-05-11 Thread Dennis Li
The user applications maybe register the KFD_EVENT_TYPE_HW_EXCEPTION and
KFD_EVENT_TYPE_MEMORY events, driver could notify them when poison data
consumed. Beside that, some applications maybe register SIGBUS signal
hander. These applications will handle poison data by themselves, exit
or re-create context to re-dispatch works.

Signed-off-by: Dennis Li 

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ba2c2ce0c55a..4d210f23c33c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1050,3 +1050,42 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
}
srcu_read_unlock(_processes_srcu, idx);
 }
+
+void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
+{
+   struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+   struct kfd_hsa_memory_exception_data memory_exception_data;
+   struct kfd_hsa_hw_exception_data hw_exception_data;
+   struct kfd_event *ev;
+   uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+
+   if (!p)
+   return; /* Presumably process exited. */
+
+   memset(_exception_data, 0, sizeof(hw_exception_data));
+   hw_exception_data.gpu_id = dev->id;
+   hw_exception_data.memory_lost = 1;
+   hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
+
+   memset(_exception_data, 0, sizeof(memory_exception_data));
+   memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
+   memory_exception_data.gpu_id = dev->id;
+   memory_exception_data.failure.imprecise = true;
+
+   mutex_lock(>event_mutex);
+   idr_for_each_entry_continue(>event_idr, ev, id) {
+   if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+   ev->hw_exception_data = hw_exception_data;
+   set_event(ev);
+   }
+
+   if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+   ev->memory_exception_data = memory_exception_data;
+   set_event(ev);
+   }
+   }
+   mutex_unlock(>event_mutex);
+
+   /* user application will handle SIGBUS signal */
+   send_sig(SIGBUS, p->lead_thread, 0);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 97c36e3c8c80..9f9b1dfb9c37 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -230,7 +230,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
sq_intr_err);
if (sq_intr_err != 
SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != 
SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
-   kfd_signal_hw_exception_event(pasid);
+   kfd_signal_poison_consumed_event(dev, 
pasid);
amdgpu_amdkfd_gpu_reset(dev->kgd);
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 64552f6b8ba4..daa9d47514c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1144,6 +1144,8 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 
pasid,
 
 void kfd_signal_reset_event(struct kfd_dev *dev);
 
+void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
+
 void kfd_flush_tlb(struct kfd_process_device *pdd);
 
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

2021-05-11 Thread Christian König

Am 10.05.21 um 22:29 schrieb David M Nieto:

The proper metric for fence utilization over several
contexts is an harmonic mean, but such calculation is
prohibitive in kernel space, so the code approximates it.

Because the approximation diverges when one context has a
very small ratio compared with the other context, this change
filter out ratios smaller that 0.01%

Signed-off-by: David M Nieto 
Change-Id: I5b6e0ce5f489a5f55855d35354a6a3653e9d613b
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
  2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 9036c93b4a0c..89ee464b9424 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -698,16 +698,27 @@ ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr 
*mgr, uint32_t hwip,
struct amdgpu_ctx_entity *centity;
ktime_t total = 0, max = 0;
  
+


Unrelated white space change.


if (idx >= AMDGPU_MAX_ENTITY_NUM)
return 0;
idp = >ctx_handles;
mutex_lock(>lock);
idr_for_each_entry(idp, ctx, id) {
+   ktime_t ttotal = tmax = ktime_set(0, 0);


There should be a blank line between decleration and code and please 
don't initialize local variables if it isn't necessary.


Christian.


if (!ctx->entities[hwip][idx])
continue;
  
  		centity = ctx->entities[hwip][idx];

-   amdgpu_ctx_fence_time(ctx, centity, , );
+   amdgpu_ctx_fence_time(ctx, centity, , );
+
+   /* Harmonic mean approximation diverges for very small
+* values. If ratio < 0.01% ignore
+*/
+   if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
+   continue;
+
+   total = ktime_add(total, ttotal);
+   max = ktime_after(tmax, max) ? tmax : max;
}
  
  	mutex_unlock(>lock);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 10dcf59a5c6b..3541dfb059ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -30,6 +30,7 @@ struct drm_file;
  struct amdgpu_fpriv;
  
  #define AMDGPU_MAX_ENTITY_NUM 4

+#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) (max > 16384ULL*total)
  
  struct amdgpu_ctx_entity {

uint64_tsequence;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-11 Thread Christian König

Marek and other userspace folks need to decide that.

Basic question here is if Mesa is already accessing sysfs nodes for 
OpenGL or RADV. If that is the case then we should probably expose the 
information there as well.


If that isn't the case (which I think it is) then we should implement it 
as IOCTL.


Regards,
Christian.

Am 10.05.21 um 22:19 schrieb Nieto, David M:


One of the primary usecases is to add this information to the renderer 
string, I am not sure if there are other cases of UMD drivers 
accessing sysfs nodes, but I think if we think permissions, if a 
client is authenticated and opens the render device then it can use 
the IOCTL, it is unclear to me we can make a such an assumption for 
sysfs nodes…


I think there is value in having both tbh.

Regards,

David

*From: *Christian König 
*Date: *Monday, May 10, 2021 at 6:48 AM
*To: *"Nieto, David M" , "Gu, JiaWei (Will)" 

*Cc: *Alex Deucher , "Deng, Emily" 
, Kees Cook , amd-gfx list 

*Subject: *Re: [PATCH] drm/amdgpu: Align serial size in 
drm_amdgpu_info_vbios


Well we could add both as sysfs file(s).

Question here is rather what is the primary use case of this and if 
the application has the necessary access permissions to the sysfs files?


Regards,
Christian.

Am 10.05.21 um 15:42 schrieb Nieto, David M:

Then the application would need to issue the ioctl and then open a
sysfs file to get all the information it needs. It makes little
sense from a programming perspective to add an incomplete
interface in my opinion



*From:*Gu, JiaWei (Will) 

*Sent:* Monday, May 10, 2021 12:13:07 AM
*To:* Nieto, David M 

*Cc:* Alex Deucher 
; amd-gfx list

; Kees Cook
 ; Deng,
Emily  
*Subject:* RE: [PATCH] drm/amdgpu: Align serial size in
drm_amdgpu_info_vbios

[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from
drm_amdgpu_info_vbios, not the whole struct.

struct drm_amdgpu_info_vbios {
    __u8 name[64];
    __u32 dbdf;
    __u8 vbios_pn[64];
    __u32 version;
    __u8 date[32];
    __u8 serial[16]; // jiawei: shall we delete this
    __u32 dev_id;
    __u32 rev_id;
    __u32 sub_dev_id;
    __u32 sub_ved_id;
};

serial[16] in drm_amdgpu_info_vbios  copied from adev->serial, but
there's already a sysfs named serial_number, which exposes it already.

static ssize_t amdgpu_device_get_serial_number(struct device *dev,
    struct device_attribute *attr, char *buf)
{
    struct drm_device *ddev = dev_get_drvdata(dev);
    struct amdgpu_device *adev = ddev->dev_private;

    return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
}

Thanks,
Jiawei


-Original Message-
From: Nieto, David M 

Sent: Monday, May 10, 2021 2:53 PM
To: Gu, JiaWei (Will)  
Cc: Alex Deucher 
; amd-gfx list

; Kees Cook
 ; Deng,
Emily  
Subject: Re: [PATCH] drm/amdgpu: Align serial size in
drm_amdgpu_info_vbios

No, this structure contains all the details of the vbios: date,
serial number, name, etc.

The sysfs node only contains the vbios name string

> On May 9, 2021, at 23:33, Gu, JiaWei (Will) 
 wrote:
>
> [AMD Official Use Only - Internal Distribution Only]
>
> With a second thought,
> __u8 serial[16] in drm_amdgpu_info_vbios is a bit redundant,
sysfs serial_number already exposes it.
>
> Is it fine to abandon it from drm_amdgpu_info_vbios struct? @Alex
> Deucher @Nieto, David M
>
> Best regards,
> Jiawei
>
> -Original Message-
> From: Alex Deucher 

> Sent: Sunday, May 9, 2021 11:59 PM
> To: Gu, JiaWei (Will)  
> Cc: amd-gfx list 
; Kees Cook
>  
> Subject: Re: [PATCH] drm/amdgpu: Align serial size in
> drm_amdgpu_info_vbios
>
>> On Sat, May 8, 2021 at 2:48 AM Jiawei Gu 
 wrote:
>>
>> 20 should be serial char size now instead of 16.
>>
>> Signed-off-by: Jiawei Gu 

>
> Please make sure this keeps proper 64 bit alignment in the
structure.
>
> Alex
>
>
>> ---
>> include/uapi/drm/amdgpu_drm.h | 2 +-

RFC: use dma_alloc_noncoherent in ttm_pool_alloc_page

2021-05-11 Thread Christoph Hellwig
Hi all,

the memory allocation for the TTM pool is a big mess with two allocation
methods that both have issues, a layering violation and odd guessing of
pools in the callers.

This patch switches to the dma_alloc_noncoherent API instead fixing all
of the above issues.

Warning:  i don't have any of the relevant hardware, so this is a compile
tested request for comments only!

Diffstat:
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |1 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |4 
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |1 
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |1 
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |1 
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |1 
 drivers/gpu/drm/drm_cache.c |   31 -
 drivers/gpu/drm/drm_gem_vram_helper.c   |3 
 drivers/gpu/drm/nouveau/nouveau_ttm.c   |8 -
 drivers/gpu/drm/qxl/qxl_ttm.c   |3 
 drivers/gpu/drm/radeon/radeon.h |1 
 drivers/gpu/drm/radeon/radeon_device.c  |1 
 drivers/gpu/drm/radeon/radeon_ttm.c |4 
 drivers/gpu/drm/ttm/ttm_device.c|7 -
 drivers/gpu/drm/ttm/ttm_pool.c  |  178 
 drivers/gpu/drm/ttm/ttm_tt.c|   25 
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |4 
 include/drm/drm_cache.h |1 
 include/drm/ttm/ttm_device.h|3 
 include/drm/ttm/ttm_pool.h  |9 -
 20 files changed, 41 insertions(+), 246 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2] drm/amd/display: Fix two cursor duplication when using overlay

2021-05-11 Thread youling 257
I using linux kernel 5.13 rc1, has "drm/amd/display: Reject non-zero
src_y and src_x for video planes" patch, git bisect bad commit is
"drm/amd/display: Fix two cursor duplication when using overlay", i
build kernel 5.13 many times, reboot test many times.

2021-05-11 8:37 GMT+08:00, Rodrigo Siqueira :
> On 05/10, youling257 wrote:
>> I using amd 3400g running with android-x86, this patch is a bad commit
>> when i use android-x86 on amdgpu.
>>
>> Revert "Revert "drm/amdgpu: Ensure that the modifier requested is
>> supported by plane."" is the first bad commit, cause a androidx86 run on
>> amdgpu problem, look the video,
>> https://drive.google.com/file/d/1QklH_H2AlOTu8W1D3yl6_3rtZ7IqbjR_/view?usp=sharing
>>
>> "drm/amd/display: Fix two cursor duplication when using overlay" is the
>> second bad commit, also cause this problem,
>> https://drive.google.com/file/d/1QklH_H2AlOTu8W1D3yl6_3rtZ7IqbjR_/view?usp=sharing
>
> Hmmm... I don't think that the two cursor patch would cause that
> flickering. Are you using the latest amd-staging-drm-next? Do you have
> this patch in your local branch:
>
>  drm/amd/display: Reject non-zero src_y and src_x for video planes
>
> Thanks
>
> --
> Rodrigo Siqueira
> https://siqueira.tech
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/ttm: use dma_alloc_pages for the page pool

2021-05-11 Thread Christoph Hellwig
Use the dma_alloc_pages allocator for the TTM pool allocator.
This allocator is a front end to the page allocator which takes the
DMA mask of the device into account, thus offering the best of both
worlds of the two existing allocator versions.  This conversion also
removes the ugly layering violation where the TTM pool assumes what
kind of virtual address dma_alloc_attrs can return.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   4 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |   1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |   1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |   1 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |   1 -
 drivers/gpu/drm/drm_cache.c |  31 -
 drivers/gpu/drm/drm_gem_vram_helper.c   |   3 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c   |   8 +-
 drivers/gpu/drm/qxl/qxl_ttm.c   |   3 +-
 drivers/gpu/drm/radeon/radeon.h |   1 -
 drivers/gpu/drm/radeon/radeon_device.c  |   1 -
 drivers/gpu/drm/radeon/radeon_ttm.c |   4 +-
 drivers/gpu/drm/ttm/ttm_device.c|   7 +-
 drivers/gpu/drm/ttm/ttm_pool.c  | 178 
 drivers/gpu/drm/ttm/ttm_tt.c|  25 +---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |   4 +-
 include/drm/drm_cache.h |   1 -
 include/drm/ttm/ttm_device.h|   3 +-
 include/drm/ttm/ttm_pool.h  |   9 +-
 20 files changed, 41 insertions(+), 246 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc3a69296321b3..5f40527eeef1ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -819,7 +819,6 @@ struct amdgpu_device {
int usec_timeout;
const struct amdgpu_asic_funcs  *asic_funcs;
boolshutdown;
-   boolneed_swiotlb;
boolaccel_working;
struct notifier_block   acpi_nb;
struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3bef0432cac2f7..9bf17b44cba6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1705,9 +1705,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* No others user of address space so set it to 0 */
r = ttm_device_init(>mman.bdev, _bo_driver, adev->dev,
   adev_to_drm(adev)->anon_inode->i_mapping,
-  adev_to_drm(adev)->vma_offset_manager,
-  adev->need_swiotlb,
-  dma_addressing_limited(adev->dev));
+  adev_to_drm(adev)->vma_offset_manager);
if (r) {
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 405d6ad09022ca..2d4fa754513033 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -846,7 +846,6 @@ static int gmc_v6_0_sw_init(void *handle)
dev_warn(adev->dev, "No suitable DMA available.\n");
return r;
}
-   adev->need_swiotlb = drm_need_swiotlb(44);
 
r = gmc_v6_0_init_microcode(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 210ada2289ec9c..a504db24f4c2a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1025,7 +1025,6 @@ static int gmc_v7_0_sw_init(void *handle)
pr_warn("No suitable DMA available\n");
return r;
}
-   adev->need_swiotlb = drm_need_swiotlb(40);
 
r = gmc_v7_0_init_microcode(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index e4f27b3f28fb58..42e7b1eb84b3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1141,7 +1141,6 @@ static int gmc_v8_0_sw_init(void *handle)
pr_warn("No suitable DMA available\n");
return r;
}
-   adev->need_swiotlb = drm_need_swiotlb(40);
 
r = gmc_v8_0_init_microcode(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 455bb91060d0bc..f74784b3423740 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1548,7 +1548,6 @@ static int gmc_v9_0_sw_init(void *handle)
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
return r;
}
-   adev->need_swiotlb = drm_need_swiotlb(44);
 
if 

Re: [PATCH v2] drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz monitors are connected

2021-05-11 Thread Kai-Heng Feng
On Fri, Apr 30, 2021 at 12:57 PM Kai-Heng Feng
 wrote:
>
> Screen flickers rapidly when two 4K 60Hz monitors are in use. This issue
> doesn't happen when one monitor is 4K 60Hz (pixelclock 594MHz) and
> another one is 4K 30Hz (pixelclock 297MHz).
>
> The issue is gone after setting "power_dpm_force_performance_level" to
> "high". Following the indication, we found that the issue occurs when
> sclk is too low.
>
> So resolve the issue by disabling sclk switching when there are two
> monitors requires high pixelclock (> 297MHz).
>
> v2:
>  - Only apply the fix to Oland.
> Signed-off-by: Kai-Heng Feng 

A gentle ping...

> ---
>  drivers/gpu/drm/radeon/radeon.h| 1 +
>  drivers/gpu/drm/radeon/radeon_pm.c | 8 
>  drivers/gpu/drm/radeon/si_dpm.c| 3 +++
>  3 files changed, 12 insertions(+)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 42281fce552e6..56ed5634cebef 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -1549,6 +1549,7 @@ struct radeon_dpm {
> void*priv;
> u32 new_active_crtcs;
> int new_active_crtc_count;
> +   int high_pixelclock_count;
> u32 current_active_crtcs;
> int current_active_crtc_count;
> bool single_display;
> diff --git a/drivers/gpu/drm/radeon/radeon_pm.c 
> b/drivers/gpu/drm/radeon/radeon_pm.c
> index 0c1950f4e146f..3861c0b98fcf3 100644
> --- a/drivers/gpu/drm/radeon/radeon_pm.c
> +++ b/drivers/gpu/drm/radeon/radeon_pm.c
> @@ -1767,6 +1767,7 @@ static void radeon_pm_compute_clocks_dpm(struct 
> radeon_device *rdev)
> struct drm_device *ddev = rdev->ddev;
> struct drm_crtc *crtc;
> struct radeon_crtc *radeon_crtc;
> +   struct radeon_connector *radeon_connector;
>
> if (!rdev->pm.dpm_enabled)
> return;
> @@ -1776,6 +1777,7 @@ static void radeon_pm_compute_clocks_dpm(struct 
> radeon_device *rdev)
> /* update active crtc counts */
> rdev->pm.dpm.new_active_crtcs = 0;
> rdev->pm.dpm.new_active_crtc_count = 0;
> +   rdev->pm.dpm.high_pixelclock_count = 0;
> if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
> list_for_each_entry(crtc,
> >mode_config.crtc_list, head) {
> @@ -1783,6 +1785,12 @@ static void radeon_pm_compute_clocks_dpm(struct 
> radeon_device *rdev)
> if (crtc->enabled) {
> rdev->pm.dpm.new_active_crtcs |= (1 << 
> radeon_crtc->crtc_id);
> rdev->pm.dpm.new_active_crtc_count++;
> +   if (!radeon_crtc->connector)
> +   continue;
> +
> +   radeon_connector = 
> to_radeon_connector(radeon_crtc->connector);
> +   if (radeon_connector->pixelclock_for_modeset 
> > 297000)
> +   rdev->pm.dpm.high_pixelclock_count++;
> }
> }
> }
> diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
> index 9186095518047..3cc2b96a7f368 100644
> --- a/drivers/gpu/drm/radeon/si_dpm.c
> +++ b/drivers/gpu/drm/radeon/si_dpm.c
> @@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(struct 
> radeon_device *rdev,
> (rdev->pdev->device == 0x6605)) {
> max_sclk = 75000;
> }
> +
> +   if (rdev->pm.dpm.high_pixelclock_count > 1)
> +   disable_sclk_switching = true;
> }
>
> if (rps->vce_active) {
> --
> 2.30.2
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/pm: enable ASPM by default

2021-05-11 Thread Ernst Sjöstrand
You could even write why it was disabled before and why you can enable it
now again in the commit message.

Regards
//Ernst

Den tis 11 maj 2021 kl 09:25 skrev Chen, Jiansong (Simon) <
jiansong.c...@amd.com>:

> [AMD Official Use Only - Internal Distribution Only]
>
> Better to make the commit msg more specific, eg. Change predicate
> accordingly since aspm is enabled by default.
> Either way,  Reviewed-by: Jiansong Chen 
>
> Regards,
> Jiansong
> -Original Message-
> From: amd-gfx  On Behalf Of
> Kenneth Feng
> Sent: Tuesday, May 11, 2021 11:04 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Feng, Kenneth 
> Subject: [PATCH] drm/amd/pm: enable ASPM by default
>
> enable ASPM by default
>
> Signed-off-by: Kenneth Feng 
> ---
>  drivers/gpu/drm/amd/amdgpu/nv.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/soc15.c  | 2 +-
>  drivers/gpu/drm/amd/amdgpu/vi.c | 2 +-
>  drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
>  4 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c
> b/drivers/gpu/drm/amd/amdgpu/nv.c index 82a380be8368..2fcfd893edc5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device
> *adev)
>
>  static void nv_program_aspm(struct amdgpu_device *adev)  {
> -   if (amdgpu_aspm != 1)
> +   if (!amdgpu_aspm)
> return;
>
> if (!(adev->flags & AMD_IS_APU) &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 49ece2a7f9f0..4b660b2d1c22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -817,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct
> amdgpu_device *adev)
>
>  static void soc15_program_aspm(struct amdgpu_device *adev)  {
> -   if (amdgpu_aspm != 1)
> +   if (!amdgpu_aspm)
> return;
>
> if (!(adev->flags & AMD_IS_APU) &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
> b/drivers/gpu/drm/amd/amdgpu/vi.c index 735ebbd1148f..3d21c0799037 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device
> *adev)
> bool bL1SS = false;
> bool bClkReqSupport = true;
>
> -   if (amdgpu_aspm != 1)
> +   if (!amdgpu_aspm)
> return;
>
> if (adev->flags & AMD_IS_APU ||
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> index d2fd44b903ca..270b2b0b8e8a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> @@ -302,7 +302,7 @@ sienna_cichlid_get_allowed_feature_mask(struct
> smu_context *smu,
> if (smu->dc_controlled_by_gpio)
> *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ACDC_BIT);
>
> -   if (amdgpu_aspm == 1)
> +   if (amdgpu_aspm)
> *(uint64_t *)feature_mask |=
> FEATURE_MASK(FEATURE_DS_LCLK_BIT);
>
> return 0;
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C414e894d646e4161c7ab08d914296d92%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637562990432397285%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=TvVZdWM32y8rOn154m%2B0pLvHxk9fEuOaiBqiIxXSxiE%3Dreserved=0
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: use dma_alloc_pages for the page pool

2021-05-11 Thread Christian König

Am 11.05.21 um 08:05 schrieb Christoph Hellwig:

Use the dma_alloc_pages allocator for the TTM pool allocator.
This allocator is a front end to the page allocator which takes the
DMA mask of the device into account, thus offering the best of both
worlds of the two existing allocator versions.  This conversion also
removes the ugly layering violation where the TTM pool assumes what
kind of virtual address dma_alloc_attrs can return.


Oh yes please. But please split that up into multiple patches.

At least one switching from dma_alloc_attrs() to dma_alloc_pages() 
separately.


We certainly going to need the drm_need_swiotlb() for userptr support 
(unless we add some approach for drivers to opt out of swiotlb).


Then while I really want to get rid of GFP_DMA32 as well I'm not 100% 
sure if we can handle this without the flag.


And last we need something better to store the DMA address and order 
than allocating a separate memory object for each page.


Christian.



Signed-off-by: Christoph Hellwig 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   4 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |   1 -
  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |   1 -
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |   1 -
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |   1 -
  drivers/gpu/drm/drm_cache.c |  31 -
  drivers/gpu/drm/drm_gem_vram_helper.c   |   3 +-
  drivers/gpu/drm/nouveau/nouveau_ttm.c   |   8 +-
  drivers/gpu/drm/qxl/qxl_ttm.c   |   3 +-
  drivers/gpu/drm/radeon/radeon.h |   1 -
  drivers/gpu/drm/radeon/radeon_device.c  |   1 -
  drivers/gpu/drm/radeon/radeon_ttm.c |   4 +-
  drivers/gpu/drm/ttm/ttm_device.c|   7 +-
  drivers/gpu/drm/ttm/ttm_pool.c  | 178 
  drivers/gpu/drm/ttm/ttm_tt.c|  25 +---
  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |   4 +-
  include/drm/drm_cache.h |   1 -
  include/drm/ttm/ttm_device.h|   3 +-
  include/drm/ttm/ttm_pool.h  |   9 +-
  20 files changed, 41 insertions(+), 246 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc3a69296321b3..5f40527eeef1ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -819,7 +819,6 @@ struct amdgpu_device {
int usec_timeout;
const struct amdgpu_asic_funcs  *asic_funcs;
boolshutdown;
-   boolneed_swiotlb;
boolaccel_working;
struct notifier_block   acpi_nb;
struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3bef0432cac2f7..9bf17b44cba6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1705,9 +1705,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* No others user of address space so set it to 0 */
r = ttm_device_init(>mman.bdev, _bo_driver, adev->dev,
   adev_to_drm(adev)->anon_inode->i_mapping,
-  adev_to_drm(adev)->vma_offset_manager,
-  adev->need_swiotlb,
-  dma_addressing_limited(adev->dev));
+  adev_to_drm(adev)->vma_offset_manager);
if (r) {
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 405d6ad09022ca..2d4fa754513033 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -846,7 +846,6 @@ static int gmc_v6_0_sw_init(void *handle)
dev_warn(adev->dev, "No suitable DMA available.\n");
return r;
}
-   adev->need_swiotlb = drm_need_swiotlb(44);
  
  	r = gmc_v6_0_init_microcode(adev);

if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 210ada2289ec9c..a504db24f4c2a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1025,7 +1025,6 @@ static int gmc_v7_0_sw_init(void *handle)
pr_warn("No suitable DMA available\n");
return r;
}
-   adev->need_swiotlb = drm_need_swiotlb(40);
  
  	r = gmc_v7_0_init_microcode(adev);

if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index e4f27b3f28fb58..42e7b1eb84b3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1141,7 +1141,6 @@ static int gmc_v8_0_sw_init(void *handle)
pr_warn("No suitable DMA available\n");

RE: [PATCH] drm/amd/pm: enable ASPM by default

2021-05-11 Thread Chen, Jiansong (Simon)
[AMD Official Use Only - Internal Distribution Only]

Better to make the commit msg more specific, eg. Change predicate accordingly 
since aspm is enabled by default.
Either way,  Reviewed-by: Jiansong Chen 

Regards,
Jiansong
-Original Message-
From: amd-gfx  On Behalf Of Kenneth Feng
Sent: Tuesday, May 11, 2021 11:04 AM
To: amd-gfx@lists.freedesktop.org
Cc: Feng, Kenneth 
Subject: [PATCH] drm/amd/pm: enable ASPM by default

enable ASPM by default

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/amdgpu/nv.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/soc15.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/vi.c | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c 
index 82a380be8368..2fcfd893edc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)

 static void nv_program_aspm(struct amdgpu_device *adev)  {
-   if (amdgpu_aspm != 1)
+   if (!amdgpu_aspm)
return;

if (!(adev->flags & AMD_IS_APU) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 49ece2a7f9f0..4b660b2d1c22 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -817,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device 
*adev)

 static void soc15_program_aspm(struct amdgpu_device *adev)  {
-   if (amdgpu_aspm != 1)
+   if (!amdgpu_aspm)
return;

if (!(adev->flags & AMD_IS_APU) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c 
index 735ebbd1148f..3d21c0799037 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev)
bool bL1SS = false;
bool bClkReqSupport = true;

-   if (amdgpu_aspm != 1)
+   if (!amdgpu_aspm)
return;

if (adev->flags & AMD_IS_APU ||
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index d2fd44b903ca..270b2b0b8e8a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -302,7 +302,7 @@ sienna_cichlid_get_allowed_feature_mask(struct smu_context 
*smu,
if (smu->dc_controlled_by_gpio)
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ACDC_BIT);

-   if (amdgpu_aspm == 1)
+   if (amdgpu_aspm)
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT);

return 0;
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C414e894d646e4161c7ab08d914296d92%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637562990432397285%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=TvVZdWM32y8rOn154m%2B0pLvHxk9fEuOaiBqiIxXSxiE%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 16/16] drm/amdgpu: Verify DMA opearations from device are done

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

In case device remove is just simualted by sysfs then verify
device doesn't keep doing DMA to the released memory after
pci_remove is done.

Signed-off-by: Andrey Grodzovsky 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 83006f45b10b..5e6af9e0b7bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1314,7 +1314,13 @@ amdgpu_pci_remove(struct pci_dev *pdev)
drm_dev_unplug(dev);
amdgpu_driver_unload_kms(dev);
  
+	/*

+* Flush any in flight DMA operations from device.
+* Clear the Bus Master Enable bit and then wait on the PCIe Device
+* StatusTransactions Pending bit.
+*/
pci_disable_device(pdev);
+   pci_wait_for_pending_transaction(pdev);
  }
  
  static void


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 13/16] drm/amdgpu: Fix hang on device removal.

2021-05-11 Thread Christian König




Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

If removing while commands in flight you cannot wait to flush the
HW fences on a ring since the device is gone.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 16 ++--
  1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 1ffb36bd0b19..fa03702ecbfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -36,6 +36,7 @@
  #include 
  #include 
  
+#include 

  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  
@@ -525,8 +526,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)

   */
  void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
  {
-   unsigned i, j;
-   int r;
+   int i, r;


Is j not used here any more?

Christian.

  
  	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {

struct amdgpu_ring *ring = adev->rings[i];
@@ -535,11 +535,15 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device 
*adev)
continue;
if (!ring->no_scheduler)
drm_sched_fini(>sched);
-   r = amdgpu_fence_wait_empty(ring);
-   if (r) {
-   /* no need to trigger GPU reset as we are unloading */
+   /* You can't wait for HW to signal if it's gone */
+   if (!drm_dev_is_unplugged(>ddev))
+   r = amdgpu_fence_wait_empty(ring);
+   else
+   r = -ENODEV;
+   /* no need to trigger GPU reset as we are unloading */
+   if (r)
amdgpu_fence_driver_force_completion(ring);
-   }
+
if (ring->fence_drv.irq_src)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
   ring->fence_drv.irq_type);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 12/16] drm/amdgpu: Prevent any job recoveries after device is unplugged.

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

Return DRM_TASK_STATUS_ENODEV back to the scheduler when device
is not present so they timeout timer will not be rearmed.

v5: Update to match updated return values in enum drm_gpu_sched_stat

Signed-off-by: Andrey Grodzovsky 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 19 ---
  1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 759b34799221..d33e6d97cc89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -25,6 +25,8 @@
  #include 
  #include 
  
+#include 

+
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  
@@ -34,6 +36,15 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)

struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info ti;
struct amdgpu_device *adev = ring->adev;
+   int idx;
+
+   if (!drm_dev_enter(>ddev, )) {
+   DRM_INFO("%s - device unplugged skipping recovery on 
scheduler:%s",
+__func__, s_job->sched->name);
+
+   /* Effectively the job is aborted as the device is gone */
+   return DRM_GPU_SCHED_STAT_ENODEV;
+   }
  
  	memset(, 0, sizeof(struct amdgpu_task_info));
  
@@ -41,7 +52,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)

amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) 
{
DRM_ERROR("ring %s timeout, but soft recovered\n",
  s_job->sched->name);
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   goto exit;
}
  
  	amdgpu_vm_get_task_info(ring->adev, job->pasid, );

@@ -53,13 +64,15 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  
  	if (amdgpu_device_should_recover_gpu(ring->adev)) {

amdgpu_device_gpu_recover(ring->adev, job);
-   return DRM_GPU_SCHED_STAT_NOMINAL;
} else {
drm_sched_suspend_timeout(>sched);
if (amdgpu_sriov_vf(adev))
adev->virt.tdr_debug = true;
-   return DRM_GPU_SCHED_STAT_NOMINAL;
}
+
+exit:
+   drm_dev_exit(idx);
+   return DRM_GPU_SCHED_STAT_NOMINAL;
  }
  
  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 11/16] drm/sched: Make timeout timer rearm conditional.

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

We don't want to rearm the timer if driver hook reports
that the device is gone.

v5: Update drm_gpu_sched_stat values in code.

Signed-off-by: Andrey Grodzovsky 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/scheduler/sched_main.c | 11 +++
  1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index f4f474944169..8d1211e87101 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -314,6 +314,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
  {
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
+   enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
  
  	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
  
@@ -331,7 +332,7 @@ static void drm_sched_job_timedout(struct work_struct *work)

list_del_init(>list);
spin_unlock(>job_list_lock);
  
-		job->sched->ops->timedout_job(job);

+   status = job->sched->ops->timedout_job(job);
  
  		/*

 * Guilty job did complete and hence needs to be manually 
removed
@@ -345,9 +346,11 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
spin_unlock(>job_list_lock);
}
  
-	spin_lock(>job_list_lock);

-   drm_sched_start_timeout(sched);
-   spin_unlock(>job_list_lock);
+   if (status != DRM_GPU_SCHED_STAT_ENODEV) {
+   spin_lock(>job_list_lock);
+   drm_sched_start_timeout(sched);
+   spin_unlock(>job_list_lock);
+   }
  }
  
   /**


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 10/16] drm/amdgpu: Guard against write accesses after device removal

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.

v5:
Protect more places wher memcopy_to/form_io takes place
Protect IB submissions

v6: Switch to !drm_dev_enter instead of scoping entire code
with brackets.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   |  9 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c| 17 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 63 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 70 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  | 49 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   | 31 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   | 22 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  7 +-
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c| 44 ++--
  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c|  8 +--
  drivers/gpu/drm/amd/amdgpu/psp_v3_1.c |  8 +--
  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 26 ---
  drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 22 +++---
  .../drm/amd/pm/powerplay/smumgr/smu7_smumgr.c |  2 +
  17 files changed, 257 insertions(+), 145 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a0bff4713672..94c415176cdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -71,6 +71,8 @@
  #include 
  #include 
  
+#include 

+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -281,7 +283,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, 
loff_t pos,
unsigned long flags;
uint32_t hi = ~0;
uint64_t last;
+   int idx;
  
+	 if (!drm_dev_enter(>ddev, ))

+return;
  
  #ifdef CONFIG_64BIT

last = min(pos + size, adev->gmc.visible_vram_size);
@@ -299,8 +304,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, 
loff_t pos,
memcpy_fromio(buf, addr, count);
}
  
-		if (count == size)

+   if (count == size) {
+   drm_dev_exit(idx);
return;
+   }


Maybe use a goto instead, but really just a nit pick.



  
  		pos += count;

buf += count / 4;
@@ -323,6 +330,8 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, 
loff_t pos,
*buf++ = RREG32_NO_KIQ(mmMM_DATA);
}
spin_unlock_irqrestore(>mmio_idx_lock, flags);
+
+   drm_dev_exit(idx);
  }
  
  /*

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 4d32233cde92..04ba5eef1e88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -31,6 +31,8 @@
  #include "amdgpu_ras.h"
  #include "amdgpu_xgmi.h"
  
+#include 

+
  /**
   * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
   *
@@ -151,6 +153,10 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, 
void *cpu_pt_addr,
  {
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
+   int idx;
+
+   if (!drm_dev_enter(>ddev, ))
+   return 0;
  
  	/*

 * The following is for PTE only. GART does not have PDEs.
@@ -158,6 +164,9 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void 
*cpu_pt_addr,
value = addr & 0xF000ULL;
value |= flags;
writeq(value, ptr + (gpu_page_idx * 8));
+
+   drm_dev_exit(idx);
+
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

index 148a3b481b12..62fcbd446c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -30,6 +30,7 @@
  #include 
  
  #include 

+#include 
  
  #include "amdgpu.h"

  #include "atom.h"
@@ -137,7 +138,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
bool secure;
  
  	unsigned i;

-   int r = 0;
+   int idx, r = 0;
bool need_pipe_sync = false;
  
  	if (num_ibs == 0)

@@ -169,13 +170,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
return -EINVAL;
}
  
+	if (!drm_dev_enter(>ddev, ))

+   return -ENODEV;
+
alloc_size = ring->funcs->emit_frame_size + num_ibs *
ring->funcs->emit_ib_size;
  
  	r = amdgpu_ring_alloc(ring, alloc_size);

if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
-   return r;
+   goto exit;
}
  
  	

Re: [PATCH v6 06/16] drm/amdgpu: Handle IOMMU enabled case.

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

Handle all DMA IOMMU gropup related dependencies before the
group is removed.

v5: Drop IOMMU notifier and switch to lockless call to ttm_tt_unpopulate
v6: Drop the BO unamp list

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   | 3 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   | 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 9 +
  drivers/gpu/drm/amd/amdgpu/cik_ih.c| 1 -
  drivers/gpu/drm/amd/amdgpu/cz_ih.c | 1 -
  drivers/gpu/drm/amd/amdgpu/iceland_ih.c| 1 -
  drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 3 ---
  drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 -
  drivers/gpu/drm/amd/amdgpu/tonga_ih.c  | 1 -
  drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 3 ---
  11 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 18598eda18f6..a0bff4713672 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3256,7 +3256,6 @@ static const struct attribute *amdgpu_dev_attributes[] = {
NULL
  };
  
-

  /**
   * amdgpu_device_init - initialize the driver
   *
@@ -3698,12 +3697,13 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_ucode_sysfs_fini(adev);
sysfs_remove_files(>dev->kobj, amdgpu_dev_attributes);
  
-

amdgpu_fbdev_fini(adev);
  
  	amdgpu_irq_fini_hw(adev);
  
  	amdgpu_device_ip_fini_early(adev);

+
+   amdgpu_gart_dummy_page_fini(adev);


I think you should probably just call amdgpu_gart_fini() here.


  }
  
  void amdgpu_device_fini_sw(struct amdgpu_device *adev)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index c5a9a4fb10d2..354e68081b53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
*adev)
   *
   * Frees the dummy page used by the driver (all asics).
   */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
  {
if (!adev->dummy_page_addr)
return;
@@ -375,5 +375,4 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
   */
  void amdgpu_gart_fini(struct amdgpu_device *adev)
  {
-   amdgpu_gart_dummy_page_fini(adev);
  }


Well either you remove amdgpu_gart_fini() or just call 
amdgpu_gart_fini() instead of amdgpu_gart_dummy_page_fini().



diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..78dc7a23da56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -58,6 +58,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
  int amdgpu_gart_init(struct amdgpu_device *adev);
  void amdgpu_gart_fini(struct amdgpu_device *adev);
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
  int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
   int pages);
  int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 233b64dab94b..a14973a7a9c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -361,6 +361,15 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
if (!amdgpu_device_has_dc_support(adev))
flush_work(>hotplug_work);
}
+
+   if (adev->irq.ih_soft.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih_soft);
+   if (adev->irq.ih.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih);
+   if (adev->irq.ih1.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih1);
+   if (adev->irq.ih2.ring)
+   amdgpu_ih_ring_fini(adev, >irq.ih2);


You should probably make the function NULL save instead of checking here.

Christian.


  }
  
  /**

diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c 
b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 183d44a6583c..df385ffc9768 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -310,7 +310,6 @@ static int cik_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
  	amdgpu_irq_fini_sw(adev);

-   amdgpu_ih_ring_fini(adev, >irq.ih);
amdgpu_irq_remove_domain(adev);
  
  	return 0;

diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c 
b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index d32743949003..b8c47e0cf37a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -302,7 +302,6 @@ static int cz_ih_sw_fini(void *handle)
 

Re: [PATCH v6 05/16] drm/amdgpu: Add early fini callback

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

Use it to call disply code dependent on device->drv_data
before it's set to NULL on device unplug

v5: Move HW finilization into this callback to prevent MMIO accesses
 post cpi remove.

Signed-off-by: Andrey Grodzovsky 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 59 +--
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++-
  drivers/gpu/drm/amd/include/amd_shared.h  |  2 +
  3 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3760ce7d8ff8..18598eda18f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
return 0;
  }
  
-/**

- * amdgpu_device_ip_fini - run fini for hardware IPs
- *
- * @adev: amdgpu_device pointer
- *
- * Main teardown pass for hardware IPs.  The list of all the hardware
- * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
- * are run.  hw_fini tears down the hardware associated with each IP
- * and sw_fini tears down any software state associated with each IP.
- * Returns 0 on success, negative error code on failure.
- */
-static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
  {
int i, r;
  
-	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)

-   amdgpu_virt_release_ras_err_handler_data(adev);
+   for (i = 0; i < adev->num_ip_blocks; i++) {
+   if (!adev->ip_blocks[i].version->funcs->early_fini)
+   continue;
  
-	amdgpu_ras_pre_fini(adev);

+   r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+   if (r) {
+   DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+   }
+   }
  
-	if (adev->gmc.xgmi.num_physical_nodes > 1)

-   amdgpu_xgmi_remove_device(adev);
+   amdgpu_amdkfd_suspend(adev, false);
  
  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);

amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
  
-	amdgpu_amdkfd_device_fini(adev);

-
/* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
@@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device 
*adev)
adev->ip_blocks[i].status.hw = false;
}
  
+	return 0;

+}
+
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run.  hw_fini tears down the hardware associated with each IP
+ * and sw_fini tears down any software state associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+{
+   int i, r;
+
+   if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
+   amdgpu_virt_release_ras_err_handler_data(adev);
+
+   amdgpu_ras_pre_fini(adev);
+
+   if (adev->gmc.xgmi.num_physical_nodes > 1)
+   amdgpu_xgmi_remove_device(adev);
+
+   amdgpu_amdkfd_device_fini_sw(adev);
  
  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {

if (!adev->ip_blocks[i].status.sw)
@@ -3683,6 +3702,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_fbdev_fini(adev);
  
  	amdgpu_irq_fini_hw(adev);

+
+   amdgpu_device_ip_fini_early(adev);
  }
  
  void amdgpu_device_fini_sw(struct amdgpu_device *adev)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 296704ce3768..6c2c6a51ce6c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1251,6 +1251,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
return -EINVAL;
  }
  
+static int amdgpu_dm_early_fini(void *handle)

+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+   amdgpu_dm_audio_fini(adev);
+
+   return 0;
+}
+
  static void amdgpu_dm_fini(struct amdgpu_device *adev)
  {
int i;
@@ -1259,8 +1268,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
drm_encoder_cleanup(>dm.mst_encoders[i].base);
}
  
-	amdgpu_dm_audio_fini(adev);

-
amdgpu_dm_destroy_drm_device(>dm);
  
  #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)

@@ -2298,6 +2305,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
.late_init = dm_late_init,
.sw_init = dm_sw_init,

Re: [PATCH v6 04/16] drm/amdkfd: Split kfd suspend from devie exit

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

Helps to expdite HW related stuff to amdgpu_pci_remove

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_device.c| 3 ++-
  3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 5f6696a3c778..2b06dee9a0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
}
  }
  
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)

+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
  {
if (adev->kfd.dev) {
kgd2kfd_device_exit(adev->kfd.dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 14f68c028126..f8e10af99c28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
  int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 357b9bf62a1c..ab6d2a43c9a3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -858,10 +858,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
return kfd->init_complete;
  }
  
+

+


Looks like unnecessary white space change to me.


  void kgd2kfd_device_exit(struct kfd_dev *kfd)
  {
if (kfd->init_complete) {
-   kgd2kfd_suspend(kfd, false);


Where is the call to this function now?

Christian.


device_queue_manager_uninit(kfd->dqm);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v6 01/16] drm/ttm: Remap all page faults to per process dummy page.

2021-05-11 Thread Christian König

Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:

On device removal reroute all CPU mappings to dummy page.

v3:
Remove loop to find DRM file and instead access it
by vma->vm_file->private_data. Move dummy page installation
into a separate function.

v4:
Map the entire BOs VA space into on demand allocated dummy page
on the first fault for that BO.

v5: Remove duplicate return.

v6: Polish ttm_bo_vm_dummy_page, remove superflous code.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 57 -
  include/drm/ttm/ttm_bo_api.h|  2 ++
  2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index b31b18058965..e5a9615519d1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -34,6 +34,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  #include 
  #include 
  #include 
@@ -380,19 +382,72 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
  }
  EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
  
+static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res)

+{
+   struct page *dummy_page = (struct page *)res;
+
+   __free_page(dummy_page);
+}
+
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct ttm_buffer_object *bo = vma->vm_private_data;
+   struct drm_device *ddev = bo->base.dev;
+   vm_fault_t ret = VM_FAULT_NOPAGE;
+   unsigned long address;
+   unsigned long pfn;
+   struct page *page;
+
+   /* Allocate new dummy page to map all the VA range in this VMA to it*/
+   page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+   if (!page)
+   return VM_FAULT_OOM;
+
+   pfn = page_to_pfn(page);
+
+   /* Prefault the entire VMA range right away to avoid further faults */
+   for (address = vma->vm_start; address < vma->vm_end; address += 
PAGE_SIZE) {
+



+   if (unlikely(address >= vma->vm_end))
+   break;


That extra check can be removed as far as I can see.



+
+   if (vma->vm_flags & VM_MIXEDMAP)
+   ret = vmf_insert_mixed_prot(vma, address,
+   __pfn_to_pfn_t(pfn, 
PFN_DEV),
+   prot);
+   else
+   ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
+   }
+



+   /* Set the page to be freed using drmm release action */
+   if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page))
+   return VM_FAULT_OOM;


You should probably move that before inserting the page into the VMA and 
also free the allocated page if it goes wrong.


Apart from that patch looks good to me,
Christian.


+
+   return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
+
  vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
  {
struct vm_area_struct *vma = vmf->vma;
pgprot_t prot;
struct ttm_buffer_object *bo = vma->vm_private_data;
+   struct drm_device *ddev = bo->base.dev;
vm_fault_t ret;
+   int idx;
  
  	ret = ttm_bo_vm_reserve(bo, vmf);

if (ret)
return ret;
  
  	prot = vma->vm_page_prot;

-   ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
+   if (drm_dev_enter(ddev, )) {
+   ret = ttm_bo_vm_fault_reserved(vmf, prot, 
TTM_BO_VM_NUM_PREFAULT, 1);
+   drm_dev_exit(idx);
+   } else {
+   ret = ttm_bo_vm_dummy_page(vmf, prot);
+   }
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
  
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h

index 639521880c29..254ede97f8e3 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -620,4 +620,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned 
long addr,
 void *buf, int len, int write);
  bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
  
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);

+
  #endif


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: add synchronization among waves in the same threadgroup

2021-05-11 Thread Zhang, Hawking
[AMD Public Use]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Dennis Li  
Sent: Tuesday, May 11, 2021 14:04
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Kuehling, Felix ; Zhang, 
Hawking ; Koenig, Christian 
Cc: Li, Dennis 
Subject: [PATCH] drm/amdgpu: add synchronization among waves in the same 
threadgroup

It is possible that the previous waves have exited before others are created, 
so the other waves maybe reuse pyhsical resouces left by previous ones. 
Therefore add barrier instruction to synchronize waves within the same 
threadgroup.

Signed-off-by: Dennis Li 

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index fdd65589f06b..dbad9ef002d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -93,98 +93,99 @@ static const struct soc15_reg_golden 
golden_settings_gc_9_4_2_alde[] = {  static const u32 
vgpr_init_compute_shader_aldebaran[] = {
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x0280,
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
-   0x81078407, 0xc0410080, 0x0007, 0xbf8c, 0xd3d94000, 0x1880,
-   0xd3d94001, 0x1880, 0xd3d94002, 0x1880, 0xd3d94003, 0x1880,
-   0xd3d94004, 0x1880, 0xd3d94005, 0x1880, 0xd3d94006, 0x1880,
-   0xd3d94007, 0x1880, 0xd3d94008, 0x1880, 0xd3d94009, 0x1880,
-   0xd3d9400a, 0x1880, 0xd3d9400b, 0x1880, 0xd3d9400c, 0x1880,
-   0xd3d9400d, 0x1880, 0xd3d9400e, 0x1880, 0xd3d9400f, 0x1880,
-   0xd3d94010, 0x1880, 0xd3d94011, 0x1880, 0xd3d94012, 0x1880,
-   0xd3d94013, 0x1880, 0xd3d94014, 0x1880, 0xd3d94015, 0x1880,
-   0xd3d94016, 0x1880, 0xd3d94017, 0x1880, 0xd3d94018, 0x1880,
-   0xd3d94019, 0x1880, 0xd3d9401a, 0x1880, 0xd3d9401b, 0x1880,
-   0xd3d9401c, 0x1880, 0xd3d9401d, 0x1880, 0xd3d9401e, 0x1880,
-   0xd3d9401f, 0x1880, 0xd3d94020, 0x1880, 0xd3d94021, 0x1880,
-   0xd3d94022, 0x1880, 0xd3d94023, 0x1880, 0xd3d94024, 0x1880,
-   0xd3d94025, 0x1880, 0xd3d94026, 0x1880, 0xd3d94027, 0x1880,
-   0xd3d94028, 0x1880, 0xd3d94029, 0x1880, 0xd3d9402a, 0x1880,
-   0xd3d9402b, 0x1880, 0xd3d9402c, 0x1880, 0xd3d9402d, 0x1880,
-   0xd3d9402e, 0x1880, 0xd3d9402f, 0x1880, 0xd3d94030, 0x1880,
-   0xd3d94031, 0x1880, 0xd3d94032, 0x1880, 0xd3d94033, 0x1880,
-   0xd3d94034, 0x1880, 0xd3d94035, 0x1880, 0xd3d94036, 0x1880,
-   0xd3d94037, 0x1880, 0xd3d94038, 0x1880, 0xd3d94039, 0x1880,
-   0xd3d9403a, 0x1880, 0xd3d9403b, 0x1880, 0xd3d9403c, 0x1880,
-   0xd3d9403d, 0x1880, 0xd3d9403e, 0x1880, 0xd3d9403f, 0x1880,
-   0xd3d94040, 0x1880, 0xd3d94041, 0x1880, 0xd3d94042, 0x1880,
-   0xd3d94043, 0x1880, 0xd3d94044, 0x1880, 0xd3d94045, 0x1880,
-   0xd3d94046, 0x1880, 0xd3d94047, 0x1880, 0xd3d94048, 0x1880,
-   0xd3d94049, 0x1880, 0xd3d9404a, 0x1880, 0xd3d9404b, 0x1880,
-   0xd3d9404c, 0x1880, 0xd3d9404d, 0x1880, 0xd3d9404e, 0x1880,
-   0xd3d9404f, 0x1880, 0xd3d94050, 0x1880, 0xd3d94051, 0x1880,
-   0xd3d94052, 0x1880, 0xd3d94053, 0x1880, 0xd3d94054, 0x1880,
-   0xd3d94055, 0x1880, 0xd3d94056, 0x1880, 0xd3d94057, 0x1880,
-   0xd3d94058, 0x1880, 0xd3d94059, 0x1880, 0xd3d9405a, 0x1880,
-   0xd3d9405b, 0x1880, 0xd3d9405c, 0x1880, 0xd3d9405d, 0x1880,
-   0xd3d9405e, 0x1880, 0xd3d9405f, 0x1880, 0xd3d94060, 0x1880,
-   0xd3d94061, 0x1880, 0xd3d94062, 0x1880, 0xd3d94063, 0x1880,
-   0xd3d94064, 0x1880, 0xd3d94065, 0x1880, 0xd3d94066, 0x1880,
-   0xd3d94067, 0x1880, 0xd3d94068, 0x1880, 0xd3d94069, 0x1880,
-   0xd3d9406a, 0x1880, 0xd3d9406b, 0x1880, 0xd3d9406c, 0x1880,
-   0xd3d9406d, 0x1880, 0xd3d9406e, 0x1880, 0xd3d9406f, 0x1880,
-   0xd3d94070, 0x1880, 0xd3d94071, 0x1880, 0xd3d94072, 0x1880,
-   0xd3d94073, 0x1880, 0xd3d94074, 0x1880, 0xd3d94075, 0x1880,
-   0xd3d94076, 0x1880, 0xd3d94077, 0x1880, 0xd3d94078, 0x1880,
-   0xd3d94079, 0x1880, 0xd3d9407a, 0x1880, 0xd3d9407b, 0x1880,
-   0xd3d9407c, 0x1880, 0xd3d9407d, 0x1880, 0xd3d9407e, 0x1880,
-   0xd3d9407f, 0x1880, 0xd3d94080, 0x1880, 0xd3d94081, 0x1880,
-   0xd3d94082, 0x1880, 0xd3d94083, 0x1880, 0xd3d94084, 0x1880,
-   0xd3d94085, 0x1880, 0xd3d94086, 0x1880, 0xd3d94087, 0x1880,
-   0xd3d94088, 0x1880, 0xd3d94089, 0x1880, 0xd3d9408a, 0x1880,
-   0xd3d9408b, 0x1880, 0xd3d9408c, 0x1880, 0xd3d9408d, 

[PATCH] drm/amdgpu: add synchronization among waves in the same threadgroup

2021-05-11 Thread Dennis Li
It is possible that the previous waves have exited before others are
created, so the other waves maybe reuse pyhsical resouces left by
previous ones. Therefore add barrier instruction to synchronize waves within
the same threadgroup.

Signed-off-by: Dennis Li 

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index fdd65589f06b..dbad9ef002d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -93,98 +93,99 @@ static const struct soc15_reg_golden 
golden_settings_gc_9_4_2_alde[] = {
 static const u32 vgpr_init_compute_shader_aldebaran[] = {
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x0280,
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
-   0x81078407, 0xc0410080, 0x0007, 0xbf8c, 0xd3d94000, 0x1880,
-   0xd3d94001, 0x1880, 0xd3d94002, 0x1880, 0xd3d94003, 0x1880,
-   0xd3d94004, 0x1880, 0xd3d94005, 0x1880, 0xd3d94006, 0x1880,
-   0xd3d94007, 0x1880, 0xd3d94008, 0x1880, 0xd3d94009, 0x1880,
-   0xd3d9400a, 0x1880, 0xd3d9400b, 0x1880, 0xd3d9400c, 0x1880,
-   0xd3d9400d, 0x1880, 0xd3d9400e, 0x1880, 0xd3d9400f, 0x1880,
-   0xd3d94010, 0x1880, 0xd3d94011, 0x1880, 0xd3d94012, 0x1880,
-   0xd3d94013, 0x1880, 0xd3d94014, 0x1880, 0xd3d94015, 0x1880,
-   0xd3d94016, 0x1880, 0xd3d94017, 0x1880, 0xd3d94018, 0x1880,
-   0xd3d94019, 0x1880, 0xd3d9401a, 0x1880, 0xd3d9401b, 0x1880,
-   0xd3d9401c, 0x1880, 0xd3d9401d, 0x1880, 0xd3d9401e, 0x1880,
-   0xd3d9401f, 0x1880, 0xd3d94020, 0x1880, 0xd3d94021, 0x1880,
-   0xd3d94022, 0x1880, 0xd3d94023, 0x1880, 0xd3d94024, 0x1880,
-   0xd3d94025, 0x1880, 0xd3d94026, 0x1880, 0xd3d94027, 0x1880,
-   0xd3d94028, 0x1880, 0xd3d94029, 0x1880, 0xd3d9402a, 0x1880,
-   0xd3d9402b, 0x1880, 0xd3d9402c, 0x1880, 0xd3d9402d, 0x1880,
-   0xd3d9402e, 0x1880, 0xd3d9402f, 0x1880, 0xd3d94030, 0x1880,
-   0xd3d94031, 0x1880, 0xd3d94032, 0x1880, 0xd3d94033, 0x1880,
-   0xd3d94034, 0x1880, 0xd3d94035, 0x1880, 0xd3d94036, 0x1880,
-   0xd3d94037, 0x1880, 0xd3d94038, 0x1880, 0xd3d94039, 0x1880,
-   0xd3d9403a, 0x1880, 0xd3d9403b, 0x1880, 0xd3d9403c, 0x1880,
-   0xd3d9403d, 0x1880, 0xd3d9403e, 0x1880, 0xd3d9403f, 0x1880,
-   0xd3d94040, 0x1880, 0xd3d94041, 0x1880, 0xd3d94042, 0x1880,
-   0xd3d94043, 0x1880, 0xd3d94044, 0x1880, 0xd3d94045, 0x1880,
-   0xd3d94046, 0x1880, 0xd3d94047, 0x1880, 0xd3d94048, 0x1880,
-   0xd3d94049, 0x1880, 0xd3d9404a, 0x1880, 0xd3d9404b, 0x1880,
-   0xd3d9404c, 0x1880, 0xd3d9404d, 0x1880, 0xd3d9404e, 0x1880,
-   0xd3d9404f, 0x1880, 0xd3d94050, 0x1880, 0xd3d94051, 0x1880,
-   0xd3d94052, 0x1880, 0xd3d94053, 0x1880, 0xd3d94054, 0x1880,
-   0xd3d94055, 0x1880, 0xd3d94056, 0x1880, 0xd3d94057, 0x1880,
-   0xd3d94058, 0x1880, 0xd3d94059, 0x1880, 0xd3d9405a, 0x1880,
-   0xd3d9405b, 0x1880, 0xd3d9405c, 0x1880, 0xd3d9405d, 0x1880,
-   0xd3d9405e, 0x1880, 0xd3d9405f, 0x1880, 0xd3d94060, 0x1880,
-   0xd3d94061, 0x1880, 0xd3d94062, 0x1880, 0xd3d94063, 0x1880,
-   0xd3d94064, 0x1880, 0xd3d94065, 0x1880, 0xd3d94066, 0x1880,
-   0xd3d94067, 0x1880, 0xd3d94068, 0x1880, 0xd3d94069, 0x1880,
-   0xd3d9406a, 0x1880, 0xd3d9406b, 0x1880, 0xd3d9406c, 0x1880,
-   0xd3d9406d, 0x1880, 0xd3d9406e, 0x1880, 0xd3d9406f, 0x1880,
-   0xd3d94070, 0x1880, 0xd3d94071, 0x1880, 0xd3d94072, 0x1880,
-   0xd3d94073, 0x1880, 0xd3d94074, 0x1880, 0xd3d94075, 0x1880,
-   0xd3d94076, 0x1880, 0xd3d94077, 0x1880, 0xd3d94078, 0x1880,
-   0xd3d94079, 0x1880, 0xd3d9407a, 0x1880, 0xd3d9407b, 0x1880,
-   0xd3d9407c, 0x1880, 0xd3d9407d, 0x1880, 0xd3d9407e, 0x1880,
-   0xd3d9407f, 0x1880, 0xd3d94080, 0x1880, 0xd3d94081, 0x1880,
-   0xd3d94082, 0x1880, 0xd3d94083, 0x1880, 0xd3d94084, 0x1880,
-   0xd3d94085, 0x1880, 0xd3d94086, 0x1880, 0xd3d94087, 0x1880,
-   0xd3d94088, 0x1880, 0xd3d94089, 0x1880, 0xd3d9408a, 0x1880,
-   0xd3d9408b, 0x1880, 0xd3d9408c, 0x1880, 0xd3d9408d, 0x1880,
-   0xd3d9408e, 0x1880, 0xd3d9408f, 0x1880, 0xd3d94090, 0x1880,
-   0xd3d94091, 0x1880, 0xd3d94092, 0x1880, 0xd3d94093, 0x1880,
-   0xd3d94094, 0x1880, 0xd3d94095, 0x1880, 0xd3d94096, 0x1880,
-   0xd3d94097, 0x1880, 0xd3d94098, 0x1880, 0xd3d94099, 0x1880,
-   0xd3d9409a, 0x1880,