[AMD Official Use Only - AMD Internal Distribution Only]

Thanks,
Lijo
>-----Original Message-----
>From: amd-gfx <[email protected]> On Behalf Of Ellen
>Pan
>Sent: Thursday, October 9, 2025 9:00 AM
>To: [email protected]
>Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
><[email protected]>; Gande, Shravan kumar
><[email protected]>; Pan, Ellen <[email protected]>
>Subject: [PATCH 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during
>VF init
>
>    1. Introduced amdgpu_virt_init_critical_region during VF init.
>     - VFs use init_data_header_offset and init_data_header_size_kb
>            transmitted via PF2VF mailbox to fetch the offset of
>            critical regions' offsets/sizes in VRAM and save to
>            adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb.
>
>Signed-off-by: Ellen Pan <[email protected]>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   6 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 103
>++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |   7 ++
> drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  31 ++++++
> 4 files changed, 147 insertions(+)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 929936c8d87c..2a33b950d511 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -2754,6 +2754,12 @@ static int amdgpu_device_ip_early_init(struct
>amdgpu_device *adev)
>               r = amdgpu_virt_request_full_gpu(adev, true);
>               if (r)
>                       return r;
>+
>+              if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
>+                      r = amdgpu_virt_init_critical_region(adev);
>+                      if (r)
>+                              return r;
>+              }
>       }
>
>       switch (adev->asic_type) {
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>index 3a6b0e1084d7..46c19e96086a 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>@@ -843,6 +843,109 @@ static void amdgpu_virt_init_ras(struct
>amdgpu_device *adev)
>       adev->virt.ras.cper_rptr = 0;
> }
>
>+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t
>+*buf_start, uint8_t *buf_end) {
>+      uint32_t sum = 0;
>+
>+      if (buf_start >= buf_end)
>+              return 0;
>+
>+      for (; buf_start < buf_end; buf_start++)
>+              sum += buf_start[0];
>+
>+      return 0xffffffff - sum;
>+}
>+
>+#define mmRCC_CONFIG_MEMSIZE  0xde3
>+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) {
>+      struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
>+      uint32_t init_hdr_offset = adev->virt.init_data_header_offset;
>+      uint32_t init_hdr_size = adev->virt.init_data_header_size_kb << 10;
>+      uint64_t pos = 0;
>+      uint64_t vram_size;
>+      int r = 0;
>+      uint8_t checksum = 0;
>+
>+      if (init_hdr_offset < 0) {
>+              DRM_ERROR("Invalid init header offset\n");
>+              return -EINVAL;
>+      }
>+
>+      vram_size = RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
[lijo]
We do a check against all 0xFFs as well which is invalid.

>+      if ((init_hdr_offset + init_hdr_size) > vram_size) {
>+              DRM_ERROR("init_data_header exceeds VRAM size,
>exiting\n");
>+              return -EINVAL;
>+      }
>+
>+      /* Allocate for init_data_hdr */
>+      init_data_hdr = kzalloc(sizeof(struct
>amd_sriov_msg_init_data_header), GFP_KERNEL);
>+      if (!init_data_hdr)
>+              return -ENOMEM;
>+
>+      pos = (uint64_t)init_hdr_offset;
>+      amdgpu_device_vram_access(adev, pos, (uint32_t *)init_data_hdr,
>+                                      sizeof(struct
>amd_sriov_msg_init_data_header), false);
>+
>+      switch (init_data_hdr->version) {
>+      case GPU_CRIT_REGION_V2:
>+              if (strncmp(init_data_hdr->signature, "INDA", 4) != 0) {
>+                      DRM_ERROR("Invalid init data signature: %.4s\n",
>init_data_hdr->signature);
>+                      r = -EINVAL;
>+                      goto out;
>+              }
>+
>+              checksum =
>+                      amdgpu_virt_crit_region_calc_checksum((uint8_t
>*)&init_data_hdr->initdata_offset,
>+                              (uint8_t *)init_data_hdr + sizeof(struct
>amd_sriov_msg_init_data_header));
>+              if (checksum != init_data_hdr->checksum) {
>+                      DRM_ERROR("Found unmatching checksum from
>calculation 0x%x and init_data 0x%x\n",
>+                                              checksum, init_data_hdr-
>>checksum);
>+                      r = -EINVAL;
>+                      goto out;
>+              }
>+
>+              /* Initialize critical region offsets */
>+              adev->virt.crit_region_base_offset = init_data_hdr-
>>initdata_offset;
>+              adev-
>>virt.crit_region_offsets[AMD_SRIOV_MSG_IPD_TABLE_ID] =
>+                      init_data_hdr->ip_discovery_offset;
>+              adev-
>>virt.crit_region_offsets[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID] =
>+                      init_data_hdr->vbios_img_offset;
>+              adev-
>>virt.crit_region_offsets[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID] =
>+                      init_data_hdr->ras_tele_info_offset;
>+              adev-
>>virt.crit_region_offsets[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID] =
>+                      init_data_hdr->dataexchange_offset;
>+              adev-
>>virt.crit_region_offsets[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID] =
>+                      init_data_hdr->bad_page_info_offset;
>+
>+              /* Initialize critical region sizes */
>+              adev->virt.crit_region_size_in_kb = init_data_hdr-
>>initdata_size_in_kb;
>+              adev-
>>virt.crit_region_sizes_kb[AMD_SRIOV_MSG_IPD_TABLE_ID] =
>+                      init_data_hdr->ip_discovery_size_in_kb;
>+              adev-
>>virt.crit_region_sizes_kb[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID] =
>+                      init_data_hdr->vbios_img_size_in_kb;
>+              adev-
>>virt.crit_region_sizes_kb[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID] =
>+                      init_data_hdr->ras_tele_info_size_in_kb;
>+              adev-
>>virt.crit_region_sizes_kb[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID] =
>+                      init_data_hdr->dataexchange_size_in_kb;
>+              adev-
>>virt.crit_region_sizes_kb[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID] =
>+                      init_data_hdr->bad_page_size_in_kb;
>+
>+              adev->virt.init_data_done = true;
>+              break;
>+      default:
>+              DRM_ERROR("Invalid init header version: %u\n",
>init_data_hdr->version);
>+              r = -EINVAL;
>+              goto out;
>+      }
>+
>+out:
>+      kfree(init_data_hdr);
>+      init_data_hdr = NULL;
>+
>+      return r;
>+}
>+
> void amdgpu_virt_init(struct amdgpu_device *adev)  {
>       bool is_sriov = false;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>index 2a0627596bd2..5f6014b2f349 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>@@ -292,6 +292,11 @@ struct amdgpu_virt {
>       /* critical regions v2 */
>       uint32_t init_data_header_offset;
>       uint32_t init_data_header_size_kb;
>+      uint32_t crit_region_base_offset;
>+      uint32_t crit_region_size_in_kb;
>+      uint64_t crit_region_offsets[AMD_SRIOV_MSG_MAX_TABLE_ID];
>+      uint64_t crit_region_sizes_kb[AMD_SRIOV_MSG_MAX_TABLE_ID];
[lijo]

For this type of thing, it's better to keep the data together like in struct 
with offset/size
        struct amdgpu_virt_region       crit_regn;
        struct amdgpu_virt_region       
crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];

Thanks,
Lijo

>+      bool init_data_done;
>
>       /* vf2pf message */
>       struct delayed_work vf2pf_work;
>@@ -428,6 +433,8 @@ void amdgpu_virt_exchange_data(struct
>amdgpu_device *adev);  void amdgpu_virt_fini_data_exchange(struct
>amdgpu_device *adev);  void amdgpu_virt_init(struct amdgpu_device *adev);
>
>+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
>+
> bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);  int
>amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);  void
>amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); diff --git
>a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>index b53caab5b706..d15c256f9abd 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>@@ -70,6 +70,37 @@ enum amd_sriov_crit_region_version {
>       GPU_CRIT_REGION_V2 = 2,
> };
>
>+/* v2 layout offset enum (in order of allocation) */ enum
>+amd_sriov_msg_table_id_enum {
>+      AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
>+      AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
>+      AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
>+      AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
>+      AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
>+      AMD_SRIOV_MSG_INITD_H_TABLE_ID,
>+      AMD_SRIOV_MSG_MAX_TABLE_ID,
>+};
>+
>+struct amd_sriov_msg_init_data_header {
>+      char     signature[4];  /* "INDA"  */
>+      uint32_t version;
>+      uint32_t checksum;
>+      uint32_t initdata_offset; /* 0 */
>+      uint32_t initdata_size_in_kb; /* 5MB */
>+      uint32_t valid_tables;
>+      uint32_t vbios_img_offset;
>+      uint32_t vbios_img_size_in_kb;
>+      uint32_t dataexchange_offset;
>+      uint32_t dataexchange_size_in_kb;
>+      uint32_t ras_tele_info_offset;
>+      uint32_t ras_tele_info_size_in_kb;
>+      uint32_t ip_discovery_offset;
>+      uint32_t ip_discovery_size_in_kb;
>+      uint32_t bad_page_info_offset;
>+      uint32_t bad_page_size_in_kb;
>+      uint32_t reserved[8];
>+};
>+
> /*
>  * PF2VF history log:
>  * v1 defined in amdgim
>--
>2.34.1

Reply via email to