Re: [PATCH] drm/amdgpu: Disable cwsr for vega10 and Sienna_Cichlid in sriov

2021-05-19 Thread Felix Kuehling

Am 2021-05-19 um 5:02 a.m. schrieb Chengzhe Liu:
> In sriov, cwsr is not stable
NAK. Without CWSR, ROCm is not stable. Any compute application with long
running waves can cause a hang.

Regards,
  Felix

>
> Signed-off-by: Chengzhe Liu 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 24 +---
>  1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 80015e866498..89bd0059329b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -335,7 +335,7 @@ static const struct kfd_device_info vega10_vf_device_info 
> = {
>   .event_interrupt_class = _interrupt_class_v9,
>   .num_of_watch_points = 4,
>   .mqd_size_aligned = MQD_SIZE_ALIGNED,
> - .supports_cwsr = true,
> + .supports_cwsr = false,
>   .needs_iommu_device = false,
>   .needs_pci_atomics = false,
>   .num_sdma_engines = 2,
> @@ -505,6 +505,24 @@ static const struct kfd_device_info 
> sienna_cichlid_device_info = {
>   .num_sdma_queues_per_engine = 8,
>  };
>  
> +static const struct kfd_device_info sienna_cichlid_vf_device_info = {
> + .asic_family = CHIP_SIENNA_CICHLID,
> + .asic_name = "sienna_cichlid",
> + .max_pasid_bits = 16,
> + .max_no_of_hqd  = 24,
> + .doorbell_size  = 8,
> + .ih_ring_entry_size = 8 * sizeof(uint32_t),
> + .event_interrupt_class = _interrupt_class_v10,
> + .num_of_watch_points = 4,
> + .mqd_size_aligned = MQD_SIZE_ALIGNED,
> + .needs_iommu_device = false,
> + .supports_cwsr = false,
> + .needs_pci_atomics = true,
> + .num_sdma_engines = 4,
> + .num_xgmi_sdma_engines = 0,
> + .num_sdma_queues_per_engine = 8,
> +};
> +
>  static const struct kfd_device_info navy_flounder_device_info = {
>   .asic_family = CHIP_NAVY_FLOUNDER,
>   .asic_name = "navy_flounder",
> @@ -601,7 +619,7 @@ static const struct kfd_device_info 
> *kfd_supported_devices[][2] = {
>   [CHIP_NAVI10] = {_device_info, NULL},
>   [CHIP_NAVI12] = {_device_info, _device_info},
>   [CHIP_NAVI14] = {_device_info, NULL},
> - [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
> _cichlid_device_info},
> + [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
> _cichlid_vf_device_info},
>   [CHIP_NAVY_FLOUNDER] = {_flounder_device_info, 
> _flounder_device_info},
>   [CHIP_VANGOGH] = {_device_info, NULL},
>   [CHIP_DIMGREY_CAVEFISH] = {_cavefish_device_info, 
> _cavefish_device_info},
> @@ -674,7 +692,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>  
>  static void kfd_cwsr_init(struct kfd_dev *kfd)
>  {
> - if (cwsr_enable && kfd->device_info->supports_cwsr) {
> + if ((cwsr_enable && kfd->device_info->supports_cwsr) || cwsr_enable == 
> 2) {
>   if (kfd->device_info->asic_family < CHIP_VEGA10) {
>   BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
>   kfd->cwsr_isa = cwsr_trap_gfx8_hex;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Disable cwsr for vega10 and Sienna_Cichlid in sriov

2021-05-19 Thread Chengzhe Liu
In sriov, cwsr is not stable

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 80015e866498..89bd0059329b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -335,7 +335,7 @@ static const struct kfd_device_info vega10_vf_device_info = 
{
.event_interrupt_class = _interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
-   .supports_cwsr = true,
+   .supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
@@ -505,6 +505,24 @@ static const struct kfd_device_info 
sienna_cichlid_device_info = {
.num_sdma_queues_per_engine = 8,
 };
 
+static const struct kfd_device_info sienna_cichlid_vf_device_info = {
+   .asic_family = CHIP_SIENNA_CICHLID,
+   .asic_name = "sienna_cichlid",
+   .max_pasid_bits = 16,
+   .max_no_of_hqd  = 24,
+   .doorbell_size  = 8,
+   .ih_ring_entry_size = 8 * sizeof(uint32_t),
+   .event_interrupt_class = _interrupt_class_v10,
+   .num_of_watch_points = 4,
+   .mqd_size_aligned = MQD_SIZE_ALIGNED,
+   .needs_iommu_device = false,
+   .supports_cwsr = false,
+   .needs_pci_atomics = true,
+   .num_sdma_engines = 4,
+   .num_xgmi_sdma_engines = 0,
+   .num_sdma_queues_per_engine = 8,
+};
+
 static const struct kfd_device_info navy_flounder_device_info = {
.asic_family = CHIP_NAVY_FLOUNDER,
.asic_name = "navy_flounder",
@@ -601,7 +619,7 @@ static const struct kfd_device_info 
*kfd_supported_devices[][2] = {
[CHIP_NAVI10] = {_device_info, NULL},
[CHIP_NAVI12] = {_device_info, _device_info},
[CHIP_NAVI14] = {_device_info, NULL},
-   [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
_cichlid_device_info},
+   [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
_cichlid_vf_device_info},
[CHIP_NAVY_FLOUNDER] = {_flounder_device_info, 
_flounder_device_info},
[CHIP_VANGOGH] = {_device_info, NULL},
[CHIP_DIMGREY_CAVEFISH] = {_cavefish_device_info, 
_cavefish_device_info},
@@ -674,7 +692,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 
 static void kfd_cwsr_init(struct kfd_dev *kfd)
 {
-   if (cwsr_enable && kfd->device_info->supports_cwsr) {
+   if ((cwsr_enable && kfd->device_info->supports_cwsr) || cwsr_enable == 
2) {
if (kfd->device_info->asic_family < CHIP_VEGA10) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx