Re: [PATCH 14/24] drm/amdkfd: Handle remaining BUG_ONs more gracefully v2

2017-08-16 Thread Oded Gabbay
On Wed, Aug 16, 2017 at 6:00 AM, Felix Kuehling  wrote:
> In most cases, BUG_ONs can be replaced with WARN_ON with an error
> return. In some void functions just turn them into a WARN_ON and
> possibly an early exit.
>
> v2:
> * Cleaned up error handling in pm_send_unmap_queue
> * Removed redundant WARN_ON in kfd_process_destroy_delayed
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c|  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c|  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c| 16 
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 19 +-
>  .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |  2 +-
>  .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c  | 20 +++---
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   |  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c|  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c| 44 
> ++
>  drivers/gpu/drm/amd/amdkfd/kfd_pasid.c |  4 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  9 ++---
>  .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  7 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c  |  4 +-
>  14 files changed, 84 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> index 3841cad..0aa021a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> @@ -60,7 +60,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
> unsigned int *ib_packet_buff;
> int status;
>
> -   BUG_ON(!size_in_bytes);
> +   if (WARN_ON(!size_in_bytes))
> +   return -EINVAL;
>
> kq = dbgdev->kq;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
> index 2dc..3da25f7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
> @@ -64,7 +64,8 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct 
> kfd_dev *pdev)
> enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
> struct kfd_dbgmgr *new_buff;
>
> -   BUG_ON(!pdev->init_complete);
> +   if (WARN_ON(!pdev->init_complete))
> +   return false;
>
> new_buff = kfd_alloc_struct(new_buff);
> if (!new_buff) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 416955f..f628ac3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -98,7 +98,7 @@ static const struct kfd_device_info 
> *lookup_device_info(unsigned short did)
>
> for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
> if (supported_devices[i].did == did) {
> -   BUG_ON(!supported_devices[i].device_info);
> +   WARN_ON(!supported_devices[i].device_info);
> return supported_devices[i].device_info;
> }
> }
> @@ -212,9 +212,8 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int 
> pasid,
> flags);
>
> dev = kfd_device_by_pci_dev(pdev);
> -   BUG_ON(!dev);
> -
> -   kfd_signal_iommu_event(dev, pasid, address,
> +   if (!WARN_ON(!dev))
> +   kfd_signal_iommu_event(dev, pasid, address,
> flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
>
> return AMD_IOMMU_INV_PRI_RSP_INVALID;
> @@ -397,9 +396,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned 
> int buf_size,
>  {
> unsigned int num_of_longs;
>
> -   BUG_ON(buf_size < chunk_size);
> -   BUG_ON(buf_size == 0);
> -   BUG_ON(chunk_size == 0);
> +   if (WARN_ON(buf_size < chunk_size))
> +   return -EINVAL;
> +   if (WARN_ON(buf_size == 0))
> +   return -EINVAL;
> +   if (WARN_ON(chunk_size == 0))
> +   return -EINVAL;
>
> kfd->gtt_sa_chunk_size = chunk_size;
> kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 2486dfb..e553c5e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -388,7 +388,8 @@ static struct mqd_manager *get_mqd_manager_nocpsch(
>  {
> struct mqd_manager *mqd;
>
> -   BUG_ON(type >= KFD_MQD_TYPE_MAX);
> +   if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
> +   return NULL;
>
> pr_debug("mqd type %d\n", type);
>
> @@ -513,7 +514,7 @@ static void uninitialize_nocpsch(struct 
> device_queue_manager *dqm)
>  {
> int i;
>
> -   BUG_ON(dqm->queue_count > 0 || 

[PATCH 14/24] drm/amdkfd: Handle remaining BUG_ONs more gracefully v2

2017-08-15 Thread Felix Kuehling
In most cases, BUG_ONs can be replaced with WARN_ON with an error
return. In some void functions just turn them into a WARN_ON and
possibly an early exit.

v2:
* Cleaned up error handling in pm_send_unmap_queue
* Removed redundant WARN_ON in kfd_process_destroy_delayed

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c|  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c|  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c| 16 
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 19 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |  2 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c  | 20 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   |  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c|  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c| 44 ++
 drivers/gpu/drm/amd/amdkfd/kfd_pasid.c |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  9 ++---
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  7 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c  |  4 +-
 14 files changed, 84 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 3841cad..0aa021a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -60,7 +60,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
unsigned int *ib_packet_buff;
int status;
 
-   BUG_ON(!size_in_bytes);
+   if (WARN_ON(!size_in_bytes))
+   return -EINVAL;
 
kq = dbgdev->kq;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 2dc..3da25f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -64,7 +64,8 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct 
kfd_dev *pdev)
enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
struct kfd_dbgmgr *new_buff;
 
-   BUG_ON(!pdev->init_complete);
+   if (WARN_ON(!pdev->init_complete))
+   return false;
 
new_buff = kfd_alloc_struct(new_buff);
if (!new_buff) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 416955f..f628ac3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -98,7 +98,7 @@ static const struct kfd_device_info 
*lookup_device_info(unsigned short did)
 
for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
if (supported_devices[i].did == did) {
-   BUG_ON(!supported_devices[i].device_info);
+   WARN_ON(!supported_devices[i].device_info);
return supported_devices[i].device_info;
}
}
@@ -212,9 +212,8 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int 
pasid,
flags);
 
dev = kfd_device_by_pci_dev(pdev);
-   BUG_ON(!dev);
-
-   kfd_signal_iommu_event(dev, pasid, address,
+   if (!WARN_ON(!dev))
+   kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
 
return AMD_IOMMU_INV_PRI_RSP_INVALID;
@@ -397,9 +396,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned 
int buf_size,
 {
unsigned int num_of_longs;
 
-   BUG_ON(buf_size < chunk_size);
-   BUG_ON(buf_size == 0);
-   BUG_ON(chunk_size == 0);
+   if (WARN_ON(buf_size < chunk_size))
+   return -EINVAL;
+   if (WARN_ON(buf_size == 0))
+   return -EINVAL;
+   if (WARN_ON(chunk_size == 0))
+   return -EINVAL;
 
kfd->gtt_sa_chunk_size = chunk_size;
kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2486dfb..e553c5e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -388,7 +388,8 @@ static struct mqd_manager *get_mqd_manager_nocpsch(
 {
struct mqd_manager *mqd;
 
-   BUG_ON(type >= KFD_MQD_TYPE_MAX);
+   if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+   return NULL;
 
pr_debug("mqd type %d\n", type);
 
@@ -513,7 +514,7 @@ static void uninitialize_nocpsch(struct 
device_queue_manager *dqm)
 {
int i;
 
-   BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+   WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
 
kfree(dqm->allocated_queues);
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
@@ -1129,8 +1130,8 @@ struct device_queue_manager 
*device_queue_manager_init(struct kfd_dev *dev)