On Tue,  5 May 2026 16:05:13 +0200
Ketil Johnsen <[email protected]> wrote:

Here comes the second part of the review :-).

> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c 
> b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 2ab444ee8c710..e93042eaf3fc8 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -100,8 +100,11 @@ static void panthor_gpu_irq_handler(struct 
> panthor_device *ptdev, u32 status)
>                        fault_status, panthor_exception_name(ptdev, 
> fault_status & 0xFF),
>                        address);
>       }
> -     if (status & GPU_IRQ_PROTM_FAULT)
> +     if (status & GPU_IRQ_PROTM_FAULT) {
>               drm_warn(&ptdev->base, "GPU Fault in protected mode\n");
> +             panthor_gpu_disable_protm_fault_interrupt(ptdev);

It's only used in a single place, so I'd just inline the content of
panthor_gpu_disable_protm_fault_interrupt() here. Also, I think
panthor_gpu_disable_protm_fault_interrupt() is not taking the right
lock (see below).

> +             panthor_device_schedule_reset(ptdev);
> +     }
>  
>       spin_lock(&ptdev->gpu->reqs_lock);
>       if (status & ptdev->gpu->pending_reqs) {
> @@ -367,6 +370,10 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev)
>       unsigned long flags;
>  
>       spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
> +
> +     /** Re-enable the protm_irq_fault when reset is complete */
> +     ptdev->gpu->irq.mask |= GPU_IRQ_PROTM_FAULT;

panthor_irq::mask should only be modified with the
panthor_irq::mask_lock held. Besides, we have a helper for
that:

        panthor_gpu_irq_enable_events(&ptdev->gpu->irq, GPU_IRQ_PROTM_FAULT);

> +
>       if (!drm_WARN_ON(&ptdev->base,
>                        ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) {
>               ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED;
> @@ -427,3 +434,8 @@ void panthor_gpu_resume(struct panthor_device *ptdev)
>       panthor_hw_l2_power_on(ptdev);
>  }
>  
> +void panthor_gpu_disable_protm_fault_interrupt(struct panthor_device *ptdev)
> +{
> +     scoped_guard(spinlock_irqsave, &ptdev->gpu->reqs_lock)
> +             ptdev->gpu->irq.mask &= ~GPU_IRQ_PROTM_FAULT;

Same problem with wrong lock being taken to modify the mask, and
panthor_gpu_irq_disable_events() probably being a better option?

> +}
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h 
> b/drivers/gpu/drm/panthor/panthor_gpu.h
> index 12c263a399281..ca66c73f543e6 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.h
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.h
> @@ -54,4 +54,10 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev);
>  void panthor_gpu_power_changed_off(struct panthor_device *ptdev);
>  int panthor_gpu_power_changed_on(struct panthor_device *ptdev);
>  
> +/**
> + * panthor_gpu_disable_protm_fault_interrupt() - Disable GPU_PROTECTED_FAULT 
> interrupt
> + * @ptdev: Device.
> + */
> +void panthor_gpu_disable_protm_fault_interrupt(struct panthor_device *ptdev);
> +
>  #endif
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c 
> b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 07f54176ec1bf..702f537905b56 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -31,6 +31,7 @@
>  #include <linux/sizes.h>
>  
>  #include "panthor_device.h"
> +#include "panthor_fw.h"
>  #include "panthor_gem.h"
>  #include "panthor_gpu.h"
>  #include "panthor_heap.h"
> @@ -1704,8 +1705,12 @@ static int panthor_vm_lock_region(struct panthor_vm 
> *vm, u64 start, u64 size)
>       if (drm_WARN_ON(&ptdev->base, vm->locked_region.size))
>               return -EINVAL;
>  
> +     down_read(&ptdev->protm.lock);
> +
>       mutex_lock(&ptdev->mmu->as.slots_lock);
>       if (vm->as.id >= 0 && size) {
> +             panthor_fw_protm_exit_sync(ptdev);
> +
>               /* Lock the region that needs to be updated */
>               gpu_write64(ptdev, AS_LOCKADDR(vm->as.id),
>                           pack_region_range(ptdev, &start, &size));
> @@ -1720,6 +1725,9 @@ static int panthor_vm_lock_region(struct panthor_vm 
> *vm, u64 start, u64 size)
>       }
>       mutex_unlock(&ptdev->mmu->as.slots_lock);
>  
> +     if (ret)
> +             up_read(&ptdev->protm.lock);

Let's try to keep the locked section local to a function: the protm.lock
should, IMHO, be taken/released in panthor_vm_exec_op(). If we go for
that, this also makes the panthor_vm_lock_region() vs
panthor_vm_expand_locked_region() distinction useless, though it's
probably fine to keep it for clarity.

> +
>       return ret;
>  }
>  
> @@ -1805,6 +1813,8 @@ static void panthor_vm_unlock_region(struct panthor_vm 
> *vm)
>       vm->locked_region.start = 0;
>       vm->locked_region.size = 0;
>       mutex_unlock(&ptdev->mmu->as.slots_lock);
> +
> +     up_read(&ptdev->protm.lock);
>  }
>  
>  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
> b/drivers/gpu/drm/panthor/panthor_sched.c
> index 987072bd867c4..acb04250c7def 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -308,6 +308,15 @@ struct panthor_scheduler {
>                */
>               struct list_head stopped_groups;
>       } reset;
> +
> +     /** @protm: Protected mode related fields. */
> +     struct {
> +             /** @protected_mode: True if GPU is in protected mode. */
> +             bool protected_mode;

nit: s/protected_mode/enabled/s. But do we even need that boolean?
Isn't active_group != NULL providing the same info?

> +
> +             /** @active_group: The active protected group. */
> +             struct panthor_group *active_group;
> +     } protm;
>  };
>  
>  /**
> @@ -570,6 +579,16 @@ struct panthor_group {
>       /** @fatal_queues: Bitmask reflecting the queues that hit a fatal 
> exception. */
>       u32 fatal_queues;
>  
> +     /**
> +      * @protm_pending_queues: Bitmask reflecting the queues that are waiting
> +      *                        on a CS_PROTM_PENDING.
> +      *
> +      * The GPU will set the bit associated to the queue pending protected 
> mode
> +      * when a PROT_REGION command is executing or when trying to resume 
> previously
> +      * suspended protected mode jobs.
> +      */
> +     u32 protm_pending_queues;
> +
>       /** @tiler_oom: Mask of queues that have a tiler OOM event to process. 
> */
>       atomic_t tiler_oom;
>  
> @@ -1176,6 +1195,7 @@ queue_resume_timeout(struct panthor_queue *queue)
>   * @ptdev: Device.
>   * @csg_id: Group slot ID.
>   * @cs_id: Queue slot ID.
> + * @protm_ack: Acknowledge pending protected mode queues
>   *
>   * Program a queue slot with the queue information so things can start being
>   * executed on this queue.
> @@ -1472,6 +1492,34 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 
> csg_id, u32 priority)
>       return 0;
>  }
>  
> +static void
> +cs_slot_process_protm_pending_event_locked(struct panthor_device *ptdev,
> +                                        u32 csg_id, u32 cs_id)
> +{
> +     struct panthor_scheduler *sched = ptdev->scheduler;
> +     struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
> +     struct panthor_group *group = csg_slot->group;
> +
> +     lockdep_assert_held(&sched->lock);
> +
> +     if (!group)
> +             return;
> +
> +     /* No protected memory heap, a user space program tried to
> +      * submit a protected mode jobs resulting in the GPU raising
> +      * a CS_PROTM_PENDING request.
> +      *
> +      * This scenario is invalid and the protected mode jobs must
> +      * not be allowed to progress.
> +      */
> +     if (!ptdev->protm.heap)
> +             return;

Should we flag the group unusable when that happens and schedule it out
as soon as possible?

        if (!ptdev->protm.heap)
                group->fatal_queues |= BIT(cs_id);
        else
                group->protm_pending_queues |= BIT(cs_id);

        sched_queue_delayed_work(sched, tick, 0);

> +
> +     group->protm_pending_queues |= BIT(cs_id);
> +
> +     sched_queue_delayed_work(sched, tick, 0);
> +}
> +
>  static void
>  cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
>                                  u32 csg_id, u32 cs_id)
> @@ -1718,6 +1766,9 @@ static bool cs_slot_process_irq_locked(struct 
> panthor_device *ptdev,
>       if (events & CS_TILER_OOM)
>               cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
>  
> +     if (events & CS_PROTM_PENDING)
> +             cs_slot_process_protm_pending_event_locked(ptdev, csg_id, 
> cs_id);
> +
>       /* We don't acknowledge the TILER_OOM event since its handling is
>        * deferred to a separate work.
>        */
> @@ -1848,6 +1899,17 @@ static void sched_process_idle_event_locked(struct 
> panthor_device *ptdev)
>       sched_queue_delayed_work(ptdev->scheduler, tick, 0);
>  }
>  
> +static void sched_process_protm_exit_event_locked(struct panthor_device 
> *ptdev)
> +{
> +     lockdep_assert_held(&ptdev->scheduler->lock);
> +
> +     /* Acknowledge the protm exit and schedule a tick. */
> +     panthor_fw_protm_exit(ptdev);

Let's just inline the content of panthor_fw_protm_exit() here.*
It doesn't make sense to have all these indirections, especially
since PROTM and scheduling are intertwined anyway, so I consider
it part of the scheduler responsibility (just like the scheduler
deals with other GLB events).

The same goes for the other panthor_fw_protm_ helpers defined in
panthor_fw.c, I think panthor_sched.c would be a better fit for
those, or even inlining their content if they are only used in
a single place.

> +     sched_queue_delayed_work(ptdev->scheduler, tick, 0);
> +     ptdev->scheduler->protm.protected_mode = false;
> +     ptdev->scheduler->protm.active_group = NULL;
> +}
> +
>  /**
>   * sched_process_global_irq_locked() - Process the scheduling part of a 
> global IRQ
>   * @ptdev: Device.
> @@ -1863,6 +1925,9 @@ static void sched_process_global_irq_locked(struct 
> panthor_device *ptdev)
>       ack = READ_ONCE(glb_iface->output->ack);
>       evts = (req ^ ack) & GLB_EVT_MASK;
>  
> +     if (evts & GLB_PROTM_EXIT)
> +             sched_process_protm_exit_event_locked(ptdev);
> +
>       if (evts & GLB_IDLE)
>               sched_process_idle_event_locked(ptdev);
>  }
> @@ -1872,23 +1937,71 @@ static void process_fw_events_work(struct work_struct 
> *work)
>       struct panthor_scheduler *sched = container_of(work, struct 
> panthor_scheduler,
>                                                     fw_events_work);
>       u32 events = atomic_xchg(&sched->fw_events, 0);
> +     u32 csg_events = events & ~JOB_INT_GLOBAL_IF;
>       struct panthor_device *ptdev = sched->ptdev;
>  
>       mutex_lock(&sched->lock);
>  
> +     while (csg_events) {
> +             u32 csg_id = ffs(csg_events) - 1;
> +
> +             sched_process_csg_irq_locked(ptdev, csg_id);
> +             csg_events &= ~BIT(csg_id);
> +     }

I'm sure you have a good reason to re-order the processing
of CSG and GLB events, and it'd be good to have it documented
in a comment.

> +
>       if (events & JOB_INT_GLOBAL_IF) {
>               sched_process_global_irq_locked(ptdev);
>               events &= ~JOB_INT_GLOBAL_IF;
>       }
>  
> -     while (events) {
> -             u32 csg_id = ffs(events) - 1;
> +     mutex_unlock(&sched->lock);
> +}
>  
> -             sched_process_csg_irq_locked(ptdev, csg_id);
> -             events &= ~BIT(csg_id);
> +static void handle_protm_fault(struct panthor_device *ptdev)

This is a bit of a misnomer, I think. It doesn't seem to be triggered
by a FAULT event, it's more a timeout on a PROTM section that would
lead to a reset being scheduled, and this "blocked-in-protm" situation
being detected as part of the reset.

> +{
> +     struct panthor_scheduler *sched = ptdev->scheduler;
> +     u32 csg_id;
> +     struct panthor_group *protm_group;
> +
> +     guard(mutex)(&sched->lock);
> +
> +     if (!sched->protm.protected_mode)
> +             return;
> +
> +     protm_group = sched->protm.active_group;
> +
> +     if (drm_WARN_ON(&ptdev->base, !protm_group))
> +             return;

See, that's a perfect example of sched->protm.protected_mode being redundant.
Now you're left with a potential protected_mode=true,active_group=NULL
inconsistency you don't expect.

> +
> +     /* Group will be terminated by the device reset */
> +     protm_group->fatal_queues |= GENMASK(protm_group->queue_count - 1, 0);
> +
> +     if (!panthor_fw_protm_exit_wait_event_timeout(ptdev))
> +             goto cleanup_protm;
> +
> +     /**
> +      * GPU failed to exit protected mode. Mark all non-protected mode CSGs

        /* GPU failed to exit protected mode. Mark all non-protected mode CSGs

> +      * as suspended so that they are unaffected by the GPU reset.
> +      */
> +
> +     for (csg_id = 0; csg_id < sched->csg_slot_count; csg_id++) {
> +             struct panthor_group *group = sched->csg_slots[csg_id].group;
> +
> +             if (!group || group == protm_group)
> +                     continue;
> +
> +             group->state = PANTHOR_CS_GROUP_SUSPENDED;
> +
> +             group_unbind_locked(group);
> +
> +             list_move(&group->run_node, group_is_idle(group) ?
> +                                             
> &sched->groups.idle[group->priority] :
> +                                             
> &sched->groups.runnable[group->priority]);

nit: Let's use a local struct list_head * variable to select the right list.

>       }
>  
> -     mutex_unlock(&sched->lock);
> +cleanup_protm:
> +     sched->protm.protected_mode = false;
> +     sched->protm.active_group = NULL;
>  }
>  
>  /**
> @@ -2029,6 +2142,7 @@ struct panthor_sched_tick_ctx {
>       bool immediate_tick;
>       bool stop_tick;
>       u32 csg_upd_failed_mask;
> +     struct panthor_group *protm_group;
>  };
>  
>  static bool
> @@ -2299,6 +2413,7 @@ tick_ctx_evict_group(struct panthor_scheduler *sched,
>  
>  static void
>  tick_ctx_reschedule_group(struct panthor_scheduler *sched,
> +                       struct panthor_sched_tick_ctx *ctx,
>                         struct panthor_csg_slots_upd_ctx *upd_ctx,
>                         struct panthor_group *group,
>                         int new_csg_prio)
> @@ -2321,6 +2436,30 @@ tick_ctx_reschedule_group(struct panthor_scheduler 
> *sched,
>                                       csg_iface->output->ack ^ 
> CSG_ENDPOINT_CONFIG,
>                                       CSG_ENDPOINT_CONFIG);
>       }
> +
> +     if (ctx->protm_group == group) {
> +             for (u32 q = 0; q < group->queue_count; q++) {
> +                     struct panthor_fw_cs_iface *cs_iface;
> +
> +                     if (!(group->protm_pending_queues & BIT(q)))
> +                             continue;
> +
> +                     cs_iface = panthor_fw_get_cs_iface(ptdev, 
> group->csg_id, q);
> +                     panthor_fw_update_reqs(cs_iface, req, 
> cs_iface->output->ack,
> +                                            CS_PROTM_PENDING);
> +             }
> +
> +             panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack,
> +                                    group->protm_pending_queues);
> +             csgs_upd_ctx_ring_doorbell(upd_ctx, group->csg_id);
> +             group->protm_pending_queues = 0;
> +
> +             /*
> +              * We only allow one protected group to run at same time,
> +              * as it makes it easier to handle faults in protected mode.

It's more something to document in the panthor_scheduler::protm::active_group
section.

> +              */
> +             sched->protm.active_group = group;

Would it make sense to move this logic to a tick_ctx_handle_protm_group()
helper that's called before/after tick_ctx_reschedule_group()? This way
there's no extra if (ctx->protm_group == group) conditional branch in here.


static void
tick_ctx_handle_protm_group(struct panthor_scheduler *sched,
                            struct panthor_sched_tick_ctx *ctx,
                            struct panthor_csg_slots_upd_ctx *upd_ctx)
{
        struct panthor_device *ptdev = sched->ptdev;
        struct panthor_group *group = ctx->protm_group;
        struct panthor_fw_csg_iface *csg_iface;

        if (!group || drm_WARN_ON(&ptdev->base, group->csg_id < 0))
                return;

        csg_iface = panthor_fw_get_csg_iface(ptdev, group->csg_id);
        for (u32 q = 0; q < group->queue_count; q++) {
                struct panthor_fw_cs_iface *cs_iface;

                if (!(group->protm_pending_queues & BIT(q)))
                        continue;

                cs_iface = panthor_fw_get_cs_iface(ptdev, group->csg_id, q);
                panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack,
                                       CS_PROTM_PENDING);
        }

        panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack,
                               group->protm_pending_queues);
        csgs_upd_ctx_ring_doorbell(upd_ctx, group->csg_id);
        group->protm_pending_queues = 0;
        sched->protm.active_group = group;
}

> +     }
>  }
>  
>  static void
> @@ -2336,6 +2475,17 @@ tick_ctx_schedule_group(struct panthor_scheduler 
> *sched,
>       group_bind_locked(group, csg_id);
>       csg_slot_prog_locked(ptdev, csg_id, csg_prio);
>  
> +     /* If the group was waiting for protected mode before suspension,
> +      * and the tick context enters this mode, it should be serviced
> +      * immediately because the slot reset should have set the
> +      * CS_PROTM_PENDING bit to zero, and cs_prog_slot_locked() sets it to
> +      * zero too.
> +      * It's not clear if we will get a new CS_PROTM_PENDING event in that
> +      * case, but it should be safe either way.
> +      */
> +     if (group->protm_pending_queues && ctx->protm_group)
> +             group->protm_pending_queues = 0;

I'd move this to the path where we do the SUSPEND, or group_unbind(), even.

> +
>       csgs_upd_ctx_queue_reqs(ptdev, upd_ctx, csg_id,
>                               group->state == PANTHOR_CS_GROUP_SUSPENDED ?
>                               CSG_STATE_RESUME : CSG_STATE_START,
> @@ -2365,7 +2515,7 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct 
> panthor_sched_tick_ctx *c
>  
>               /* Update priorities on already running groups. */
>               list_for_each_entry(group, &ctx->groups[prio], run_node) {
> -                     tick_ctx_reschedule_group(sched, &upd_ctx, group, 
> new_csg_prio--);
> +                     tick_ctx_reschedule_group(sched, ctx, &upd_ctx, group, 
> new_csg_prio--);
>               }
>       }
>  
> @@ -2457,6 +2607,15 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct 
> panthor_sched_tick_ctx *c
>  
>       sched->used_csg_slot_count = ctx->group_count;
>       sched->might_have_idle_groups = ctx->idle_group_count > 0;
> +
> +     if (ctx->protm_group) {
> +             ret = panthor_fw_protm_enter(ptdev);
> +             if (ret) {
> +                     panthor_device_schedule_reset(ptdev);
> +                     ctx->csg_upd_failed_mask = U32_MAX;

It's weird to flag it as all CSGs update failed. Should we instead
have

                        /* If we failed to enter PROTM, consider the group who
                         * requested it as failed.
                         */
                        ctx->csg_upd_failed_mask |= 
BIT(ctx->protm_group->csg_id);

> +             }
> +             sched->protm.protected_mode = true;

I'd move that to a tick_ctx_service_protm_req() helper that has the
panthor_fw_protm_enter() inlined, because again, it doesn't make
sense to have this defined in panthor_fw.c if the only user lives
in panthor_sched.c

> +     }
>  }
>  
>  static u64
> @@ -2490,7 +2649,7 @@ static void tick_work(struct work_struct *work)
>       u64 resched_target = sched->resched_target;
>       u64 remaining_jiffies = 0, resched_delay;
>       u64 now = get_jiffies_64();
> -     int prio, ret, cookie;
> +     int prio, protm_prio, ret, cookie;
>       bool full_tick;
>  
>       if (!drm_dev_enter(&ptdev->base, &cookie))
> @@ -2564,14 +2723,49 @@ static void tick_work(struct work_struct *work)
>               }
>       }
>  
> +     /* Check if the highest priority group want to switch to protected mode 
> */
> +     for (protm_prio = PANTHOR_CSG_PRIORITY_COUNT - 1; protm_prio >= 0; 
> protm_prio--) {
> +             struct panthor_group *group;
> +
> +             group = list_first_entry_or_null(&ctx.groups[protm_prio],
> +                                              struct panthor_group,
> +                                              run_node);
> +             if (group) {
> +                     ctx.protm_group = group;
> +                     break;
> +             }

Should this be

                if (group) {
                        if (group->protm_pending_queues)
                                ctx.protm_group = group;

                        break;
                }

?

> +     }
> +
>       /* If we have free CSG slots left, pick idle groups */
> -     for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
> -          prio >= 0 && !tick_ctx_is_full(sched, &ctx);
> -          prio--) {

How about we keep it a single indentation level and skip higher prios if
PROTM is requested:

                /* Pick only idle groups with equal or lower priority than the
                 * group triggering protected mode. Do not bother picking
                 * unscheduled idle groups.
                 */
                if (ctx.protm_group && prio < protm_prio)
                        continue;

This saves us an indentation level and limits the code duplication.

> -             /* Check the old_group queue first to avoid reprogramming the 
> slots */
> -             tick_ctx_pick_groups_from_list(sched, &ctx, 
> &ctx.old_groups[prio], false, true);
> -             tick_ctx_pick_groups_from_list(sched, &ctx, 
> &sched->groups.idle[prio],
> -                                            false, false);
> +     if (ctx.protm_group) {
> +             /* Pick only idle groups with equal or lower priority than the
> +              * group triggering protected mode. Do not bother picking
> +              * unscheduled idle groups.
> +              */
> +             for (prio = protm_prio;
> +                  prio >= 0 && !tick_ctx_is_full(sched, &ctx);
> +                  prio--)
> +                     tick_ctx_pick_groups_from_list(sched, &ctx,
> +                                                    &ctx.old_groups[prio],
> +                                                    false, true);
> +     } else {
> +             /* No switch to protected, just pick any idle group according
> +              * to priority
> +              */
> +             for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
> +                  prio >= 0 && !tick_ctx_is_full(sched, &ctx);
> +                  prio--) {
> +                     /* Check the old_group queue first to avoid
> +                      * reprogramming the slots
> +                      */
> +                     tick_ctx_pick_groups_from_list(sched, &ctx,
> +                                                    &ctx.old_groups[prio],
> +                                                    false, true);
> +                     tick_ctx_pick_groups_from_list(sched, &ctx,
> +                                                    
> &sched->groups.idle[prio],
> +                                                    false, false);
> +             }
> +
>       }
>  
>       tick_ctx_apply(sched, &ctx);
> @@ -2993,6 +3187,8 @@ void panthor_sched_pre_reset(struct panthor_device 
> *ptdev)
>       cancel_work_sync(&sched->sync_upd_work);
>       cancel_delayed_work_sync(&sched->tick_work);
>  
> +     handle_protm_fault(ptdev);

I actually wonder if this should be part of the panthor_sched_suspend()
logic. That is, we would automatically flag all non-protm groups as
suspended if the GPU was in PROTM mode at the time the hang happened.

> +
>       panthor_sched_suspend(ptdev);
>  
>       /* Stop all groups that might still accept jobs, so we don't get passed

Regards,

Boris

Reply via email to