On 2/16/2026 8:51 AM, Bjorn Andersson wrote:
> On Sun, Feb 15, 2026 at 11:51:35PM +0530, Ekansh Gupta wrote:
>> For any remote call to DSP, after sending an invocation message,
>> fastRPC driver waits for glink response and during this time the
>> CPU can go into low power modes. This adds latency to overall fastrpc
>> call as CPU wakeup and scheduling latencies are included. Add polling
>> mode support with which fastRPC driver will poll continuously on a
>> memory after sending a message to remote subsystem which will eliminate
>> CPU wakeup and scheduling latencies and reduce fastRPC overhead. Poll
>> mode can be enabled by user by using FASTRPC_IOCTL_SET_OPTION ioctl
>> request with FASTRPC_POLL_MODE request id.
>>
>> Signed-off-by: Ekansh Gupta <[email protected]>
>> ---
>>  drivers/misc/fastrpc.c      | 142 ++++++++++++++++++++++++++++++++++--
>>  include/uapi/misc/fastrpc.h |  10 +++
>>  2 files changed, 145 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
>> index e935ae3776b4..c1e67dbacf2c 100644
>> --- a/drivers/misc/fastrpc.c
>> +++ b/drivers/misc/fastrpc.c
>> @@ -23,6 +23,8 @@
>>  #include <uapi/misc/fastrpc.h>
>>  #include <linux/of_reserved_mem.h>
>>  #include <linux/bits.h>
>> +#include <linux/compiler.h>
>> +#include <linux/iopoll.h>
>>  
>>  #define ADSP_DOMAIN_ID (0)
>>  #define MDSP_DOMAIN_ID (1)
>> @@ -37,6 +39,7 @@
>>  #define FASTRPC_CTX_MAX (256)
>>  #define FASTRPC_INIT_HANDLE 1
>>  #define FASTRPC_DSP_UTILITIES_HANDLE        2
>> +#define FASTRPC_MAX_STATIC_HANDLE (20)
>>  #define FASTRPC_CTXID_MASK GENMASK(15, 8)
>>  #define INIT_FILELEN_MAX (2 * 1024 * 1024)
>>  #define INIT_FILE_NAMELEN_MAX (128)
>> @@ -105,6 +108,12 @@
>>  
>>  #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, 
>> miscdev)
>>  
>> +/* Poll response number from remote processor for call completion */
>> +#define FASTRPC_POLL_RESPONSE (0xdecaf)
>> +
>> +/* Polling mode timeout limit */
>> +#define FASTRPC_POLL_MAX_TIMEOUT_US (10000)
>> +
>>  struct fastrpc_phy_page {
>>      dma_addr_t addr;        /* dma address */
>>      u64 size;               /* size of contiguous region */
>> @@ -235,8 +244,14 @@ struct fastrpc_invoke_ctx {
>>      u32 sc;
>>      u64 *fdlist;
>>      u32 *crc;
>> +    /* Poll memory that DSP updates */
>> +    u32 *poll;
>>      u64 ctxid;
>>      u64 msg_sz;
>> +    /* work done status flag */
>> +    bool is_work_done;
>> +    /* process updates poll memory instead of glink response */
>> +    bool is_polled;
>>      struct kref refcount;
>>      struct list_head node; /* list of ctxs */
>>      struct completion work;
>> @@ -307,6 +322,8 @@ struct fastrpc_user {
>>      int client_id;
>>      int pd;
>>      bool is_secure_dev;
>> +    /* Flags poll mode state */
>> +    bool poll_mode;
>>      /* Lock for lists */
>>      spinlock_t lock;
>>      /* lock for allocations */
>> @@ -924,7 +941,8 @@ static int fastrpc_get_meta_size(struct 
>> fastrpc_invoke_ctx *ctx)
>>              sizeof(struct fastrpc_invoke_buf) +
>>              sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
>>              sizeof(u64) * FASTRPC_MAX_FDLIST +
>> -            sizeof(u32) * FASTRPC_MAX_CRCLIST;
>> +            sizeof(u32) * FASTRPC_MAX_CRCLIST +
>> +            sizeof(u32);
>>  
>>      return size;
>>  }
>> @@ -1020,6 +1038,9 @@ static int fastrpc_get_args(u32 kernel, struct 
>> fastrpc_invoke_ctx *ctx)
>>      list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
>>      pages = fastrpc_phy_page_start(list, ctx->nscalars);
>>      ctx->fdlist = (u64 *)(pages + ctx->nscalars);
>> +    ctx->poll = (u32 *)((uintptr_t)ctx->fdlist + sizeof(u64) * 
>> FASTRPC_MAX_FDLIST +
>> +                         sizeof(u32) * FASTRPC_MAX_CRCLIST);
>> +
>>      args = (uintptr_t)ctx->buf->virt + metalen;
>>      rlen = pkt_size - metalen;
>>      ctx->rpra = rpra;
>> @@ -1188,6 +1209,75 @@ static int fastrpc_invoke_send(struct 
>> fastrpc_session_ctx *sctx,
>>  
>>  }
>>  
>> +static inline u32 fastrpc_poll_op(void *p)
>> +{
>> +    struct fastrpc_invoke_ctx *ctx = p;
>> +
>> +    dma_rmb();
>> +    return READ_ONCE(*ctx->poll);
>> +}
>> +
>> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx)
>> +{
>> +    u32 val;
>> +    int ret;
>> +
>> +    /*
>> +     * Poll until DSP writes FASTRPC_POLL_RESPONSE into *ctx->poll
>> +     * or until another path marks the work done.
>> +     */
>> +    ret = read_poll_timeout_atomic(fastrpc_poll_op, val,
>> +                                   (val == FASTRPC_POLL_RESPONSE) ||
>> +                                   ctx->is_work_done, 1,
> Weird line wrap of the conditional, please put the val == and the
> ctx->is_work_done on the same line - it's just 90 characters.
Ack.
>
>> +                                   FASTRPC_POLL_MAX_TIMEOUT_US, false, ctx);
>> +
>> +    if (!ret && val == FASTRPC_POLL_RESPONSE) {
>> +            ctx->is_work_done = true;
>> +            ctx->retval = 0;
>> +    }
>> +
>> +    if (ret == -ETIMEDOUT)
>> +            ret = -EIO;
>> +
>> +    return ret;
>> +}
>> +
>> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
>> +                                        u32 kernel)
>> +{
>> +    int err = 0;
>> +
>> +    if (kernel) {
>> +            if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
>> +                    err = -ETIMEDOUT;
>> +    } else {
>> +            err = wait_for_completion_interruptible(&ctx->work);
>> +    }
>> +
>> +    return err;
>> +}
>> +
>> +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
>> +                                   u32 kernel)
>> +{
>> +    int err;
>> +
>> +    do {
>> +            if (ctx->is_polled) {
>> +                    err = poll_for_remote_response(ctx);
>> +                    /* If polling timed out, move to normal response mode */
> I had already written to question the lack of fallback to non-polling
> mode and how this would prohibit me from mixing expected long and short
> calls...
>
> Would certainly be nice to clarify this behavior in the commit
> message...
I'll add more details for this.
>
>> +                    if (err)
>> +                            ctx->is_polled = false;
>> +            } else {
>> +                    err = fastrpc_wait_for_response(ctx, kernel);
>> +                    if (err)
>> +                            return err;
>> +            }
>> +    } while (!ctx->is_work_done);
>> +
>> +    return err;
> Isn't 0 the only value of err you can get here with?
yes, it's always going to be return 0; I'll update this.
>
>> +}
>> +
>>  static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>>                                 u32 handle, u32 sc,
>>                                 struct fastrpc_invoke_args *args)
>> @@ -1223,16 +1313,26 @@ static int fastrpc_internal_invoke(struct 
>> fastrpc_user *fl,  u32 kernel,
>>      if (err)
>>              goto bail;
>>  
>> -    if (kernel) {
>> -            if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
>> -                    err = -ETIMEDOUT;
>> -    } else {
>> -            err = wait_for_completion_interruptible(&ctx->work);
>> -    }
>> +    /*
>> +     * Set message context as polled if the call is for a user PD
>> +     * dynamic module and user has enabled poll mode.
>> +     */
>> +    if (handle > FASTRPC_MAX_STATIC_HANDLE && fl->pd == USER_PD &&
>> +        fl->poll_mode)
> The line is 85 characters if you don't break it. You're allowed to use
> up to 100 characters if it makes the code easier to read - and it does.
Ack.
>
>> +            ctx->is_polled = true;
>> +
>> +    err = fastrpc_wait_for_completion(ctx, kernel);
>>  
> Ugly blank line between the assignment and error check...
I'll remove this.
>
>>      if (err)
>>              goto bail;
>>  
>> +    if (!ctx->is_work_done) {
> "err" is the return value of the wait, and this checks the outcome of
> the wait... Returning "success" and pass "failure" through a sideband
> channel is confusing.
>
> That said, as far as I can see, there are three ways
> fastrpc_wait_for_completion() can exit:
>
> 1) err = 0 && ctx->is_work_done = true after polling
> 2) err = 0 && ctx->is_work_done = true after wait
> 3) err != 0 && ctx->is_work_done is undefined after wait
>
> For #1 and #2 we won't hit either if statement here.
> For #3 we already hit above condition and went to bail.
>
> So do we ever enter here?
You're right, the check is not getting encountered due to the following reasons:
1) Poll success -> err = 0, is_work_done =true.
2) Wait success -> err = 0, is_work_done =true.
3) Poll failed -> fallback to wait.
4) Wait failed -> err check before this if condition.

I'll remove this check.
>
>> +            err = -ETIMEDOUT;
>> +            dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, 
>> sc 0x%x\n",
>> +                    handle, sc);
> jfyi, you can use %#x to format 0x%x
Ack.
>
>> +            goto bail;
>> +    }
>> +
>>      /* make sure that all memory writes by DSP are seen by CPU */
>>      dma_rmb();
>>      /* populate all the output buffers with results */
>> @@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct 
>> fastrpc_ioctl_capability *cap,
>>      return 0;
>>  }
>>  
>> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
>> +{
>> +    struct fastrpc_ioctl_set_option opt = {0};
>> +    int i;
>> +
>> +    if (copy_from_user(&opt, argp, sizeof(opt)))
>> +            return -EFAULT;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
>> +            if (opt.reserved[i] != 0)
>> +                    return -EINVAL;
>> +    }
>> +
>> +    if (opt.req != FASTRPC_POLL_MODE)
>> +            return -EINVAL;
>> +
>> +    if (opt.value)
> Would it make sense to allow the caller to affect the poll timeout using
> the other 31 bits of this value?
I was planning to bring that control[1], but it's might be difficult for the 
caller

[1] 
https://lore.kernel.org/all/[email protected]/

//Ekansh
>
> Regards,
> Bjorn
>
>> +            fl->poll_mode = true;
>> +    else
>> +            fl->poll_mode = false;
>> +
>> +    return 0;
>> +}
>> +
>>  static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
>>  {
>>      struct fastrpc_ioctl_capability cap = {0};
>> @@ -2167,6 +2291,9 @@ static long fastrpc_device_ioctl(struct file *file, 
>> unsigned int cmd,
>>      case FASTRPC_IOCTL_MEM_UNMAP:
>>              err = fastrpc_req_mem_unmap(fl, argp);
>>              break;
>> +    case FASTRPC_IOCTL_SET_OPTION:
>> +            err = fastrpc_set_option(fl, argp);
>> +            break;
>>      case FASTRPC_IOCTL_GET_DSP_INFO:
>>              err = fastrpc_get_dsp_info(fl, argp);
>>              break;
>> @@ -2518,6 +2645,7 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device 
>> *rpdev, void *data,
>>      }
>>  
>>      ctx->retval = rsp->retval;
>> +    ctx->is_work_done = true;
>>      complete(&ctx->work);
>>  
>>      /*
>> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
>> index c6e2925f47e6..c37e24a764ae 100644
>> --- a/include/uapi/misc/fastrpc.h
>> +++ b/include/uapi/misc/fastrpc.h
>> @@ -16,6 +16,7 @@
>>  #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct 
>> fastrpc_init_create_static)
>>  #define FASTRPC_IOCTL_MEM_MAP               _IOWR('R', 10, struct 
>> fastrpc_mem_map)
>>  #define FASTRPC_IOCTL_MEM_UNMAP             _IOWR('R', 11, struct 
>> fastrpc_mem_unmap)
>> +#define FASTRPC_IOCTL_SET_OPTION    _IOWR('R', 12, struct 
>> fastrpc_ioctl_set_option)
>>  #define FASTRPC_IOCTL_GET_DSP_INFO  _IOWR('R', 13, struct 
>> fastrpc_ioctl_capability)
>>  
>>  /**
>> @@ -67,6 +68,9 @@ enum fastrpc_proc_attr {
>>  /* Fastrpc attribute for memory protection of buffers */
>>  #define FASTRPC_ATTR_SECUREMAP      (1)
>>  
>> +/* Set option request ID to enable poll mode */
>> +#define FASTRPC_POLL_MODE   (1)
>> +
>>  struct fastrpc_invoke_args {
>>      __u64 ptr;
>>      __u64 length;
>> @@ -133,6 +137,12 @@ struct fastrpc_mem_unmap {
>>      __s32 reserved[5];
>>  };
>>  
>> +struct fastrpc_ioctl_set_option {
>> +    __u32 req;      /* request id */
>> +    __u32 value;    /* value */
>> +    __s32 reserved[6];
>> +};
>> +
>>  struct fastrpc_ioctl_capability {
>>      __u32 unused; /* deprecated, ignored by the kernel */
>>      __u32 attribute_id;
>> -- 
>> 2.34.1
>>
>>

Reply via email to