On 1/6/2026 8:29 AM, Dmitry Baryshkov wrote: > On Tue, Dec 30, 2025 at 11:58:31AM +0530, Ekansh Gupta wrote: >> For any remote call to DSP, after sending an invocation message, >> fastRPC driver waits for glink response and during this time the >> CPU can go into low power modes. This adds latency to overall fastrpc >> call as CPU wakeup and scheduling latencies are included. Add polling >> mode support with which fastRPC driver will poll continuously on a >> memory after sending a message to remote subsystem which will eliminate >> CPU wakeup and scheduling latencies and reduce fastRPC overhead. Poll >> mode can be enabled by user by using FASTRPC_IOCTL_SET_OPTION ioctl >> request with FASTRPC_POLL_MODE request id. >> >> Signed-off-by: Ekansh Gupta <[email protected]> >> --- >> drivers/misc/fastrpc.c | 139 ++++++++++++++++++++++++++++++++++-- >> include/uapi/misc/fastrpc.h | 9 +++ >> 2 files changed, 141 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c >> index 222ff15e04bd..d95d31d27b82 100644 >> --- a/drivers/misc/fastrpc.c >> +++ b/drivers/misc/fastrpc.c >> @@ -22,6 +22,8 @@ >> #include <linux/firmware/qcom/qcom_scm.h> >> #include <uapi/misc/fastrpc.h> >> #include <linux/of_reserved_mem.h> >> +#include <linux/compiler.h> >> +#include <linux/iopoll.h> >> #include <linux/bitfield.h> >> >> #define ADSP_DOMAIN_ID (0) >> @@ -38,6 +40,7 @@ >> #define FASTRPC_CTX_MAX (256) >> #define FASTRPC_INIT_HANDLE 1 >> #define FASTRPC_DSP_UTILITIES_HANDLE 2 >> +#define FASTRPC_MAX_STATIC_HANDLE (20) >> #define FASTRPC_CTXID_MASK GENMASK(15, 8) >> #define INIT_FILELEN_MAX (2 * 1024 * 1024) >> #define INIT_FILE_NAMELEN_MAX (128) >> @@ -106,6 +109,12 @@ >> >> #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, >> miscdev) >> >> +/* Poll response number from remote processor for call completion */ >> +#define FASTRPC_POLL_RESPONSE (0xdecaf) >> + >> +/* Polling mode timeout limit */ >> +#define FASTRPC_POLL_MAX_TIMEOUT_US (10000) >> + >> struct fastrpc_phy_page { >> u64 addr; /* physical address */ >> u64 size; /* size of contiguous region */ >> @@ -236,8 +245,14 @@ struct fastrpc_invoke_ctx { >> u32 sc; >> u64 *fdlist; >> u32 *crc; >> + /* Poll memory that DSP updates */ >> + u32 *poll; >> u64 ctxid; >> u64 msg_sz; >> + /* work done status flag */ >> + bool is_work_done; >> + /* process updates poll memory instead of glink response */ >> + bool is_polled; >> struct kref refcount; >> struct list_head node; /* list of ctxs */ >> struct completion work; >> @@ -301,6 +316,8 @@ struct fastrpc_user { >> int client_id; >> int pd; >> bool is_secure_dev; >> + /* Flags poll mode state */ >> + bool poll_mode; >> /* Lock for lists */ >> spinlock_t lock; >> /* lock for allocations */ >> @@ -894,7 +911,8 @@ static int fastrpc_get_meta_size(struct >> fastrpc_invoke_ctx *ctx) >> sizeof(struct fastrpc_invoke_buf) + >> sizeof(struct fastrpc_phy_page)) * ctx->nscalars + >> sizeof(u64) * FASTRPC_MAX_FDLIST + >> - sizeof(u32) * FASTRPC_MAX_CRCLIST; >> + sizeof(u32) * FASTRPC_MAX_CRCLIST + >> + sizeof(u32); >> >> return size; >> } >> @@ -990,6 +1008,7 @@ static int fastrpc_get_args(u32 kernel, struct >> fastrpc_invoke_ctx *ctx) >> list = fastrpc_invoke_buf_start(rpra, ctx->nscalars); >> pages = fastrpc_phy_page_start(list, ctx->nscalars); >> ctx->fdlist = (u64 *)(pages + ctx->nscalars); >> + ctx->poll = (u32 *)(ctx->fdlist + FASTRPC_MAX_FDLIST + >> FASTRPC_MAX_CRCLIST); > This doesn't seem to match the calculation few lines ago. fdlist is a > pointer. On 64-bit systems ctx->poll will point to (uintptr_t)ctx->fdlist + > sizeof(u64) * FASTRPC_MAX_FDLIST + (sizeof u64) * FASTRPC_MAX_CRCLIST, > while in fastrpc_get_meta_size it was ... + sizeof(u32) * > FASTRPC_MAX_CRCLIST. > > Am I missing something? I should be: (uintptr_t)ctx->fdlist + sizeof(u64) * FASTRPC_MAX_FDLIST + (sizeof u32) * FASTRPC_MAX_CRCLIST based on the fastrpc_get_meta_size calculation, I'll correct this calculation. > >> args = (uintptr_t)ctx->buf->virt + metalen; >> rlen = pkt_size - metalen; >> ctx->rpra = rpra; >> @@ -1158,6 +1177,75 @@ static int fastrpc_invoke_send(struct >> fastrpc_session_ctx *sctx, >> >> } >> >> +static inline u32 fastrpc_poll_op(void *p) >> +{ >> + struct fastrpc_invoke_ctx *ctx = p; >> + >> + dma_rmb(); >> + return READ_ONCE(*ctx->poll); >> +} >> + >> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx) >> +{ >> + u32 val; >> + int ret; >> + >> + /* >> + * Poll until DSP writes FASTRPC_POLL_RESPONSE into *ctx->poll >> + * or until another path marks the work done. >> + */ >> + ret = read_poll_timeout_atomic(fastrpc_poll_op, val, >> + (val == FASTRPC_POLL_RESPONSE) || >> + ctx->is_work_done, 1, >> + FASTRPC_POLL_MAX_TIMEOUT_US, false, ctx); >> + >> + if (!ret && val == FASTRPC_POLL_RESPONSE) { >> + ctx->is_work_done = true; >> + ctx->retval = 0; >> + } >> + >> + if (ret == -ETIMEDOUT) >> + ret = -EIO; >> + >> + return ret; >> +} >> + >> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx, >> + u32 kernel) >> +{ >> + int err = 0; >> + >> + if (kernel) { >> + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) >> + err = -ETIMEDOUT; >> + } else { >> + err = wait_for_completion_interruptible(&ctx->work); >> + } >> + >> + return err; >> +} >> + >> +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx, >> + u32 kernel) >> +{ >> + int err; >> + >> + do { >> + if (ctx->is_polled) { >> + err = poll_for_remote_response(ctx); >> + /* If polling timed out, move to normal response mode */ >> + if (err) >> + ctx->is_polled = false; >> + } else { >> + err = fastrpc_wait_for_response(ctx, kernel); >> + if (err) >> + return err; >> + } >> + } while (!ctx->is_work_done); >> + >> + return err; >> +} >> + >> static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, >> u32 handle, u32 sc, >> struct fastrpc_invoke_args *args) >> @@ -1193,16 +1281,25 @@ static int fastrpc_internal_invoke(struct >> fastrpc_user *fl, u32 kernel, >> if (err) >> goto bail; >> >> - if (kernel) { >> - if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) >> - err = -ETIMEDOUT; >> - } else { >> - err = wait_for_completion_interruptible(&ctx->work); >> - } >> + /* >> + * Set message context as polled if the call is for a user PD >> + * dynamic module and user has enabled poll mode. >> + */ >> + if (handle > FASTRPC_MAX_STATIC_HANDLE && fl->pd == USER_PD && >> + fl->poll_mode) >> + ctx->is_polled = true; >> + >> + err = fastrpc_wait_for_completion(ctx, kernel); >> >> if (err) >> goto bail; >> >> + if (!ctx->is_work_done) { >> + err = -ETIMEDOUT; >> + dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, >> sc 0x%x\n", >> + handle, sc); >> + goto bail; >> + } >> /* make sure that all memory writes by DSP are seen by CPU */ >> dma_rmb(); >> /* populate all the output buffers with results */ >> @@ -1780,6 +1877,30 @@ static int fastrpc_get_info_from_kernel(struct >> fastrpc_ioctl_capability *cap, >> return 0; >> } >> >> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp) >> +{ >> + struct fastrpc_ioctl_set_option opt = {0}; >> + int i; >> + >> + if (copy_from_user(&opt, argp, sizeof(opt))) >> + return -EFAULT; >> + >> + for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) { >> + if (opt.reserved[i] != 0) >> + return -EINVAL; >> + } >> + >> + if (opt.req != FASTRPC_POLL_MODE) >> + return -EINVAL; >> + >> + if (opt.value) >> + fl->poll_mode = true; >> + else >> + fl->poll_mode = false; > Is poll_mode supported on MSM8916? No, but it will fallback to rpmsg_callback in case polling is enabled from kernel but not enabled from DSP. > >> + >> + return 0; >> +} >> + >> static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp) >> { >> struct fastrpc_ioctl_capability cap = {0}; >> @@ -2134,6 +2255,9 @@ static long fastrpc_device_ioctl(struct file *file, >> unsigned int cmd, >> case FASTRPC_IOCTL_MEM_UNMAP: >> err = fastrpc_req_mem_unmap(fl, argp); >> break; >> + case FASTRPC_IOCTL_SET_OPTION: >> + err = fastrpc_set_option(fl, argp); >> + break; >> case FASTRPC_IOCTL_GET_DSP_INFO: >> err = fastrpc_get_dsp_info(fl, argp); >> break; >> @@ -2465,6 +2589,7 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device >> *rpdev, void *data, >> >> ctx->retval = rsp->retval; >> complete(&ctx->work); >> + ctx->is_work_done = true; >> >> /* >> * The DMA buffer associated with the context cannot be freed in >> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h >> index c6e2925f47e6..3207c42fb318 100644 >> --- a/include/uapi/misc/fastrpc.h >> +++ b/include/uapi/misc/fastrpc.h >> @@ -16,6 +16,7 @@ >> #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct >> fastrpc_init_create_static) >> #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct >> fastrpc_mem_map) >> #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct >> fastrpc_mem_unmap) >> +#define FASTRPC_IOCTL_SET_OPTION _IOWR('R', 12, struct >> fastrpc_ioctl_set_option) >> #define FASTRPC_IOCTL_GET_DSP_INFO _IOWR('R', 13, struct >> fastrpc_ioctl_capability) >> >> /** >> @@ -66,6 +67,8 @@ enum fastrpc_proc_attr { >> >> /* Fastrpc attribute for memory protection of buffers */ >> #define FASTRPC_ATTR_SECUREMAP (1) >> +/* Set option request ID to enable poll mode */ >> +#define FASTRPC_POLL_MODE (1) > Is it related to FASTRPC_ATTR_SECUREMAP? Why is it a part of the same > visual block? I'll create a separation here. > >> >> struct fastrpc_invoke_args { >> __u64 ptr; >> @@ -133,6 +136,12 @@ struct fastrpc_mem_unmap { >> __s32 reserved[5]; >> }; >> >> +struct fastrpc_ioctl_set_option { >> + __u32 req; /* request id */ >> + __u32 value; /* value */ >> + __s32 reserved[6]; > What are you reserving it for? In case there are any more options that we want to control from here. As it's more of a generic set_option request. > >> +}; >> + >> struct fastrpc_ioctl_capability { >> __u32 unused; /* deprecated, ignored by the kernel */ >> __u32 attribute_id; >> -- >> 2.34.1 >>
