MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.
Add driver logic to check firmware version, and if MSG_OP_CHAIN_EXEC_NPU is supported, uses it to submit firmware commands. Signed-off-by: Lizhi Hou <[email protected]> --- drivers/accel/amdxdna/aie2_message.c | 443 +++++++++++++++++--------- drivers/accel/amdxdna/aie2_msg_priv.h | 42 ++- drivers/accel/amdxdna/aie2_pci.c | 13 + drivers/accel/amdxdna/aie2_pci.h | 29 ++ drivers/accel/amdxdna/amdxdna_ctx.c | 6 +- drivers/accel/amdxdna/amdxdna_ctx.h | 11 +- drivers/accel/amdxdna/npu1_regs.c | 6 + drivers/accel/amdxdna/npu2_regs.c | 1 + drivers/accel/amdxdna/npu4_regs.c | 6 + drivers/accel/amdxdna/npu5_regs.c | 1 + drivers/accel/amdxdna/npu6_regs.c | 1 + 11 files changed, 392 insertions(+), 167 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 3a4c845d783a..4751a8aff0f7 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -27,6 +27,8 @@ #define DECLARE_AIE2_MSG(name, op) \ DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE) +#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops) + static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, struct xdna_mailbox_msg *msg) { @@ -479,177 +481,291 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx, return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); } -int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, - int (*notify_cb)(void *, void __iomem *, size_t)) +static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op) { - struct mailbox_channel *chann = hwctx->priv->mbox_chann; - struct amdxdna_dev *xdna = hwctx->client->xdna; - struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; - union { - struct execute_buffer_req ebuf; - struct exec_dpu_req dpu; - } req; - struct xdna_mailbox_msg msg; - u32 payload_len; - void *payload; - int cu_idx; - int ret; - u32 op; + struct execute_buffer_req *cu_req = req; + u32 cmd_len; + void *cmd; - if (!chann) - return -ENODEV; + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (cmd_len > sizeof(cu_req->payload)) + return -EINVAL; - payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); - if (!payload) { - XDNA_ERR(xdna, "Invalid command, cannot get payload"); + cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (cu_req->cu_idx == INVALID_CU_IDX) return -EINVAL; - } - cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo); - if (cu_idx < 0) { - XDNA_DBG(xdna, "Invalid cu idx"); + memcpy(cu_req->payload, cmd, cmd_len); + + *size = sizeof(*cu_req); + *msg_op = MSG_OP_EXECUTE_BUFFER_CF; + return 0; +} + +static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op) +{ + struct exec_dpu_req *dpu_req = req; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; + + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload)) return -EINVAL; - } - op = amdxdna_cmd_get_op(cmd_abo); - switch (op) { - case ERT_START_CU: - if (unlikely(payload_len > sizeof(req.ebuf.payload))) - XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len); - req.ebuf.cu_idx = cu_idx; - memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload)); - msg.send_size = sizeof(req.ebuf); - msg.opcode = MSG_OP_EXECUTE_BUFFER_CF; - break; - case ERT_START_NPU: { - struct amdxdna_cmd_start_npu *sn = payload; - - if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload))) - XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len); - req.dpu.inst_buf_addr = sn->buffer; - req.dpu.inst_size = sn->buffer_size; - req.dpu.inst_prop_cnt = sn->prop_count; - req.dpu.cu_idx = cu_idx; - memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload)); - msg.send_size = sizeof(req.dpu); - msg.opcode = MSG_OP_EXEC_DPU; - break; - } - default: - XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op); + dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (dpu_req->cu_idx == INVALID_CU_IDX) return -EINVAL; - } - msg.handle = job; - msg.notify_cb = notify_cb; - msg.send_data = (u8 *)&req; - print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, - 0x40, false); - ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); - if (ret) { - XDNA_ERR(xdna, "Send message failed"); - return ret; - } + dpu_req->inst_buf_addr = sn->buffer; + dpu_req->inst_size = sn->buffer_size; + dpu_req->inst_prop_cnt = sn->prop_count; + memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn)); + *size = sizeof(*dpu_req); + *msg_op = MSG_OP_EXEC_DPU; return 0; } +static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) +{ + struct cmd_chain_req *chain_req = req; + + chain_req->buf_addr = slot_addr; + chain_req->buf_size = size; + chain_req->count = cmd_cnt; +} + +static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) +{ + struct cmd_chain_npu_req *npu_chain_req = req; + + npu_chain_req->flags = 0; + npu_chain_req->reserved = 0; + npu_chain_req->buf_addr = slot_addr; + npu_chain_req->buf_size = size; + npu_chain_req->count = cmd_cnt; +} + static int -aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, - struct amdxdna_gem_obj *abo, u32 *size) +aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) { - struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset; - int cu_idx = amdxdna_cmd_get_cu_idx(abo); - u32 payload_len; - void *payload; + struct cmd_chain_slot_execbuf_cf *cf_slot = slot; + u32 cmd_len; + void *cmd; - if (cu_idx < 0) + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (*size < sizeof(*cf_slot) + cmd_len) return -EINVAL; - payload = amdxdna_cmd_get_payload(abo, &payload_len); - if (!payload) + cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (cf_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - if (!slot_has_space(*buf, offset, payload_len)) - return -ENOSPC; - - buf->cu_idx = cu_idx; - buf->arg_cnt = payload_len / sizeof(u32); - memcpy(buf->args, payload, payload_len); - /* Accurate buf size to hint firmware to do necessary copy */ - *size = sizeof(*buf) + payload_len; + cf_slot->arg_cnt = cmd_len / sizeof(u32); + memcpy(cf_slot->args, cmd, cmd_len); + /* Accurate slot size to hint firmware to do necessary copy */ + *size = sizeof(*cf_slot) + cmd_len; return 0; } static int -aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, - struct amdxdna_gem_obj *abo, u32 *size) +aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) { - struct cmd_chain_slot_dpu *buf = cmd_buf + offset; - int cu_idx = amdxdna_cmd_get_cu_idx(abo); + struct cmd_chain_slot_dpu *dpu_slot = slot; struct amdxdna_cmd_start_npu *sn; - u32 payload_len; - void *payload; + u32 cmd_len; u32 arg_sz; - if (cu_idx < 0) + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*sn); + if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) return -EINVAL; - payload = amdxdna_cmd_get_payload(abo, &payload_len); - if (!payload) + if (*size < sizeof(*dpu_slot) + arg_sz) return -EINVAL; - sn = payload; - arg_sz = payload_len - sizeof(*sn); - if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) + + dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (dpu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - if (!slot_has_space(*buf, offset, arg_sz)) - return -ENOSPC; + dpu_slot->inst_buf_addr = sn->buffer; + dpu_slot->inst_size = sn->buffer_size; + dpu_slot->inst_prop_cnt = sn->prop_count; + dpu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(dpu_slot->args, sn->prop_args, arg_sz); + + /* Accurate slot size to hint firmware to do necessary copy */ + *size = sizeof(*dpu_slot) + arg_sz; + return 0; +} + +static u32 aie2_get_chain_msg_op(u32 cmd_op) +{ + switch (cmd_op) { + case ERT_START_CU: + return MSG_OP_CHAIN_EXEC_BUFFER_CF; + case ERT_START_NPU: + return MSG_OP_CHAIN_EXEC_DPU; + default: + break; + } - buf->inst_buf_addr = sn->buffer; - buf->inst_size = sn->buffer_size; - buf->inst_prop_cnt = sn->prop_count; - buf->cu_idx = cu_idx; - buf->arg_cnt = arg_sz / sizeof(u32); - memcpy(buf->args, sn->prop_args, arg_sz); + return MSG_OP_MAX_OPCODE; +} - /* Accurate buf size to hint firmware to do necessary copy */ - *size = sizeof(*buf) + arg_sz; +static struct aie2_exec_msg_ops legacy_exec_message_ops = { + .init_cu_req = aie2_init_exec_cu_req, + .init_dpu_req = aie2_init_exec_dpu_req, + .init_chain_req = aie2_init_exec_chain_req, + .fill_cf_slot = aie2_cmdlist_fill_cf, + .fill_dpu_slot = aie2_cmdlist_fill_dpu, + .get_chain_msg_op = aie2_get_chain_msg_op, +}; + +static int +aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + u32 cmd_len; + void *cmd; + + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + if (*size < sizeof(*npu_slot) + cmd_len) + return -EINVAL; + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_NON_ELF; + npu_slot->arg_cnt = cmd_len / sizeof(u32); + memcpy(npu_slot->args, cmd, cmd_len); + + *size = sizeof(*npu_slot) + cmd_len; return 0; } static int -aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset, - struct amdxdna_gem_obj *abo, u32 *size) +aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_start_npu *sn; + u32 cmd_len; + u32 arg_sz; + + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*sn); + if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF; + npu_slot->inst_buf_addr = sn->buffer; + npu_slot->inst_size = sn->buffer_size; + npu_slot->inst_prop_cnt = sn->prop_count; + npu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(npu_slot->args, sn->prop_args, arg_sz); + + *size = sizeof(*npu_slot) + arg_sz; + return 0; +} + +static u32 aie2_get_npu_chain_msg_op(u32 cmd_op) +{ + return MSG_OP_CHAIN_EXEC_NPU; +} + +static struct aie2_exec_msg_ops npu_exec_message_ops = { + .init_cu_req = aie2_init_exec_cu_req, + .init_dpu_req = aie2_init_exec_dpu_req, + .init_chain_req = aie2_init_npu_chain_req, + .fill_cf_slot = aie2_cmdlist_fill_npu_cf, + .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu, + .get_chain_msg_op = aie2_get_npu_chain_msg_op, +}; + +static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo, + size_t *size, u32 *msg_op) { - u32 this_op = amdxdna_cmd_get_op(abo); - void *cmd_buf = cmdbuf_abo->mem.kva; + struct amdxdna_dev *xdna = cmd_abo->client->xdna; int ret; + u32 op; - if (this_op != op) { - ret = -EINVAL; - goto done; - } + op = amdxdna_cmd_get_op(cmd_abo); switch (op) { case ERT_START_CU: - ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size); + ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op); + if (ret) { + XDNA_DBG(xdna, "Init CU req failed ret %d", ret); + return ret; + } break; case ERT_START_NPU: - ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size); + ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op); + if (ret) { + XDNA_DBG(xdna, "Init DPU req failed ret %d", ret); + return ret; + } + break; default: + XDNA_INFO(xdna, "Unsupported op %d", op); ret = -EOPNOTSUPP; + break; } -done: - if (ret) { - XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d", - op, ret); + return ret; +} + +static int +aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo, + size_t *size, u32 *cmd_op) +{ + struct amdxdna_dev *xdna = cmd_abo->client->xdna; + int ret; + u32 op; + + op = amdxdna_cmd_get_op(cmd_abo); + if (*cmd_op == ERT_INVALID_CMD) + *cmd_op = op; + else if (op != *cmd_op) + return -EINVAL; + + switch (op) { + case ERT_START_CU: + ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU: + ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size); + break; + default: + XDNA_INFO(xdna, "Unsupported op %d", op); + ret = -EOPNOTSUPP; + break; } + return ret; } +void aie2_msg_init(struct amdxdna_dev_hdl *ndev) +{ + if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND)) + ndev->exec_msg_ops = &npu_exec_message_ops; + else + ndev->exec_msg_ops = &legacy_exec_message_ops; +} + static inline struct amdxdna_gem_obj * aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) { @@ -658,29 +774,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) return job->hwctx->priv->cmd_buf[idx]; } -static void -aie2_cmdlist_prepare_request(struct cmd_chain_req *req, - struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt) +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, + int (*notify_cb)(void *, void __iomem *, size_t)) { - req->buf_addr = cmdbuf_abo->mem.dev_addr; - req->buf_size = size; - req->count = cnt; - drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); - XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d", - req->buf_addr, size, cnt); -} + struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct xdna_mailbox_msg msg; + union exec_req req; + int ret; -static inline u32 -aie2_cmd_op_to_msg_op(u32 op) -{ - switch (op) { - case ERT_START_CU: - return MSG_OP_CHAIN_EXEC_BUFFER_CF; - case ERT_START_NPU: - return MSG_OP_CHAIN_EXEC_DPU; - default: - return MSG_OP_MAX_OPCODE; + if (!chann) + return -ENODEV; + + ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode); + if (ret) + return ret; + + msg.handle = job; + msg.notify_cb = notify_cb; + msg.send_data = (u8 *)&req; + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, + 0x40, false); + + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); + if (ret) { + XDNA_ERR(xdna, "Send message failed"); + return ret; } + + return 0; } int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, @@ -691,12 +814,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, struct mailbox_channel *chann = hwctx->priv->mbox_chann; struct amdxdna_client *client = hwctx->client; struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; + struct amdxdna_dev *xdna = client->xdna; struct amdxdna_cmd_chain *payload; struct xdna_mailbox_msg msg; - struct cmd_chain_req req; + union exec_chain_req req; u32 payload_len; u32 offset = 0; - u32 size; + size_t size; int ret; u32 op; u32 i; @@ -707,41 +831,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, payload_len < struct_size(payload, data, payload->command_count)) return -EINVAL; + op = ERT_INVALID_CMD; for (i = 0; i < payload->command_count; i++) { u32 boh = (u32)(payload->data[i]); struct amdxdna_gem_obj *abo; abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); if (!abo) { - XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh); + XDNA_ERR(xdna, "Failed to find cmd BO %d", boh); return -ENOENT; } - /* All sub-cmd should have same op, use the first one. */ - if (i == 0) - op = amdxdna_cmd_get_op(abo); - - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size); + size = cmdbuf_abo->mem.size - offset; + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset, + abo, &size, &op); amdxdna_gem_put_obj(abo); if (ret) - return -EINVAL; + return ret; offset += size; } + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); + if (msg.opcode == MSG_OP_MAX_OPCODE) + return -EOPNOTSUPP; /* The offset is the accumulated total size of the cmd buffer */ - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count); + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, + offset, payload->command_count); + drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset); - msg.opcode = aie2_cmd_op_to_msg_op(op); - if (msg.opcode == MSG_OP_MAX_OPCODE) - return -EOPNOTSUPP; msg.handle = job; msg.notify_cb = notify_cb; msg.send_data = (u8 *)&req; msg.send_size = sizeof(req); ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); if (ret) { - XDNA_ERR(hwctx->client->xdna, "Send message failed"); + XDNA_ERR(xdna, "Send message failed"); return ret; } @@ -754,23 +879,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, { struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); struct mailbox_channel *chann = hwctx->priv->mbox_chann; + struct amdxdna_dev *xdna = hwctx->client->xdna; struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; struct xdna_mailbox_msg msg; - struct cmd_chain_req req; - u32 size; + union exec_chain_req req; + u32 op = ERT_INVALID_CMD; + size_t size; int ret; - u32 op; - op = amdxdna_cmd_get_op(cmd_abo); - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size); + size = cmdbuf_abo->mem.size; + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op); if (ret) return ret; - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1); - - msg.opcode = aie2_cmd_op_to_msg_op(op); + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); if (msg.opcode == MSG_OP_MAX_OPCODE) return -EOPNOTSUPP; + + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, + size, 1); + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); + msg.handle = job; msg.notify_cb = notify_cb; msg.send_data = (u8 *)&req; diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 2dbea1d09980..947daa63f064 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -20,6 +20,7 @@ enum aie2_msg_opcode { MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12, MSG_OP_CHAIN_EXEC_DPU = 0x13, MSG_OP_CONFIG_DEBUG_BO = 0x14, + MSG_OP_CHAIN_EXEC_NPU = 0x18, MSG_OP_MAX_XRT_OPCODE, MSG_OP_SUSPEND = 0x101, MSG_OP_RESUME = 0x102, @@ -172,6 +173,16 @@ struct exec_dpu_req { __u32 payload[35]; } __packed; +enum exec_npu_type { + EXEC_NPU_TYPE_NON_ELF = 0x1, + EXEC_NPU_TYPE_PARTIAL_ELF = 0x2, +}; + +union exec_req { + struct execute_buffer_req ebuf; + struct exec_dpu_req dpu_req; +}; + struct execute_buffer_resp { enum aie2_msg_status status; } __packed; @@ -343,9 +354,6 @@ struct async_event_msg_resp { } __packed; #define MAX_CHAIN_CMDBUF_SIZE SZ_4K -#define slot_has_space(slot, offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ - sizeof(typeof(slot))) struct cmd_chain_slot_execbuf_cf { __u32 cu_idx; @@ -363,12 +371,40 @@ struct cmd_chain_slot_dpu { __u32 args[] __counted_by(arg_cnt); }; +#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32)) +struct cmd_chain_slot_npu { + enum exec_npu_type type; + u64 inst_buf_addr; + u64 save_buf_addr; + u64 restore_buf_addr; + u32 inst_size; + u32 save_size; + u32 restore_size; + u32 inst_prop_cnt; + u32 cu_idx; + u32 arg_cnt; + u32 args[] __counted_by(arg_cnt); +} __packed; + struct cmd_chain_req { __u64 buf_addr; __u32 buf_size; __u32 count; } __packed; +struct cmd_chain_npu_req { + u32 flags; + u32 reserved; + u64 buf_addr; + u32 buf_size; + u32 count; +} __packed; + +union exec_chain_req { + struct cmd_chain_npu_req npu_req; + struct cmd_chain_req req; +}; + struct cmd_chain_resp { enum aie2_msg_status status; __u32 fail_cmd_idx; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 80313a2a98d4..d7ccbdaf47f5 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -55,6 +55,7 @@ struct mgmt_mbox_chann_info { static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor) { + const struct aie2_fw_feature_tbl *feature; struct amdxdna_dev *xdna = ndev->xdna; /* @@ -78,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 f XDNA_ERR(xdna, "Firmware minor version smaller than supported"); return -EINVAL; } + + for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor; + feature++) { + if (fw_minor < feature->min_minor) + continue; + if (feature->max_minor > 0 && fw_minor > feature->max_minor) + continue; + + set_bit(feature->feature, &ndev->feature_mask); + } + return 0; } @@ -587,6 +599,7 @@ static int aie2_init(struct amdxdna_dev *xdna) } release_firmware(fw); + aie2_msg_init(ndev); amdxdna_pm_init(xdna); return 0; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index cfe42b0d4242..d0a3cb1fe8be 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -156,6 +156,17 @@ enum aie2_dev_status { AIE2_DEV_START, }; +struct aie2_exec_msg_ops { + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, + size_t *size, u32 *msg_op); + void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + u32 (*get_chain_msg_op)(u32 cmd_op); +}; + struct amdxdna_dev_hdl { struct amdxdna_dev *xdna; const struct amdxdna_dev_priv *priv; @@ -173,6 +184,8 @@ struct amdxdna_dev_hdl { u32 total_col; struct aie_version version; struct aie_metadata metadata; + unsigned long feature_mask; + struct aie2_exec_msg_ops *exec_msg_ops; /* power management and clock*/ enum amdxdna_power_mode_type pw_mode; @@ -208,12 +221,26 @@ struct aie2_hw_ops { int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); }; +enum aie2_fw_feature { + AIE2_NPU_COMMAND, + AIE2_FEATURE_MAX +}; + +struct aie2_fw_feature_tbl { + enum aie2_fw_feature feature; + u32 max_minor; + u32 min_minor; +}; + +#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask) + struct amdxdna_dev_priv { const char *fw_path; u64 protocol_major; u64 protocol_minor; const struct rt_config *rt_config; const struct dpm_clk_freq *dpm_clk_tbl; + const struct aie2_fw_feature_tbl *fw_feature_tbl; #define COL_ALIGN_NONE 0 #define COL_ALIGN_NATURE 1 @@ -239,6 +266,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[]; extern const struct dpm_clk_freq npu4_dpm_clk_table[]; extern const struct rt_config npu1_default_rt_cfg[]; extern const struct rt_config npu4_default_rt_cfg[]; +extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[]; /* aie2_smu.c */ int aie2_smu_init(struct amdxdna_dev_hdl *ndev); @@ -263,6 +291,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args); /* aie2_message.c */ +void aie2_msg_init(struct amdxdna_dev_hdl *ndev); int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index d18182c59668..878cc955f56d 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) return &cmd->data[num_masks]; } -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) { struct amdxdna_cmd *cmd = abo->mem.kva; u32 num_masks, i; u32 *cu_mask; if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) - return -1; + return INVALID_CU_IDX; num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); cu_mask = cmd->data; @@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) return ffs(cu_mask[i]) - 1; } - return -1; + return INVALID_CU_IDX; } /* diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index 919c654dfea6..1aa2b938e07b 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -13,9 +13,10 @@ struct amdxdna_hwctx_priv; enum ert_cmd_opcode { - ERT_START_CU = 0, - ERT_CMD_CHAIN = 19, - ERT_START_NPU = 20, + ERT_INVALID_CMD = ~0U, + ERT_START_CU = 0, + ERT_CMD_CHAIN = 19, + ERT_START_NPU = 20, }; enum ert_cmd_state { @@ -64,6 +65,8 @@ struct amdxdna_cmd { u32 data[]; }; +#define INVALID_CU_IDX (~0U) + struct amdxdna_hwctx { struct amdxdna_client *client; struct amdxdna_hwctx_priv *priv; @@ -150,7 +153,7 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) } void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); void amdxdna_hwctx_remove_all(struct amdxdna_client *client); diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index 23feb5f6fad3..ffc2e7c7b523 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -63,12 +63,18 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = { { 0 } }; +static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { + { .feature = AIE2_NPU_COMMAND, .min_minor = 8 }, + { 0 } +}; + static const struct amdxdna_dev_priv npu1_dev_priv = { .fw_path = "amdnpu/1502_00/npu.sbin", .protocol_major = 0x5, .protocol_minor = 0x7, .rt_config = npu1_default_rt_cfg, .dpm_clk_tbl = npu1_dpm_clk_table, + .fw_feature_tbl = npu1_fw_feature_table, .col_align = COL_ALIGN_NONE, .mbox_dev_addr = NPU1_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c index 67c2ae931c62..5fbfdcc3762d 100644 --- a/drivers/accel/amdxdna/npu2_regs.c +++ b/drivers/accel/amdxdna/npu2_regs.c @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv = { .protocol_minor = 0x6, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU2_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index fac6c1b0b74b..79aba12acfde 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -83,12 +83,18 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = { { 0 } }; +const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { + { .feature = AIE2_NPU_COMMAND, .min_minor = 15 }, + { 0 } +}; + static const struct amdxdna_dev_priv npu4_dev_priv = { .fw_path = "amdnpu/17f0_10/npu.sbin", .protocol_major = 0x6, .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU4_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index c91e1fa76ff5..c5e259ab9f49 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = { .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU5_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 773f738915a7..2de63b44d6e7 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = { .protocol_minor = 12, .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, + .fw_feature_tbl = npu4_fw_feature_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU6_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ -- 2.34.1
