Hi, On 13.03.2023 18:37, Jeffrey Hugo wrote: > On 3/13/2023 7:21 AM, Jacek Lawrynowicz wrote: >> Hi, >> >> On 06.03.2023 22:33, Jeffrey Hugo wrote: >>> Add the QAIC driver uapi file and core driver file that binds to the PCIe >>> device. The core driver file also creates the accel device and manages >>> all the interconnections between the different parts of the driver. >>> >>> The driver can be built as a module. If so, it will be called "qaic.ko". >>> >>> Signed-off-by: Jeffrey Hugo <quic_jh...@quicinc.com> >>> Reviewed-by: Carl Vanderlip <quic_ca...@quicinc.com> >>> Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkano...@quicinc.com> >>> --- >>> drivers/accel/qaic/qaic.h | 282 ++++++++++++++++++ >>> drivers/accel/qaic/qaic_drv.c | 648 >>> ++++++++++++++++++++++++++++++++++++++++++ >>> include/uapi/drm/qaic_accel.h | 397 ++++++++++++++++++++++++++ >>> 3 files changed, 1327 insertions(+) >>> create mode 100644 drivers/accel/qaic/qaic.h >>> create mode 100644 drivers/accel/qaic/qaic_drv.c >>> create mode 100644 include/uapi/drm/qaic_accel.h >>> >>> diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h >>> new file mode 100644 >>> index 0000000..996a68f >>> --- /dev/null >>> +++ b/drivers/accel/qaic/qaic.h >>> @@ -0,0 +1,282 @@ >>> +/* SPDX-License-Identifier: GPL-2.0-only >>> + * >>> + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. >>> + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights >>> reserved. >>> + */ >>> + >>> +#ifndef _QAIC_H_ >>> +#define _QAIC_H_ >>> + >>> +#include <linux/interrupt.h> >>> +#include <linux/kref.h> >>> +#include <linux/mhi.h> >>> +#include <linux/mutex.h> >>> +#include <linux/pci.h> >>> +#include <linux/spinlock.h> >>> +#include <linux/srcu.h> >>> +#include <linux/wait.h> >>> +#include <linux/workqueue.h> >>> +#include <drm/drm_device.h> >>> +#include <drm/drm_gem.h> >>> + >>> +#define QAIC_DBC_BASE SZ_128K >>> +#define QAIC_DBC_SIZE SZ_4K >>> + >>> +#define QAIC_NO_PARTITION -1 >>> + >>> +#define QAIC_DBC_OFF(i) ((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE) >>> + >>> +#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base) >>> + >>> +extern bool poll_datapath; >>> + >>> +struct qaic_user { >>> + /* Uniquely identifies this user for the device */ >>> + int handle; >>> + struct kref ref_count; >>> + /* Char device opened by this user */ >>> + struct qaic_drm_device *qddev; >>> + /* Node in list of users that opened this drm device */ >>> + struct list_head node; >>> + /* SRCU used to synchronize this user during cleanup */ >>> + struct srcu_struct qddev_lock; >>> + atomic_t chunk_id; >>> +}; >>> + >>> +struct dma_bridge_chan { >>> + /* Pointer to device strcut maintained by driver */ >>> + struct qaic_device *qdev; >>> + /* ID of this DMA bridge channel(DBC) */ >>> + unsigned int id; >>> + /* Synchronizes access to xfer_list */ >>> + spinlock_t xfer_lock; >>> + /* Base address of request queue */ >>> + void *req_q_base; >>> + /* Base address of response queue */ >>> + void *rsp_q_base; >>> + /* >>> + * Base bus address of request queue. Response queue bus address can be >>> + * calculated by adding request queue size to this variable >>> + */ >>> + dma_addr_t dma_addr; >>> + /* Total size of request and response queue in byte */ >>> + u32 total_size; >>> + /* Capacity of request/response queue */ >>> + u32 nelem; >>> + /* The user that opened this DBC */ >>> + struct qaic_user *usr; >>> + /* >>> + * Request ID of next memory handle that goes in request queue. One >>> + * memory handle can enqueue more than one request elements, all >>> + * this requests that belong to same memory handle have same request ID >>> + */ >>> + u16 next_req_id; >>> + /* true: DBC is in use; false: DBC not in use */ >>> + bool in_use; >>> + /* >>> + * Base address of device registers. Used to read/write request and >>> + * response queue's head and tail pointer of this DBC. >>> + */ >>> + void __iomem *dbc_base; >>> + /* Head of list where each node is a memory handle queued in request >>> queue */ >>> + struct list_head xfer_list; >>> + /* Synchronizes DBC readers during cleanup */ >>> + struct srcu_struct ch_lock; >>> + /* >>> + * When this DBC is released, any thread waiting on this wait queue is >>> + * woken up >>> + */ >>> + wait_queue_head_t dbc_release; >>> + /* Head of list where each node is a bo associated with this DBC */ >>> + struct list_head bo_lists; >>> + /* The irq line for this DBC. Used for polling */ >>> + unsigned int irq; >>> + /* Polling work item to simulate interrupts */ >>> + struct work_struct poll_work; >>> +}; >>> + >>> +struct qaic_device { >>> + /* Pointer to base PCI device struct of our physical device */ >>> + struct pci_dev *pdev; >>> + /* Req. ID of request that will be queued next in MHI control device */ >>> + u32 next_seq_num; >>> + /* Base address of bar 0 */ >>> + void __iomem *bar_0; >>> + /* Base address of bar 2 */ >>> + void __iomem *bar_2; >>> + /* Controller structure for MHI devices */ >>> + struct mhi_controller *mhi_cntl; >>> + /* MHI control channel device */ >>> + struct mhi_device *cntl_ch; >>> + /* List of requests queued in MHI control device */ >>> + struct list_head cntl_xfer_list; >>> + /* Synchronizes MHI control device transactions and its xfer list */ >>> + struct mutex cntl_mutex; >>> + /* Array of DBC struct of this device */ >>> + struct dma_bridge_chan *dbc; >>> + /* Work queue for tasks related to MHI control device */ >>> + struct workqueue_struct *cntl_wq; >>> + /* Synchronizes all the users of device during cleanup */ >>> + struct srcu_struct dev_lock; >>> + /* true: Device under reset; false: Device not under reset */ >>> + bool in_reset; >>> + /* >>> + * true: A tx MHI transaction has failed and a rx buffer is still >>> queued >>> + * in control device. Such a buffer is considered lost rx buffer >>> + * false: No rx buffer is lost in control device >>> + */ >>> + bool cntl_lost_buf; >>> + /* Maximum number of DBC supported by this device */ >>> + u32 num_dbc; >>> + /* Reference to the drm_device for this device when it is created */ >>> + struct qaic_drm_device *qddev; >>> + /* Generate the CRC of a control message */ >>> + u32 (*gen_crc)(void *msg); >>> + /* Validate the CRC of a control message */ >>> + bool (*valid_crc)(void *msg); >>> +}; >>> + >>> +struct qaic_drm_device { >>> + /* Pointer to the root device struct driven by this driver */ >>> + struct qaic_device *qdev; >>> + /* >>> + * The physical device can be partition in number of logical devices. >>> + * And each logical device is given a partition id. This member stores >>> + * that id. QAIC_NO_PARTITION is a sentinel used to mark that this drm >>> + * device is the actual physical device >>> + */ >>> + s32 partition_id; >>> + /* Pointer to the drm device struct of this drm device */ >>> + struct drm_device *ddev; >>> + /* Head in list of users who have opened this drm device */ >>> + struct list_head users; >>> + /* Synchronizes access to users list */ >>> + struct mutex users_mutex; >>> +}; >>> + >>> +struct qaic_bo { >>> + struct drm_gem_object base; >>> + /* Scatter/gather table for allocate/imported BO */ >>> + struct sg_table *sgt; >>> + /* BO size requested by user. GEM object might be bigger in size. */ >>> + u64 size; >>> + /* Head in list of slices of this BO */ >>> + struct list_head slices; >>> + /* Total nents, for all slices of this BO */ >>> + int total_slice_nents; >>> + /* >>> + * Direction of transfer. It can assume only two value DMA_TO_DEVICE >>> and >>> + * DMA_FROM_DEVICE. >>> + */ >>> + int dir; >>> + /* The pointer of the DBC which operates on this BO */ >>> + struct dma_bridge_chan *dbc; >>> + /* Number of slice that belongs to this buffer */ >>> + u32 nr_slice; >>> + /* Number of slice that have been transferred by DMA engine */ >>> + u32 nr_slice_xfer_done; >>> + /* true = BO is queued for execution, true = BO is not queued */ >>> + bool queued; >>> + /* >>> + * If true then user has attached slicing information to this BO by >>> + * calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl. >>> + */ >>> + bool sliced; >>> + /* Request ID of this BO if it is queued for execution */ >>> + u16 req_id; >>> + /* Handle assigned to this BO */ >>> + u32 handle; >>> + /* Wait on this for completion of DMA transfer of this BO */ >>> + struct completion xfer_done; >>> + /* >>> + * Node in linked list where head is dbc->xfer_list. >>> + * This link list contain BO's that are queued for DMA transfer. >>> + */ >>> + struct list_head xfer_list; >>> + /* >>> + * Node in linked list where head is dbc->bo_lists. >>> + * This link list contain BO's that are associated with the DBC it is >>> + * linked to. >>> + */ >>> + struct list_head bo_list; >>> + struct { >>> + /* >>> + * Latest timestamp(ns) at which kernel received a request to >>> + * execute this BO >>> + */ >>> + u64 req_received_ts; >>> + /* >>> + * Latest timestamp(ns) at which kernel enqueued requests of >>> + * this BO for execution in DMA queue >>> + */ >>> + u64 req_submit_ts; >>> + /* >>> + * Latest timestamp(ns) at which kernel received a completion >>> + * interrupt for requests of this BO >>> + */ >>> + u64 req_processed_ts; >>> + /* >>> + * Number of elements already enqueued in DMA queue before >>> + * enqueuing requests of this BO >>> + */ >>> + u32 queue_level_before; >>> + } perf_stats; >>> + >>> +}; >>> + >>> +struct bo_slice { >>> + /* Mapped pages */ >>> + struct sg_table *sgt; >>> + /* Number of requests required to queue in DMA queue */ >>> + int nents; >>> + /* See enum dma_data_direction */ >>> + int dir; >>> + /* Actual requests that will be copied in DMA queue */ >>> + struct dbc_req *reqs; >>> + struct kref ref_count; >>> + /* true: No DMA transfer required */ >>> + bool no_xfer; >>> + /* Pointer to the parent BO handle */ >>> + struct qaic_bo *bo; >>> + /* Node in list of slices maintained by parent BO */ >>> + struct list_head slice; >>> + /* Size of this slice in bytes */ >>> + u64 size; >>> + /* Offset of this slice in buffer */ >>> + u64 offset; >>> +}; >>> + >>> +int get_dbc_req_elem_size(void); >>> +int get_dbc_rsp_elem_size(void); >>> +int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 >>> *major, u16 *minor); >>> +int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file >>> *file_priv); >>> +void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result >>> *mhi_result); >>> + >>> +void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result >>> *mhi_result); >>> + >>> +int qaic_control_open(struct qaic_device *qdev); >>> +void qaic_control_close(struct qaic_device *qdev); >>> +void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr); >>> + >>> +irqreturn_t dbc_irq_threaded_fn(int irq, void *data); >>> +irqreturn_t dbc_irq_handler(int irq, void *data); >>> +int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user >>> *usr); >>> +void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user >>> *usr); >>> +void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id); >>> +void release_dbc(struct qaic_device *qdev, u32 dbc_id); >>> + >>> +void wake_all_cntl(struct qaic_device *qdev); >>> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool >>> exit_reset); >>> + >>> +struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, >>> struct dma_buf *dma_buf); >>> + >>> +int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct >>> drm_file *file_priv); >>> +int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file >>> *file_priv); >>> +int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct >>> drm_file *file_priv); >>> +int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct >>> drm_file *file_priv); >>> +int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_priv); >>> +int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file >>> *file_priv); >>> +int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct >>> drm_file *file_priv); >>> +void irq_polling_work(struct work_struct *work); >>> + >>> +#endif /* _QAIC_H_ */ >>> diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c >>> new file mode 100644 >>> index 0000000..9172799 >>> --- /dev/null >>> +++ b/drivers/accel/qaic/qaic_drv.c >>> @@ -0,0 +1,648 @@ >>> +// SPDX-License-Identifier: GPL-2.0-only >>> + >>> +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ >>> +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights >>> reserved. */ >>> + >>> +#include <linux/delay.h> >>> +#include <linux/dma-mapping.h> >>> +#include <linux/idr.h> >>> +#include <linux/interrupt.h> >>> +#include <linux/list.h> >>> +#include <linux/kref.h> >>> +#include <linux/mhi.h> >>> +#include <linux/module.h> >>> +#include <linux/msi.h> >>> +#include <linux/mutex.h> >>> +#include <linux/pci.h> >>> +#include <linux/spinlock.h> >>> +#include <linux/workqueue.h> >>> +#include <linux/wait.h> >>> +#include <drm/drm_accel.h> >>> +#include <drm/drm_drv.h> >>> +#include <drm/drm_file.h> >>> +#include <drm/drm_gem.h> >>> +#include <drm/drm_ioctl.h> >>> +#include <uapi/drm/qaic_accel.h> >>> + >>> +#include "mhi_controller.h" >>> +#include "mhi_qaic_ctrl.h" >>> +#include "qaic.h" >>> + >>> +MODULE_IMPORT_NS(DMA_BUF); >>> + >>> +#define PCI_DEV_AIC100 0xa100 >>> +#define QAIC_NAME "qaic" >>> +#define QAIC_DESC "Qualcomm Cloud AI Accelerators" >>> +#define CNTL_MAJOR 5 >>> +#define CNTL_MINOR 0 >>> + >>> +static unsigned int datapath_polling; >>> +module_param(datapath_polling, uint, 0400); >>> +bool poll_datapath; >> >> You can define this param as: >> static bool poll_datapath; >> module_param_named(poll_datapath, datapath_polling, bool, 0400); >> >> You woudn't have to set poll_datapath manually. >> I would also consider using a single name for the param and adding >> documentation using MODULE_PARM_DESC(). > > poll_datapath is referenced in a different file, therefore it cannot be > static. > > Having two variables is intentional. The parameter only takes effect at > module load. This file reads datapath_polling at module load, copies the > value, and the driver uses the cached value (patch 5) during operation. If > during operation, the module parameter changes value, that doesn't take > effect. > > Your suggestion appears to make the cached variable, and the module parameter > the same. Which means if the module parameter changes value during driver > operation, it will take effect immediately. This seems like a "footgun", > giving the user an oppertunity to break things. > > I think datapath_polling can be defined as a bool, and the module_param call > can use the bool type, but the two variables remain. > > I could make poll_datapath device local, but then we'd have N copies of it, > instead of just 1, which feels like a waste of memory. > > Using MODULE_PARAM_DESC feels like a good idea. Will do.
The value of the poll_datapath cannot be changed at runtime as have read-only permissions (0400) set in sysfs.