Bill Fischofer(Bill-Fischofer-Linaro) replied on github web page:
platform/linux-generic/pktio/mdev/cxgb4.c
@@ -0,0 +1,887 @@
+/*Copyright (c) 2018, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "config.h"
+
+#ifdef ODP_MDEV
+
+#include <linux/types.h>
+#include <protocols/eth.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <odp_packet_io_internal.h>
+#include <odp_posix_extensions.h>
+
+#include <odp/api/hints.h>
+#include <odp/api/packet.h>
+#include <odp/api/plat/packet_inlines.h>
+#include <odp/drv/hints.h>
+#include <odp/drv/mmio.h>
+
+#include <pktio/common.h>
+#include <pktio/ethtool.h>
+#include <pktio/mdev.h>
+#include <pktio/sysfs.h>
+#include <pktio/uapi_net_mdev.h>
+
+#define MODULE_NAME "cxgb4"
+
+#define CXGB4_TX_BUF_SIZE 2048U
+
+#define CXGB4_TX_INLINE_MAX (256 - sizeof(cxgb4_fw_eth_tx_pkt_wr_t) \
+ - sizeof(cxgb4_cpl_tx_pkt_core_t))
+
+/* RX queue definitions */
+#define CXGB4_RX_QUEUE_NUM_MAX 32
+
+/** RX descriptor */
+typedef struct {
+ uint32_t padding[12];
+
+ odp_u32be_t hdrbuflen_pidx;
+#define RX_DESC_NEW_BUF_FLAG (1U << 31)
+ odp_u32be_t pldbuflen_qid;
+ union {
+#define RX_DESC_GEN_SHIFT 7
+#define RX_DESC_GEN_MASK 0x1
+#define RX_DESC_TYPE_SHIFT 4
+#define RX_DESC_TYPE_MASK 0x3
+#define RX_DESC_TYPE_FLBUF_X 0
+#define RX_DESC_TYPE_CPL_X 1
+#define RX_DESC_TYPE_INTR_X 2
+ uint8_t type_gen;
+#define RX_DESC_TIMESTAMP_MASK 0xfffffffffffffffULL
+ odp_u64be_t last_flit;
+ };
+} cxgb4_rx_desc_t;
+
+#define RX_DESC_TO_GEN(rxd) \
+ (((rxd)->type_gen >> RX_DESC_GEN_SHIFT) & RX_DESC_GEN_MASK)
+#define RX_DESC_TO_TYPE(rxd) \
+ (((rxd)->type_gen >> RX_DESC_TYPE_SHIFT) & RX_DESC_TYPE_MASK)
+
+/** RX queue data */
+typedef struct ODP_ALIGNED_CACHE {
+ cxgb4_rx_desc_t *rx_descs; /**< RX queue base */
+
+ odp_u32le_t *doorbell_fl; /**< Free list refill doorbell */
+ odp_u32le_t *doorbell_desc; /**< Rx descriptor free doorbell */
+ uint32_t doorbell_fl_key; /**< 'Key' to the doorbell */
+ uint32_t doorbell_desc_key; /**< 'Key' to the doorbell */
+
+ uint16_t rx_queue_len; /**< Number of RX desc entries */
+ uint16_t rx_next; /**< Next RX desc to handle */
+
+ uint32_t gen:1; /**< RX queue generation */
+
+ odp_u64be_t *free_list; /**< Free list base */
+
+ uint8_t free_list_len; /**< Number of free list entries */
+ uint8_t commit_pending; /**< Free list entries pending commit */
+
+ uint8_t cidx; /**< Free list consumer index */
+ uint8_t pidx; /**< Free list producer index */
+
+ uint32_t offset; /**< Offset into last free fragment */
+
+ mdev_dma_area_t rx_data; /**< RX packet payload area */
+
+ odp_ticketlock_t lock; /**< RX queue lock */
+} cxgb4_rx_queue_t;
+
+/* TX queue definitions */
+#define CXGB4_TX_QUEUE_NUM_MAX 32
+
+typedef struct {
+ odp_u64be_t data[8];
+} cxgb4_tx_desc_t;
+
+typedef struct {
+#define CXGB4_FW_ETH_TX_PKT_WR 0x08000000UL
+ odp_u32be_t op_immdlen;
+ odp_u32be_t equiq_to_len16;
+ odp_u64be_t r3;
+} cxgb4_fw_eth_tx_pkt_wr_t;
+
+typedef struct {
+#define CPL_TX_PKT_XT 0xEE000000UL
+#define TXPKT_PF_S 8
+#define TXPKT_PF_V(x) ((x) << TXPKT_PF_S)
+#define TXPKT_INTF_S 16
+#define TXPKT_INTF_V(x) ((x) << TXPKT_INTF_S)
+ odp_u32be_t ctrl0;
+ odp_u16be_t pack;
+ odp_u16be_t len;
+#define TXPKT_IPCSUM_DIS_F (1UL << 62)
+#define TXPKT_L4CSUM_DIS_F (1UL << 63)
+ odp_u64be_t ctrl1;
+} cxgb4_cpl_tx_pkt_core_t;
+
+typedef struct {
+ odp_u32be_t len[2];
+ odp_u64be_t addr[2];
+} cxgb4_sg_pair_t;
+
+typedef struct {
+#define CXGB4_ULP_TX_SC_DSGL (0x82UL << 24)
+ odp_u32be_t sg_pairs_num;
+ odp_u32be_t len0;
+ odp_u64be_t addr0;
+ cxgb4_sg_pair_t sg_pairs[0];
+} cxgb4_sg_list_t;
+
+typedef struct {
+ odp_u32be_t qid;
+ odp_u16be_t cidx;
+ odp_u16be_t pidx;
+} cxgb4_tx_queue_stats;
+
+/** TX queue data */
+typedef struct ODP_ALIGNED_CACHE {
+ cxgb4_tx_desc_t *tx_descs; /**< TX queue base */
+ cxgb4_tx_queue_stats *stats; /**< TX queue stats */
+
+ odp_u32le_t *doorbell_desc; /**< TX queue doorbell */
+ uint32_t doorbell_desc_key; /**< 'Key' to the doorbell */
+
+ uint16_t tx_queue_len; /**< Number of TX desc entries */
+ uint16_t tx_next; /**< Next TX desc to insert */
+
+ mdev_dma_area_t tx_data; /**< TX packet payload area */
+
+ odp_ticketlock_t lock; /**< TX queue lock */
+} cxgb4_tx_queue_t;
+
+/** Packet socket using mediated cxgb4 device */
+typedef struct {
+ /** RX queue hot data */
+ cxgb4_rx_queue_t rx_queues[CXGB4_RX_QUEUE_NUM_MAX];
+
+ /** TX queue hot data */
+ cxgb4_tx_queue_t tx_queues[CXGB4_TX_QUEUE_NUM_MAX];
+
+ odp_pool_t pool; /**< pool to alloc packets from */
+
+ odp_bool_t lockless_rx; /**< no locking for RX */
+ uint16_t free_list_align; /**< Alignment required for RX chunks */
+
+ odp_bool_t lockless_tx; /**< no locking for TX */
+ uint8_t tx_channel; /**< TX channel of the interface */
+ uint8_t phys_function; /**< Physical function of the interface
*/
+
+ odp_pktio_capability_t capa; /**< interface capabilities */
+
+ uint8_t *mmio; /**< MMIO region */
+
+ int sockfd; /**< control socket */
+
+ mdev_device_t mdev; /**< Common mdev data */
+} pktio_ops_cxgb4_data_t;
+
+static void cxgb4_rx_refill(cxgb4_rx_queue_t *rxq, uint8_t num);
+static void cxgb4_wait_link_up(pktio_entry_t *pktio_entry);
+static int cxgb4_close(pktio_entry_t *pktio_entry);
+
+static int cxgb4_mmio_register(pktio_ops_cxgb4_data_t *pkt_cxgb4,
+ uint64_t offset, uint64_t size)
+{
+ ODP_ASSERT(pkt_cxgb4->mmio == NULL);
+
+ pkt_cxgb4->mmio = mdev_region_mmap(&pkt_cxgb4->mdev, offset, size);
+ if (pkt_cxgb4->mmio == MAP_FAILED) {
+ ODP_ERR("Cannot mmap MMIO\n");
+ return -1;
+ }
+
+ ODP_DBG("Register MMIO region: 0x%llx@%016llx\n", size, offset);
+
+ return 0;
+}
+
+static int cxgb4_rx_queue_register(pktio_ops_cxgb4_data_t *pkt_cxgb4,
+ uint64_t offset, uint64_t size,
+ uint64_t free_list_offset)
+{
+ uint16_t rxq_idx = pkt_cxgb4->capa.max_input_queues++;
+ cxgb4_rx_queue_t *rxq = &pkt_cxgb4->rx_queues[rxq_idx];
+ struct ethtool_ringparam ering;
+ uint64_t val;
+ int ret;
+
+ ODP_ASSERT(rxq_idx < ARRAY_SIZE(pkt_cxgb4->rx_queues));
+
+ odp_ticketlock_init(&rxq->lock);
+
+ ret = ethtool_ringparam_get_fd(pkt_cxgb4->sockfd,
+ pkt_cxgb4->mdev.if_name, &ering);
+ if (ret) {
+ ODP_ERR("Cannot get queue length\n");
+ return -1;
+ }
+ rxq->rx_queue_len = ering.rx_mini_pending;
+ rxq->free_list_len = ering.rx_pending + 8;
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/queues/rx-%u/cxgb4/"
+ "doorbell_fl_offset",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ if (ret) {
+ ODP_ERR("Cannot get %s rx-%u doorbell_fl_offset\n",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ return -1;
+ }
+ rxq->doorbell_fl = (odp_u32le_t *)(void *)(pkt_cxgb4->mmio + val);
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/queues/rx-%u/cxgb4/"
+ "doorbell_fl_key",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ if (ret) {
+ ODP_ERR("Cannot get %s rx-%u doorbell_fl_key\n",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ return -1;
+ }
+ rxq->doorbell_fl_key = val;
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/queues/rx-%u/cxgb4/"
+ "doorbell_desc_offset",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ if (ret) {
+ ODP_ERR("Cannot get %s rx-%u doorbell_desc_offset\n",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ return -1;
+ }
+ rxq->doorbell_desc = (odp_u32le_t *)(void *)(pkt_cxgb4->mmio + val);
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/queues/rx-%u/cxgb4/"
+ "doorbell_desc_key",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ if (ret) {
+ ODP_ERR("Cannot get %s rx-%u doorbell_desc_key\n",
+ pkt_cxgb4->mdev.if_name, rxq_idx);
+ return -1;
+ }
+ rxq->doorbell_desc_key = val;
+
+ ODP_ASSERT(rxq->rx_queue_len * sizeof(*rxq->rx_descs) <= size);
+
+ rxq->rx_descs = mdev_region_mmap(&pkt_cxgb4->mdev, offset, size);
+ if (rxq->rx_descs == MAP_FAILED) {
+ ODP_ERR("Cannot mmap RX queue\n");
+ return -1;
+ }
+
+ ODP_ASSERT(rxq->free_list_len * sizeof(*rxq->free_list) <=
+ ODP_PAGE_SIZE);
+
+ rxq->free_list =
+ mdev_region_mmap(&pkt_cxgb4->mdev, free_list_offset, ODP_PAGE_SIZE);
+ if (rxq->free_list == MAP_FAILED) {
+ ODP_ERR("Cannot mmap RX queue free list\n");
+ return -1;
+ }
+
+ rxq->rx_data.size = rxq->free_list_len * ODP_PAGE_SIZE;
+ ret = mdev_dma_area_alloc(&pkt_cxgb4->mdev, &rxq->rx_data);
+ if (ret) {
+ ODP_ERR("Cannot allocate RX queue DMA area\n");
+ return -1;
+ }
+
+ rxq->gen = 1;
+
+ /*
+ * Leave 1 HW block (8 entries) unpopulated,
+ * otherwise HW will think the free list is empty.
+ */
+ cxgb4_rx_refill(rxq, rxq->free_list_len - 8);
+ rxq->cidx = rxq->free_list_len - 1;
+
+ ODP_DBG("Register RX queue region: 0x%llx@%016llx\n", size, offset);
+ ODP_DBG(" RX descriptors: %u\n", rxq->rx_queue_len);
+ ODP_DBG(" RX free list entries: %u\n", rxq->free_list_len);
+
+ return 0;
+}
+
+static int cxgb4_tx_queue_register(pktio_ops_cxgb4_data_t *pkt_cxgb4,
+ uint64_t offset, uint64_t size)
+{
+ uint16_t txq_idx = pkt_cxgb4->capa.max_output_queues++;
+ cxgb4_tx_queue_t *txq = &pkt_cxgb4->tx_queues[txq_idx];
+ struct ethtool_ringparam ering;
+ uint64_t val;
+ int ret;
+
+ ODP_ASSERT(txq_idx < ARRAY_SIZE(pkt_cxgb4->tx_queues));
+
+ odp_ticketlock_init(&txq->lock);
+
+ ret = ethtool_ringparam_get_fd(pkt_cxgb4->sockfd,
+ pkt_cxgb4->mdev.if_name, &ering);
+ if (ret) {
+ ODP_ERR("Cannot get queue length\n");
+ return -1;
+ }
+ txq->tx_queue_len = ering.tx_pending;
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/queues/tx-%u/cxgb4/"
+ "doorbell_desc_offset",
+ pkt_cxgb4->mdev.if_name, txq_idx);
+ if (ret) {
+ ODP_ERR("Cannot get %s tx-%u doorbell_desc_offset\n",
+ pkt_cxgb4->mdev.if_name, txq_idx);
+ return -1;
+ }
+ txq->doorbell_desc = (odp_u32le_t *)(void *)(pkt_cxgb4->mmio + val);
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/queues/tx-%u/cxgb4/"
+ "doorbell_desc_key",
+ pkt_cxgb4->mdev.if_name, txq_idx);
+ if (ret) {
+ ODP_ERR("Cannot get %s tx-%u doorbell_desc_key\n",
+ pkt_cxgb4->mdev.if_name, txq_idx);
+ return -1;
+ }
+ txq->doorbell_desc_key = val;
+
+ ODP_ASSERT(txq->tx_queue_len * sizeof(*txq->tx_descs) +
+ sizeof(*txq->stats) <= size);
+
+ txq->tx_descs = mdev_region_mmap(&pkt_cxgb4->mdev, offset, size);
+ if (txq->tx_descs == MAP_FAILED) {
+ ODP_ERR("Cannot mmap TX queue\n");
+ return -1;
+ }
+
+ txq->stats =
+ (cxgb4_tx_queue_stats *)(txq->tx_descs + txq->tx_queue_len);
+
+ txq->tx_data.size = txq->tx_queue_len * CXGB4_TX_BUF_SIZE;
+ ret = mdev_dma_area_alloc(&pkt_cxgb4->mdev, &txq->tx_data);
+ if (ret) {
+ ODP_ERR("Cannot allocate TX queue DMA area\n");
+ return -1;
+ }
+
+ ODP_DBG("Register TX queue region: 0x%llx@%016llx\n", size, offset);
+ ODP_DBG(" TX descriptors: %u\n", txq->tx_queue_len);
+
+ return 0;
+}
+
+static int cxgb4_region_info_cb(mdev_device_t *mdev,
+ struct vfio_region_info *region_info)
+{
+ pktio_ops_cxgb4_data_t *pkt_cxgb4 =
+ odp_container_of(mdev, pktio_ops_cxgb4_data_t, mdev);
+ mdev_region_class_t class_info;
+ struct vfio_region_info_cap_sparse_mmap *sparse;
+
+ if (vfio_get_region_cap_type(region_info, &class_info) < 0) {
+ ODP_ERR("Cannot find class_info in region %u\n",
+ region_info->index);
+ return -1;
+ }
+
+ switch (class_info.type) {
+ case VFIO_NET_MDEV_MMIO:
+ return cxgb4_mmio_register(pkt_cxgb4,
+ region_info->offset,
+ region_info->size);
+
+ case VFIO_NET_MDEV_RX_RING:
+ if (vfio_get_region_sparse_mmaps(region_info, &sparse) < 0) {
+ ODP_ERR("RX queue in region %u: %s\n",
+ region_info->index,
+ "no areas found");
+ return -1;
+ }
+
+ if (sparse->nr_areas != 2) {
+ ODP_ERR("RX queue in region %u: %s\n",
+ region_info->index,
+ "wrong number of areas");
+ return -1;
+ }
+
+ ODP_ASSERT(sparse->areas[1].size == ODP_PAGE_SIZE);
+
+ return cxgb4_rx_queue_register(pkt_cxgb4,
+ sparse->areas[0].offset,
+ sparse->areas[0].size,
+ sparse->areas[1].offset);
+
+ case VFIO_NET_MDEV_TX_RING:
+ return cxgb4_tx_queue_register(pkt_cxgb4,
+ region_info->offset,
+ region_info->size);
+
+ default:
+ ODP_ERR("Unexpected region %u (class %u:%u)\n",
+ region_info->index, class_info.type,
+ class_info.subtype);
+ return -1;
+ }
+}
+
+static int cxgb4_open(odp_pktio_t id ODP_UNUSED,
+ pktio_entry_t *pktio_entry,
+ const char *resource, odp_pool_t pool)
+{
+ pktio_ops_cxgb4_data_t *pkt_cxgb4;
+ uint64_t val;
+ int ret;
+
+ ODP_ASSERT(pool != ODP_POOL_INVALID);
+
+ if (strncmp(resource, NET_MDEV_PREFIX, strlen(NET_MDEV_PREFIX)))
+ return -1;
+
+ ODP_DBG("%s: probing resource %s\n", MODULE_NAME, resource);
+
+ pkt_cxgb4 = ODP_OPS_DATA_ALLOC(sizeof(*pkt_cxgb4));
+ if (odp_unlikely(pkt_cxgb4 == NULL)) {
+ ODP_ERR("Failed to allocate pktio_ops_cxgb4_data_t struct");
+ return -1;
+ }
+ pktio_entry->s.ops_data = pkt_cxgb4;
+
+ memset(pkt_cxgb4, 0, sizeof(*pkt_cxgb4));
+
+ pkt_cxgb4->sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (pkt_cxgb4->sockfd == -1) {
+ ODP_ERR("Cannot get device control socket\n");
+ goto out;
+ }
+
+ ret =
+ mdev_device_create(&pkt_cxgb4->mdev, MODULE_NAME,
+ resource + strlen(NET_MDEV_PREFIX),
+ cxgb4_region_info_cb);
+ if (ret)
+ goto out;
+
+ pkt_cxgb4->pool = pool;
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/tx_channel",
+ pkt_cxgb4->mdev.if_name);
+ if (ret) {
+ ODP_ERR("Cannot get %s tx_channel\n", pkt_cxgb4->mdev.if_name);
+ return -1;
+ }
+ pkt_cxgb4->tx_channel = val;
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/phys_function",
+ pkt_cxgb4->mdev.if_name);
+ if (ret) {
+ ODP_ERR("Cannot get %s phys_function\n",
+ pkt_cxgb4->mdev.if_name);
+ return -1;
+ }
+ pkt_cxgb4->phys_function = val;
+
+ ret = sysfs_attr_u64_get(&val, "/sys/class/net/%s/free_list_align",
+ pkt_cxgb4->mdev.if_name);
+ if (ret) {
+ ODP_ERR("Cannot get %s free_list_align\n",
+ pkt_cxgb4->mdev.if_name);
+ return -1;
+ }
+ pkt_cxgb4->free_list_align = val;
+
+ cxgb4_wait_link_up(pktio_entry);
+
+ ODP_DBG("%s: open %s is successful\n", MODULE_NAME,
+ pkt_cxgb4->mdev.if_name);
+
+ return 0;
+
+out:
+ cxgb4_close(pktio_entry);
+ return -1;
+}
+
+static int cxgb4_close(pktio_entry_t *pktio_entry)
+{
+ pktio_ops_cxgb4_data_t *pkt_cxgb4 = pktio_entry->s.ops_data;
+
+ ODP_DBG("%s: close %s\n", MODULE_NAME, pkt_cxgb4->mdev.if_name);
+
+ mdev_device_destroy(&pkt_cxgb4->mdev);
+
+ for (uint16_t i = 0; i < pkt_cxgb4->capa.max_input_queues; i++) {
+ cxgb4_rx_queue_t *rxq = &pkt_cxgb4->rx_queues[i];
+
+ if (rxq->rx_data.size)
+ mdev_dma_area_free(&pkt_cxgb4->mdev, &rxq->rx_data);
+ }
+
+ for (uint16_t i = 0; i < pkt_cxgb4->capa.max_output_queues; i++) {
+ cxgb4_tx_queue_t *txq = &pkt_cxgb4->tx_queues[i];
+
+ if (txq->tx_data.size)
+ mdev_dma_area_free(&pkt_cxgb4->mdev, &txq->tx_data);
+ }
+
+ if (pkt_cxgb4->sockfd != -1)
+ close(pkt_cxgb4->sockfd);
+
+ ODP_OPS_DATA_FREE(pkt_cxgb4);
+
+ return 0;
+}
+
+static void cxgb4_rx_refill(cxgb4_rx_queue_t *rxq, uint8_t num)
+{
+ rxq->commit_pending += num;
+
+ while (num) {
+ uint64_t iova = rxq->rx_data.iova + rxq->pidx * ODP_PAGE_SIZE;
+
+ rxq->free_list[rxq->pidx] = odp_cpu_to_be_64(iova);
+
+ rxq->pidx++;
+ if (odp_unlikely(rxq->pidx >= rxq->free_list_len))
+ rxq->pidx = 0;
+
+ num--;
+ }
+
+ /* We commit free list entries to HW in packs of 8 */
+ if (rxq->commit_pending >= 8) {
+ uint32_t val = rxq->doorbell_fl_key | (rxq->commit_pending / 8);
+
+ /* Ring the doorbell */
+ odpdrv_mmio_u32le_write(val, rxq->doorbell_fl);
+
+ rxq->commit_pending &= 7;
+ }
+}
+
+static int cxgb4_recv(pktio_entry_t *pktio_entry,
+ int rxq_idx, odp_packet_t pkt_table[], int num)
+{
+ pktio_ops_cxgb4_data_t *pkt_cxgb4 = pktio_entry->s.ops_data;
+ cxgb4_rx_queue_t *rxq = &pkt_cxgb4->rx_queues[rxq_idx];
+ uint16_t refill_count = 0;
+ int rx_pkts = 0;
+
+ if (!pkt_cxgb4->lockless_rx)
+ odp_ticketlock_lock(&rxq->lock);
+
+ while (rx_pkts < num) {
+ volatile cxgb4_rx_desc_t *rxd = &rxq->rx_descs[rxq->rx_next];
+ odp_packet_t pkt;
+ uint32_t pkt_len;
+ uint8_t type;
+
+ if (RX_DESC_TO_GEN(rxd) != rxq->gen)
+ break;
+
+ type = RX_DESC_TO_TYPE(rxd);
+
+ if (odp_unlikely(type != RX_DESC_TYPE_FLBUF_X)) {
+ ODP_ERR("Invalid rxd type %u\n", type);
+
+ rxq->rx_next++;
+ if (odp_unlikely(rxq->rx_next >= rxq->rx_queue_len)) {
+ rxq->rx_next = 0;
+ rxq->gen ^= 1;
+ }
+
+ continue;
+ }
+
+ pkt_len = odp_be_to_cpu_32(rxd->pldbuflen_qid);
+
+ /*
+ * HW skips trailing area in current RX buffer and starts in the
+ * next one from the beginning.
+ */
+ if (pkt_len & RX_DESC_NEW_BUF_FLAG) {
+ rxq->cidx++;
+ if (odp_unlikely(rxq->cidx >= rxq->free_list_len))
+ rxq->cidx = 0;
+
+ rxq->offset = 0;
+ refill_count++;
+
+ pkt_len ^= RX_DESC_NEW_BUF_FLAG;
+ }
+
+ if (odp_unlikely(rxq->offset + pkt_len > ODP_PAGE_SIZE)) {
+ /* TODO: reset the HW and reinit ? */
+ ODP_ABORT("Packet write beyond buffer boundary\n");
Comment:
Yes, that way the application at least gets the opportunity to do something.
Always best to let the application decide if termination is necessary.
> Mykyta Iziumtsev(MykytaI) wrote:
> I've rearranged code to allocate packet before driver state is updated and
> break if allocation fails. The initial code was written with an assumption
> that code would be much more complex if we would try to revert driver state
> on packet allocation failure, but after some brainstorming code looks even
> simpler now (I hope).
>
> Regarding odp_packet_alloc_multi: I believe this API is more of use to
> application developer because he/she already knows expected packet size. In
> drivers we don't know what will be length of RXed packets. Furthermore,
> depending on HW, we may or may not know how many packets we have to handle.
>
> Even if the number of packets at hand is known (like in e1000e) -- it's still
> questionable if it's optimal to always allocate MTU-sized packets ... at
> least when we anyway copy packet payload. Of course all of this is rendered
> null and void as soon as we implement zerocopy.
>> Mykyta Iziumtsev(MykytaI) wrote:
>> I would say that this condition should never ever happen. If it still
>> happens -- it's a fatal error which requires system reboot because it's
>> actually random memory corruption and you can't count on system behaving
>> well after that.
>>
>> Furthermore, with wonderful Chelsio HW it's impossible to reset it.
>>
>> We can put cxgb4 pktio into error state permanently if you think it will be
>> enough (with respect to memory corruption statement).
>>> Mykyta Iziumtsev(MykytaI) wrote:
>>> Statistics are planned to be added at later stage.
>>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote:
>>>> Any discards/skips should be covered with a stat counter. Need to think
>>>> about driver MIB needs as we refine these.
>>>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote:
>>>>> Wouldn't it be better to handle the error case in the `if`? E.g.,
>>>>> ```
>>>>> if (odp_unlikely(pkt == ODP_PACKET_INVALID)) {
>>>>> ODP_ERR("Pool exhausted on recv\n");
>>>>> break;
>>>>> }
>>>>> ```
>>>>>
>>>>> Alternately (and probably better) you could call
>>>>> `odp_packet_alloc_multi()` up-front to get `num` pkts and then reduce
>>>>> `num` to the number of packets you actually received.
>>>>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote:
>>>>>> Ok, for an initial cut, but an `ODP_ABORT()` should never be issued in
>>>>>> production SW. Unexpected conditions should abort the individual
>>>>>> operation with an appropriate stat counter, error bit, etc. If that's
>>>>>> not possible, then the pktio can be put into an error state such that no
>>>>>> further I/O operations are permitted on it until it goes through some
>>>>>> sort of reset / re-init processing.
>>>>>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote:
>>>>>>> Same comments as for PR #380, moreover we shouldn't duplicate commits
>>>>>>> in two PRs as that will cause merge conflicts. I suggest you reorg
>>>>>>> these into:
>>>>>>>
>>>>>>> 1. A basic mdev PR that adds the common code. This can be reviewed and
>>>>>>> merged first.
>>>>>>>
>>>>>>> 2. Once that's in, individual PRs for the various mdev drivers become a
>>>>>>> lot simpler to add on top of this.
>>>>>>>
https://github.com/Linaro/odp/pull/383#discussion_r162330102
updated_at 2018-01-18 12:40:19