Handling the transmission to second peer and receiving from it.
This includes communication with upper layer, the network stack
and configuration of Thunderbolt(TM) HW.

Signed-off-by: Amir Levy <amir.jer.l...@intel.com>
---
 drivers/thunderbolt/icm/icm_nhi.c |   15 +
 drivers/thunderbolt/icm/net.c     | 1475 +++++++++++++++++++++++++++++++++++++
 2 files changed, 1490 insertions(+)

diff --git a/drivers/thunderbolt/icm/icm_nhi.c 
b/drivers/thunderbolt/icm/icm_nhi.c
index 1bee701..052b348 100644
--- a/drivers/thunderbolt/icm/icm_nhi.c
+++ b/drivers/thunderbolt/icm/icm_nhi.c
@@ -1056,6 +1056,7 @@ static irqreturn_t nhi_msi(int __always_unused irq, void 
*data)
 {
        struct tbt_nhi_ctxt *nhi_ctxt = data;
        u32 isr0, isr1, imr0, imr1;
+       int i;
 
        /* clear on read */
        isr0 = ioread32(nhi_ctxt->iobase + REG_RING_NOTIFY_BASE);
@@ -1078,6 +1079,20 @@ static irqreturn_t nhi_msi(int __always_unused irq, void 
*data)
 
        spin_unlock(&nhi_ctxt->lock);
 
+       for (i = 0; i < nhi_ctxt->num_ports; ++i) {
+               struct net_device *net_dev =
+                               nhi_ctxt->net_devices[i].net_dev;
+               if (net_dev) {
+                       u8 path = PATH_FROM_PORT(nhi_ctxt->num_paths, i);
+
+                       if (isr0 & REG_RING_INT_RX_PROCESSED(
+                                       path, nhi_ctxt->num_paths))
+                               tbt_net_rx_msi(net_dev);
+                       if (isr0 & REG_RING_INT_TX_PROCESSED(path))
+                               tbt_net_tx_msi(net_dev);
+               }
+       }
+
        if (isr0 & REG_RING_INT_RX_PROCESSED(TBT_ICM_RING_NUM,
                                             nhi_ctxt->num_paths))
                schedule_work(&nhi_ctxt->icm_msgs_work);
diff --git a/drivers/thunderbolt/icm/net.c b/drivers/thunderbolt/icm/net.c
index 1ac0b1f..0fd24f5 100644
--- a/drivers/thunderbolt/icm/net.c
+++ b/drivers/thunderbolt/icm/net.c
@@ -134,6 +134,17 @@ struct approve_inter_domain_connection_cmd {
 
 };
 
+struct tbt_frame_header {
+       /* size of the data with the frame */
+       __le32 frame_size;
+       /* running index on the frames */
+       __le16 frame_index;
+       /* ID of the frame to match frames to specific packet */
+       __le16 frame_id;
+       /* how many frames assembles a full packet */
+       __le32 frame_count;
+};
+
 enum neg_event {
        RECEIVE_LOGOUT = NUM_MEDIUM_STATUSES,
        RECEIVE_LOGIN_RESPONSE,
@@ -141,15 +152,81 @@ enum neg_event {
        NUM_NEG_EVENTS
 };
 
+enum frame_status {
+       GOOD_FRAME,
+       GOOD_AS_FIRST_FRAME,
+       GOOD_AS_FIRST_MULTICAST_FRAME,
+       FRAME_NOT_READY,
+       FRAME_ERROR,
+};
+
+enum packet_filter {
+       /* all multicast MAC addresses */
+       PACKET_TYPE_ALL_MULTICAST,
+       /* all types of MAC addresses: multicast, unicast and broadcast */
+       PACKET_TYPE_PROMISCUOUS,
+       /* all unicast MAC addresses */
+       PACKET_TYPE_UNICAST_PROMISCUOUS,
+};
+
 enum disconnect_path_stage {
        STAGE_1 = BIT(0),
        STAGE_2 = BIT(1)
 };
 
+struct tbt_net_stats {
+       u64 tx_packets;
+       u64 tx_bytes;
+       u64 tx_errors;
+       u64 rx_packets;
+       u64 rx_bytes;
+       u64 rx_length_errors;
+       u64 rx_over_errors;
+       u64 rx_crc_errors;
+       u64 rx_missed_errors;
+       u64 multicast;
+};
+
+static const char tbt_net_gstrings_stats[][ETH_GSTRING_LEN] = {
+       "tx_packets",
+       "tx_bytes",
+       "tx_errors",
+       "rx_packets",
+       "rx_bytes",
+       "rx_length_errors",
+       "rx_over_errors",
+       "rx_crc_errors",
+       "rx_missed_errors",
+       "multicast",
+};
+
+struct tbt_buffer {
+       dma_addr_t dma;
+       union {
+               struct tbt_frame_header *hdr;
+               struct page *page;
+       };
+       u32 page_offset;
+};
+
+struct tbt_desc_ring {
+       /* pointer to the descriptor ring memory */
+       struct tbt_buf_desc *desc;
+       /* physical address of the descriptor ring */
+       dma_addr_t dma;
+       /* array of buffer structs */
+       struct tbt_buffer *buffers;
+       /* last descriptor that was associated with a buffer */
+       u16 last_allocated;
+       /* next descriptor to check for DD status bit */
+       u16 next_to_clean;
+};
+
 /**
 *  struct tbt_port - the basic tbt_port structure
 *  @tbt_nhi_ctxt:              context of the nhi controller.
 *  @net_dev:                   networking device object.
+*  @napi:                      network API
 *  @login_retry_work:          work queue for sending login requests.
 *  @login_response_work:       work queue for sending login responses.
 *  @work_struct logout_work:   work queue for sending logout requests.
@@ -165,6 +242,11 @@ enum disconnect_path_stage {
 *  @login_retry_count:         counts number of login retries sent.
 *  @local_depth:               depth of the remote peer in the chain.
 *  @transmit_path:             routing parameter for the icm.
+*  @tx_ring:                   transmit ring from where the packets are sent.
+*  @rx_ring:                   receive ring  where the packets are received.
+*  @stats:                     network statistics of the rx/tx packets.
+*  @packet_filters:            defines filters for the received packets.
+*  @multicast_hash_table:      hash table of multicast addresses.
 *  @frame_id:                  counting ID of frames.
 *  @num:                       port number.
 *  @local_path:                        routing parameter for the icm.
@@ -174,6 +256,7 @@ enum disconnect_path_stage {
 struct tbt_port {
        struct tbt_nhi_ctxt *nhi_ctxt;
        struct net_device *net_dev;
+       struct napi_struct napi;
        struct delayed_work login_retry_work;
        struct work_struct login_response_work;
        struct work_struct logout_work;
@@ -189,6 +272,17 @@ struct tbt_port {
        u8 login_retry_count;
        u8 local_depth;
        u8 transmit_path;
+       struct tbt_desc_ring tx_ring ____cacheline_aligned_in_smp;
+       struct tbt_desc_ring rx_ring;
+       struct tbt_net_stats stats;
+       u32 packet_filters;
+       /*
+        * hash table of 1024 boolean entries with hashing of
+        * the multicast address
+        */
+       u32 multicast_hash_table[DIV_ROUND_UP(
+                                       TBT_NET_MULTICAST_HASH_TABLE_SIZE,
+                                       BITS_PER_U32)];
        u16 frame_id;
        u8 num;
        u8 local_path;
@@ -235,6 +329,8 @@ static void tbt_net_tear_down(struct net_device *net_dev, 
bool send_logout)
                      (port->local_path * REG_OPTS_STEP);
                u32 rx_reg_val = ioread32(rx_reg) & ~REG_OPTS_E2E_EN;
 
+               napi_disable(&port->napi);
+
                tx_reg = iobase + REG_TX_OPTIONS_BASE +
                         (port->local_path * REG_OPTS_STEP);
                tx_reg_val = ioread32(tx_reg) & ~REG_OPTS_E2E_EN;
@@ -276,8 +372,1340 @@ static void tbt_net_tear_down(struct net_device 
*net_dev, bool send_logout)
                                       port->nhi_ctxt->num_paths);
                spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
        }
+
+       port->rx_ring.next_to_clean = 0;
+       port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1;
+
+}
+
+void tbt_net_tx_msi(struct net_device *net_dev)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+       void __iomem *iobase = port->nhi_ctxt->iobase;
+       u32 prod_cons, prod, cons;
+
+       prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase, REG_TX_RING_BASE,
+                                                   port->local_path));
+       prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+       cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+       if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS)
+               return;
+
+       if (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >=
+                                                       TX_WAKE_THRESHOLD) {
+               netif_wake_queue(port->net_dev);
+       } else {
+               spin_lock(&port->nhi_ctxt->lock);
+               /* enable TX interrupt */
+               RING_INT_ENABLE_TX(iobase, port->local_path);
+               spin_unlock(&port->nhi_ctxt->lock);
+       }
+}
+
+static irqreturn_t tbt_net_tx_msix(int __always_unused irq, void *data)
+{
+       struct tbt_port *port = data;
+       void __iomem *iobase = port->nhi_ctxt->iobase;
+       u32 prod_cons, prod, cons;
+
+       prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase,
+                                                   REG_TX_RING_BASE,
+                                                   port->local_path));
+       prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+       cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+       if (prod < TBT_NET_NUM_TX_BUFS && cons < TBT_NET_NUM_TX_BUFS &&
+           TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >=
+                                                       TX_WAKE_THRESHOLD) {
+               spin_lock(&port->nhi_ctxt->lock);
+               /* disable TX interrupt */
+               RING_INT_DISABLE_TX(iobase, port->local_path);
+               spin_unlock(&port->nhi_ctxt->lock);
+
+               netif_wake_queue(port->net_dev);
+       }
+
+       return IRQ_HANDLED;
+}
+
+void tbt_net_rx_msi(struct net_device *net_dev)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+
+       napi_schedule_irqoff(&port->napi);
+}
+
+static irqreturn_t tbt_net_rx_msix(int __always_unused irq, void *data)
+{
+       struct tbt_port *port = data;
+
+       if (likely(napi_schedule_prep(&port->napi))) {
+               struct tbt_nhi_ctxt *nhi_ctx = port->nhi_ctxt;
+
+               spin_lock(&nhi_ctx->lock);
+               /* disable RX interrupt */
+               RING_INT_DISABLE_RX(nhi_ctx->iobase, port->local_path,
+                                   nhi_ctx->num_paths);
+               spin_unlock(&nhi_ctx->lock);
+
+               __napi_schedule_irqoff(&port->napi);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void tbt_net_pull_tail(struct sk_buff *skb)
+{
+       skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+       unsigned int pull_len;
+       unsigned char *va;
+
+       /*
+        * it is valid to use page_address instead of kmap since we are
+        * working with pages allocated out of the lomem pool
+        */
+       va = skb_frag_address(frag);
+
+       pull_len = eth_get_headlen(va, TBT_NET_RX_HDR_SIZE);
+
+       /* align pull length to size of long to optimize memcpy performance */
+       skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+       /* update all of the pointers */
+       skb_frag_size_sub(frag, pull_len);
+       frag->page_offset += pull_len;
+       skb->data_len -= pull_len;
+       skb->tail += pull_len;
+}
+
+static inline bool tbt_net_alloc_mapped_page(struct device *dev,
+                                            struct tbt_buffer *buf, gfp_t gfp)
+{
+       if (!buf->page) {
+               buf->page = alloc_page(gfp | __GFP_COLD);
+               if (unlikely(!buf->page))
+                       return false;
+
+               buf->dma = dma_map_page(dev, buf->page, 0, PAGE_SIZE,
+                                       DMA_FROM_DEVICE);
+               if (dma_mapping_error(dev, buf->dma)) {
+                       __free_page(buf->page);
+                       buf->page = NULL;
+                       return false;
+               }
+               buf->page_offset = 0;
+       }
+       return true;
+}
+
+static bool tbt_net_alloc_rx_buffers(struct device *dev,
+                                    struct tbt_desc_ring *rx_ring,
+                                    u16 cleaned_count, void __iomem *reg,
+                                    gfp_t gfp)
+{
+       u16 i = (rx_ring->last_allocated + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+       bool res = false;
+
+       while (cleaned_count--) {
+               struct tbt_buf_desc *desc = &rx_ring->desc[i];
+               struct tbt_buffer *buf = &rx_ring->buffers[i];
+
+               /* making sure next_to_clean won't get old buffer */
+               desc->attributes = cpu_to_le32(DESC_ATTR_REQ_STS |
+                                              DESC_ATTR_INT_EN);
+               if (tbt_net_alloc_mapped_page(dev, buf, gfp)) {
+                       res = true;
+                       rx_ring->last_allocated = i;
+                       i = (i + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+                       desc->phys = cpu_to_le64(buf->dma + buf->page_offset);
+               } else {
+                       break;
+               }
+       }
+
+       if (res) {
+               iowrite32((rx_ring->last_allocated << REG_RING_CONS_SHIFT) &
+                         REG_RING_CONS_MASK, reg);
+       }
+
+       return res;
+}
+
+static inline bool tbt_net_multicast_mac_set(const u32 *multicast_hash_table,
+                                            const u8 *ether_addr)
+{
+       u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ether_addr);
+
+       return !!(multicast_hash_table[hash_val / BITS_PER_U32] &
+                 BIT(hash_val % BITS_PER_U32));
+}
+
+static enum frame_status tbt_net_check_frame(struct tbt_port *port,
+                                            u16 frame_num, u32 *count,
+                                            u16 index, u16 *id, u32 *size)
+{
+       struct tbt_desc_ring *rx_ring = &port->rx_ring;
+       __le32 desc_attr = rx_ring->desc[frame_num].attributes;
+       enum frame_status res = GOOD_AS_FIRST_FRAME;
+       u32 len, frame_count, frame_size;
+       struct tbt_frame_header *hdr;
+
+       if (!(desc_attr & cpu_to_le32(DESC_ATTR_DESC_DONE)))
+               return FRAME_NOT_READY;
+
+       rmb(); /* read other fields from desc after checking DD */
+
+       if (unlikely(desc_attr & cpu_to_le32(DESC_ATTR_RX_CRC_ERR))) {
+               ++port->stats.rx_crc_errors;
+               goto err;
+       } else if (unlikely(desc_attr &
+                               cpu_to_le32(DESC_ATTR_RX_BUF_OVRN_ERR))) {
+               ++port->stats.rx_over_errors;
+               goto err;
+       }
+
+       len = (le32_to_cpu(desc_attr) & DESC_ATTR_LEN_MASK)
+             >> DESC_ATTR_LEN_SHIFT;
+       if (len == 0)
+               len = TBT_RING_MAX_FRAME_SIZE;
+       /* should be greater than just header i.e. contains data */
+       if (unlikely(len <= sizeof(struct tbt_frame_header))) {
+               ++port->stats.rx_length_errors;
+               goto err;
+       }
+
+       prefetchw(rx_ring->buffers[frame_num].page);
+       hdr = page_address(rx_ring->buffers[frame_num].page) +
+                               rx_ring->buffers[frame_num].page_offset;
+       /* prefetch first cache line of first page */
+       prefetch(hdr);
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(&port->nhi_ctxt->pdev->dev,
+                                     rx_ring->buffers[frame_num].dma,
+                                     rx_ring->buffers[frame_num].page_offset,
+                                     TBT_RING_MAX_FRAME_SIZE,
+                                     DMA_FROM_DEVICE);
+
+       frame_count = le32_to_cpu(hdr->frame_count);
+       frame_size = le32_to_cpu(hdr->frame_size);
+
+       if (unlikely((frame_size > len - sizeof(struct tbt_frame_header)) ||
+                    (frame_size == 0))) {
+               ++port->stats.rx_length_errors;
+               goto err;
+       }
+       /*
+        * In case we're in the middle of packet, validate the frame header
+        * based on first fragment of the packet
+        */
+       if (*count) {
+               /* check the frame count fits the count field */
+               if (frame_count != *count) {
+                       ++port->stats.rx_length_errors;
+                       goto check_as_first;
+               }
+
+               /*
+                * check the frame identifiers are incremented correctly,
+                * and id is matching
+                */
+               if ((le16_to_cpu(hdr->frame_index) != index) ||
+                   (le16_to_cpu(hdr->frame_id) != *id)) {
+                       ++port->stats.rx_missed_errors;
+                       goto check_as_first;
+               }
+
+               *size += frame_size;
+               if (*size > TBT_NET_MTU) {
+                       ++port->stats.rx_length_errors;
+                       goto err;
+               }
+               res = GOOD_FRAME;
+       } else { /* start of packet, validate the frame header */
+               const u8 *addr;
+
+check_as_first:
+               rx_ring->next_to_clean = frame_num;
+
+               /* validate the first packet has a valid frame count */
+               if (unlikely(frame_count == 0 ||
+                            frame_count > (TBT_NET_NUM_RX_BUFS / 4))) {
+                       ++port->stats.rx_length_errors;
+                       goto err;
+               }
+
+               /* validate the first packet has a valid frame index */
+               if (hdr->frame_index != 0) {
+                       ++port->stats.rx_missed_errors;
+                       goto err;
+               }
+
+               BUILD_BUG_ON(TBT_NET_RX_HDR_SIZE > TBT_RING_MAX_FRM_DATA_SZ);
+               if ((frame_count > 1) && (frame_size < TBT_NET_RX_HDR_SIZE)) {
+                       ++port->stats.rx_length_errors;
+                       goto err;
+               }
+
+               addr = (u8 *)(hdr + 1);
+
+               /* check the packet can go through the filter */
+               if (is_multicast_ether_addr(addr)) {
+                       if (!is_broadcast_ether_addr(addr)) {
+                               if ((port->packet_filters &
+                                    (BIT(PACKET_TYPE_PROMISCUOUS) |
+                                     BIT(PACKET_TYPE_ALL_MULTICAST))) ||
+                                   tbt_net_multicast_mac_set(
+                                       port->multicast_hash_table, addr))
+                                       res = GOOD_AS_FIRST_MULTICAST_FRAME;
+                               else
+                                       goto err;
+                       }
+               } else if (!(port->packet_filters &
+                            (BIT(PACKET_TYPE_PROMISCUOUS) |
+                             BIT(PACKET_TYPE_UNICAST_PROMISCUOUS))) &&
+                          !ether_addr_equal(port->net_dev->dev_addr, addr)) {
+                       goto err;
+               }
+
+               *size = frame_size;
+               *count = frame_count;
+               *id = le16_to_cpu(hdr->frame_id);
+       }
+
+#if (PREFETCH_STRIDE < 128)
+       prefetch((u8 *)hdr + PREFETCH_STRIDE);
+#endif
+
+       return res;
+
+err:
+       rx_ring->next_to_clean = (frame_num + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+       return FRAME_ERROR;
+}
+
+static inline unsigned int tbt_net_max_frm_data_size(
+                                               __maybe_unused u32 frame_size)
+{
+#if (TBT_NUM_FRAMES_PER_PAGE > 1)
+       return ALIGN(frame_size + sizeof(struct tbt_frame_header),
+                    L1_CACHE_BYTES) -
+              sizeof(struct tbt_frame_header);
+#else
+       return TBT_RING_MAX_FRM_DATA_SZ;
+#endif
+}
+
+static int tbt_net_poll(struct napi_struct *napi, int budget)
+{
+       struct tbt_port *port = container_of(napi, struct tbt_port, napi);
+       void __iomem *reg = TBT_RING_CONS_PROD_REG(port->nhi_ctxt->iobase,
+                                                  REG_RX_RING_BASE,
+                                                  port->local_path);
+       struct tbt_desc_ring *rx_ring = &port->rx_ring;
+       u16 cleaned_count = TBT_NUM_BUFS_BETWEEN(rx_ring->last_allocated,
+                                                rx_ring->next_to_clean,
+                                                TBT_NET_NUM_RX_BUFS);
+       unsigned long flags;
+       int rx_packets = 0;
+
+loop:
+       while (likely(rx_packets < budget)) {
+               struct sk_buff *skb;
+               enum frame_status status;
+               bool multicast = false;
+               u32 frame_count = 0, size;
+               u16 j, frame_id;
+               int i;
+
+               /*
+                * return some buffers to hardware, one at a time is too slow
+                * so allocate  TBT_NET_RX_BUFFER_WRITE buffers at the same time
+                */
+               if (cleaned_count >= TBT_NET_RX_BUFFER_WRITE) {
+                       tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+                                                rx_ring, cleaned_count, reg,
+                                                GFP_ATOMIC);
+                       cleaned_count = 0;
+               }
+
+               status = tbt_net_check_frame(port, rx_ring->next_to_clean,
+                                            &frame_count, 0, &frame_id,
+                                            &size);
+               if (status == FRAME_NOT_READY)
+                       break;
+
+               if (status == FRAME_ERROR) {
+                       ++cleaned_count;
+                       continue;
+               }
+
+               multicast = (status == GOOD_AS_FIRST_MULTICAST_FRAME);
+
+               /*
+                *  i is incremented up to the frame_count frames received,
+                *  j cyclicly goes over the location from the next frame
+                *  to clean in the ring
+                */
+               j = (rx_ring->next_to_clean + 1);
+               j &= (TBT_NET_NUM_RX_BUFS - 1);
+               for (i = 1; i < frame_count; ++i) {
+                       status = tbt_net_check_frame(port, j, &frame_count, i,
+                                                    &frame_id, &size);
+                       if (status == FRAME_NOT_READY)
+                               goto out;
+
+                       j = (j + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+
+                       /* if a new frame is found, start over */
+                       if (status == GOOD_AS_FIRST_FRAME ||
+                           status == GOOD_AS_FIRST_MULTICAST_FRAME) {
+                               multicast = (status ==
+                                            GOOD_AS_FIRST_MULTICAST_FRAME);
+                               cleaned_count += i;
+                               i = 0;
+                               continue;
+                       }
+
+                       if (status == FRAME_ERROR) {
+                               cleaned_count += (i + 1);
+                               goto loop;
+                       }
+               }
+
+               /* allocate a skb to store the frags */
+               skb = netdev_alloc_skb_ip_align(port->net_dev,
+                                               TBT_NET_RX_HDR_SIZE);
+               if (unlikely(!skb))
+                       break;
+
+               /*
+                * we will be copying header into skb->data in
+                * tbt_net_pull_tail so it is in our interest to prefetch
+                * it now to avoid a possible cache miss
+                */
+               prefetchw(skb->data);
+
+               /*
+                * if overall size of packet smaller than TBT_NET_RX_HDR_SIZE
+                * which is a small buffer size we decided to allocate
+                * as the base to RX
+                */
+               if (size <= TBT_NET_RX_HDR_SIZE) {
+                       struct tbt_buffer *buf =
+                               &(rx_ring->buffers[rx_ring->next_to_clean]);
+                       u8 *va = page_address(buf->page) + buf->page_offset +
+                                sizeof(struct tbt_frame_header);
+
+                       memcpy(__skb_put(skb, size), va,
+                              ALIGN(size, sizeof(long)));
+
+                       /*
+                        * Reuse buffer as-is,
+                        * just make sure it is local
+                        * Access to local memory is faster than non-local
+                        * memory so let's reuse.
+                        * If not local, let's free it and reallocate later.
+                        */
+                       if (likely(page_to_nid(buf->page) == numa_node_id()))
+                               /* sync the buffer for use by the device */
+                               dma_sync_single_range_for_device(
+                                               &port->nhi_ctxt->pdev->dev,
+                                               buf->dma, buf->page_offset,
+                                               TBT_RING_MAX_FRAME_SIZE,
+                                               DMA_FROM_DEVICE);
+                       else {
+                               /* this page cannot be reused so discard it */
+                               put_page(buf->page);
+                               buf->page = NULL;
+                               dma_unmap_page(&port->nhi_ctxt->pdev->dev,
+                                              buf->dma, PAGE_SIZE,
+                                              DMA_FROM_DEVICE);
+                       }
+                       rx_ring->next_to_clean = (rx_ring->next_to_clean + 1) &
+                                                (TBT_NET_NUM_RX_BUFS - 1);
+               } else {
+                       for (i = 0; i < frame_count;  ++i) {
+                               struct tbt_buffer *buf = &(rx_ring->buffers[
+                                               rx_ring->next_to_clean]);
+                               struct tbt_frame_header *hdr =
+                                               page_address(buf->page) +
+                                               buf->page_offset;
+                               u32 frm_size = le32_to_cpu(hdr->frame_size);
+
+                               unsigned int truesize =
+                                       tbt_net_max_frm_data_size(frm_size);
+
+                               /* add frame to skb struct */
+                               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+                                               buf->page,
+                                               sizeof(struct tbt_frame_header)
+                                                       + buf->page_offset,
+                                               frm_size, truesize);
+
+#if (TBT_NUM_FRAMES_PER_PAGE > 1)
+                               /* move offset up to the next cache line */
+                               buf->page_offset += (truesize +
+                                       sizeof(struct tbt_frame_header));
+
+                               /*
+                                * we can reuse buffer if there is space
+                                * available and it is local
+                                */
+                               if (page_to_nid(buf->page) == numa_node_id()
+                                   && buf->page_offset <=
+                                       PAGE_SIZE - TBT_RING_MAX_FRAME_SIZE) {
+                                       /*
+                                        * bump ref count on page before
+                                        * it is given to the stack
+                                        */
+                                       get_page(buf->page);
+                                       /*
+                                        * sync the buffer for use by the
+                                        * device
+                                        */
+                                       dma_sync_single_range_for_device(
+                                               &port->nhi_ctxt->pdev->dev,
+                                               buf->dma, buf->page_offset,
+                                               TBT_RING_MAX_FRAME_SIZE,
+                                               DMA_FROM_DEVICE);
+                               } else
+#endif
+                               {
+                                       buf->page = NULL;
+                                       dma_unmap_page(
+                                               &port->nhi_ctxt->pdev->dev,
+                                               buf->dma, PAGE_SIZE,
+                                               DMA_FROM_DEVICE);
+                               }
+
+                               rx_ring->next_to_clean =
+                                               (rx_ring->next_to_clean + 1) &
+                                               (TBT_NET_NUM_RX_BUFS - 1);
+                       }
+                       /*
+                        * place header from the first
+                        * fragment in linear portion of buffer
+                        */
+                       tbt_net_pull_tail(skb);
+               }
+
+               /* pad short packets */
+               if (unlikely(skb->len < ETH_ZLEN)) {
+                       int pad_len = ETH_ZLEN - skb->len;
+
+                       /* The skb is freed on error */
+                       if (unlikely(skb_pad(skb, pad_len))) {
+                               cleaned_count += frame_count;
+                               continue;
+                       }
+                       __skb_put(skb, pad_len);
+               }
+
+               skb->protocol = eth_type_trans(skb, port->net_dev);
+               napi_gro_receive(&port->napi, skb);
+
+               ++rx_packets;
+               port->stats.rx_bytes += size;
+               if (multicast)
+                       ++port->stats.multicast;
+               cleaned_count += frame_count;
+       }
+
+out:
+       port->stats.rx_packets += rx_packets;
+
+       if (cleaned_count)
+               tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+                                        rx_ring, cleaned_count, reg,
+                                        GFP_ATOMIC);
+
+       /* If all work not completed, return budget and keep polling */
+       if (rx_packets >= budget)
+               return budget;
+
+       /* Work is done so exit the polling mode and re-enable the interrupt */
+       napi_complete(napi);
+
+       spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+       /* enable RX interrupt */
+       RING_INT_ENABLE_RX(port->nhi_ctxt->iobase, port->local_path,
+                          port->nhi_ctxt->num_paths);
+
+       spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+
+       return 0;
+}
+
+static int tbt_net_open(struct net_device *net_dev)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+       int res = 0;
+       int i, j;
+
+       /* change link state to off until path establishment finishes */
+       netif_carrier_off(net_dev);
+
+       /*
+        * if we previously succeeded to allocate msix entries,
+        * now request IRQ for them:
+        *  2=tx data port 0,
+        *  3=rx data port 0,
+        *  4=tx data port 1,
+        *  5=rx data port 1,
+        *  ...
+        *  if not, if msi is used, nhi_msi will handle icm & data paths
+        */
+       if (port->nhi_ctxt->msix_entries) {
+               char name[] = "tbt-net-xx-xx";
+
+               scnprintf(name, sizeof(name), "tbt-net-rx-%02u", port->num);
+               res = devm_request_irq(&port->nhi_ctxt->pdev->dev,
+                       port->nhi_ctxt->msix_entries[3+(port->num*2)].vector,
+                       tbt_net_rx_msix, 0, name, port);
+               if (res) {
+                       netif_err(port, ifup, net_dev, "request_irq %s failed 
%d\n",
+                                 name, res);
+                       goto out;
+               }
+               name[8] = 't';
+               res = devm_request_irq(&port->nhi_ctxt->pdev->dev,
+                       port->nhi_ctxt->msix_entries[2+(port->num*2)].vector,
+                       tbt_net_tx_msix, 0, name, port);
+               if (res) {
+                       netif_err(port, ifup, net_dev, "request_irq %s failed 
%d\n",
+                                 name, res);
+                       goto request_irq_failure;
+               }
+       }
+       /*
+        * Verifying that all buffer sizes are well defined.
+        * Starting with frame(s) will not tip over the
+        * page boundary
+        */
+       BUILD_BUG_ON(TBT_NUM_FRAMES_PER_PAGE < 1);
+       /*
+        * Just to make sure we have enough place for containing
+        * 3 max MTU packets for TX
+        */
+       BUILD_BUG_ON((TBT_NET_NUM_TX_BUFS * TBT_RING_MAX_FRAME_SIZE) <
+                    (TBT_NET_MTU * 3));
+       /* make sure the number of TX Buffers is power of 2 */
+       BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_TX_BUFS);
+       /*
+        * Just to make sure we have enough place for containing
+        * 3 max MTU packets for RX
+        */
+       BUILD_BUG_ON((TBT_NET_NUM_RX_BUFS * TBT_RING_MAX_FRAME_SIZE) <
+                    (TBT_NET_MTU * 3));
+       /* make sure the number of RX Buffers is power of 2 */
+       BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_RX_BUFS);
+
+       port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1;
+
+       port->tx_ring.buffers = vzalloc(TBT_NET_NUM_TX_BUFS *
+                                       sizeof(struct tbt_buffer));
+       if (!port->tx_ring.buffers)
+               goto ring_alloc_failure;
+       port->rx_ring.buffers = vzalloc(TBT_NET_NUM_RX_BUFS *
+                                       sizeof(struct tbt_buffer));
+       if (!port->rx_ring.buffers)
+               goto ring_alloc_failure;
+
+       /*
+        * Allocate TX and RX descriptors
+        * if the total size is less than a page, do a central allocation
+        * Otherwise, split TX and RX
+        */
+       if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+               port->tx_ring.desc = dmam_alloc_coherent(
+                               &port->nhi_ctxt->pdev->dev,
+                               TBT_NET_SIZE_TOTAL_DESCS,
+                               &port->tx_ring.dma,
+                               GFP_KERNEL | __GFP_ZERO);
+               if (!port->tx_ring.desc)
+                       goto ring_alloc_failure;
+               /* RX starts where TX finishes */
+               port->rx_ring.desc = &port->tx_ring.desc[TBT_NET_NUM_TX_BUFS];
+               port->rx_ring.dma = port->tx_ring.dma +
+                       (TBT_NET_NUM_TX_BUFS * sizeof(struct tbt_buf_desc));
+       } else {
+               port->tx_ring.desc = dmam_alloc_coherent(
+                               &port->nhi_ctxt->pdev->dev,
+                               TBT_NET_NUM_TX_BUFS *
+                                               sizeof(struct tbt_buf_desc),
+                               &port->tx_ring.dma,
+                               GFP_KERNEL | __GFP_ZERO);
+               if (!port->tx_ring.desc)
+                       goto ring_alloc_failure;
+               port->rx_ring.desc = dmam_alloc_coherent(
+                               &port->nhi_ctxt->pdev->dev,
+                               TBT_NET_NUM_RX_BUFS *
+                                               sizeof(struct tbt_buf_desc),
+                               &port->rx_ring.dma,
+                               GFP_KERNEL | __GFP_ZERO);
+               if (!port->rx_ring.desc)
+                       goto rx_desc_alloc_failure;
+       }
+
+       /* allocate TX buffers and configure the descriptors */
+       for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) {
+               port->tx_ring.buffers[i].hdr = dma_alloc_coherent(
+                       &port->nhi_ctxt->pdev->dev,
+                       TBT_NUM_FRAMES_PER_PAGE * TBT_RING_MAX_FRAME_SIZE,
+                       &port->tx_ring.buffers[i].dma,
+                       GFP_KERNEL);
+               if (!port->tx_ring.buffers[i].hdr)
+                       goto buffers_alloc_failure;
+
+               port->tx_ring.desc[i].phys =
+                               cpu_to_le64(port->tx_ring.buffers[i].dma);
+               port->tx_ring.desc[i].attributes =
+                               cpu_to_le32(DESC_ATTR_REQ_STS |
+                                           TBT_NET_DESC_ATTR_SOF_EOF);
+
+               /*
+                * In case the page is bigger than the frame size,
+                * make the next buffer descriptor points
+                * on the next frame memory address within the page
+                */
+               for (i++, j = 1; (i < TBT_NET_NUM_TX_BUFS) &&
+                                (j < TBT_NUM_FRAMES_PER_PAGE); i++, j++) {
+                       port->tx_ring.buffers[i].dma =
+                               port->tx_ring.buffers[i - 1].dma +
+                               TBT_RING_MAX_FRAME_SIZE;
+                       port->tx_ring.buffers[i].hdr =
+                               (void *)(port->tx_ring.buffers[i - 1].hdr) +
+                               TBT_RING_MAX_FRAME_SIZE;
+                       /* move the next offset i.e. TBT_RING_MAX_FRAME_SIZE */
+                       port->tx_ring.buffers[i].page_offset =
+                               port->tx_ring.buffers[i - 1].page_offset +
+                               TBT_RING_MAX_FRAME_SIZE;
+                       port->tx_ring.desc[i].phys =
+                               cpu_to_le64(port->tx_ring.buffers[i].dma);
+                       port->tx_ring.desc[i].attributes =
+                               cpu_to_le32(DESC_ATTR_REQ_STS |
+                                           TBT_NET_DESC_ATTR_SOF_EOF);
+               }
+               i--;
+       }
+
+       port->negotiation_status =
+                       BIT(port->nhi_ctxt->net_devices[port->num].medium_sts);
+       if (port->negotiation_status == BIT(MEDIUM_READY_FOR_CONNECTION)) {
+               port->login_retry_count = 0;
+               queue_delayed_work(port->nhi_ctxt->net_workqueue,
+                                  &port->login_retry_work, 0);
+       }
+
+       netif_info(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - 
ready for ThunderboltIP negotiation\n",
+                  port->num);
+       return 0;
+
+buffers_alloc_failure:
+       /*
+        * Rollback the Tx buffers that were already allocated
+        * until the failure
+        */
+       for (i--; i >= 0; i--) {
+               /* free only for first buffer allocation */
+               if (port->tx_ring.buffers[i].page_offset == 0)
+                       dma_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                         TBT_NUM_FRAMES_PER_PAGE *
+                                               TBT_RING_MAX_FRAME_SIZE,
+                                         port->tx_ring.buffers[i].hdr,
+                                         port->tx_ring.buffers[i].dma);
+               port->tx_ring.buffers[i].hdr = NULL;
+       }
+       /*
+        * For central allocation, free all
+        * otherwise free RX and then TX separately
+        */
+       if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+               dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                  TBT_NET_SIZE_TOTAL_DESCS,
+                                  port->tx_ring.desc,
+                                  port->tx_ring.dma);
+               port->rx_ring.desc = NULL;
+       } else {
+               dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                  TBT_NET_NUM_RX_BUFS *
+                                               sizeof(struct tbt_buf_desc),
+                                  port->rx_ring.desc,
+                                  port->rx_ring.dma);
+               port->rx_ring.desc = NULL;
+rx_desc_alloc_failure:
+               dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                  TBT_NET_NUM_TX_BUFS *
+                                               sizeof(struct tbt_buf_desc),
+                                  port->tx_ring.desc,
+                                  port->tx_ring.dma);
+       }
+       port->tx_ring.desc = NULL;
+ring_alloc_failure:
+       vfree(port->tx_ring.buffers);
+       port->tx_ring.buffers = NULL;
+       vfree(port->rx_ring.buffers);
+       port->rx_ring.buffers = NULL;
+       res = -ENOMEM;
+       netif_err(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - 
unable to allocate memory\n",
+                 port->num);
+
+       if (!port->nhi_ctxt->msix_entries)
+               goto out;
+
+       devm_free_irq(&port->nhi_ctxt->pdev->dev,
+                     port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector,
+                     port);
+request_irq_failure:
+       devm_free_irq(&port->nhi_ctxt->pdev->dev,
+                     port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector,
+                     port);
+out:
+       return res;
+}
+
+static int tbt_net_close(struct net_device *net_dev)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+       int i;
+
+       /*
+        * Close connection, disable rings, flow controls
+        * and interrupts
+        */
+       tbt_net_tear_down(net_dev, !(port->negotiation_status &
+                                    BIT(RECEIVE_LOGOUT)));
+
+       cancel_work_sync(&port->login_response_work);
+       cancel_work_sync(&port->logout_work);
+       cancel_work_sync(&port->status_reply_work);
+       cancel_work_sync(&port->approve_inter_domain_work);
+
+       /* Rollback the Tx buffers that were allocated */
+       for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) {
+               if (port->tx_ring.buffers[i].page_offset == 0)
+                       dma_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                         TBT_NUM_FRAMES_PER_PAGE *
+                                               TBT_RING_MAX_FRAME_SIZE,
+                                         port->tx_ring.buffers[i].hdr,
+                                         port->tx_ring.buffers[i].dma);
+               port->tx_ring.buffers[i].hdr = NULL;
+       }
+       /* Unmap the Rx buffers that were allocated */
+       for (i = 0; i < TBT_NET_NUM_RX_BUFS; i++)
+               if (port->rx_ring.buffers[i].page) {
+                       put_page(port->rx_ring.buffers[i].page);
+                       port->rx_ring.buffers[i].page = NULL;
+                       dma_unmap_page(&port->nhi_ctxt->pdev->dev,
+                                      port->rx_ring.buffers[i].dma, PAGE_SIZE,
+                                      DMA_FROM_DEVICE);
+               }
+
+       /*
+        * For central allocation, free all
+        * otherwise free RX and then TX separately
+        */
+       if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+               dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                  TBT_NET_SIZE_TOTAL_DESCS,
+                                  port->tx_ring.desc,
+                                  port->tx_ring.dma);
+               port->rx_ring.desc = NULL;
+       } else {
+               dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                  TBT_NET_NUM_RX_BUFS *
+                                               sizeof(struct tbt_buf_desc),
+                                  port->rx_ring.desc,
+                                  port->rx_ring.dma);
+               port->rx_ring.desc = NULL;
+               dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+                                  TBT_NET_NUM_TX_BUFS *
+                                               sizeof(struct tbt_buf_desc),
+                                  port->tx_ring.desc,
+                                  port->tx_ring.dma);
+       }
+       port->tx_ring.desc = NULL;
+
+       vfree(port->tx_ring.buffers);
+       port->tx_ring.buffers = NULL;
+       vfree(port->rx_ring.buffers);
+       port->rx_ring.buffers = NULL;
+
+       devm_free_irq(&port->nhi_ctxt->pdev->dev,
+                     port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector,
+                     port);
+       devm_free_irq(&port->nhi_ctxt->pdev->dev,
+                     port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector,
+                     port);
+
+       netif_info(port, ifdown, net_dev, "Thunderbolt(TM) Networking port %u - 
is down\n",
+                  port->num);
+
+       return 0;
+}
+
+static bool tbt_net_xmit_csum(struct sk_buff *skb,
+                             struct tbt_desc_ring *tx_ring, u32 first,
+                             u32 last, u32 frame_count)
+{
+
+       struct tbt_frame_header *hdr = tx_ring->buffers[first].hdr;
+       __wsum wsum = (__force __wsum)htonl(skb->len -
+                                           skb_transport_offset(skb));
+       int offset = skb_transport_offset(skb);
+       __sum16 *tucso;  /* TCP UDP Checksum Segment Offset */
+       __be16 protocol = skb->protocol;
+       u8 *dest = (u8 *)(hdr + 1);
+       int len;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
+               for (; first != last;
+                       first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1)) {
+                       hdr = tx_ring->buffers[first].hdr;
+                       hdr->frame_count = cpu_to_le32(frame_count);
+               }
+               return true;
+       }
+
+       if (protocol == htons(ETH_P_8021Q)) {
+               struct vlan_hdr *vhdr, vh;
+
+               vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh);
+               if (!vhdr)
+                       return false;
+
+               protocol = vhdr->h_vlan_encapsulated_proto;
+       }
+
+       /*
+        * Data points on the beginning of packet.
+        * Check is the checksum absolute place in the
+        * packet.
+        * ipcso will update IP checksum.
+        * tucso will update TCP/UPD checksum.
+        */
+       if (protocol == htons(ETH_P_IP)) {
+               __sum16 *ipcso = (__sum16 *)(dest +
+                       ((u8 *)&(ip_hdr(skb)->check) - skb->data));
+
+               *ipcso = 0;
+               *ipcso = ip_fast_csum(dest + skb_network_offset(skb),
+                                     ip_hdr(skb)->ihl);
+               if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+                       tucso = (__sum16 *)(dest +
+                               ((u8 *)&(tcp_hdr(skb)->check) - skb->data));
+               else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
+                       tucso = (__sum16 *)(dest +
+                               ((u8 *)&(udp_hdr(skb)->check) - skb->data));
+               else
+                       return false;
+
+               *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+                                           ip_hdr(skb)->daddr, 0,
+                                           ip_hdr(skb)->protocol, 0);
+       } else if (skb_is_gso(skb)) {
+               if (skb_is_gso_v6(skb)) {
+                       tucso = (__sum16 *)(dest +
+                               ((u8 *)&(tcp_hdr(skb)->check) - skb->data));
+                       *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                                 &ipv6_hdr(skb)->daddr,
+                                                 0, IPPROTO_TCP, 0);
+               } else if ((protocol == htons(ETH_P_IPV6)) &&
+                          (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) {
+                       tucso = (__sum16 *)(dest +
+                               ((u8 *)&(udp_hdr(skb)->check) - skb->data));
+                       *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                                 &ipv6_hdr(skb)->daddr,
+                                                 0, IPPROTO_UDP, 0);
+               } else {
+                       return false;
+               }
+       } else if (protocol == htons(ETH_P_IPV6)) {
+               tucso = (__sum16 *)(dest + skb_checksum_start_offset(skb) +
+                                   skb->csum_offset);
+               *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                         &ipv6_hdr(skb)->daddr,
+                                         0, ipv6_hdr(skb)->nexthdr, 0);
+       } else {
+               return false;
+       }
+
+       /* First frame was headers, rest of the frames is data */
+       for (; first != last; first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1),
+                                                               offset = 0) {
+               hdr = tx_ring->buffers[first].hdr;
+               dest = (u8 *)(hdr + 1) + offset;
+               len = le32_to_cpu(hdr->frame_size) - offset;
+               wsum = csum_partial(dest, len, wsum);
+               hdr->frame_count = cpu_to_le32(frame_count);
+       }
+       *tucso = csum_fold(wsum);
+
+       return true;
+}
+
+static netdev_tx_t tbt_net_xmit_frame(struct sk_buff *skb,
+                                     struct net_device *net_dev)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+       void __iomem *iobase = port->nhi_ctxt->iobase;
+       void __iomem *reg = TBT_RING_CONS_PROD_REG(iobase,
+                                                  REG_TX_RING_BASE,
+                                                  port->local_path);
+       struct tbt_desc_ring *tx_ring = &port->tx_ring;
+       struct tbt_frame_header *hdr;
+       u32 prod_cons, prod, cons, first;
+       /* len equivalent to the fragment length */
+       unsigned int len = skb_headlen(skb);
+       /* data_len is overall packet length */
+       unsigned int data_len = skb->len;
+       u32 frm_idx, frag_num = 0;
+       const u8 *src = skb->data;
+       bool unmap = false;
+       __le32 *attr;
+       u8 *dest;
+
+       if (unlikely(data_len == 0 || data_len > TBT_NET_MTU))
+               goto invalid_packet;
+
+       prod_cons = ioread32(reg);
+       prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+       cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+       if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS)
+               goto tx_error;
+
+       if (data_len > (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) *
+                       TBT_RING_MAX_FRM_DATA_SZ)) {
+               unsigned long flags;
+
+               netif_stop_queue(net_dev);
+
+               spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+               /*
+                * Enable TX interrupt to be notified about available buffers
+                * and restart transmission upon this.
+                */
+               RING_INT_ENABLE_TX(iobase, port->local_path);
+               spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+
+               return NETDEV_TX_BUSY;
+       }
+
+       first = prod;
+       attr = &tx_ring->desc[prod].attributes;
+       hdr = tx_ring->buffers[prod].hdr;
+       dest = (u8 *)(hdr + 1);
+       /* if overall packet is bigger than the frame data size */
+       for (frm_idx = 0; data_len > TBT_RING_MAX_FRM_DATA_SZ; ++frm_idx) {
+               u32 size_left = TBT_RING_MAX_FRM_DATA_SZ;
+
+               *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK |
+                                     DESC_ATTR_INT_EN |
+                                     DESC_ATTR_DESC_DONE));
+               hdr->frame_size = cpu_to_le32(TBT_RING_MAX_FRM_DATA_SZ);
+               hdr->frame_index = cpu_to_le16(frm_idx);
+               hdr->frame_id = cpu_to_le16(port->frame_id);
+
+               do {
+                       if (len > size_left) {
+                               /*
+                                * Copy data onto tx buffer data with full
+                                * frame size then break
+                                * and go to next frame
+                                */
+                               memcpy(dest, src, size_left);
+                               len -= size_left;
+                               dest += size_left;
+                               src += size_left;
+                               break;
+                       }
+
+                       memcpy(dest, src, len);
+                       size_left -= len;
+                       dest += len;
+
+                       if (unmap) {
+                               kunmap_atomic((void *)src);
+                               unmap = false;
+                       }
+                       /*
+                        * Ensure all fragments have been processed
+                        */
+                       if (frag_num < skb_shinfo(skb)->nr_frags) {
+                               const skb_frag_t *frag =
+                                       &(skb_shinfo(skb)->frags[frag_num]);
+                               len = skb_frag_size(frag);
+                               /* map and then unmap quickly */
+                               src = kmap_atomic(skb_frag_page(frag)) +
+                                                       frag->page_offset;
+                               unmap = true;
+                               ++frag_num;
+                       } else if (unlikely(size_left > 0)) {
+                               goto invalid_packet;
+                       }
+               } while (size_left > 0);
+
+               data_len -= TBT_RING_MAX_FRM_DATA_SZ;
+               prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1);
+               attr = &tx_ring->desc[prod].attributes;
+               hdr = tx_ring->buffers[prod].hdr;
+               dest = (u8 *)(hdr + 1);
+       }
+
+       *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK | DESC_ATTR_DESC_DONE));
+       /* Enable the interrupts, for resuming from stop queue later (if so) */
+       *attr |= cpu_to_le32(DESC_ATTR_INT_EN |
+               (((sizeof(struct tbt_frame_header) + data_len) <<
+                 DESC_ATTR_LEN_SHIFT) & DESC_ATTR_LEN_MASK));
+       hdr->frame_size = cpu_to_le32(data_len);
+       hdr->frame_index = cpu_to_le16(frm_idx);
+       hdr->frame_id = cpu_to_le16(port->frame_id);
+
+       /* In case  the remaining data_len is smaller than a frame */
+       while (len < data_len) {
+               memcpy(dest, src, len);
+               data_len -= len;
+               dest += len;
+
+               if (unmap) {
+                       kunmap_atomic((void *)src);
+                       unmap = false;
+               }
+
+               if (frag_num < skb_shinfo(skb)->nr_frags) {
+                       const skb_frag_t *frag =
+                                       &(skb_shinfo(skb)->frags[frag_num]);
+                       len = skb_frag_size(frag);
+                       src = kmap_atomic(skb_frag_page(frag)) +
+                                                       frag->page_offset;
+                       unmap = true;
+                       ++frag_num;
+               } else if (unlikely(data_len > 0)) {
+                       goto invalid_packet;
+               }
+       }
+       memcpy(dest, src, data_len);
+       if (unmap) {
+               kunmap_atomic((void *)src);
+               unmap = false;
+       }
+
+       ++frm_idx;
+       prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1);
+
+       if (!tbt_net_xmit_csum(skb, tx_ring, first, prod, frm_idx))
+               goto invalid_packet;
+
+       if (port->match_frame_id)
+               ++port->frame_id;
+
+       prod_cons &= ~REG_RING_PROD_MASK;
+       prod_cons |= (prod << REG_RING_PROD_SHIFT) & REG_RING_PROD_MASK;
+       wmb(); /* make sure producer update is done after buffers are ready */
+       iowrite32(prod_cons, reg);
+
+       ++port->stats.tx_packets;
+       port->stats.tx_bytes += skb->len;
+
+       dev_consume_skb_any(skb);
+       return NETDEV_TX_OK;
+
+invalid_packet:
+       netif_err(port, tx_err, net_dev, "port %u invalid transmit packet\n",
+                 port->num);
+tx_error:
+       ++port->stats.tx_errors;
+       dev_kfree_skb_any(skb);
+       return NETDEV_TX_OK;
+}
+
+static void tbt_net_set_rx_mode(struct net_device *net_dev)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+       struct netdev_hw_addr *ha;
+
+       if (net_dev->flags & IFF_PROMISC)
+               port->packet_filters |= BIT(PACKET_TYPE_PROMISCUOUS);
+       else
+               port->packet_filters &= ~BIT(PACKET_TYPE_PROMISCUOUS);
+       if (net_dev->flags & IFF_ALLMULTI)
+               port->packet_filters |= BIT(PACKET_TYPE_ALL_MULTICAST);
+       else
+               port->packet_filters &= ~BIT(PACKET_TYPE_ALL_MULTICAST);
+
+       /* if you have more than a single MAC address */
+       if (netdev_uc_count(net_dev) > 1)
+               port->packet_filters |= BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+       /* if have a single MAC address */
+       else if (netdev_uc_count(net_dev) == 1) {
+               netdev_for_each_uc_addr(ha, net_dev)
+                       /* checks whether the MAC is what we set */
+                       if (ether_addr_equal(ha->addr, net_dev->dev_addr))
+                               port->packet_filters &=
+                                       ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+                       else
+                               port->packet_filters |=
+                                       BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+       } else {
+               port->packet_filters &= ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+       }
+
+       /* Populate the multicast hash table with received MAC addresses */
+       memset(port->multicast_hash_table, 0,
+              sizeof(port->multicast_hash_table));
+       netdev_for_each_mc_addr(ha, net_dev) {
+               u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ha->addr);
+
+               port->multicast_hash_table[hash_val / BITS_PER_U32] |=
+                                               BIT(hash_val % BITS_PER_U32);
+       }
+
+}
+
+static struct rtnl_link_stats64 *tbt_net_get_stats64(
+                                       struct net_device *net_dev,
+                                       struct rtnl_link_stats64 *stats)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+
+       memset(stats, 0, sizeof(*stats));
+       stats->tx_packets = port->stats.tx_packets;
+       stats->tx_bytes = port->stats.tx_bytes;
+       stats->tx_errors = port->stats.tx_errors;
+       stats->rx_packets = port->stats.rx_packets;
+       stats->rx_bytes = port->stats.rx_bytes;
+       stats->rx_length_errors = port->stats.rx_length_errors;
+       stats->rx_over_errors = port->stats.rx_over_errors;
+       stats->rx_crc_errors = port->stats.rx_crc_errors;
+       stats->rx_missed_errors = port->stats.rx_missed_errors;
+       stats->rx_errors = stats->rx_length_errors + stats->rx_over_errors +
+                          stats->rx_crc_errors + stats->rx_missed_errors;
+       stats->multicast = port->stats.multicast;
+       return stats;
 }
 
+static int tbt_net_set_mac_address(struct net_device *net_dev, void *addr)
+{
+       struct sockaddr *saddr = addr;
+
+       if (!is_valid_ether_addr(saddr->sa_data))
+               return -EADDRNOTAVAIL;
+
+       memcpy(net_dev->dev_addr, saddr->sa_data, net_dev->addr_len);
+
+       return 0;
+}
+
+static int tbt_net_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+
+       /* MTU < 68 is an error and causes problems on some kernels */
+       if (new_mtu < 68 || new_mtu > (TBT_NET_MTU - ETH_HLEN))
+               return -EINVAL;
+
+       netif_info(port, probe, net_dev, "Thunderbolt(TM) Networking port %u - 
changing MTU from %u to %d\n",
+                  port->num, net_dev->mtu, new_mtu);
+
+       net_dev->mtu = new_mtu;
+
+       return 0;
+}
+
+static const struct net_device_ops tbt_netdev_ops = {
+       /* called when the network is up'ed */
+       .ndo_open               = tbt_net_open,
+       /* called when the network is down'ed */
+       .ndo_stop               = tbt_net_close,
+       .ndo_start_xmit         = tbt_net_xmit_frame,
+       .ndo_set_rx_mode        = tbt_net_set_rx_mode,
+       .ndo_get_stats64        = tbt_net_get_stats64,
+       .ndo_set_mac_address    = tbt_net_set_mac_address,
+       .ndo_change_mtu         = tbt_net_change_mtu,
+       .ndo_validate_addr      = eth_validate_addr,
+};
+
+static int tbt_net_get_settings(__maybe_unused struct net_device *net_dev,
+                               struct ethtool_cmd *ecmd)
+{
+       ecmd->supported |= SUPPORTED_20000baseKR2_Full;
+       ecmd->advertising |= ADVERTISED_20000baseKR2_Full;
+       ecmd->autoneg = AUTONEG_DISABLE;
+       ecmd->transceiver = XCVR_INTERNAL;
+       ecmd->supported |= SUPPORTED_FIBRE;
+       ecmd->advertising |= ADVERTISED_FIBRE;
+       ecmd->port = PORT_FIBRE;
+       ethtool_cmd_speed_set(ecmd, SPEED_20000);
+       ecmd->duplex = DUPLEX_FULL;
+
+       return 0;
+}
+
+
+static u32 tbt_net_get_msglevel(struct net_device *net_dev)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+
+       return port->msg_enable;
+}
+
+static void tbt_net_set_msglevel(struct net_device *net_dev, u32 data)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+
+       port->msg_enable = data;
+}
+
+static void tbt_net_get_strings(__maybe_unused struct net_device *net_dev,
+                               u32 stringset, u8 *data)
+{
+       if (stringset == ETH_SS_STATS)
+               memcpy(data, tbt_net_gstrings_stats,
+                      sizeof(tbt_net_gstrings_stats));
+}
+
+static void tbt_net_get_ethtool_stats(struct net_device *net_dev,
+                                     __maybe_unused struct ethtool_stats *sts,
+                                     u64 *data)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+
+       memcpy(data, &port->stats, sizeof(port->stats));
+}
+
+static int tbt_net_get_sset_count(__maybe_unused struct net_device *net_dev,
+                                 int sset)
+{
+       if (sset == ETH_SS_STATS)
+               return sizeof(tbt_net_gstrings_stats) / ETH_GSTRING_LEN;
+       return -EOPNOTSUPP;
+}
+
+static void tbt_net_get_drvinfo(struct net_device *net_dev,
+                               struct ethtool_drvinfo *drvinfo)
+{
+       struct tbt_port *port = netdev_priv(net_dev);
+
+       strlcpy(drvinfo->driver, "Thunderbolt(TM) Networking",
+               sizeof(drvinfo->driver));
+       strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
+
+       strlcpy(drvinfo->bus_info, pci_name(port->nhi_ctxt->pdev),
+               sizeof(drvinfo->bus_info));
+       drvinfo->n_stats = tbt_net_get_sset_count(net_dev, ETH_SS_STATS);
+}
+
+static const struct ethtool_ops tbt_net_ethtool_ops = {
+       .get_settings           = tbt_net_get_settings,
+       .get_drvinfo            = tbt_net_get_drvinfo,
+       .get_link               = ethtool_op_get_link,
+       .get_msglevel           = tbt_net_get_msglevel,
+       .set_msglevel           = tbt_net_set_msglevel,
+       .get_strings            = tbt_net_get_strings,
+       .get_ethtool_stats      = tbt_net_get_ethtool_stats,
+       .get_sset_count         = tbt_net_get_sset_count,
+};
+
 static inline int send_message(struct tbt_port *port, const char *func,
                                enum pdf_value pdf, u32 msg_len, const u8 *msg)
 {
@@ -514,6 +1942,10 @@ void negotiation_events(struct net_device *net_dev,
                /* configure TX ring */
                reg = iobase + REG_TX_RING_BASE +
                      (port->local_path * REG_RING_STEP);
+               iowrite32(lower_32_bits(port->tx_ring.dma),
+                         reg + REG_RING_PHYS_LO_OFFSET);
+               iowrite32(upper_32_bits(port->tx_ring.dma),
+                         reg + REG_RING_PHYS_HI_OFFSET);
 
                tx_ring_conf = (TBT_NET_NUM_TX_BUFS << REG_RING_SIZE_SHIFT) &
                                REG_RING_SIZE_MASK;
@@ -556,6 +1988,10 @@ void negotiation_events(struct net_device *net_dev,
                 */
                reg = iobase + REG_RX_RING_BASE +
                      (port->local_path * REG_RING_STEP);
+               iowrite32(lower_32_bits(port->rx_ring.dma),
+                         reg + REG_RING_PHYS_LO_OFFSET);
+               iowrite32(upper_32_bits(port->rx_ring.dma),
+                         reg + REG_RING_PHYS_HI_OFFSET);
 
                rx_ring_conf = (TBT_NET_NUM_RX_BUFS << REG_RING_SIZE_SHIFT) &
                                REG_RING_SIZE_MASK;
@@ -565,6 +2001,17 @@ void negotiation_events(struct net_device *net_dev,
                                REG_RING_BUF_SIZE_MASK;
 
                iowrite32(rx_ring_conf, reg + REG_RING_SIZE_OFFSET);
+               /* allocate RX buffers and configure the descriptors */
+               if (!tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+                                             &port->rx_ring,
+                                             TBT_NET_NUM_RX_BUFS,
+                                             reg + REG_RING_CONS_PROD_OFFSET,
+                                             GFP_KERNEL)) {
+                       netif_err(port, link, net_dev, "Thunderbolt(TM) 
Networking port %u - no memory for receive buffers\n",
+                                 port->num);
+                       tbt_net_tear_down(net_dev, true);
+                       break;
+               }
 
                spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
                /* enable RX interrupt */
@@ -577,6 +2024,7 @@ void negotiation_events(struct net_device *net_dev,
                netif_info(port, link, net_dev, "Thunderbolt(TM) Networking 
port %u - ready\n",
                           port->num);
 
+               napi_enable(&port->napi);
                netif_carrier_on(net_dev);
                netif_start_queue(net_dev);
                break;
@@ -787,15 +2235,42 @@ struct net_device *nhi_alloc_etherdev(struct 
tbt_nhi_ctxt *nhi_ctxt,
        scnprintf(net_dev->name, sizeof(net_dev->name), "tbtnet%%dp%hhu",
                  port_num);
 
+       net_dev->netdev_ops = &tbt_netdev_ops;
+
+       netif_napi_add(net_dev, &port->napi, tbt_net_poll, NAPI_POLL_WEIGHT);
+
+       net_dev->hw_features = NETIF_F_SG |
+                              NETIF_F_ALL_TSO |
+                              NETIF_F_UFO |
+                              NETIF_F_GRO |
+                              NETIF_F_IP_CSUM |
+                              NETIF_F_IPV6_CSUM;
+       net_dev->features = net_dev->hw_features;
+       if (nhi_ctxt->pci_using_dac)
+               net_dev->features |= NETIF_F_HIGHDMA;
+
        INIT_DELAYED_WORK(&port->login_retry_work, login_retry);
        INIT_WORK(&port->login_response_work, login_response);
        INIT_WORK(&port->logout_work, logout);
        INIT_WORK(&port->status_reply_work, status_reply);
        INIT_WORK(&port->approve_inter_domain_work, approve_inter_domain);
 
+       net_dev->ethtool_ops = &tbt_net_ethtool_ops;
+
+       tbt_net_change_mtu(net_dev, TBT_NET_MTU - ETH_HLEN);
+
+       if (register_netdev(net_dev))
+               goto err_register;
+
+       netif_carrier_off(net_dev);
+
        netif_info(port, probe, net_dev,
                   "Thunderbolt(TM) Networking port %u - MAC Address: %pM\n",
                   port_num, net_dev->dev_addr);
 
        return net_dev;
+
+err_register:
+       free_netdev(net_dev);
+       return NULL;
 }
-- 
2.7.4

Reply via email to