Happy New Year,

I successfully created root cell and inmate cell on QEMU, and nuttx running 
in the inmate cell. At the same time, I added the ivshmem-net device to the 
root cell and the intimate cell, and loaded the NIC driver. Finally, I 
configured ip 172.16.0.1 and 172.16.0.2 for the network card. But when I 
execute ping 172.16.0.2 in the root cell, the error "From 172.16.0.1 
icmp_seq=1 Destination Host Unreachable" appears. The attachment is the 
driver I used in linux and nuttx respectively.The network driver uses the 
virtio interface. I tried to log in the driver, but I found that the 
control flow did not enter ndo_start_xmit().

ping
PING 172.16.0.2 (172.16.0.2) 56(84) bytes of data.
>From 172.16.0.1 icmp_seq=1 Destination Host Unreachable
>From 172.16.0.1 icmp_seq=2 Destination Host Unreachable
>From 172.16.0.1 icmp_seq=3 Destination Host Unreachable

route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 10.0.2.2 0.0.0.0 UG 100 0 0 enp0s2
10.0.2.0 0.0.0.0 255.255.255.0 U 100 0 0 enp0s2
169.254.0.0 0.0.0.0 255.255.0.0 U 1000 0 0 enp0s2
172.16.0.0 0.0.0.0 255.255.255.0 U 0 0 0 enp0s14

ifconfig
enp0s2 Link encap:Ethernet HWaddr 52:54:00:12:34:56
          inet addr:10.0.2.15 Bcast:10.0.2.255 Mask:255.255.255.0
          inet6 addr: fec0::8070:776d:7dfd:da1/64 Scope:Site
          inet6 addr: fec0::1493:dcc2:ea12:8774/64 Scope:Site
          inet6 addr: fec0::7c68:51e0:8aab:db34/64 Scope:Site
          inet6 addr: fe80::feb9:1534:861b:722f/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
          RX packets:675167977 errors:0 dropped:0 overruns:0 frame:0
          TX packets:248205 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes: 52687970572 (52.6 GB) TX bytes: 49989174072 (49.9 GB)
          Interrupt:22 Memory:feb80000-feba0000

enp0s14 Link encap:Ethernet HWaddr 3e:27:50:f3:c5:16
          inet addr:172.16.0.1 Bcast:172.16.0.255 Mask:255.255.255.0
          UP BROADCAST RUNNING MULTICAST MTU:16384 Metric:1
          RX packets:0 errors:0 dropped:0 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:0 (0.0 B) TX bytes:0 (0.0 B)

lo Link encap:Local Loopback
          inet addr:127.0.0.1 Mask:255.0.0.0
          inet6 addr: ::1/128 Scope:Host
          UP LOOPBACK RUNNING MTU:65536 Metric:1
          RX packets: 491565 errors:0 dropped:0 overruns:0 frame:0
          TX packets:491565 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes: 29522474 (29.5 MB) TX bytes: 29522474 (29.5 MB)

arp
? (172.16.0.2) at <incomplete> on enp0s14
? (10.0.2.3) at 52:55:0a:00:02:03 [ether] on enp0s2
? (10.0.2.2) at 52:55:0a:00:02:02 [ether] on enp0s2

jailhouse output
Initializing Jailhouse hypervisor v0.12 (5-g06ba27d-dirty) on CPU 2
Code location: 0xfffffffff0000050
Using x2APIC
Page pool usage after early setup: mem 108/32207, remap 0/131072
Initializing processors:
 CPU 2... (APIC ID 2) OK
 CPU 1... (APIC ID 1) OK
 CPU 3... (APIC ID 3) OK
 CPU 0... (APIC ID 0) OK
Initializing unit: VT-d
DMAR unit @0xfed90000/0x1000
Reserving 24 interrupt(s) for device ff:00.0 at index 0
Initializing unit: IOAPIC
Initializing unit: Cache Allocation Technology
Initializing unit: PCI
Adding virtual PCI device 00:0d.0 to cell "RootCell"
Adding virtual PCI device 00:0e.0 to cell "RootCell"
Adding PCI device 00:00.0 to cell "RootCell"
Adding PCI device 00:01.0 to cell "RootCell"
Adding PCI device 00:02.0 to cell "RootCell"
Reserving 5 interrupt(s) for device 00:02.0 at index 24
Adding PCI device 00:1b.0 to cell "RootCell"
Reserving 1 interrupt(s) for device 00:1b.0 at index 29
Adding PCI device 00:1f.0 to cell "RootCell"
Adding PCI device 00:1f.2 to cell "RootCell"
Reserving 1 interrupt(s) for device 00:1f.2 at index 30
Adding PCI device 00:1f.3 to cell "RootCell"
Page pool usage after late setup: mem 339/32207, remap 65542/131072
Activating hypervisor
Reserving 1 interrupt(s) for device 00:00.0 at index 31
Adding virtual PCI device 00:0d.0 to cell "nuttx"
Shared memory connection established, peer cells:
 "RootCell"
Adding virtual PCI device 00:0e.0 to cell "nuttx"
Shared memory connection established, peer cells:
 "RootCell"
Created cell "nuttx"
Page pool usage after cell creation: mem 871/32207, remap 65543/131072
Cell "nuttx" can be loaded
CPU 3 received SIPI, vector 100
Started cell "nuttx"

nuttx output
x86_rng_initialize: Initializing RNG
pci_enumerate: [00:0d.0] Found 110a:4106, class/reversion 00000200
pci_enumerate: [00:0d.0] Jailhouse Shadow process memory and pipe
shadow_probe: Shadow[0] mapped bar[0]: 0xf0000000
shadow_probe: Shadow[0] mapped bar[1]: 0xf0001000
pci_enable_device: 00:0d.0, CMD: 0 -> 6
shadow_probe: Shadow[0] shared memory base: 0xf0000000, size: 0x1000
shadow_probe: Shadow[0] State Table phy_addr: 0x176000000 virt_addr: 
0xf0002000, size: 0x1000
shadow_probe: Shadow[0] R/W  region phy_addr: 0x1000 virt_addr: 0x1000, 
size: 0x3ffff000
shadow_probe: Shadow[0] I    region phy_addr: 0x1b6001000 virt_addr: 
0xf0003000, size: 0x3000
shadow_probe: Shadow[0] O    region phy_addr: 0x1b6005000 virt_addr: 
0xf0007000, size: 0x3000
shadow_probe: Initialized Shadow[0]
pci_enumerate: [00:0e.0] Found 110a:4106, class/reversion 00000100
pci_enumerate: [00:0e.0] Jailhouse Ivshmem-net
ivshmnet_probe: Ivshmem-net[0] mapped bar[0]: 0xf000b000
ivshmnet_probe: Ivshmem-net[0] mapped bar[1]: 0xf000c000
pci_enable_device: 00:0e.0, CMD: 0 -> 6
ivshmnet_probe: Ivshmem-net[0] State Table phy_addr:0x1b6205000 virt_addr: 
0xf000d000, size: 0x1000
ivshmnet_probe: Ivshmem-net[0] TX region phy_addr: 0x1b6285000 virt_addr: 
0xf000e000, size: 0x7f000
ivshmnet_probe: Ivshmem-net[0] RX region phy_addr: 0x1b6206000 virt_addr: 
0xf008d000, size: 0x7f000
ivshmnet_probe: Initialized Ivshmem-net[1]
shadow_state_change: Remote state: 0

cRTOS Daemon: Starting...

cRTOS Daemon: Initializing Network (eth0)...
set ip 
set router 
set mask 
 ip  up
cRTOS: Initialized! port: 42

cRTOS: Waiting for client

在2021年12月22日星期三 UTC+8 22:46:01<Bezdeka, Florian> 写道:

> On Wed, 2021-12-22 at 06:33 -0800, jiajun huang wrote:
> > Hi,
> > I will try as you suggest.
> > Currently I try to run this open source project
> > https://github.com/fixstars/cRTOS/blob/master/Installation.md. I used
> > to follow the guidelines of this project to successfully run
> > linux+nuttx on QEMU, but I found that the ivshmem-net device on qemu
> > does not seem to work. I suspect it is because the mmio area created
> > by jailhouse for ivshmem-net devices is not registered in QEMU. So I
> > decided to try to run this project on the server. I want to know if
> > the ivshmem device is supported by QEMU?
> > 
>
> There are examples with ivshmem on qemu, if you get the memory mapping
> right it will work. Normally you can't re-use the same hypervisor
> configuration on real hardware. The IOAPIC/iommu setup is normally
> different. 
>
> Root-Cell:
> https://github.com/siemens/jailhouse/blob/master/configs/x86/qemu-x86.c
>
> Inmate/Linux:
>
> https://github.com/siemens/jailhouse/blob/master/configs/x86/linux-x86-demo.c
>
> > 
> > 在2021年12月22日星期三 UTC+8 22:17:37<Bezdeka, Florian> 写道:
> > > On Wed, 2021-12-22 at 05:39 -0800, jiajun huang wrote:
> > > > Dear Jailhouse community,
> > > > This bug occurred when I tried to start nuttx on a none-root cell
> > > > on
> > > > the server. I added two ivshmem devices for nuttx. Below is my
> > > > configuration file. I am not sure if there is a problem with the
> > > > mmio
> > > > area in the configuration file. What is the communication area?
> > > > In
> > > > addition, if jailhouse runs in QEMU, can two virtual machines
> > > > communicate with each other through ivshmem-net?
> > > > 
> > > > Below is my root-cell , nuttx configuration and log output from
> > > > the
> > > > port.
> > > 
> > > Have you validated your cell configurations with the jailhouse
> > > config
> > > checker? I did not look into your configuration in detail, but
> > > nearly
> > > all of your inmate memory blocks are tagged with
> > > "JAILHOUSE_MEM_ROOTSHARED" which seems uncommon.
> > > 
> > > I would start step by step. So start from a configuration where you
> > > know that both cells are booting up. Add virtual NICs afterwards
> > > and
> > > make sure that IRQs are delivered to ivshmem devices.
> > > 
> > > Are you able to follow the boot log of your inmate? Hopefully you
> > > will
> > > see the reason for the VM exit there.
> > > 
> > > HTH,
> > > Florian
> > > 
> > > > 
> > > > Best regards,
> > > > 
> > > > Jiajun Huang
> > > > 
> > > 
>
>

-- 
You received this message because you are subscribed to the Google Groups 
"Jailhouse" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/jailhouse-dev/c7eaa08e-ae92-4c5c-a57c-7ddac379242cn%40googlegroups.com.
/*
 * Copyright 2016 Mans Rullgard <[email protected]>
 * Copyright (c) Siemens AG, 2016-2020
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/virtio_ring.h>

#ifndef PCI_DEVICE_ID_IVSHMEM
#define PCI_DEVICE_ID_IVSHMEM      0x4106
#endif

#define IVSHM_PROTO_NET            0x0001

#include "ivshmem.h"

#define DRV_NAME "ivshmem-net"

#define IVSHM_NET_STATE_RESET		0
#define IVSHM_NET_STATE_INIT		1
#define IVSHM_NET_STATE_READY		2
#define IVSHM_NET_STATE_RUN		3

#define IVSHM_NET_FLAG_RUN		0

#define IVSHM_NET_MTU_DEF		16384

#define IVSHM_NET_FRAME_SIZE(s) ALIGN(18 + (s), SMP_CACHE_BYTES)

#define IVSHM_NET_VQ_ALIGN 64

#define IVSHM_NET_SECTION_TX		0
#define IVSHM_NET_SECTION_RX		1

#define IVSHM_NET_MSIX_STATE		0
#define IVSHM_NET_MSIX_TX_RX		1

#define IVSHM_NET_NUM_VECTORS		2

//ivshm_net virtue队列结构体
struct ivshm_net_queue {
	struct vring vr;
	u32 free_head;
	u32 num_free;
	u32 num_added;
	u16 last_avail_idx;
	u16 last_used_idx;

	void *data;
	void *end;
	u32 size;
	u32 head;
	u32 tail;
};

//ivshm 状态信息
struct ivshm_net_stats {
	u32 tx_rx_interrupts;
	u32 tx_packets;
	u32 tx_notify;
	u32 tx_pause;
	u32 rx_packets;
	u32 rx_notify;
	u32 napi_poll;
	u32 napi_complete;
	u32 napi_poll_n[10];
};

//ivshm_net设备结构  两个virtue队列 两个shmem区域
struct ivshm_net {
	struct ivshm_net_queue rx;
	struct ivshm_net_queue tx;

	u32 vrsize;
	u32 qlen;
	u32 qsize;

	struct napi_struct napi;

	u32 state;
	u32 last_peer_state;
	u32 *state_table;

	unsigned long flags;

	struct workqueue_struct *state_wq;
	struct work_struct state_work;

	struct ivshm_net_stats stats;

	struct ivshm_regs __iomem *ivshm_regs;
	void *shm[2];
	resource_size_t shmlen;
	u32 peer_id;

	u32 tx_rx_vector;

	struct pci_dev *pdev;
};

static void *ivshm_net_desc_data(struct ivshm_net *in,
				 struct ivshm_net_queue *q,
				 unsigned int region,
				 struct vring_desc *desc,
				 u32 *len)
{
	u64 offs = READ_ONCE(desc->addr);
	u32 dlen = READ_ONCE(desc->len);
	u16 flags = READ_ONCE(desc->flags);
	void *data;

	if (flags)
		return NULL;

	if (offs >= in->shmlen)
		return NULL;

	data = in->shm[region] + offs;

	if (data < q->data || data >= q->end)
		return NULL;

	if (dlen > q->end - data)
		return NULL;

	*len = dlen;

	return data;
}

static void ivshm_net_init_queue(struct ivshm_net *in,
				 struct ivshm_net_queue *q,
				 void *mem, unsigned int len)
{
	memset(q, 0, sizeof(*q));

	vring_init(&q->vr, len, mem, IVSHM_NET_VQ_ALIGN);
	q->data = mem + in->vrsize;
	q->end = q->data + in->qsize;
	q->size = in->qsize;
}

static void ivshm_net_init_queues(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);
	void *tx;
	void *rx;
	int i;

	tx = in->shm[IVSHM_NET_SECTION_TX];
	rx = in->shm[IVSHM_NET_SECTION_RX];

	memset(tx, 0, in->shmlen);

	ivshm_net_init_queue(in, &in->tx, tx, in->qlen);
	ivshm_net_init_queue(in, &in->rx, rx, in->qlen);

	swap(in->rx.vr.used, in->tx.vr.used);

	in->tx.num_free = in->tx.vr.num;

	for (i = 0; i < in->tx.vr.num - 1; i++)
		in->tx.vr.desc[i].next = i + 1;
}

static int ivshm_net_calc_qsize(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);
	unsigned int vrsize;
	unsigned int qsize;
	unsigned int qlen;

	for (qlen = 4096; qlen > 32; qlen >>= 1) {
		vrsize = vring_size(qlen, IVSHM_NET_VQ_ALIGN);
		vrsize = ALIGN(vrsize, IVSHM_NET_VQ_ALIGN);
		if (vrsize < in->shmlen / 8)
			break;
	}

	if (vrsize > in->shmlen)
		return -EINVAL;

	qsize = in->shmlen - vrsize;

	if (qsize < 4 * ETH_MIN_MTU)
		return -EINVAL;

	in->vrsize = vrsize;
	in->qlen = qlen;
	in->qsize = qsize;

	return 0;
}

static void ivshm_net_notify_tx(struct ivshm_net *in, unsigned int num)
{
	u16 evt, old, new;

	virt_mb();

	evt = READ_ONCE(vring_avail_event(&in->tx.vr));
	old = in->tx.last_avail_idx - num;
	new = in->tx.last_avail_idx;

	if (vring_need_event(evt, new, old)) {
		writel(in->tx_rx_vector | (in->peer_id << 16),
		       &in->ivshm_regs->doorbell);
		in->stats.tx_notify++;
	}
}

static void ivshm_net_enable_rx_irq(struct ivshm_net *in)
{
	vring_avail_event(&in->rx.vr) = in->rx.last_avail_idx;
	virt_wmb();
}

static void ivshm_net_notify_rx(struct ivshm_net *in, unsigned int num)
{
	u16 evt, old, new;

	virt_mb();

	evt = READ_ONCE(vring_used_event(&in->rx.vr));
	old = in->rx.last_used_idx - num;
	new = in->rx.last_used_idx;

	if (vring_need_event(evt, new, old)) {
		writel(in->tx_rx_vector | (in->peer_id << 16),
		       &in->ivshm_regs->doorbell);
		in->stats.rx_notify++;
	}
}

static void ivshm_net_enable_tx_irq(struct ivshm_net *in)
{
	vring_used_event(&in->tx.vr) = in->tx.last_used_idx;
	virt_wmb();
}

static bool ivshm_net_rx_avail(struct ivshm_net *in)
{
	virt_mb();
	return READ_ONCE(in->rx.vr.avail->idx) != in->rx.last_avail_idx;
}

static size_t ivshm_net_tx_space(struct ivshm_net *in)
{
	struct ivshm_net_queue *tx = &in->tx;
	u32 tail = tx->tail;
	u32 head = tx->head;
	u32 space;

	if (head < tail)
		space = tail - head;
	else
		space = max(tx->size - head, tail);

	return space;
}

static bool ivshm_net_tx_ok(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);

	return in->tx.num_free >= 2 &&
		ivshm_net_tx_space(in) >= 2 * IVSHM_NET_FRAME_SIZE(ndev->mtu);
}

static u32 ivshm_net_tx_advance(struct ivshm_net_queue *q, u32 *pos, u32 len)
{
	u32 p = *pos;

	len = IVSHM_NET_FRAME_SIZE(len);

	if (q->size - p < len)
		p = 0;
	*pos = p + len;

	return p;
}

static bool ivshm_net_tx_clean(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);
	struct ivshm_net_queue *tx = &in->tx;
	struct vring_used_elem *used;
	struct vring *vr = &tx->vr;
	struct vring_desc *desc;
	struct vring_desc *fdesc;
	u16 last = tx->last_used_idx;
	unsigned int num;
	bool tx_ok;
	u32 fhead;

	fdesc = NULL;
	fhead = 0;
	num = 0;

	while (last != virt_load_acquire(&vr->used->idx)) {
		void *data;
		u32 len;
		u32 tail;

		used = vr->used->ring + (last % vr->num);
		if (used->id >= vr->num || used->len != 1) {
			netdev_err(ndev, "invalid tx used->id %d ->len %d\n",
				   used->id, used->len);
			break;
		}

		desc = &vr->desc[used->id];

		data = ivshm_net_desc_data(in, &in->tx, IVSHM_NET_SECTION_TX,
					   desc, &len);
		if (!data) {
			netdev_err(ndev, "bad tx descriptor, data == NULL\n");
			break;
		}

		tail = ivshm_net_tx_advance(tx, &tx->tail, len);
		if (data != tx->data + tail) {
			netdev_err(ndev, "bad tx descriptor\n");
			break;
		}

		if (!num)
			fdesc = desc;
		else
			desc->next = fhead;

		fhead = used->id;

		tx->last_used_idx = ++last;
		num++;
		tx->num_free++;
		BUG_ON(tx->num_free > vr->num);

		tx_ok = ivshm_net_tx_ok(ndev);
		if (!tx_ok)
			ivshm_net_enable_tx_irq(in);
	}

	if (num) {
		fdesc->next = tx->free_head;
		tx->free_head = fhead;
	} else {
		tx_ok = ivshm_net_tx_ok(ndev);
	}

	return tx_ok;
}

static void ivshm_net_tx_poll(struct net_device *ndev)
{
	struct netdev_queue *txq = netdev_get_tx_queue(ndev, 0);

	if (!__netif_tx_trylock(txq))
		return;

	if (ivshm_net_tx_clean(ndev) && netif_queue_stopped(ndev))
		netif_wake_queue(ndev);

	__netif_tx_unlock(txq);
}

static struct vring_desc *ivshm_net_rx_desc(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);
	struct ivshm_net_queue *rx = &in->rx;
	struct vring *vr = &rx->vr;
	unsigned int avail;
	u16 avail_idx;

	avail_idx = virt_load_acquire(&vr->avail->idx);

	if (avail_idx == rx->last_avail_idx)
		return NULL;

	avail = vr->avail->ring[rx->last_avail_idx++ & (vr->num - 1)];
	if (avail >= vr->num) {
		netdev_err(ndev, "invalid rx avail %d\n", avail);
		return NULL;
	}

	return &vr->desc[avail];
}

static void ivshm_net_rx_finish(struct ivshm_net *in, struct vring_desc *desc)
{
	struct ivshm_net_queue *rx = &in->rx;
	struct vring *vr = &rx->vr;
	unsigned int desc_id = desc - vr->desc;
	unsigned int used;

	used = rx->last_used_idx++ & (vr->num - 1);
	vr->used->ring[used].id = desc_id;
	vr->used->ring[used].len = 1;

	virt_store_release(&vr->used->idx, rx->last_used_idx);
}

static int ivshm_net_poll(struct napi_struct *napi, int budget)
{
	struct net_device *ndev = napi->dev;
	struct ivshm_net *in = container_of(napi, struct ivshm_net, napi);
	int received = 0;

	in->stats.napi_poll++;

	ivshm_net_tx_poll(ndev);

	while (received < budget) {
		struct vring_desc *desc;
		struct sk_buff *skb;
		void *data;
		u32 len;

		desc = ivshm_net_rx_desc(ndev);
		if (!desc)
			break;

		data = ivshm_net_desc_data(in, &in->rx, IVSHM_NET_SECTION_RX,
					   desc, &len);
		if (!data) {
			netdev_err(ndev, "bad rx descriptor\n");
			break;
		}

		skb = napi_alloc_skb(napi, len);

		if (skb) {
			memcpy(skb_put(skb, len), data, len);
			skb->protocol = eth_type_trans(skb, ndev);
			napi_gro_receive(napi, skb);
		}

		ndev->stats.rx_packets++;
		ndev->stats.rx_bytes += len;

		ivshm_net_rx_finish(in, desc);
		received++;
	}

	if (received < budget) {
		in->stats.napi_complete++;
		napi_complete_done(napi, received);
		ivshm_net_enable_rx_irq(in);
		if (ivshm_net_rx_avail(in))
			napi_schedule(napi);
	}

	if (received)
		ivshm_net_notify_rx(in, received);

	in->stats.rx_packets += received;
	in->stats.napi_poll_n[received ? 1 + min(ilog2(received), 8) : 0]++;

	return received;
}

static netdev_tx_t ivshm_net_xmit(struct sk_buff *skb, struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);
	struct ivshm_net_queue *tx = &in->tx;
	bool xmit_more = netdev_xmit_more();
	struct vring *vr = &tx->vr;
	struct vring_desc *desc;
	unsigned int desc_idx;
	unsigned int avail;
	u32 head;
	void *buf;

	if (!ivshm_net_tx_clean(ndev)) {
		netif_stop_queue(ndev);

		netdev_err(ndev, "BUG: tx ring full when queue awake!\n");
		return NETDEV_TX_BUSY;
	}

	desc_idx = tx->free_head;
	desc = &vr->desc[desc_idx];
	tx->free_head = desc->next;
	tx->num_free--;

	head = ivshm_net_tx_advance(tx, &tx->head, skb->len);

	if (!ivshm_net_tx_ok(ndev)) {
		ivshm_net_enable_tx_irq(in);
		netif_stop_queue(ndev);
		xmit_more = false;
		in->stats.tx_pause++;
	}

	buf = tx->data + head;
	skb_copy_and_csum_dev(skb, buf);

	desc->addr = buf - in->shm[IVSHM_NET_SECTION_TX];
	desc->len = skb->len;
	desc->flags = 0;

	avail = tx->last_avail_idx++ & (vr->num - 1);
	vr->avail->ring[avail] = desc_idx;
	tx->num_added++;

	virt_store_release(&vr->avail->idx, tx->last_avail_idx);

	if (!xmit_more) {
		ivshm_net_notify_tx(in, tx->num_added);
		tx->num_added = 0;
	}

	in->stats.tx_packets++;
	ndev->stats.tx_packets++;
	ndev->stats.tx_bytes += skb->len;

	dev_consume_skb_any(skb);

	return NETDEV_TX_OK;
}

static void ivshm_net_set_state(struct ivshm_net *in, u32 state)
{
	virt_wmb();
	WRITE_ONCE(in->state, state);
	writel(state, &in->ivshm_regs->state);
}

static void ivshm_net_run(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);

	if (in->state < IVSHM_NET_STATE_READY)
		return;

	if (!netif_running(ndev))
		return;

	if (test_and_set_bit(IVSHM_NET_FLAG_RUN, &in->flags))
		return;

	netif_start_queue(ndev);
	napi_enable(&in->napi);
	napi_schedule(&in->napi);
	ivshm_net_set_state(in, IVSHM_NET_STATE_RUN);
}

static void ivshm_net_do_stop(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);

	ivshm_net_set_state(in, IVSHM_NET_STATE_RESET);

	if (!test_and_clear_bit(IVSHM_NET_FLAG_RUN, &in->flags))
		return;

	netif_stop_queue(ndev);
	napi_disable(&in->napi);
}

static void ivshm_net_state_change(struct work_struct *work)
{
	struct ivshm_net *in = container_of(work, struct ivshm_net, state_work);
	struct net_device *ndev = in->napi.dev;
	u32 peer_state = READ_ONCE(in->state_table[in->peer_id]);

	switch (in->state) {
	case IVSHM_NET_STATE_RESET:
		/*
		 * Wait for the remote to leave READY/RUN before transitioning
		 * to INIT.
		 */
		if (peer_state < IVSHM_NET_STATE_READY)
			ivshm_net_set_state(in, IVSHM_NET_STATE_INIT);
		break;

	case IVSHM_NET_STATE_INIT:
		/*
		 * Wait for the remote to leave RESET before performing the
		 * initialization and moving to READY.
		 */
		if (peer_state > IVSHM_NET_STATE_RESET) {
			ivshm_net_init_queues(ndev);
			ivshm_net_set_state(in, IVSHM_NET_STATE_READY);

			rtnl_lock();
			call_netdevice_notifiers(NETDEV_CHANGEADDR, ndev);
			rtnl_unlock();
		}
		break;

	case IVSHM_NET_STATE_READY:
		/*
		 * Link is up and we are running once the remote is in READY or
		 * RUN.
		 */
		if (peer_state >= IVSHM_NET_STATE_READY) {
			netif_carrier_on(ndev);
			ivshm_net_run(ndev);
			break;
		}
		/* fall through */
	case IVSHM_NET_STATE_RUN:
		/*
		 * If the remote goes to RESET, we need to follow immediately.
		 */
		if (peer_state == IVSHM_NET_STATE_RESET) {
			netif_carrier_off(ndev);
			ivshm_net_do_stop(ndev);
		}
		break;
	}

	virt_wmb();
	WRITE_ONCE(in->last_peer_state, peer_state);
}

static void ivshm_net_check_state(struct ivshm_net *in)
{
	if (in->state_table[in->peer_id] != in->last_peer_state ||
	    !test_bit(IVSHM_NET_FLAG_RUN, &in->flags))
		queue_work(in->state_wq, &in->state_work);
}

static irqreturn_t ivshm_net_int_state(int irq, void *data)
{
	struct ivshm_net *in = data;

	ivshm_net_check_state(in);

	return IRQ_HANDLED;
}

static irqreturn_t ivshm_net_int_tx_rx(int irq, void *data)
{
	struct ivshm_net *in = data;

	in->stats.tx_rx_interrupts++;

	napi_schedule_irqoff(&in->napi);

	return IRQ_HANDLED;
}

static irqreturn_t ivshm_net_intx(int irq, void *data)
{
	ivshm_net_int_state(irq, data);
	ivshm_net_int_tx_rx(irq, data);

	return IRQ_HANDLED;
}

static int ivshm_net_open(struct net_device *ndev)
{
	netdev_reset_queue(ndev);
	ndev->operstate = IF_OPER_UP;
	ivshm_net_run(ndev);

	return 0;
}

static int ivshm_net_stop(struct net_device *ndev)
{
	ndev->operstate = IF_OPER_DOWN;
	ivshm_net_do_stop(ndev);

	return 0;
}

static int ivshm_net_change_mtu(struct net_device *ndev, int mtu)
{
	if (netif_running(ndev)) {
		netdev_err(ndev, "must be stopped to change its MTU\n");
		return -EBUSY;
	}

	ndev->mtu = mtu;

	return 0;
}

#ifdef CONFIG_NET_POLL_CONTROLLER
static void ivshm_net_poll_controller(struct net_device *ndev)
{
	struct ivshm_net *in = netdev_priv(ndev);

	napi_schedule(&in->napi);
}
#endif

static const struct net_device_ops ivshm_net_ops = {
	.ndo_open		= ivshm_net_open,
	.ndo_stop		= ivshm_net_stop,
	.ndo_start_xmit		= ivshm_net_xmit,
	.ndo_change_mtu		= ivshm_net_change_mtu,
	.ndo_set_mac_address 	= eth_mac_addr,
	.ndo_validate_addr	= eth_validate_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller	= ivshm_net_poll_controller,
#endif
};

static const char ivshm_net_stats[][ETH_GSTRING_LEN] = {
	"tx_rx_interrupts",
	"tx_packets",
	"tx_notify",
	"tx_pause",
	"rx_packets",
	"rx_notify",
	"napi_poll",
	"napi_complete",
	"napi_poll_0",
	"napi_poll_1",
	"napi_poll_2",
	"napi_poll_4",
	"napi_poll_8",
	"napi_poll_16",
	"napi_poll_32",
	"napi_poll_64",
	"napi_poll_128",
	"napi_poll_256",
};

#define NUM_STATS ARRAY_SIZE(ivshm_net_stats)

static int ivshm_net_get_sset_count(struct net_device *ndev, int sset)
{
	if (sset == ETH_SS_STATS)
		return NUM_STATS;

	return -EOPNOTSUPP;
}

static void ivshm_net_get_strings(struct net_device *ndev, u32 sset, u8 *buf)
{
	if (sset == ETH_SS_STATS)
		memcpy(buf, &ivshm_net_stats, sizeof(ivshm_net_stats));
}

static void ivshm_net_get_ethtool_stats(struct net_device *ndev,
					struct ethtool_stats *estats, u64 *st)
{
	struct ivshm_net *in = netdev_priv(ndev);
	unsigned int n = 0;
	unsigned int i;

	st[n++] = in->stats.tx_rx_interrupts;
	st[n++] = in->stats.tx_packets;
	st[n++] = in->stats.tx_notify;
	st[n++] = in->stats.tx_pause;
	st[n++] = in->stats.rx_packets;
	st[n++] = in->stats.rx_notify;
	st[n++] = in->stats.napi_poll;
	st[n++] = in->stats.napi_complete;

	for (i = 0; i < ARRAY_SIZE(in->stats.napi_poll_n); i++)
		st[n++] = in->stats.napi_poll_n[i];

	memset(&in->stats, 0, sizeof(in->stats));
}

#define IVSHM_NET_REGS_LEN	(3 * sizeof(u32) + 6 * sizeof(u16))

static int ivshm_net_get_regs_len(struct net_device *ndev)
{
	return IVSHM_NET_REGS_LEN;
}

static void ivshm_net_get_regs(struct net_device *ndev,
			       struct ethtool_regs *regs, void *p)
{
	struct ivshm_net *in = netdev_priv(ndev);
	u32 *reg32 = p;
	u16 *reg16;

	*reg32++ = in->state;
	*reg32++ = in->last_peer_state;
	*reg32++ = in->qlen;

	reg16 = (u16 *)reg32;

	*reg16++ = in->tx.vr.avail ? in->tx.vr.avail->idx : 0;
	*reg16++ = in->tx.vr.used ? in->tx.vr.used->idx : 0;
	*reg16++ = in->tx.vr.avail ? vring_avail_event(&in->tx.vr) : 0;

	*reg16++ = in->rx.vr.avail ? in->rx.vr.avail->idx : 0;
	*reg16++ = in->rx.vr.used ? in->rx.vr.used->idx : 0;
	*reg16++ = in->rx.vr.avail ? vring_avail_event(&in->rx.vr) : 0;
}

static const struct ethtool_ops ivshm_net_ethtool_ops = {
	.get_sset_count		= ivshm_net_get_sset_count,
	.get_strings		= ivshm_net_get_strings,
	.get_ethtool_stats	= ivshm_net_get_ethtool_stats,
	.get_regs_len		= ivshm_net_get_regs_len,
	.get_regs		= ivshm_net_get_regs,
};

static u64 get_config_qword(struct pci_dev *pdev, unsigned int pos)
{
	u32 lo, hi;

	pci_read_config_dword(pdev, pos, &lo);
	pci_read_config_dword(pdev, pos + 4, &hi);
	return lo | ((u64)hi << 32);
}

static int ivshm_net_probe(struct pci_dev *pdev,
			   const struct pci_device_id *pci_id)
{
	phys_addr_t output_sections_addr, section_addr;
	resource_size_t section_sz, output_section_sz;
	void *state_table, *output_sections;
	struct ivshm_regs __iomem *regs;
	struct net_device *ndev;
	struct ivshm_net *in;
	unsigned int cap_pos;
	char *device_name;
	int vendor_cap;
	u32 id, dword;
	int ret;

  dev_info(&pdev->dev, "Probed IVSHMEMNET!\n");

	ret = pcim_enable_device(pdev);
	if (ret) {
		dev_err(&pdev->dev, "pci_enable_device: %d\n", ret);
		return ret;
	}

	ret = pcim_iomap_regions(pdev, BIT(0), DRV_NAME);
	if (ret) {
		dev_err(&pdev->dev, "pcim_iomap_regions: %d\n", ret);
		return ret;
	}

	regs = pcim_iomap_table(pdev)[0];

	id = readl(&regs->id);
	if (id > 1) {
		dev_err(&pdev->dev, "invalid ID %d\n", id);
		return -EINVAL;
	}
	if (readl(&regs->max_peers) > 2) {
		dev_err(&pdev->dev, "only 2 peers supported\n");
		return -EINVAL;
	}

	vendor_cap = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
	if (vendor_cap < 0) {
		dev_err(&pdev->dev, "missing vendor capability\n");
		return -EINVAL;
	}

	if (pci_resource_len(pdev, 2) > 0) {
		section_addr = pci_resource_start(pdev, 2);
	} else {
		cap_pos = vendor_cap + IVSHM_CFG_ADDRESS;
		section_addr = get_config_qword(pdev, cap_pos);
	}

	cap_pos = vendor_cap + IVSHM_CFG_STATE_TAB_SZ;
	pci_read_config_dword(pdev, cap_pos, &dword);
	section_sz = dword;

	if (!devm_request_mem_region(&pdev->dev, section_addr, section_sz,
				     DRV_NAME))
		return -EBUSY;

	state_table = devm_memremap(&pdev->dev, section_addr, section_sz,
				    MEMREMAP_WB);
	if (!state_table)
		return -ENOMEM;

	output_sections_addr = section_addr + section_sz;

	cap_pos = vendor_cap + IVSHM_CFG_RW_SECTION_SZ;
	section_sz = get_config_qword(pdev, cap_pos);
	if (section_sz > 0) {
		dev_info(&pdev->dev, "R/W section detected - "
			 "unused by this driver version\n");
		output_sections_addr += section_sz;
	}

	cap_pos = vendor_cap + IVSHM_CFG_OUTPUT_SECTION_SZ;
	output_section_sz = get_config_qword(pdev, cap_pos);
	if (output_section_sz == 0) {
		dev_err(&pdev->dev, "Missing input/output sections\n");
		return -EINVAL;
	}

	if (!devm_request_mem_region(&pdev->dev, output_sections_addr,
				     output_section_sz * 2, DRV_NAME))
		return -EBUSY;

	output_sections = devm_memremap(&pdev->dev, output_sections_addr,
					output_section_sz * 2, MEMREMAP_WB);
	if (!output_sections)
		return -ENOMEM;

	section_addr = output_sections_addr + output_section_sz * id;
	dev_info(&pdev->dev, "TX memory at %pa, size %pa\n",
		 &section_addr, &output_section_sz);
	section_addr = output_sections_addr + output_section_sz * !id;
	dev_info(&pdev->dev, "RX memory at %pa, size %pa\n",
		 &section_addr, &output_section_sz);

	device_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s[%s]", DRV_NAME,
				     dev_name(&pdev->dev));
	if (!device_name)
		return -ENOMEM;

	ndev = alloc_etherdev(sizeof(*in));
	if (!ndev)
		return -ENOMEM;

	pci_set_drvdata(pdev, ndev);
	SET_NETDEV_DEV(ndev, &pdev->dev);

	in = netdev_priv(ndev);
	in->ivshm_regs = regs;
	in->state_table = state_table;

	in->shm[IVSHM_NET_SECTION_TX] =
		output_sections + output_section_sz * id;
	in->shm[IVSHM_NET_SECTION_RX] =
		output_sections + output_section_sz * !id;

	in->shmlen = output_section_sz;

	in->peer_id = !id;
	in->pdev = pdev;

	ret = ivshm_net_calc_qsize(ndev);
	if (ret)
		goto err_free;

	in->state_wq = alloc_ordered_workqueue(device_name, 0);
	if (!in->state_wq)
		goto err_free;

	INIT_WORK(&in->state_work, ivshm_net_state_change);

	eth_random_addr(ndev->dev_addr);
	ndev->netdev_ops = &ivshm_net_ops;
	ndev->ethtool_ops = &ivshm_net_ethtool_ops;
	ndev->mtu = min_t(u32, IVSHM_NET_MTU_DEF, in->qsize / 16);
	ndev->min_mtu = ETH_MIN_MTU;
	ndev->max_mtu = min_t(u32, ETH_MAX_MTU, in->qsize / 4);
	ndev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG;
	ndev->features = ndev->hw_features;

	netif_carrier_off(ndev);
	netif_napi_add(ndev, &in->napi, ivshm_net_poll, NAPI_POLL_WEIGHT);

	ret = register_netdev(ndev);
	if (ret)
		goto err_wq;

	ret = pci_alloc_irq_vectors(pdev, 1, 2, PCI_IRQ_LEGACY | PCI_IRQ_MSIX);
	if (ret < 0)
		goto err_alloc_irq;

	if (pdev->msix_enabled) {
		if (ret != 2) {
			ret = -EBUSY;
			goto err_request_irq;
		}

		device_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
					     "%s-state[%s]", DRV_NAME,
					     dev_name(&pdev->dev));
		if (!device_name) {
			ret = -ENOMEM;
			goto err_request_irq;
		}

		ret = request_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_STATE),
				  ivshm_net_int_state, 0, device_name, in);
		if (ret)
			goto err_request_irq;

		device_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
					     "%s-tx-rx[%s]", DRV_NAME,
					     dev_name(&pdev->dev));
		if (!device_name) {
			ret = -ENOMEM;
			goto err_request_irq2;
		}

		ret = request_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_TX_RX),
				  ivshm_net_int_tx_rx, 0, device_name, in);
		if (ret)
			goto err_request_irq2;

		in->tx_rx_vector = IVSHM_NET_MSIX_TX_RX;
	} else {
		ret = request_irq(pci_irq_vector(pdev, 0), ivshm_net_intx, 0,
				  device_name, in);
		if (ret)
			goto err_request_irq;

		in->tx_rx_vector = 0;
	}

	pci_set_master(pdev);

	pci_write_config_byte(pdev, vendor_cap + IVSHM_CFG_PRIV_CNTL, 0);
	writel(IVSHM_INT_ENABLE, &in->ivshm_regs->int_control);

	writel(IVSHM_NET_STATE_RESET, &in->ivshm_regs->state);
	ivshm_net_check_state(in);

	return 0;

err_request_irq2:
	free_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_STATE), in);
err_request_irq:
	pci_free_irq_vectors(pdev);
err_alloc_irq:
	unregister_netdev(ndev);
err_wq:
	destroy_workqueue(in->state_wq);
err_free:
	free_netdev(ndev);

	return ret;
}

static void ivshm_net_remove(struct pci_dev *pdev)
{
	struct net_device *ndev = pci_get_drvdata(pdev);
	struct ivshm_net *in = netdev_priv(ndev);

	writel(IVSHM_NET_STATE_RESET, &in->ivshm_regs->state);
	writel(0, &in->ivshm_regs->int_control);

	if (pdev->msix_enabled) {
		free_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_STATE), in);
		free_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_TX_RX), in);
	} else {
		free_irq(pci_irq_vector(pdev, 0), in);
	}
	pci_free_irq_vectors(pdev);

	unregister_netdev(ndev);
	cancel_work_sync(&in->state_work);
	destroy_workqueue(in->state_wq);
	free_netdev(ndev);
}

static const struct pci_device_id ivshm_net_id_table[] = {
	{ PCI_DEVICE(PCI_VENDOR_ID_SIEMENS, PCI_DEVICE_ID_IVSHMEM),
	  (PCI_CLASS_OTHERS << 16) | IVSHM_PROTO_NET, 0xffffff },
	{ 0 }
};
MODULE_DEVICE_TABLE(pci, ivshm_net_id_table);

static struct pci_driver ivshm_net_driver = {
	.name		= DRV_NAME,
	.id_table	= ivshm_net_id_table,
	.probe		= ivshm_net_probe,
	.remove		= ivshm_net_remove,
};
module_pci_driver(ivshm_net_driver);

MODULE_AUTHOR("Mans Rullgard <[email protected]>");
MODULE_LICENSE("GPL");
/*
 * Jailhouse, a Linux-based partitioning hypervisor
 *
 * Copyright (c) Siemens AG, 2014-2017
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 * Alternatively, you can use or redistribute this file under the following
 * BSD license:
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Configuration for QEMU Standard PC (Q35 + ICH9, 2009)
 * created with '/usr/local/libexec/jailhouse/jailhouse config create -c ttyS0 --mem-hv 128M --mem-inmates 1536M configs/x86/sysconfig.c'
 *
 * NOTE: This config expects the following to be appended to your kernel cmdline
 *       "memmap=0x68000000$0x16e000000"
 */

#include <jailhouse/types.h>
#include <jailhouse/cell-config.h>

struct {
	struct jailhouse_system header;
	__u64 cpus[1];
	struct jailhouse_memory mem_regions[24];
	struct jailhouse_irqchip irqchips[1];
	struct jailhouse_pio pio_regions[13];
	struct jailhouse_pci_device pci_devices[9];
	struct jailhouse_pci_capability pci_caps[9];
} __attribute__((packed)) config = {
	.header = {
		.signature = JAILHOUSE_SYSTEM_SIGNATURE,
		.revision = JAILHOUSE_CONFIG_REVISION,
		.flags = JAILHOUSE_SYS_VIRTUAL_DEBUG_CONSOLE,
		.hypervisor_memory = {
			.phys_start = 0x16e000000,
			.size = 0x8000000,
		},
		.debug_console = {
			.address = 0x3f8,
			.type = JAILHOUSE_CON_TYPE_8250,
			.flags = JAILHOUSE_CON_ACCESS_PIO |
				 JAILHOUSE_CON_REGDIST_1,
		},
		.platform_info = {
			.pci_mmconfig_base = 0xb0000000,
			.pci_mmconfig_end_bus = 0xff,
			.x86 = {
				.pm_timer_address = 0x608,
				.vtd_interrupt_limit = 128,
				.iommu_units = {
					{
						.type = JAILHOUSE_IOMMU_INTEL,
						.base = 0xfed90000,
						.size = 0x1000,
					},
				},
			},
		},
		.root_cell = {
			.name = "RootCell",
			.cpu_set_size = sizeof(config.cpus),
			.num_memory_regions = ARRAY_SIZE(config.mem_regions),
			.num_irqchips = ARRAY_SIZE(config.irqchips),
			.num_pio_regions = ARRAY_SIZE(config.pio_regions),
			.num_pci_devices = ARRAY_SIZE(config.pci_devices),
			.num_pci_caps = ARRAY_SIZE(config.pci_caps),
		},
	},

	.cpus = {
		0x000000000000000f,
	},

	.mem_regions = {
		/* MemRegion: 00000000-0009fbff : System RAM */
		{
			.phys_start = 0x0,
			.virt_start = 0x0,
			.size = 0xa0000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
				JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA,
		},
		/* MemRegion: 00100000-14ffffff : System RAM */
		{
			.phys_start = 0x100000,
			.virt_start = 0x100000,
			.size = 0x14f00000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
				JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA,
		},
		/* MemRegion: 15000000-16ffffff : Kernel */
		{
			.phys_start = 0x15000000,
			.virt_start = 0x15000000,
			.size = 0x2000000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
				JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA,
		},
		/* MemRegion: 17000000-7ffdefff : System RAM */
		{
			.phys_start = 0x17000000,
			.virt_start = 0x17000000,
			.size = 0x68fdf000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
				JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA,
		},
		/* MemRegion: fd000000-fdffffff : 0000:00:01.0 */
		{
			.phys_start = 0xfd000000,
			.virt_start = 0xfd000000,
			.size = 0x1000000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: feb40000-feb7ffff : 0000:00:02.0 */
		{
			.phys_start = 0xfeb40000,
			.virt_start = 0xfeb40000,
			.size = 0x40000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: feb80000-feb9ffff : e1000e */
		{
			.phys_start = 0xfeb80000,
			.virt_start = 0xfeb80000,
			.size = 0x20000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: feba0000-febbffff : e1000e */
		{
			.phys_start = 0xfeba0000,
			.virt_start = 0xfeba0000,
			.size = 0x20000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: febd1000-febd3fff : e1000e */
		{
			.phys_start = 0xfebd1000,
			.virt_start = 0xfebd1000,
			.size = 0x3000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: febd4000-febd7fff : ICH HD audio */
		{
			.phys_start = 0xfebd4000,
			.virt_start = 0xfebd4000,
			.size = 0x4000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: febd8000-febd8fff : 0000:00:01.0 */
		{
			.phys_start = 0xfebd8000,
			.virt_start = 0xfebd8000,
			.size = 0x1000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: febd9000-febd9fff : ahci */
		{
			.phys_start = 0xfebd9000,
			.virt_start = 0xfebd9000,
			.size = 0x1000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: fed00000-fed003ff : PNP0103:00 */
		{
			.phys_start = 0xfed00000,
			.virt_start = 0xfed00000,
			.size = 0x1000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: 100000000-16dffffff : System RAM */
		{
			.phys_start = 0x100000000,
			.virt_start = 0x100000000,
			.size = 0x6e000000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
				JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA,
		},
		/* MemRegion: 1d6000000-27fffffff : System RAM */
		{
			.phys_start = 0x1d6000000,
			.virt_start = 0x1d6000000,
			.size = 0xaa000000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
				JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA,
		},
		/* MemRegion: 000c0000-000dffff : ROMs */
		{
			.phys_start = 0xc0000,
			.virt_start = 0xc0000,
			.size = 0x20000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE,
		},
		/* MemRegion: 176000000-1d5ffffff : JAILHOUSE Inmate Memory */
		{
			.phys_start = 0x176000000,
			.virt_start = 0x176000000,
			.size = 0x1000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_ROOTSHARED,
		},
		{
			.phys_start = 0x176001000,
			.virt_start = 0x176001000,
			.size =        0x40000000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
        JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_ROOTSHARED,
		},
		{
			.phys_start = 0x1b6001000,
			.virt_start = 0x1b6001000,
			.size = 0x4000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_ROOTSHARED,
		},
		{
			.phys_start = 0x1b6005000,
			.virt_start = 0x1b6005000,
			.size = 0x4000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_ROOTSHARED,
		},
		JAILHOUSE_SHMEM_NET_REGIONS(0x1b6205000, 0),
	},

	.irqchips = {
		/* IOAPIC 0, GSI base 0 */
		{
			.address = 0xfec00000,
			.id = 0xff00,
			.pin_bitmap = {
				0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
			},
		},
	},

	.pio_regions = {
		/* Port I/O: 0000-001f : dma1 */
		/* PIO_RANGE(0x0, 0x20), */
		/* Port I/O: 0020-0021 : pic1 */
		/* PIO_RANGE(0x20, 0x2), */
		/* Port I/O: 0040-0043 : timer0 */
		PIO_RANGE(0x40, 0x4),
		/* Port I/O: 0050-0053 : timer1 */
		/* PIO_RANGE(0x50, 0x4), */
		/* Port I/O: 0060-0060 : keyboard */
		PIO_RANGE(0x60, 0x1),
		/* Port I/O: 0064-0064 : keyboard */
		PIO_RANGE(0x64, 0x1),
		/* Port I/O: 0070-0077 : rtc0 */
		PIO_RANGE(0x70, 0x8),
		/* Port I/O: 0080-008f : dma page reg */
		/* PIO_RANGE(0x80, 0x10), */
		/* Port I/O: 00a0-00a1 : pic2 */
		/* PIO_RANGE(0xa0, 0x2), */
		/* Port I/O: 00c0-00df : dma2 */
		/* PIO_RANGE(0xc0, 0x20), */
		/* Port I/O: 00f0-00ff : fpu */
		/* PIO_RANGE(0xf0, 0x10), */
		/* Port I/O: 02f8-02ff : serial */
		PIO_RANGE(0x2f8, 0x8),
		/* Port I/O: 0378-037a : parport0 */
		/* PIO_RANGE(0x378, 0x3), */
		/* Port I/O: 03c0-03df : vga+ */
		PIO_RANGE(0x3c0, 0x20),
		/* Port I/O: 03e8-03ef : serial */
		/* PIO_RANGE(0x3e8, 0x8), */
		/* Port I/O: 03f8-03ff : serial */
		PIO_RANGE(0x3f8, 0x8),
		/* Port I/O: 0510-051b : fw_cfg_io */
		/* PIO_RANGE(0x510, 0xc), */
		/* Port I/O: 0600-0603 : ACPI PM1a_EVT_BLK */
		/* PIO_RANGE(0x600, 0x4), */
		/* Port I/O: 0604-0605 : ACPI PM1a_CNT_BLK */
		/* PIO_RANGE(0x604, 0x2), */
		/* Port I/O: 0608-060b : ACPI PM_TMR */
		/* PIO_RANGE(0x608, 0x4), */
		/* Port I/O: 0620-062f : ACPI GPE0_BLK */
		/* PIO_RANGE(0x620, 0x10), */
		/* Port I/O: 0630-0633 : iTCO_wdt.1.auto */
		/* PIO_RANGE(0x630, 0x4), */
		/* Port I/O: 0660-067f : iTCO_wdt.1.auto */
		/* PIO_RANGE(0x660, 0x20), */
		/* Port I/O: 0700-073f : 0000:00:1f.3 */
		/* PIO_RANGE(0x700, 0x40), */
		/* Port I/O: c040-c05f : 0000:00:02.0 */
		PIO_RANGE(0xc040, 0x20),
		/* Port I/O: c060-c07f : 0000:00:1f.2 */
		PIO_RANGE(0xc060, 0x20),
		PIO_RANGE(0x3f0, 0x8), /* floppy */
		PIO_RANGE(0x402, 0x1), /* invalid but accessed by X */
		PIO_RANGE(0x5658, 0x4), /* vmport */
		PIO_RANGE(0xc000, 0xff), /* PCI devices */
	},

	.pci_devices = {
		{ /* IVSHMEM (demo) */
			.type = JAILHOUSE_PCI_TYPE_IVSHMEM,
			.domain = 0x0,
			.iommu = 0,
			.bdf = 0x0d << 3,
			.bar_mask = JAILHOUSE_IVSHMEM_BAR_MASK_MSIX,
			.num_msix_vectors = 16,
			.shmem_regions_start = 16,
			.shmem_dev_id = 0,
			.shmem_peers = 2,
			.shmem_protocol = 0x0002,
		},
		/* IVSHMEM-NET ,*/
		{ 
			.type = JAILHOUSE_PCI_TYPE_IVSHMEM,
			.domain = 0x0,
			.iommu = 0,
			.bdf = 0x0e << 3,
			.bar_mask = JAILHOUSE_IVSHMEM_BAR_MASK_MSIX,
			.num_msix_vectors = 2,
			.shmem_regions_start = 20,
			.shmem_dev_id = 0,
			.shmem_peers = 2,
			.shmem_protocol = JAILHOUSE_SHMEM_PROTO_VETH,
		},
		/* PCIDevice: 00:00.0 */
		{
			.type = JAILHOUSE_PCI_TYPE_DEVICE,
			.iommu = 0,
			.domain = 0x0,
			.bdf = 0x0,
			.bar_mask = {
				0x00000000, 0x00000000, 0x00000000,
				0x00000000, 0x00000000, 0x00000000,
			},
			.caps_start = 0,
			.num_caps = 0,
			.num_msi_vectors = 0,
			.msi_64bits = 0,
			.msi_maskable = 0,
			.num_msix_vectors = 0,
			.msix_region_size = 0x0,
			.msix_address = 0x0,
		},
		/* PCIDevice: 00:01.0 */
		{
			.type = JAILHOUSE_PCI_TYPE_DEVICE,
			.iommu = 0,
			.domain = 0x0,
			.bdf = 0x8,
			.bar_mask = {
				0xff000000, 0x00000000, 0xfffff000,
				0x00000000, 0x00000000, 0x00000000,
			},
			.caps_start = 0,
			.num_caps = 0,
			.num_msi_vectors = 0,
			.msi_64bits = 0,
			.msi_maskable = 0,
			.num_msix_vectors = 0,
			.msix_region_size = 0x0,
			.msix_address = 0x0,
		},
		/* PCIDevice: 00:02.0 */
		{
			.type = JAILHOUSE_PCI_TYPE_DEVICE,
			.iommu = 0,
			.domain = 0x0,
			.bdf = 0x10,
			.bar_mask = {
				0xfffe0000, 0xfffe0000, 0xffffffe0,
				0xffffc000, 0x00000000, 0x00000000,
			},
			.caps_start = 0,
			.num_caps = 6,
			.num_msi_vectors = 1,
			.msi_64bits = 1,
			.msi_maskable = 0,
			.num_msix_vectors = 5,
			.msix_region_size = 0x1000,
			.msix_address = 0xfebd0000,
		},
		/* PCIDevice: 00:1b.0 */
		{
			.type = JAILHOUSE_PCI_TYPE_DEVICE,
			.iommu = 0,
			.domain = 0x0,
			.bdf = 0xd8,
			.bar_mask = {
				0xffffc000, 0x00000000, 0x00000000,
				0x00000000, 0x00000000, 0x00000000,
			},
			.caps_start = 6,
			.num_caps = 1,
			.num_msi_vectors = 1,
			.msi_64bits = 1,
			.msi_maskable = 0,
			.num_msix_vectors = 0,
			.msix_region_size = 0x0,
			.msix_address = 0x0,
		},
		/* PCIDevice: 00:1f.0 */
		{
			.type = JAILHOUSE_PCI_TYPE_DEVICE,
			.iommu = 0,
			.domain = 0x0,
			.bdf = 0xf8,
			.bar_mask = {
				0x00000000, 0x00000000, 0x00000000,
				0x00000000, 0x00000000, 0x00000000,
			},
			.caps_start = 0,
			.num_caps = 0,
			.num_msi_vectors = 0,
			.msi_64bits = 0,
			.msi_maskable = 0,
			.num_msix_vectors = 0,
			.msix_region_size = 0x0,
			.msix_address = 0x0,
		},
		/* PCIDevice: 00:1f.2 */
		{
			.type = JAILHOUSE_PCI_TYPE_DEVICE,
			.iommu = 0,
			.domain = 0x0,
			.bdf = 0xfa,
			.bar_mask = {
				0x00000000, 0x00000000, 0x00000000,
				0x00000000, 0xffffffe0, 0xfffff000,
			},
			.caps_start = 7,
			.num_caps = 2,
			.num_msi_vectors = 1,
			.msi_64bits = 1,
			.msi_maskable = 0,
			.num_msix_vectors = 0,
			.msix_region_size = 0x0,
			.msix_address = 0x0,
		},
		/* PCIDevice: 00:1f.3 */
		{
			.type = JAILHOUSE_PCI_TYPE_DEVICE,
			.iommu = 0,
			.domain = 0x0,
			.bdf = 0xfb,
			.bar_mask = {
				0x00000000, 0x00000000, 0x00000000,
				0x00000000, 0xffffffc0, 0x00000000,
			},
			.caps_start = 0,
			.num_caps = 0,
			.num_msi_vectors = 0,
			.msi_64bits = 0,
			.msi_maskable = 0,
			.num_msix_vectors = 0,
			.msix_region_size = 0x0,
			.msix_address = 0x0,
		},
	},

	.pci_caps = {
		/* PCIDevice: 00:02.0 */
		{
			.id = PCI_CAP_ID_PM,
			.start = 0xc8,
			.len = 0x8,
			.flags = JAILHOUSE_PCICAPS_WRITE,
		},
		{
			.id = PCI_CAP_ID_MSI,
			.start = 0xd0,
			.len = 0xe,
			.flags = JAILHOUSE_PCICAPS_WRITE,
		},
		{
			.id = PCI_CAP_ID_EXP,
			.start = 0xe0,
			.len = 0x14,
			.flags = 0,
		},
		{
			.id = PCI_CAP_ID_MSIX,
			.start = 0xa0,
			.len = 0xc,
			.flags = JAILHOUSE_PCICAPS_WRITE,
		},
		{
			.id = PCI_EXT_CAP_ID_ERR | JAILHOUSE_PCI_EXT_CAP,
			.start = 0x100,
			.len = 0x40,
			.flags = 0,
		},
		{
			.id = PCI_EXT_CAP_ID_DSN | JAILHOUSE_PCI_EXT_CAP,
			.start = 0x140,
			.len = 0xc,
			.flags = 0,
		},
		/* PCIDevice: 00:1b.0 */
		{
			.id = PCI_CAP_ID_MSI,
			.start = 0x60,
			.len = 0xe,
			.flags = JAILHOUSE_PCICAPS_WRITE,
		},
		/* PCIDevice: 00:1f.2 */
		{
			.id = PCI_CAP_ID_MSI,
			.start = 0x80,
			.len = 0xe,
			.flags = JAILHOUSE_PCICAPS_WRITE,
		},
		{
			.id = PCI_CAP_ID_SATA,
			.start = 0xa8,
			.len = 0x2,
			.flags = 0,
		},
	},
};
/*****************************************************************************
 * drivers/net/ivhsmem_net.c
 *
 * Copyright (C) 2020  Chung-Fan Yang @ Fixstars Corporation
 *                                      <[email protected]>
 *
 * Derived from Jailhouse Linux Ivshmem-net driver
 * Copyright 2016 Mans Rullgard <[email protected]>
 * Copyright (c) Siemens AG, 2016-2020
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.

 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.

 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 *****************************************************************************/

/*****************************************************************************
 * Included Files
 *****************************************************************************/

#include <nuttx/config.h>
#include <nuttx/arch.h>

#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <errno.h>
#include <sched.h>
#include <debug.h>

#include <arch/io.h>
#include <nuttx/pci/pci.h>
#include <nuttx/virt/ivshmem.h>
#include <nuttx/virt/virtio_ring.h>

#include <arpa/inet.h>
#include <nuttx/net/netdev.h>
#include <nuttx/net/arp.h>
#include <nuttx/net/ivshmem_net.h>

#ifdef CONFIG_NET_PKT
#  include <nuttx/net/pkt.h>
#endif

/*****************************************************************************
 * Pre-processor Definitions
 *****************************************************************************/

#define bswap16 __builtin_bswap16
#define bswap32 __builtin_bswap32
#define bswap64 __builtin_bswap64

/* Work queue support is required. */

#if !defined(CONFIG_SCHED_WORKQUEUE)
#  error Work queue support is required!
#else

/* The low priority work queue is preferred.  If it is not enabled, LPWORK
 * will be the same as HPWORK.
 */

#  if defined(CONFIG_IVSHMNET_HPWORK)
#    define ETHWORK HPWORK
#  elif defined(CONFIG_IVSHMNET_LPWORK)
#    define ETHWORK LPWORK
#  else
#    error Neither high or Low priority workqueue is defined
#  endif
#endif

/* CONFIG_IVSHMEM_NET_NINTERFACES determines the number of physical interfaces
 * that will be supported.
 */

#ifndef CONFIG_IVSHMNET_NINTERFACES
# define CONFIG_IVSHMNET_NINTERFACES 1
#endif

/* TX poll delay = 1 seconds. CLK_TCK is the number of clock ticks per second */

#define IVSHMNET_WDDELAY   (1 * CLK_TCK)

/* TX timeout = 1 minute */

#define IVSHMNET_TXTIMEOUT (20ULL * CLK_TCK)

/* This is a helper pointer for accessing the contents of the Ethernet header */

#define BUF ((struct eth_hdr_s *)priv->sk_dev.d_buf)

#define IVSHMNET_STATE_RESET    0
#define IVSHMNET_STATE_INIT     1
#define IVSHMNET_STATE_READY    2
#define IVSHMNET_STATE_RUN      3

#define IVSHMNET_FLAG_RUN       0

#define IVSHMNET_MTU_MIN        68
#define IVSHMNET_MTU_DEFAULT    16384

#define IVSHMNET_ALIGN(addr, align) (((addr) + (align - 1)) & ~(align - 1))
#define SMP_CACHE_BYTES         64
#define IVSHMNET_FRAME_SIZE(s)  IVSHMNET_ALIGN(18 + (s), SMP_CACHE_BYTES)

#define IVSHMNET_VQ_ALIGN       64

#define IVSHMNET_SECTION_ST     0
#define IVSHMNET_SECTION_TX     1
#define IVSHMNET_SECTION_RX     2

#define IVSHMNET_MSIX_STATE     0
#define IVSHMNET_MSIX_TX_RX     1

#define IVSHMNET_NUM_VECTORS    2

/*****************************************************************************
 * Private Types
 *****************************************************************************/

typedef FAR struct file        file_t;

struct ivshmnet_queue {
  struct vring vr;
  uint32_t free_head;
  uint32_t num_free;
  uint32_t num_added;
  uint16_t last_avail_idx;
  uint16_t last_used_idx;

  void *data;
  void *end;
  uint32_t size;
  uint32_t head;
  uint32_t tail;
};

struct ivshmem_mem_region_s
{
  uintptr_t       paddress;
  uintptr_t       address;
  unsigned long   size;
  bool            readonly;
};

struct ivshmnet_driver_s
{
  FAR struct pci_dev_s dev;

  FAR volatile struct jh_ivshmem_regs_s *regs;
  void *msix_table;
  uint16_t peer_id;
  uint16_t vectors;

  FAR struct ivshmem_mem_region_s mem[3];

  struct ivshmnet_queue rx;
  struct ivshmnet_queue tx;

  uint32_t vrsize;
  uint32_t qlen;
  uint32_t qsize;

  uint32_t state;
  uint32_t last_peer_state;
  volatile uint32_t *state_table;

  unsigned long flags;

  struct net_driver_s sk_dev;  /* Interface understood by the network */
  bool sk_bifup;               /* true:ifup false:ifdown */
  WDOG_ID sk_txpoll;           /* TX poll timer */
  WDOG_ID sk_txtimeout;        /* TX timeout timer */
  struct work_s sk_pollwork;   /* For deferring poll work to the work queue */
  struct work_s sk_irqwork;
  struct work_s sk_statework;  /* For deferring interrupt work to the work queue */

  uint8_t pktbuf[MAX_NETDEV_PKTSIZE + CONFIG_NET_GUARDSIZE];
};

/*****************************************************************************
 * Private Data
 *****************************************************************************/

int g_ivshmnet_dev_count = 0;

struct ivshmnet_driver_s g_ivshmnet_devices[CONFIG_IVSHMNET_NINTERFACES];


/*****************************************************************************
 * Private Function Prototypes
 *****************************************************************************/

/* ivshm-net */

static void ivshmnet_state_change(void *in);
static void ivshmnet_set_state(struct ivshmnet_driver_s *in, uint32_t state);
static void ivshmnet_check_state(struct ivshmnet_driver_s *in);

/* Common TX logic */

static int  ivshmnet_transmit(FAR struct ivshmnet_driver_s *priv);
static int  ivshmnet_txpoll(FAR struct net_driver_s *dev);

/* Interrupt handling */

static void ivshmnet_reply(struct ivshmnet_driver_s *priv);
static void ivshmnet_receive(FAR struct ivshmnet_driver_s *priv);
static void ivshmnet_txdone(FAR struct ivshmnet_driver_s *priv);

static void ivshmnet_interrupt_work(FAR void *arg);
static int  ivshmnet_interrupt(int irq, FAR void *context, FAR void *arg);

/* Watchdog timer expirations */

static void ivshmnet_txtimeout_work(FAR void *arg);
static void ivshmnet_txtimeout_expiry(int argc, wdparm_t arg, ...);

static void ivshmnet_poll_work(FAR void *arg);
static void ivshmnet_poll_expiry(int argc, wdparm_t arg, ...);

/* NuttX callback functions */

static int  ivshmnet_ifup(FAR struct net_driver_s *dev);
static int  ivshmnet_ifdown(FAR struct net_driver_s *dev);

static void ivshmnet_txavail_work(FAR void *arg);
static int  ivshmnet_txavail(FAR struct net_driver_s *dev);

#if defined(CONFIG_NET_IGMP) || defined(CONFIG_NET_ICMPv6)
static int  ivshmnet_addmac(FAR struct net_driver_s *dev,
              FAR const uint8_t *mac);
#ifdef CONFIG_NET_IGMP
static int  ivshmnet_rmmac(FAR struct net_driver_s *dev,
              FAR const uint8_t *mac);
#endif
#ifdef CONFIG_NET_ICMPv6
static void ivshmnet_ipv6multicast(FAR struct ivshmnet_driver_s *priv);
#endif
#endif
#ifdef CONFIG_NETDEV_IOCTL
static int  ivshmnet_ioctl(FAR struct net_driver_s *dev, int cmd,
              unsigned long arg);
#endif

/****************************************************************************
 * Private Functions
 ****************************************************************************/

/*****************************************
 *  ivshmem-net vring support functions  *
 *****************************************/

static void *ivshmnet_desc_data(
        struct ivshmnet_driver_s *in, struct ivshmnet_queue *q,
        unsigned int region,  struct vring_desc *desc,
        uint32_t *len)
{
  uint64_t offs = READ_ONCE(desc->addr);
  uint32_t dlen = READ_ONCE(desc->len);
  uint16_t flags = READ_ONCE(desc->flags);
  void *data;

  if (flags)
      return NULL;

  if (offs >= in->mem[region].size)
      return NULL;

  data = (void *)(in->mem[region].address + offs);

  if (data < q->data || data >= q->end)
      return NULL;

  if (dlen > q->end - data)
      return NULL;

  *len = dlen;

  return data;
}

static void ivshmnet_init_queue(
        struct ivshmnet_driver_s *in, struct ivshmnet_queue *q,
        void *mem, unsigned int len)
{
  memset(q, 0, sizeof(*q));

  vring_init(&q->vr, len, mem, IVSHMNET_VQ_ALIGN);
  q->data = mem + in->vrsize;
  q->end = q->data + in->qsize;
  q->size = in->qsize;
}

static void ivshmnet_init_queues(struct ivshmnet_driver_s *in)
{
  void *tx;
  void *rx;
  int i;
  void* tmp;

  tx = (void *)in->mem[IVSHMNET_SECTION_TX].address;
  rx = (void *)in->mem[IVSHMNET_SECTION_RX].address;

  memset(tx, 0, in->mem[IVSHMNET_SECTION_TX].size);

  ivshmnet_init_queue(in, &in->tx, tx, in->qlen);
  ivshmnet_init_queue(in, &in->rx, rx, in->qlen);

  tmp = in->rx.vr.used;
  in->rx.vr.used = in->tx.vr.used;
  in->tx.vr.used = tmp;

  in->tx.num_free = in->tx.vr.num;

  for (i = 0; i < in->tx.vr.num - 1; i++)
      in->tx.vr.desc[i].next = i + 1;
}

static int ivshmnet_calc_qsize(struct ivshmnet_driver_s *in)
{
  unsigned int vrsize;
  unsigned int qsize;
  unsigned int qlen;

  for (qlen = 4096; qlen > 32; qlen >>= 1)
    {
      vrsize = vring_size(qlen, IVSHMNET_VQ_ALIGN);
      vrsize = IVSHMNET_ALIGN(vrsize, IVSHMNET_VQ_ALIGN);
      if (vrsize < (in->mem[IVSHMNET_SECTION_TX].size) / 8)
          break;
    }

  if (vrsize > in->mem[IVSHMNET_SECTION_TX].size)
      return -EINVAL;

  qsize = in->mem[IVSHMNET_SECTION_TX].size - vrsize;

  if (qsize < 4 * IVSHMNET_MTU_MIN)
      return -EINVAL;

  in->vrsize = vrsize;
  in->qlen = qlen;
  in->qsize = qsize;

  return 0;
}

/*****************************************
 *  ivshmem-net IRQ support functions  *
 *****************************************/

static void ivshmnet_notify_tx(struct ivshmnet_driver_s *in, unsigned int num)
{
  uint16_t evt, old, new;

  mb();

  evt = READ_ONCE(vring_avail_event(&in->tx.vr));
  old = in->tx.last_avail_idx - num;
  new = in->tx.last_avail_idx;

  if (vring_need_event(evt, new, old))
    {
      in->regs->doorbell =
        ((uint32_t)in->peer_id << 16) | IVSHMNET_MSIX_TX_RX;
    }
}

static void ivshmnet_enable_rx_irq(struct ivshmnet_driver_s *in)
{
  vring_avail_event(&in->rx.vr) = in->rx.last_avail_idx;
  wmb();
}

static void ivshmnet_notify_rx(struct ivshmnet_driver_s *in, unsigned int num)
{
  uint16_t evt, old, new;

  mb();

  evt = vring_used_event(&in->rx.vr);
  old = in->rx.last_used_idx - num;
  new = in->rx.last_used_idx;

  if (vring_need_event(evt, new, old))
    {
      in->regs->doorbell =
        ((uint32_t)in->peer_id << 16) | IVSHMNET_MSIX_TX_RX;
    }
}

static void ivshmnet_enable_tx_irq(struct ivshmnet_driver_s *in)
{
  vring_used_event(&in->tx.vr) = in->tx.last_used_idx;
  wmb();
}

/*************************************
 *  ivshmem-net vring syntax sugars  *
 *************************************/

static struct vring_desc *ivshmnet_rx_desc(struct ivshmnet_driver_s *in)
{
  struct ivshmnet_queue *rx = &in->rx;
  struct vring *vr = &rx->vr;
  unsigned int avail;
  uint16_t avail_idx;

  avail_idx = virt_load_acquire(&vr->avail->idx);

  if (avail_idx == rx->last_avail_idx)
      return NULL;

  avail = vr->avail->ring[rx->last_avail_idx++ & (vr->num - 1)];
  if (avail >= vr->num)
    {
      nerr("invalid rx avail %d\n", avail);
      return NULL;
    }

  return &vr->desc[avail];
}

static bool ivshmnet_rx_avail(struct ivshmnet_driver_s *in)
{
  mb();
  return READ_ONCE(in->rx.vr.avail->idx) != in->rx.last_avail_idx;
}

static void ivshmnet_rx_finish(struct ivshmnet_driver_s *in, struct vring_desc *desc)
{
  struct ivshmnet_queue *rx = &in->rx;
  struct vring *vr = &rx->vr;
  unsigned int desc_id = desc - vr->desc;
  unsigned int used;

  used = rx->last_used_idx++ & (vr->num - 1);
  vr->used->ring[used].id = desc_id;
  vr->used->ring[used].len = 1;

  virt_store_release(&vr->used->idx, rx->last_used_idx);
}

static size_t ivshmnet_tx_space(struct ivshmnet_driver_s *in)
{
  struct ivshmnet_queue *tx = &in->tx;
  uint32_t tail = tx->tail;
  uint32_t head = tx->head;
  uint32_t space;

  if (head < tail)
      space = tail - head;
  else
      space = (tx->size - head) > tail ? (tx->size - head) : tail;

  return space;
}

static bool ivshmnet_tx_ok(struct ivshmnet_driver_s *in, unsigned int mtu)
{
  return in->tx.num_free >= 2 &&
      ivshmnet_tx_space(in) >= 2 * IVSHMNET_FRAME_SIZE(mtu);
}

static uint32_t ivshmnet_tx_advance(struct ivshmnet_queue *q, uint32_t *pos, uint32_t len)
{
  uint32_t p = *pos;

  len = IVSHMNET_FRAME_SIZE(len);

  if (q->size - p < len)
      p = 0;
  *pos = p + len;

  return p;
}

static int ivshmnet_tx_clean(struct ivshmnet_driver_s *in)
{
  struct ivshmnet_queue *tx = &in->tx;
  struct vring *vr = &tx->vr;
  struct vring_desc *desc;
  struct vring_desc *fdesc;
  struct vring_used_elem *used;
  uint16_t last = tx->last_used_idx;
  uint32_t fhead;
  unsigned int num;
  bool tx_ok;

  fdesc = NULL;
  fhead = 0;
  num = 0;

  while (last != virt_load_acquire(&vr->used->idx))
    {
      void *data;
      uint32_t len;
      uint32_t tail;

      used = vr->used->ring + (last % vr->num);
      if (used->id >= vr->num || used->len != 1)
        {
          nerr("invalid tx used->id %d ->len %d\n",
                 used->id, used->len);
          break;
        }

      desc = &vr->desc[used->id];

      data = ivshmnet_desc_data(in, &in->tx, IVSHMNET_SECTION_TX,
                                desc, &len);
      if (!data)
        {
          nerr("bad tx descriptor, data == NULL\n");
          break;
        }

      tail = ivshmnet_tx_advance(tx, &tx->tail, len);
      if (data != tx->data + tail)
        {
          nerr("bad tx descriptor\n");
          break;
        }

      if (!num)
          fdesc = desc;
      else
          desc->next = fhead;

      fhead = used->id;

      tx->last_used_idx = ++last;
      num++;
      tx->num_free++;

      DEBUGASSERT(tx->num_free <= vr->num);

      tx_ok = ivshmnet_tx_ok(in, IVSHMNET_MTU_DEFAULT);
      if (!tx_ok)
          ivshmnet_enable_tx_irq(in);
  }

  if (num)
    {
      fdesc->next = tx->free_head;
      tx->free_head = fhead;
    }
  else
    {
      tx_ok = ivshmnet_tx_ok(in, IVSHMNET_MTU_DEFAULT);
    }

  return tx_ok;
}


static int ivshmnet_tx_frame(struct ivshmnet_driver_s *in, void* data, int len)
{
  struct ivshmnet_queue *tx = &in->tx;
  struct vring *vr = &tx->vr;
  struct vring_desc *desc;
  unsigned int desc_idx;
  unsigned int avail;
  uint32_t head;
  void *buf;

  unsigned int ret = ivshmnet_tx_clean(in);
  DEBUGASSERT(ret);

  desc_idx = tx->free_head;
  desc = &vr->desc[desc_idx];
  tx->free_head = desc->next;
  tx->num_free--;

  head = ivshmnet_tx_advance(tx, &tx->head, len);

  buf = tx->data + head;
  memcpy(buf, data, len);

  desc->addr = buf - (void *)in->mem[IVSHMNET_SECTION_TX].address;
  desc->len = len;
  desc->flags = 0;

  avail = tx->last_avail_idx++ & (vr->num - 1);
  vr->avail->ring[avail] = desc_idx;
  tx->num_added++;

  virt_store_release(&vr->avail->idx, tx->last_avail_idx);
  ivshmnet_notify_tx(in, tx->num_added);
  tx->num_added = 0;

  return 0;
}

/*****************************************
 *  ivshmem-net support functions  *
 *****************************************/

static void ivshmnet_run(struct ivshmnet_driver_s *in)
{
  irqstate_t flags;

  if (in->state < IVSHMNET_STATE_READY)
      return;

  /* test_and_set_bit */
  flags = enter_critical_section();
  if(in->flags & IVSHMNET_FLAG_RUN)
    {
      in->flags |= IVSHMNET_FLAG_RUN;
      leave_critical_section(flags);
      return;
    }

  in->flags |= IVSHMNET_FLAG_RUN;
  leave_critical_section(flags);

  ivshmnet_set_state(in, IVSHMNET_STATE_RUN);
  in->sk_bifup = true;

  return;
}

static void ivshmnet_do_stop(struct ivshmnet_driver_s *in)
{
  irqstate_t flags;

  in->sk_bifup = false;

  ivshmnet_set_state(in, IVSHMNET_STATE_RESET);

  /* test_and_clear_bit */
  flags = enter_critical_section();
  if(!(in->flags & IVSHMNET_FLAG_RUN))
    {
      in->flags &= ~IVSHMNET_FLAG_RUN;
      leave_critical_section(flags);
      return;
    }

  in->flags &= ~IVSHMNET_FLAG_RUN;
  leave_critical_section(flags);

  return;
}

/****************************************************************************
 * State Machine
 ****************************************************************************/

static void ivshmnet_state_change(void *arg)
{
  struct ivshmnet_driver_s *in = (struct ivshmnet_driver_s*)arg;
  uint32_t peer_state = READ_ONCE(in->state_table[in->peer_id]);

  ninfo("Remote state: %d\n", peer_state);

  switch (in->state)
    {
      case IVSHMNET_STATE_RESET:
          if (peer_state < IVSHMNET_STATE_READY)
              ivshmnet_set_state(in, IVSHMNET_STATE_INIT);
          break;

      case IVSHMNET_STATE_INIT:
          if (peer_state > IVSHMNET_STATE_RESET)
            {
              ivshmnet_init_queues(in);
              ivshmnet_set_state(in, IVSHMNET_STATE_READY);
            }
          break;

      case IVSHMNET_STATE_READY:
          if (peer_state >= IVSHMNET_STATE_READY)
            {
              ivshmnet_run(in);
              break;
            }
      case IVSHMNET_STATE_RUN:
          if (peer_state == IVSHMNET_STATE_RESET)
            {
              ivshmnet_do_stop(in);
            }
          break;
    }

  wmb();
  WRITE_ONCE(in->last_peer_state, peer_state);
}

static void ivshmnet_set_state(struct ivshmnet_driver_s *in, uint32_t state)
{
  wmb();
  WRITE_ONCE(in->state, state);
  WRITE_ONCE(in->regs->state, state);
}

static void ivshmnet_check_state(struct ivshmnet_driver_s *in)
{
  irqstate_t flags;

  flags = enter_critical_section();

  /* test_bit */
  if (in->state_table[in->peer_id] != in->last_peer_state ||
      !(IVSHMNET_FLAG_RUN & in->flags))
      work_queue(ETHWORK, &in->sk_statework, ivshmnet_state_change, in, 0);

  leave_critical_section(flags);
}

/****************************************************************************
 * State IRQ Handlers
 ****************************************************************************/

static int ivshmnet_state_handler(int irq, uint32_t *regs, void *arg)
{
  struct ivshmnet_driver_s *priv = arg;

  ivshmnet_check_state(priv);

  return 0;
}

#if 0
static void dump_ethernet_frame(void *data, int len){
    uint8_t* ptr8 = data;
    uint16_t* ptr16 = data;
    uint32_t* ptrip = (uint32_t*)(ptr8 + 14);
    uint16_t etype;

    ninfo("======= Dumping Ethernet Frame =======\n");
    ninfo("Dest MAC: %x:%x:%x:%x:%x:%x\n", ptr8[0], ptr8[1], ptr8[2], ptr8[3], ptr8[4], ptr8[5]);
    ninfo("Src  MAC: %x:%x:%x:%x:%x:%x\n", ptr8[6], ptr8[7], ptr8[8], ptr8[9], ptr8[10], ptr8[11]);
    etype = bswap16(ptr16[6]);
    ninfo("Ether Type: 0x%x\n", etype);
    if(etype == 0x806) // ARP
    {
      ninfo("------- Begin ARP Frame -------\n");
      ninfo("HW type: 0x%lx, Proto type: 0x%lx\n", bswap16((ptrip[0]) & 0xffff), bswap16((ptrip[0] >> 16) & 0xffff));
      ninfo("HW addr len: 0x%lx, Proto addr len: 0x%lx\n", (ptrip[1]) & 0xff, (ptrip[1] >> 8) & 0xff);
      ninfo("Operation: 0x%lx\n", bswap16((ptrip[1] >> 16) & 0xffff));
      ninfo("Sender hardware address: %x:%x:%x:%x:%x:%x\n",
              (ptrip[2]) & 0xff,
              (ptrip[2] >> 8) & 0xff,
              (ptrip[2] >> 16) & 0xff,
              (ptrip[2] >> 24) & 0xff,
              (ptrip[3]) & 0xff,
              (ptrip[3] >> 8) & 0xff
              );
      ninfo("Sender protocol address: %x:%x:%x:%x\n",
              (ptrip[3] >> 16) & 0xff,
              (ptrip[3] >> 24) & 0xff,
              (ptrip[4]) & 0xff,
              (ptrip[4] >> 8) & 0xff
              );
      ninfo("Target hardware address: %x:%x:%x:%x:%x:%x\n",
              (ptrip[4] >> 16) & 0xff,
              (ptrip[4] >> 24) & 0xff,
              (ptrip[5]) & 0xff,
              (ptrip[5] >> 8) & 0xff,
              (ptrip[5] >> 16) & 0xff,
              (ptrip[5] >> 24) & 0xff
              );
      ninfo("Target protocol address: %x:%x:%x:%x\n",
              (ptrip[6]) & 0xff,
              (ptrip[6] >> 8) & 0xff,
              (ptrip[6] >> 16) & 0xff,
              (ptrip[6] >> 24) & 0xff
              );
    }
    else if(etype == 0x800) //IPV4
    {
      ninfo("------- Begin IP Frame -------\n");
      ninfo("Version: %d, Hdr len: 0x%lx\n", (ptrip[0] >> 4) & 0xf, hdr_len);
      ninfo("Diff Service: 0x%lx\n", (ptrip[0] >> 8) & 0xff);
      ninfo("Total Length: 0x%lx\n", (ptrip[0] >> 16) & 0xffff);
      ninfo("Identification: 0x%lx\n", (ptrip[1]) & 0xffff);
      ninfo("Flags: 0x%lx, Frags: 0x%lx\n", (ptrip[1] >> 16) & 0x7, bswap16((ptrip[1] >> 16) & 0xffff) & 0x1fff);
      ninfo("TTL: %d, Protocol: 0x%lx\n", (ptrip[2]) & 0xff, (ptrip[2] >> 8) & 0xff);
      ninfo("Hdr checksum: 0x%lx\n", (ptrip[2] >> 16) & 0xffff);
      ninfo("Src  address: %d.%d.%d.%d\n", (ptrip[3]) & 0xff, (ptrip[3] >> 8) & 0xff, (ptrip[3] >> 16) & 0xff, (ptrip[3] >> 24) & 0xff);
      ninfo("Dest address: %d.%d.%d.%d\n", (ptrip[4]) & 0xff, (ptrip[4] >> 8) & 0xff, (ptrip[4] >> 16) & 0xff, (ptrip[4] >> 24) & 0xff);

      ninfo("Src  port: %d\n", bswap16(ptrip[hdr_len]) & 0xffff);
      ninfo("Dest port: %d\n", bswap16(ptrip[hdr_len] >> 16) & 0xffff);
    }

    return;
}
#else

#define dump_ethernet_frame(data, len)

#endif

/****************************************************************************
 * Name: ivshmnet_transmit
 *
 * Description:
 *   Start hardware transmission.  Called either from the txdone interrupt
 *   handling or from watchdog based polling.
 *
 * Input Parameters:
 *   priv - Reference to the driver state structure
 *
 * Returned Value:
 *   OK on success; a negated errno on failure
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static int ivshmnet_transmit(FAR struct ivshmnet_driver_s *priv)
{
  /* Verify that the hardware is ready to send another packet.  If we get
   * here, then we are committed to sending a packet; Higher level logic
   * must have assured that there is no transmission in progress.
   */

  /* Increment statistics */

  NETDEV_TXPACKETS(priv->sk_dev);

  /* Send the packet: address=priv->sk_dev.d_buf, length=priv->sk_dev.d_len */
  ivshmnet_tx_clean(priv);

  ASSERT(ivshmnet_tx_ok(priv, IVSHMNET_MTU_DEFAULT));

  ivshmnet_tx_frame(priv, priv->sk_dev.d_buf, priv->sk_dev.d_len);

  /* Enable Tx interrupts */
  ivshmnet_enable_tx_irq(priv);

  /* Setup the TX timeout watchdog (perhaps restarting the timer) */
  (void)wd_start(priv->sk_txtimeout, IVSHMNET_TXTIMEOUT,
                 ivshmnet_txtimeout_expiry, 1, (wdparm_t)priv);
  return OK;
}

/****************************************************************************
 * Name: ivshmnet_txpoll
 *
 * Description:
 *   The transmitter is available, check if the network has any outgoing
 *   packets ready to send.  This is a callback from devif_poll().
 *   devif_poll() may be called:
 *
 *   1. When the preceding TX packet send is complete,
 *   2. When the preceding TX packet send timesout and the interface is reset
 *   3. During normal TX polling
 *
 * Input Parameters:
 *   dev - Reference to the NuttX driver state structure
 *
 * Returned Value:
 *   OK on success; a negated errno on failure
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static int ivshmnet_txpoll(FAR struct net_driver_s *dev)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)dev->d_private;

  /* If the polling resulted in data that should be sent out on the network,
   * the field d_len is set to a value > 0.
   */

  if (priv->sk_dev.d_len > 0)
    {
      /* Look up the destination MAC address and add it to the Ethernet
       * header.
       */

#ifdef CONFIG_NET_IPv4
#ifdef CONFIG_NET_IPv6
      if (IFF_IS_IPv4(priv->sk_dev.d_flags))
#endif
        {
          arp_out(&priv->sk_dev);
        }
#endif /* CONFIG_NET_IPv4 */

#ifdef CONFIG_NET_IPv6
#ifdef CONFIG_NET_IPv4
      else
#endif
        {
          neighbor_out(&priv->sk_dev);
        }
#endif /* CONFIG_NET_IPv6 */

      /* Send the packet */

      ivshmnet_transmit(priv);

      /* Check if there is room in the device to hold another packet. If not,
       * return a non-zero value to terminate the poll.
       */
    }

  /* If zero is returned, the polling will continue until all connections have
   * been examined.
   */

  return 0;
}

/****************************************************************************
 * Name: ivshmnet_reply
 *
 * Description:
 *   After a packet has been received and dispatched to the network, it
 *   may return return with an outgoing packet.  This function checks for
 *   that case and performs the transmission if necessary.
 *
 * Input Parameters:
 *   priv - Reference to the driver state structure
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static void ivshmnet_reply(struct ivshmnet_driver_s *priv)
{
  /* If the packet dispatch resulted in data that should be sent out on the
   * network, the field d_len will set to a value > 0.
   */

  if (priv->sk_dev.d_len > 0)
    {
      /* Update the Ethernet header with the correct MAC address */

#ifdef CONFIG_NET_IPv4
#ifdef CONFIG_NET_IPv6
      /* Check for an outgoing IPv4 packet */

      if (IFF_IS_IPv4(priv->sk_dev.d_flags))
#endif
        {
          arp_out(&priv->sk_dev);
        }
#endif

#ifdef CONFIG_NET_IPv6
#ifdef CONFIG_NET_IPv4
      /* Otherwise, it must be an outgoing IPv6 packet */

      else
#endif
        {
          neighbor_out(&ivshmnet->sk_dev);
        }
#endif

      /* And send the packet */

      ivshmnet_transmit(priv);
    }
}

/****************************************************************************
 * Name: ivshmnet_receive
 *
 * Description:
 *   An interrupt was received indicating the availability of a new RX packet
 *
 * Input Parameters:
 *   priv - Reference to the driver state structure
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static void ivshmnet_receive(FAR struct ivshmnet_driver_s *priv)
{
  int received = 0;

  do
    {
      struct vring_desc *desc;
      void *data;
      uint32_t len;

      /* Check for errors and update statistics */
      ninfo("processing receive\n");

      desc = ivshmnet_rx_desc(priv); /* get next avail rx descriptor from avail ring */
      if (!desc)
        break;

      data = ivshmnet_desc_data(priv, &priv->rx, IVSHMNET_SECTION_RX,
                   desc, &len); /* Unpack descriptor and get the physical address in SHMEM and fill in len */
      if (!data) {
        nerr("bad rx descriptor\n");
        break;
      }

      dump_ethernet_frame(data, len);

      /* Check if the packet is a valid size for the network buffer
       * configuration.
       */

      /* Copy the data data from the hardware to priv->sk_dev.d_buf.  Set
       * amount of data in priv->sk_dev.d_len
       */
      memcpy(priv->sk_dev.d_buf, data, len);
      priv->sk_dev.d_len = len;

      ivshmnet_rx_finish(priv, desc); /* Release the read descriptor in to the used ring */

#ifdef CONFIG_NET_PKT
      /* When packet sockets are enabled, feed the frame into the packet tap */

       pkt_input(&priv->sk_dev);
#endif

#ifdef CONFIG_NET_IPv4
      /* Check for an IPv4 packet */

      if (BUF->type == HTONS(ETHTYPE_IP))
        {
          ninfo("IPv4 frame\n");
          NETDEV_RXIPV4(&priv->sk_dev);

          /* Handle ARP on input, then dispatch IPv4 packet to the network
           * layer.
           */

          arp_ipin(&priv->sk_dev);
          ipv4_input(&priv->sk_dev);

          /* Check for a reply to the IPv4 packet */

          ivshmnet_reply(priv);
        }
      else
#endif
#ifdef CONFIG_NET_IPv6
      /* Check for an IPv6 packet */

      if (BUF->type == HTONS(ETHTYPE_IP6))
        {
          ninfo("Iv6 frame\n");
          NETDEV_RXIPV6(&priv->sk_dev);

          /* Dispatch IPv6 packet to the network layer */

          ipv6_input(&priv->sk_dev);

          /* Check for a reply to the IPv6 packet */

          ivshmnet_reply(priv);
        }
      else
#endif
#ifdef CONFIG_NET_ARP
      /* Check for an ARP packet */

      if (BUF->type == htons(ETHTYPE_ARP))
        {
          /* Dispatch ARP packet to the network layer */

          arp_arpin(&priv->sk_dev);
          NETDEV_RXARP(&priv->sk_dev);

          /* If the above function invocation resulted in data that should be
           * sent out on the network, the field  d_len will set to a value > 0.
           */

          if (priv->sk_dev.d_len > 0)
            {
              ivshmnet_transmit(priv);
            }
        }
      else
#endif
        {
          NETDEV_RXDROPPED(&priv->sk_dev);
        }
      received++;
    }
  while (true); /* Whether are there more packets to be processed is checked above */

  ivshmnet_enable_rx_irq(priv); /* enable the irq by writing the last avail index to the end of the ring */
  if (ivshmnet_rx_avail(priv)) /* More stuff to read?, which is very unlikely*/
    work_queue(ETHWORK, &priv->sk_irqwork, ivshmnet_interrupt_work, priv, 0); /* schedule the work again */

  if (received)
    ivshmnet_notify_rx(priv, received); /* We had did some work, notify we had rx the data by triggering door bell*/
}

/****************************************************************************
 * Name: ivshmnet_txdone
 *
 * Description:
 *   An interrupt was received indicating that the last TX packet(s) is done
 *
 * Input Parameters:
 *   priv - Reference to the driver state structure
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static void ivshmnet_txdone(FAR struct ivshmnet_driver_s *priv)
{
  /* Check for errors and update statistics */

  NETDEV_TXDONE(priv->sk_dev);

  /* Check if there are pending transmissions */

  /* If no further transmissions are pending, then cancel the TX timeout and
   * disable further Tx interrupts.
   */

  wd_cancel(priv->sk_txtimeout);

  /* And disable further TX interrupts. */

  /* In any event, poll the network for new TX data */

  (void)devif_poll(&priv->sk_dev, ivshmnet_txpoll);
}

/****************************************************************************
 * Name: ivshmnet_interrupt_work
 *
 * Description:
 *   Perform interrupt related work from the worker thread
 *
 * Input Parameters:
 *   arg - The argument passed when work_queue() was called.
 *
 * Returned Value:
 *   OK on success
 *
 * Assumptions:
 *   Runs on a worker thread.
 *
 ****************************************************************************/

static void ivshmnet_interrupt_work(FAR void *arg)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)arg;

  /* Lock the network and serialize driver operations if necessary.
   * NOTE: Serialization is only required in the case where the driver work
   * is performed on an LP worker thread and where more than one LP worker
   * thread has been configured.
   */

  ninfo("processing int\n");

  net_lock();

  /* Process pending Ethernet interrupts */

  /* Get and clear interrupt status bits */

  /*ivshmnet_tx_clean(priv);*/

  /* Handle interrupts according to status bit settings */

  /* Check if we received an incoming packet, if so, call ivshmnet_receive() */
  if(ivshmnet_rx_avail(priv))
    {

      ivshmnet_receive(priv);
    }
  else
    {
      /* Check if a packet transmission just completed.  If so, call ivshmnet_txdone.
       * This may disable further Tx interrupts if there are no pending
       * transmissions.
       */

      /* XXX: Assuming single interrupt only represent TX or RX might not be a good idea */

      ivshmnet_txdone(priv);
    }

  net_unlock();

  /* Re-enable Ethernet interrupts */

  /*up_enable_irq(CONFIG_IVSHMEM_NET_IRQ);*/
}

/****************************************************************************
 * Name: ivshmnet_interrupt
 *
 * Description:
 *   Hardware interrupt handler
 *
 * Input Parameters:
 *   irq     - Number of the IRQ that generated the interrupt
 *   context - Interrupt register state save info (architecture-specific)
 *
 * Returned Value:
 *   OK on success
 *
 * Assumptions:
 *   Runs in the context of a the Ethernet interrupt handler.  Local
 *   interrupts are disabled by the interrupt logic.
 *
 ****************************************************************************/

static int ivshmnet_interrupt(int irq, FAR void *context, FAR void *arg)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)arg;

  DEBUGASSERT(priv != NULL);

  /* Disable further Ethernet interrupts.  Because Ethernet interrupts are
   * also disabled if the TX timeout event occurs, there can be no race
   * condition here.
   */

  /*up_disable_irq(CONFIG_IVSHMEM_NET_IRQ);*/

  /* TODO: Determine if a TX transfer just completed */

    {
      /* If a TX transfer just completed, then cancel the TX timeout so
       * there will be no race condition between any subsequent timeout
       * expiration and the deferred interrupt processing.
       */

       /*wd_cancel(priv->sk_txtimeout);*/
    }

  /* Schedule to perform the interrupt processing on the worker thread. */

  work_queue(ETHWORK, &priv->sk_irqwork, ivshmnet_interrupt_work, priv, 0);
  return OK;
}

/****************************************************************************
 * Name: ivshmnet_txtimeout_work
 *
 * Description:
 *   Perform TX timeout related work from the worker thread
 *
 * Input Parameters:
 *   arg - The argument passed when work_queue() as called.
 *
 * Returned Value:
 *   OK on success
 *
 ****************************************************************************/

static void ivshmnet_txtimeout_work(FAR void *arg)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)arg;

  /* Lock the network and serialize driver operations if necessary.
   * NOTE: Serialization is only required in the case where the driver work
   * is performed on an LP worker thread and where more than one LP worker
   * thread has been configured.
   */

  net_lock();

  /* Increment statistics and dump debug info */

  NETDEV_TXTIMEOUTS(priv->sk_dev);

  /* Then reset the hardware */

  /* Then poll the network for new XMIT data */

  (void)devif_poll(&priv->sk_dev, ivshmnet_txpoll);
  net_unlock();
}

/****************************************************************************
 * Name: ivshmnet_txtimeout_expiry
 *
 * Description:
 *   Our TX watchdog timed out.  Called from the timer interrupt handler.
 *   The last TX never completed.  Reset the hardware and start again.
 *
 * Input Parameters:
 *   argc - The number of available arguments
 *   arg  - The first argument
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   Runs in the context of a the timer interrupt handler.  Local
 *   interrupts are disabled by the interrupt logic.
 *
 ****************************************************************************/

static void ivshmnet_txtimeout_expiry(int argc, wdparm_t arg, ...)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)arg;

  /* Disable further Ethernet interrupts.  This will prevent some race
   * conditions with interrupt work.  There is still a potential race
   * condition with interrupt work that is already queued and in progress.
   */

  /*up_disable_irq(CONFIG_IVSHMEM_NET_IRQ);*/

  /* Schedule to perform the TX timeout processing on the worker thread. */

  work_queue(ETHWORK, &priv->sk_irqwork, ivshmnet_txtimeout_work, priv, 0);
}

/****************************************************************************
 * Name: ivshmnet_poll_work
 *
 * Description:
 *   Perform periodic polling from the worker thread
 *
 * Input Parameters:
 *   arg - The argument passed when work_queue() as called.
 *
 * Returned Value:
 *   OK on success
 *
 * Assumptions:
 *   Run on a work queue thread.
 *
 ****************************************************************************/

static void ivshmnet_poll_work(FAR void *arg)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)arg;

  /* Lock the network and serialize driver operations if necessary.
   * NOTE: Serialization is only required in the case where the driver work
   * is performed on an LP worker thread and where more than one LP worker
   * thread has been configured.
   */

  net_lock();

  /* Perform the poll */

  /* Check if there is room in the send another TX packet.  We cannot perform
   * the TX poll if he are unable to accept another packet for transmission.
   */

  /* If so, update TCP timing states and poll the network for new XMIT data.
   * Hmmm.. might be bug here.  Does this mean if there is a transmit in
   * progress, we will missing TCP time state updates?
   */

  (void)devif_timer(&priv->sk_dev, IVSHMNET_WDDELAY, ivshmnet_txpoll);

  /* Setup the watchdog poll timer again */

  (void)wd_start(priv->sk_txpoll, IVSHMNET_WDDELAY, ivshmnet_poll_expiry, 1,
                 (wdparm_t)priv);
  net_unlock();
}

/****************************************************************************
 * Name: ivshmnet_poll_expiry
 *
 * Description:
 *   Periodic timer handler.  Called from the timer interrupt handler.
 *
 * Input Parameters:
 *   argc - The number of available arguments
 *   arg  - The first argument
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   Runs in the context of a the timer interrupt handler.  Local
 *   interrupts are disabled by the interrupt logic.
 *
 ****************************************************************************/

static void ivshmnet_poll_expiry(int argc, wdparm_t arg, ...)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)arg;

  /* Schedule to perform the interrupt processing on the worker thread. */

  work_queue(ETHWORK, &priv->sk_pollwork, ivshmnet_poll_work, priv, 0);
}

/****************************************************************************
 * Name: ivshmnet_ifup
 *
 * Description:
 *   NuttX Callback: Bring up the Ethernet interface when an IP address is
 *   provided
 *
 * Input Parameters:
 *   dev - Reference to the NuttX driver state structure
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static int ivshmnet_ifup(FAR struct net_driver_s *dev)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)dev->d_private;

#ifdef CONFIG_NET_IPv4
  ninfo("Bringing up: %d.%d.%d.%d\n",
        dev->d_ipaddr & 0xff, (dev->d_ipaddr >> 8) & 0xff,
        (dev->d_ipaddr >> 16) & 0xff, dev->d_ipaddr >> 24);
#endif
#ifdef CONFIG_NET_IPv6
  ninfo("Bringing up: %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
        dev->d_ipv6addr[0], dev->d_ipv6addr[1], dev->d_ipv6addr[2],
        dev->d_ipv6addr[3], dev->d_ipv6addr[4], dev->d_ipv6addr[5],
        dev->d_ipv6addr[6], dev->d_ipv6addr[7]);
#endif

  priv->regs->int_control = JH_IVSHMEM_INT_EN;
  priv->regs->state = IVSHMNET_STATE_RESET;
  priv->state = IVSHMNET_STATE_RESET;
  ivshmnet_check_state(priv);

  /* Instantiate the MAC address from priv->sk_dev.d_mac.ether.ether_addr_octet */

#ifdef CONFIG_NET_ICMPv6
  /* Set up IPv6 multicast address filtering */

  ivshmnet_ipv6multicast(priv);
#endif

  /* Set and activate a timer process */

  (void)wd_start(priv->sk_txpoll, IVSHMNET_WDDELAY, ivshmnet_poll_expiry, 1,
                 (wdparm_t)priv);

  /* Enable the Ethernet interrupt */

  return OK;
}

/****************************************************************************
 * Name: ivshmnet_ifdown
 *
 * Description:
 *   NuttX Callback: Stop the interface.
 *
 * Input Parameters:
 *   dev - Reference to the NuttX driver state structure
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static int ivshmnet_ifdown(FAR struct net_driver_s *dev)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)dev->d_private;
  irqstate_t flags;

  /* Disable the Ethernet interrupt */

  flags = enter_critical_section();

  priv->regs->int_control &= ~JH_IVSHMEM_INT_EN;
  priv->regs->state = IVSHMNET_STATE_RESET;

  /* Cancel the TX poll timer and TX timeout timers */

  wd_cancel(priv->sk_txpoll);
  wd_cancel(priv->sk_txtimeout);

  /* Put the EMAC in its reset, non-operational state.  This should be
   * a known configuration that will guarantee the ivshmnet_ifup() always
   * successfully brings the interface back up.
   */

  /* Mark the device "down" */

  priv->sk_bifup = false;
  leave_critical_section(flags);
  return OK;
}

/****************************************************************************
 * Name: ivshmnet_txavail_work
 *
 * Description:
 *   Perform an out-of-cycle poll on the worker thread.
 *
 * Input Parameters:
 *   arg - Reference to the NuttX driver state structure (cast to void*)
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   Runs on a work queue thread.
 *
 ****************************************************************************/

static void ivshmnet_txavail_work(FAR void *arg)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)arg;

  /* Lock the network and serialize driver operations if necessary.
   * NOTE: Serialization is only required in the case where the driver work
   * is performed on an LP worker thread and where more than one LP worker
   * thread has been configured.
   */

  net_lock();

  /* Ignore the notification if the interface is not yet up */

  if (priv->sk_bifup)
    {
      /* Check if there is room in the hardware to hold another outgoing packet. */

      /* If so, then poll the network for new XMIT data */

      (void)devif_poll(&priv->sk_dev, ivshmnet_txpoll);
    }

  net_unlock();
}

/****************************************************************************
 * Name: ivshmnet_txavail
 *
 * Description:
 *   Driver callback invoked when new TX data is available.  This is a
 *   stimulus perform an out-of-cycle poll and, thereby, reduce the TX
 *   latency.
 *
 * Input Parameters:
 *   dev - Reference to the NuttX driver state structure
 *
 * Returned Value:
 *   None
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

static int ivshmnet_txavail(FAR struct net_driver_s *dev)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)dev->d_private;

  /* Is our single work structure available?  It may not be if there are
   * pending interrupt actions and we will have to ignore the Tx
   * availability action.
   */

  if (work_available(&priv->sk_pollwork))
    {
      /* Schedule to serialize the poll on the worker thread. */

      work_queue(ETHWORK, &priv->sk_pollwork, ivshmnet_txavail_work, priv, 0);
    }

  return OK;
}

/****************************************************************************
 * Name: ivshmnet_addmac
 *
 * Description:
 *   NuttX Callback: Add the specified MAC address to the hardware multicast
 *   address filtering
 *
 * Input Parameters:
 *   dev  - Reference to the NuttX driver state structure
 *   mac  - The MAC address to be added
 *
 * Returned Value:
 *   Zero (OK) on success; a negated errno value on failure.
 *
 ****************************************************************************/

#if defined(CONFIG_NET_IGMP) || defined(CONFIG_NET_ICMPv6)
static int ivshmnet_addmac(FAR struct net_driver_s *dev, FAR const uint8_t *mac)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)dev->d_private;

  /* Add the MAC address to the hardware multicast routing table */

  return OK;
}
#endif

/****************************************************************************
 * Name: ivshmnet_rmmac
 *
 * Description:
 *   NuttX Callback: Remove the specified MAC address from the hardware multicast
 *   address filtering
 *
 * Input Parameters:
 *   dev  - Reference to the NuttX driver state structure
 *   mac  - The MAC address to be removed
 *
 * Returned Value:
 *   Zero (OK) on success; a negated errno value on failure.
 *
 ****************************************************************************/

#ifdef CONFIG_NET_IGMP
static int ivshmnet_rmmac(FAR struct net_driver_s *dev, FAR const uint8_t *mac)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)dev->d_private;

  /* Add the MAC address to the hardware multicast routing table */

  return OK;
}
#endif

/****************************************************************************
 * Name: ivshmnet_ipv6multicast
 *
 * Description:
 *   Configure the IPv6 multicast MAC address.
 *
 * Input Parameters:
 *   priv - A reference to the private driver state structure
 *
 * Returned Value:
 *   Zero (OK) on success; a negated errno value on failure.
 *
 ****************************************************************************/

#ifdef CONFIG_NET_ICMPv6
static void ivshmnet_ipv6multicast(FAR struct ivshmnet_driver_s *priv)
{
  FAR struct net_driver_s *dev;
  uint16_t tmp16;
  uint8_t mac[6];

  /* For ICMPv6, we need to add the IPv6 multicast address
   *
   * For IPv6 multicast addresses, the Ethernet MAC is derived by
   * the four low-order octets OR'ed with the MAC 33:33:00:00:00:00,
   * so for example the IPv6 address FF02:DEAD:BEEF::1:3 would map
   * to the Ethernet MAC address 33:33:00:01:00:03.
   *
   * NOTES:  This appears correct for the ICMPv6 Router Solicitation
   * Message, but the ICMPv6 Neighbor Solicitation message seems to
   * use 33:33:ff:01:00:03.
   */

  mac[0] = 0x33;
  mac[1] = 0x33;

  dev    = &priv->dev;
  tmp16  = dev->d_ipv6addr[6];
  mac[2] = 0xff;
  mac[3] = tmp16 >> 8;

  tmp16  = dev->d_ipv6addr[7];
  mac[4] = tmp16 & 0xff;
  mac[5] = tmp16 >> 8;

  ninfo("IPv6 Multicast: %02x:%02x:%02x:%02x:%02x:%02x\n",
        mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);

  (void)ivshmnet_addmac(dev, mac);

#ifdef CONFIG_NET_ICMPv6_AUTOCONF
  /* Add the IPv6 all link-local nodes Ethernet address.  This is the
   * address that we expect to receive ICMPv6 Router Advertisement
   * packets.
   */

  (void)ivshmnet_addmac(dev, g_ipv6_ethallnodes.ether_addr_octet);

#endif /* CONFIG_NET_ICMPv6_AUTOCONF */

#ifdef CONFIG_NET_ICMPv6_ROUTER
  /* Add the IPv6 all link-local routers Ethernet address.  This is the
   * address that we expect to receive ICMPv6 Router Solicitation
   * packets.
   */

  (void)ivshmnet_addmac(dev, g_ipv6_ethallrouters.ether_addr_octet);

#endif /* CONFIG_NET_ICMPv6_ROUTER */
}
#endif /* CONFIG_NET_ICMPv6 */

/****************************************************************************
 * Name: ivshmnet_ioctl
 *
 * Description:
 *   Handle network IOCTL commands directed to this device.
 *
 * Input Parameters:
 *   dev - Reference to the NuttX driver state structure
 *   cmd - The IOCTL command
 *   arg - The argument for the IOCTL command
 *
 * Returned Value:
 *   OK on success; Negated errno on failure.
 *
 * Assumptions:
 *   The network is locked.
 *
 ****************************************************************************/

#ifdef CONFIG_NETDEV_IOCTL
static int ivshmnet_ioctl(FAR struct net_driver_s *dev, int cmd,
                      unsigned long arg)
{
  FAR struct ivshmnet_driver_s *priv = (FAR struct ivshmnet_driver_s *)dev->d_private;
  int ret;

  /* Decode and dispatch the driver-specific IOCTL command */

  switch (cmd)
    {
      /* Add cases here to support the IOCTL commands */

      default:
        nerr("ERROR: Unrecognized IOCTL command: %d\n", command);
        return -ENOTTY;  /* Special return value for this case */
    }

  return OK;
}
#endif

/****************************************************************************
 * Public Functions
 ****************************************************************************/

/****************************************************************************
 * Name: ivshmnet_probe
 *
 * Description:
 *   Initialize the Ethernet controller and driver
 *
 * Input Parameters:
 *   intf - In the case where there are multiple EMACs, this value
 *          identifies which EMAC is to be initialized.
 *
 * Returned Value:
 *   OK on success; Negated errno on failure.
 *
 * Assumptions:
 *   Called early in initialization before multi-tasking is initiated.
 *
 ****************************************************************************/

int ivshmnet_probe(FAR struct pci_bus_s *bus,
                   FAR struct pci_dev_type_s *type, uint16_t bdf)
{
  int vndr_cap;
  int msix_cap;
  uint8_t vndr_length;
  uint32_t io_section_size;
  uintptr_t rw_section_addr;
  uintptr_t io_section_addr;
  struct ivshmem_mem_region_s *mem;
  struct ivshmnet_driver_s *dev = g_ivshmnet_devices + g_ivshmnet_dev_count;

  if (g_ivshmnet_dev_count >= CONFIG_IVSHMNET_NINTERFACES)
    {
      pcierr("Probed too many ivshmem-net devices!\n");
    }

  memset(dev, 0, sizeof(struct ivshmnet_driver_s));

  dev->dev.bus = bus;
  dev->dev.type = type;
  dev->dev.bdf = bdf;

  if (pci_find_cap(&dev->dev, PCI_CAP_MSIX) < 0)
    {
      pcierr("Device is not MSIX capable\n");
      return -EINVAL;
    }

  dev->regs = pci_map_bar(&dev->dev, 0);
  dev->msix_table = pci_map_bar(&dev->dev, 1);

  pciinfo("Ivshmem-net[%d] mapped bar[0]: %p\n",
          g_ivshmnet_dev_count, dev->regs);

  pciinfo("Ivshmem-net[%d] mapped bar[1]: %p\n",
          g_ivshmnet_dev_count, dev->msix_table);

  if (!dev->regs || !dev->msix_table)
    {
      pcierr("Failed to map ivshmem-net bars!\n");
      return -EBUSY;
    }

  pci_enable_device(&dev->dev);

  if (dev->regs->max_peers != 2)
      return -EINVAL;

  dev->peer_id = !dev->regs->id;

  mem = &dev->mem[0];

  vndr_cap = pci_find_cap(&dev->dev, PCI_CAP_VNDR);

  if (vndr_cap < 0)
    {
      pcierr("Ivshmem[%d] missing vendor capability\n", g_ivshmnet_dev_count);
      return -ENODEV;
    }

  vndr_length =
    pci_cfg_read(&dev->dev, vndr_cap + JH_IVSHMEM_VND_LENGTH, 1);

  if (vndr_length == JH_IVSHMEM_VND_LENGTH_NO_ADDR)
    {
      mem->paddress = pci_get_bar64(&dev->dev, 2);
    }
  else
    {
      mem->paddress =
        ((uintptr_t)pci_cfg_read(&dev->dev,
          vndr_cap + JH_IVSHMEM_VND_ADDR + 4, 4) << 32);
      mem->paddress |=
        ((uintptr_t)pci_cfg_read(&dev->dev,
          vndr_cap + JH_IVSHMEM_VND_ADDR, 4));
    }

  mem->size =
        (pci_cfg_read(&dev->dev, vndr_cap + JH_IVSHMEM_VND_ST_SIZE, 4));
  mem->readonly = true;

  mem->address = (uintptr_t)pci_ioremap(&dev->dev, mem->paddress, mem->size);
  if (!mem->address)
      return -EBUSY;

  dev->state_table = (volatile uint32_t *)mem->address;

  pciinfo("Ivshmem-net[%d] State Table phy_addr:"
          "0x%lx virt_addr: 0x%lx, size: 0x%lx\n",
          g_ivshmnet_dev_count, mem->paddress, mem->address, mem->size);

  mem++;

  rw_section_addr = (mem - 1)->paddress + (mem - 1)->size;

  io_section_addr = rw_section_addr +
    (pci_cfg_read(&dev->dev, vndr_cap + JH_IVSHMEM_VND_RW_SIZE, 4));

  io_section_size =
        (pci_cfg_read(&dev->dev, vndr_cap + JH_IVSHMEM_VND_IO_SIZE, 4));

  if (!io_section_size)
    {
      pcierr("Ivshmem-net[%d] I/O region does not exist");
    }

  mem->paddress = io_section_addr + (!dev->peer_id) * io_section_size;
  mem->size = io_section_size;
  mem->readonly = false;

  mem->address =
    (uintptr_t)pci_ioremap(&dev->dev, mem->paddress, mem->size);
  if (!mem->address)
    {
      pciinfo("TX region mapping failed");
      return -EBUSY;
    }

  pciinfo("Ivshmem-net[%d] TX region phy_addr: " \
          "0x%lx virt_addr: 0x%lx, size: 0x%lx\n",
           g_ivshmnet_dev_count, mem->paddress, mem->address, mem->size);

  memset((void *)mem->address, 0, mem->size);

  mem++;

  mem->paddress = io_section_addr + (!!dev->peer_id) * io_section_size;
  mem->size = io_section_size;
  mem->readonly = true;

  mem->address =
    (uintptr_t)pci_ioremap(&dev->dev, mem->paddress, mem->size);
  if (!mem->address)
      return -EBUSY;

  pciinfo("Ivshmem-net[%d] RX region phy_addr: " \
          "0x%lx virt_addr: 0x%lx, size: 0x%lx\n",
           g_ivshmnet_dev_count, mem->paddress, mem->address, mem->size);

  pci_cfg_write(&dev->dev, vndr_cap + JH_IVSHMEM_VND_PCTL, 0, 1);

  msix_cap = pci_find_cap(&dev->dev, PCI_CAP_MSIX);

  dev->vectors =
    (pci_cfg_read(&dev->dev, msix_cap + PCI_MSIX_MCR, 2) & \
     PCI_MSIX_MCR_TBL_MASK) + 1;

  if(dev->vectors != IVSHMNET_NUM_VECTORS)
    {
      pcierr("Ivshmem-net[%d] Number of vector must be 2\n");
      return -EBUSY;
    }

  (void)irq_attach(CONFIG_IVSHMNET_BASE_IRQ + g_ivshmnet_dev_count * 2,
                   (xcpt_t)ivshmnet_state_handler, dev);
  (void)irq_attach(CONFIG_IVSHMNET_BASE_IRQ + g_ivshmnet_dev_count * 2 + 1,
                   (xcpt_t)ivshmnet_interrupt, dev);

  pci_msix_register(&dev->dev,
      CONFIG_IVSHMNET_BASE_IRQ + g_ivshmnet_dev_count * 2, 0);
  pci_msix_register(&dev->dev,
      CONFIG_IVSHMNET_BASE_IRQ + g_ivshmnet_dev_count * 2 + 1, 1);

  if (ivshmnet_calc_qsize(dev))
      return -EINVAL;

  /* fill in the rest of the structure */
  dev->sk_dev.d_buf     = dev->pktbuf;       /* Single packet buffer */
  dev->sk_dev.d_ifup    = ivshmnet_ifup;     /* I/F up (new IP address) callback */
  dev->sk_dev.d_ifdown  = ivshmnet_ifdown;   /* I/F down callback */
  dev->sk_dev.d_txavail = ivshmnet_txavail;  /* New TX data callback */
#ifdef CONFIG_NET_IGMP
  dev->sk_dev.d_addmac  = ivshmnet_addmac;   /* Add multicast MAC address */
  dev->sk_dev.d_rmmac   = ivshmnet_rmmac;    /* Remove multicast MAC address */
#endif
#ifdef CONFIG_NETDEV_IOCTL
  dev->sk_dev.d_ioctl   = ivshmnet_ioctl;    /* Handle network IOCTL commands */
#endif
  dev->sk_dev.d_private = (void *)dev;       /* Used to recover private state from dev */

  /* Create a watchdog for timing polling for and timing of transmissions */

  dev->sk_txpoll        = wd_create();       /* Create periodic poll timer */
  dev->sk_txtimeout     = wd_create();       /* Create TX timeout timer */

  DEBUGASSERT(dev->sk_txpoll != NULL && dev->sk_txtimeout != NULL);

  /* Put the interface in the down state.  This usually amounts to resetting
   * the device and/or calling ivshmnet_ifdown().
   */

  dev->sk_bifup = false;

  /* Register the device with the OS so that socket IOCTLs can be performed */

  (void)netdev_register(&dev->sk_dev, NET_LL_ETHERNET);

  g_ivshmnet_dev_count++;
  pciinfo("Initialized Ivshmem-net[%d]\n", g_ivshmnet_dev_count);

  return OK;
}

/*****************************************************************************
 * Public Data
 *****************************************************************************/

struct pci_dev_type_s pci_ivshmnet =
{
    .vendor = JH_IVSHMEM_VENDORID,
    .device = JH_IVSHMEM_DEVICEID,
    .class_rev = JH_IVSHMEM_PROTOCOL_NET,
    .name = "Jailhouse Ivshmem-net",
    .probe = ivshmnet_probe
};
/*
 * Jailhouse, a Linux-based partitioning hypervisor
 *
 * Copyright (c) Siemens AG, 2014-2017
 *
 * Hand crafted configuration for Nuttx in cRTOS
 *
 * Copyright (c) ChungFan Yang @ Fixstars corporation, 2020
 *    <[email protected]>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <jailhouse/types.h>
#include <jailhouse/cell-config.h>

struct {
	struct jailhouse_cell_desc cell;
	__u64 cpus[1];
	struct jailhouse_memory mem_regions[9];
	struct jailhouse_irqchip irqchips[1];
	struct jailhouse_pio pio_regions[5];
	struct jailhouse_cache cache_regions[1];
	struct jailhouse_pci_device pci_devices[2];
	struct jailhouse_pci_capability pci_caps[0];

} __attribute__((packed)) config = {
	.cell = {
		.signature = JAILHOUSE_CELL_DESC_SIGNATURE,
		.revision = JAILHOUSE_CONFIG_REVISION,
		.name = "nuttx",
    .flags = JAILHOUSE_CELL_PASSIVE_COMMREG,
		.cpu_set_size = sizeof(config.cpus),
		.num_memory_regions = ARRAY_SIZE(config.mem_regions),
		.num_irqchips = ARRAY_SIZE(config.irqchips),
		.num_cache_regions = ARRAY_SIZE(config.cache_regions),
		.num_pio_regions = ARRAY_SIZE(config.pio_regions),
		.num_pci_devices = ARRAY_SIZE(config.pci_devices),
    		.num_pci_caps = ARRAY_SIZE(config.pci_caps),
		.console = {
			.type = JAILHOUSE_CON_TYPE_8250,
			.flags = JAILHOUSE_CON_ACCESS_PIO,
			.address = 0x3e8,
		},
	},

	.cpus = {
		0x8,
	},

	.mem_regions = {
		/* cRTOS Shadow memory */
		{
			.phys_start = 0x176000000,
			.virt_start = 0x176000000,
			.size = 0x1000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_ROOTSHARED,
		},
		{
			.phys_start = 0x176001000,
			.virt_start = 0,
			.size = 0x40000000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
        JAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_LOADABLE |
        JAILHOUSE_MEM_ROOTSHARED,
		},
		{
			.phys_start = 0x1b6001000,
			.virt_start = 0x1b6001000,
			.size = 0x4000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_ROOTSHARED,
		},
		{
			.phys_start = 0x1b6005000,
			.virt_start = 0x1b6005000,
			.size = 0x4000,
			.flags = JAILHOUSE_MEM_READ| JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_ROOTSHARED,
		},
		/* communication region */ {
			//.virt_start = 0x80000000,
			.virt_start = 0x1b6010000,
			.size       = 0x00001000,
			.flags = JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |
				JAILHOUSE_MEM_COMM_REGION,
		},
		JAILHOUSE_SHMEM_NET_REGIONS(0x1b6205000, 1),
	},
	.cache_regions = {
		{
			.start = 0,
			.size = 2,
			.type = JAILHOUSE_CACHE_L3,
		},
	},

	.irqchips = {
		/* IOAPIC 0, GSI base 0 */
		{
			.address = 0xfec00000,
			.id = 0xff00,
			.pin_bitmap = {
				0x000001
			},
		},
	},

	.pio_regions = {
		/* Port I/O: 0020-0021 : pic1 */
    		PIO_RANGE(0x20, 0x2),
		/* Port I/O: 00a0-00a1 : pic2 */
    		PIO_RANGE(0xa0, 0x2),
		/* Port I/O: 03f8-03ff : serial */
		PIO_RANGE(0x3f8, 0x8),
		/* Port I/O: 03e8-03ef : serial */
		PIO_RANGE(0x3e8, 0x8), 
		PIO_RANGE(0x2f8, 8), /* serial 2 */
		//PIO_RANGE(0xe010, 8), /* OXPCIe952 serial */
	},
	.pci_devices = {
		{ /* Shadow */
			.type = JAILHOUSE_PCI_TYPE_IVSHMEM,
			.domain = 0x0,
			.iommu = 0,
			.bdf = 0x0d << 3,
			.bar_mask = JAILHOUSE_IVSHMEM_BAR_MASK_MSIX,
			.num_msix_vectors = 16,
			.shmem_regions_start = 0,
			.shmem_dev_id = 1,
			.shmem_peers = 2,
			.shmem_protocol = 0x0002,
		},
		 /*IVSHMEM-NET */
		{ 
			.type = JAILHOUSE_PCI_TYPE_IVSHMEM,
			.domain = 0x0,
			.iommu = 0,
			.bdf = 0x0e << 3,
			.bar_mask = JAILHOUSE_IVSHMEM_BAR_MASK_MSIX,
			.num_msix_vectors = 2,
			.shmem_regions_start = 5,
			.shmem_dev_id = 1,
			.shmem_peers = 2,
			.shmem_protocol = JAILHOUSE_SHMEM_PROTO_VETH,
		},
	},

	.pci_caps = {
		
    },
};

Reply via email to