Hi folks,

I just committed a new netback driver to head.  Some details are below.  If
you have more questions, ask Alan and John (CCed).

Thanks,

Ken

----- Forwarded message from "Kenneth D. Merry" <k...@freebsd.org> -----

From: "Kenneth D. Merry" <k...@freebsd.org>
Date: Thu, 26 Jan 2012 16:35:09 +0000 (UTC)
To: src-committ...@freebsd.org, svn-src-...@freebsd.org,
        svn-src-h...@freebsd.org
Subject: svn commit: r230587 - in head: share/man/man4 sys/dev/xen/blkback 
sys/dev/xen/netback sys/kern sys/xen/interface/io

Author: ken
Date: Thu Jan 26 16:35:09 2012
New Revision: 230587
URL: http://svn.freebsd.org/changeset/base/230587

Log:
  Xen netback driver rewrite.
  
  share/man/man4/Makefile,
  share/man/man4/xnb.4,
  sys/dev/xen/netback/netback.c,
  sys/dev/xen/netback/netback_unit_tests.c:
  
        Rewrote the netback driver for xen to attach properly via newbus
        and work properly in both HVM and PVM mode (only HVM is tested).
        Works with the in-tree FreeBSD netfront driver or the Windows
        netfront driver from SuSE.  Has not been extensively tested with
        a Linux netfront driver.  Does not implement LRO, TSO, or
        polling.  Includes unit tests that may be run through sysctl
        after compiling with XNB_DEBUG defined.
  
  sys/dev/xen/blkback/blkback.c,
  sys/xen/interface/io/netif.h:
  
        Comment elaboration.
  
  sys/kern/uipc_mbuf.c:
  
        Fix page fault in kernel mode when calling m_print() on a
        null mbuf.  Since m_print() is only used for debugging, there
        are no performance concerns for extra error checking code.
  
  sys/kern/subr_scanf.c:
  
        Add the "hh" and "ll" width specifiers from C99 to scanf().
        A few callers were already using "ll" even though scanf()
        was handling it as "l".
  
  Submitted by: Alan Somers <al...@spectralogic.com>
  Submitted by: John Suykerbuyk <jo...@spectralogic.com>
  Sponsored by: Spectra Logic
  MFC after:    1 week
  Reviewed by:  ken

Added:
  head/share/man/man4/xnb.4   (contents, props changed)
  head/sys/dev/xen/netback/netback_unit_tests.c   (contents, props changed)
Modified:
  head/share/man/man4/Makefile
  head/sys/dev/xen/blkback/blkback.c
  head/sys/dev/xen/netback/netback.c
  head/sys/kern/subr_scanf.c
  head/sys/kern/uipc_mbuf.c
  head/sys/xen/interface/io/netif.h

Modified: head/share/man/man4/Makefile
==============================================================================
--- head/share/man/man4/Makefile        Thu Jan 26 15:23:45 2012        
(r230586)
+++ head/share/man/man4/Makefile        Thu Jan 26 16:35:09 2012        
(r230587)
@@ -531,6 +531,7 @@ MAN=        aac.4 \
        ${_xen.4} \
        xhci.4 \
        xl.4 \
+       ${_xnb.4} \
        xpt.4 \
        zero.4 \
        zyd.4
@@ -731,6 +732,7 @@ _urtw.4=    urtw.4
 _viawd.4=      viawd.4
 _wpi.4=                wpi.4
 _xen.4=                xen.4
+_xnb.4=                xnb.4
 
 MLINKS+=lindev.4 full.4
 .endif

Added: head/share/man/man4/xnb.4
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/share/man/man4/xnb.4   Thu Jan 26 16:35:09 2012        (r230587)
@@ -0,0 +1,134 @@
+.\" Copyright (c) 2012 Spectra Logic Corporation
+.\"    All rights reserved.
+.\"
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions, and the following disclaimer,
+.\"    without modification.
+.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer
+.\"    substantially similar to the "NO WARRANTY" disclaimer below
+.\"    ("Disclaimer") and any redistribution must be conditioned upon
+.\"    including a substantially similar Disclaimer requirement for further
+.\"    binary redistribution.
+.\" 
+.\" NO WARRANTY
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGES.
+.\" 
+.\" Authors: Alan Somers         (Spectra Logic Corporation)
+.\" 
+.\" $FreeBSD$
+.\"
+
+.Dd January 6, 2012
+.Dt XNB 4
+.Os 
+.Sh NAME
+.Nm xnb
+.Nd "Xen Paravirtualized Backend Ethernet Driver"
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following lines in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "options XENHVM"
+.Cd "device xenpci"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver provides the back half of a paravirtualized
+.Xr xen 4
+network connection.  The netback and netfront drivers appear to their
+respective operating systems as Ethernet devices linked by a crossover cable.
+Typically,
+.Nm
+will run on Domain 0 and the netfront driver will run on a guest domain.
+However, it is also possible to run
+.Nm
+on a guest domain.  It may be bridged or routed to provide the netfront's
+domain access to other guest domains or to a physical network.
+.Pp
+In most respects, the
+.Nm
+device appears to the OS as an other Ethernet device.  It can be configured at
+runtime entirely with
+.Xr ifconfig 8
+\&.  In particular, it supports MAC changing, arbitrary MTU sizes, checksum
+offload for IP, UDP, and TCP for both receive and transmit, and TSO.  However,
+see
+.Sx CAVEATS
+before enabling txcsum, rxcsum, or tso.
+.Sh SYSCTL VARIABLES
+The following read-only variables are available via
+.Xr sysctl 8 :
+.Bl -tag -width indent
+.It Va dev.xnb.%d.dump_rings
+Displays information about the ring buffers used to pass requests between the
+netfront and netback.  Mostly useful for debugging, but can also be used to
+get traffic statistics.
+.It Va dev.xnb.%d.unit_test_results
+Runs a builtin suite of unit tests and displays the results.  Does not affect
+the operation of the driver in any way.  Note that the test suite simulates
+error conditions; this will result in error messages being printed to the
+system system log.
+.Sh CAVEATS
+Packets sent through Xennet pass over shared memory, so the protocol includes
+no form of link-layer checksum or CRC.  Furthermore, Xennet drivers always
+report to their hosts that they support receive and transmit checksum
+offloading.  They "offload" the checksum calculation by simply skipping it.
+That works fine for packets that are exchanged between two domains on the same
+machine.  However, when a Xennet interface is bridged to a physical interface,
+a correct checksum must be attached to any packets bound for that physical
+interface.  Currently, FreeBSD lacks any mechanism for an ethernet device to
+inform the OS that newly received packets are valid even though their checksums
+are not.  So if the netfront driver is configured to offload checksum
+calculations, it will pass non-checksumed packets to
+.Nm
+, which must then calculate the checksum in software before passing the packet
+to the OS.
+.Pp
+For this reason, it is recommended that if
+.Nm
+is bridged to a physcal interface, then transmit checksum offloading should be
+disabled on the netfront.  The Xennet protocol does not have any mechanism for
+the netback to request the netfront to do this; the operator must do it
+manually.
+.Sh SEE ALSO
+.Xr arp 4 ,
+.Xr netintro 4 ,
+.Xr ng_ether 4 ,
+.Xr ifconfig 8 ,
+.Xr xen 4
+.Sh HISTORY
+The
+.Nm
+device driver first appeared in
+.Fx 10.0
+.
+.Sh AUTHORS
+The
+.Nm
+driver was written by
+.An Alan Somers
+.Aq al...@spectralogic.com
+and
+.An John Suykerbuyk
+.Aq jo...@spectralogic.com
+.Sh BUGS
+The
+.Nm
+driver does not properly checksum UDP datagrams that span more than one
+Ethernet frame.  Nor does it correctly checksum IPv6 packets.  To workaround
+that bug, disable transmit checksum offloading on the netfront driver.

Modified: head/sys/dev/xen/blkback/blkback.c
==============================================================================
--- head/sys/dev/xen/blkback/blkback.c  Thu Jan 26 15:23:45 2012        
(r230586)
+++ head/sys/dev/xen/blkback/blkback.c  Thu Jan 26 16:35:09 2012        
(r230587)
@@ -3434,6 +3434,10 @@ xbb_shutdown(struct xbb_softc *xbb)
 
        DPRINTF("\n");
 
+       /*
+        * Before unlocking mutex, set this flag to prevent other threads from
+        * getting into this function
+        */
        xbb->flags |= XBBF_IN_SHUTDOWN;
        mtx_unlock(&xbb->lock);
 

Modified: head/sys/dev/xen/netback/netback.c
==============================================================================
--- head/sys/dev/xen/netback/netback.c  Thu Jan 26 15:23:45 2012        
(r230586)
+++ head/sys/dev/xen/netback/netback.c  Thu Jan 26 16:35:09 2012        
(r230587)
@@ -1,1595 +1,2535 @@
-/*
- * Copyright (c) 2006, Cisco Systems, Inc.
+/*-
+ * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
  * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
  *
- * 1. Redistributions of source code must retain the above copyright 
- *    notice, this list of conditions and the following disclaimer. 
- * 2. Redistributions in binary form must reproduce the above copyright 
- *    notice, this list of conditions and the following disclaimer in the 
- *    documentation and/or other materials provided with the distribution. 
- * 3. Neither the name of Cisco Systems, Inc. nor the names of its 
contributors 
- *    may be used to endorse or promote products derived from this software 
- *    without specific prior written permission. 
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
- * POSSIBILITY OF SUCH DAMAGE.
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ *          Alan Somers         (Spectra Logic Corporation)
+ *          John Suykerbuyk     (Spectra Logic Corporation)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
+
+/**
+ * \file netback.c
+ *
+ * \brief Device driver supporting the vending of network access
+ *       from this FreeBSD domain to other domains.
+ */
+#include "opt_inet.h"
+#include "opt_global.h"
+
 #include "opt_sctp.h"
 
 #include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
 #include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/queue.h>
-#include <sys/taskqueue.h>
 
-#include <sys/module.h>
 #include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
-#include <net/if_types.h>
 #include <net/ethernet.h>
-#include <net/if_bridgevar.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
 
-#include <netinet/in_systm.h>
 #include <netinet/in.h>
-#include <netinet/in_var.h>
 #include <netinet/ip.h>
+#include <netinet/if_ether.h>
+#if __FreeBSD_version >= 700000
 #include <netinet/tcp.h>
-#include <netinet/udp.h>
-#ifdef SCTP
-#include <netinet/sctp.h>
-#include <netinet/sctp_crc32.h>
 #endif
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <machine/in_cksum.h>
 
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
 
-#include <machine/in_cksum.h>
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen_intr.h>
-#include <machine/evtchn.h>
-#include <machine/xenbus.h>
-#include <machine/gnttab.h>
-#include <machine/xen-public/memory.h>
-#include <dev/xen/xenbus/xenbus_comms.h>
+#include <machine/_inttypes.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+
+#include <xen/evtchn.h>
+#include <xen/xen_intr.h>
+#include <xen/interface/io/netif.h>
+#include <xen/xenbus/xenbusvar.h>
+
+/*--------------------------- Compile-time Tunables 
--------------------------*/
 
+/*---------------------------------- Macros 
----------------------------------*/
+/**
+ * Custom malloc type for all driver allocations.
+ */
+static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
 
-#ifdef XEN_NETBACK_DEBUG
-#define DPRINTF(fmt, args...) \
-    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#define        XNB_SG  1       /* netback driver supports feature-sg */
+#define        XNB_GSO_TCPV4 1 /* netback driver supports feature-gso-tcpv4 */
+#define        XNB_RX_COPY 1   /* netback driver supports feature-rx-copy */
+#define        XNB_RX_FLIP 0   /* netback driver does not support 
feature-rx-flip */
+
+#undef XNB_DEBUG
+#define        XNB_DEBUG /* hardcode on during development */
+
+#ifdef XNB_DEBUG
+#define        DPRINTF(fmt, args...) \
+       printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
 #else
-#define DPRINTF(fmt, args...) ((void)0)
+#define        DPRINTF(fmt, args...) do {} while (0)
 #endif
 
-#ifdef XEN_NETBACK_DEBUG_LOTS
-#define DDPRINTF(fmt, args...) \
-    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
-#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
-#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
-#else
-#define DDPRINTF(fmt, args...) ((void)0)
-#define DPRINTF_MBUF(_m) ((void)0)
-#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
+/* Default length for stack-allocated grant tables */
+#define        GNTTAB_LEN      (64)
+
+/* Features supported by all backends.  TSO and LRO can be negotiated */
+#define        XNB_CSUM_FEATURES       (CSUM_TCP | CSUM_UDP)
+
+#define        NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define        NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+/**
+ * Two argument version of the standard macro.  Second argument is a tentative
+ * value of req_cons
+ */
+#define        RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({                     
\
+       unsigned int req = (_r)->sring->req_prod - cons;                \
+       unsigned int rsp = RING_SIZE(_r) -                              \
+       (cons - (_r)->rsp_prod_pvt);                                    \
+       req < rsp ? req : rsp;                                          \
+})
+
+#define        virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+#define        virt_to_offset(x) ((x) & (PAGE_SIZE - 1))
+
+/**
+ * Predefined array type of grant table copy descriptors.  Used to pass around
+ * statically allocated memory structures.
+ */
+typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN];
+
+/*--------------------------- Forward Declarations 
---------------------------*/
+struct xnb_softc;
+struct xnb_pkt;
+
+static void    xnb_attach_failed(struct xnb_softc *xnb,
+                                 int err, const char *fmt, ...)
+                                 __printflike(3,4);
+static int     xnb_shutdown(struct xnb_softc *xnb);
+static int     create_netdev(device_t dev);
+static int     xnb_detach(device_t dev);
+static int     xen_net_read_mac(device_t dev, uint8_t mac[]);
+static int     xnb_ifmedia_upd(struct ifnet *ifp);
+static void    xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
+static void    xnb_intr(void *arg);
+static int     xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend,
+                        const struct mbuf *mbufc, gnttab_copy_table gnttab);
+static int     xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend,
+                        struct mbuf **mbufc, struct ifnet *ifnet,
+                        gnttab_copy_table gnttab);
+static int     xnb_ring2pkt(struct xnb_pkt *pkt,
+                            const netif_tx_back_ring_t *tx_ring,
+                            RING_IDX start);
+static void    xnb_txpkt2rsp(const struct xnb_pkt *pkt,
+                             netif_tx_back_ring_t *ring, int error);
+static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet 
*ifp);
+static int     xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
+                                const struct mbuf *mbufc,
+                                gnttab_copy_table gnttab,
+                                const netif_tx_back_ring_t *txb,
+                                domid_t otherend_id);
+static void    xnb_update_mbufc(struct mbuf *mbufc,
+                                const gnttab_copy_table gnttab, int n_entries);
+static int     xnb_mbufc2pkt(const struct mbuf *mbufc,
+                             struct xnb_pkt *pkt,
+                             RING_IDX start, int space);
+static int     xnb_rxpkt2gnttab(const struct xnb_pkt *pkt,
+                                const struct mbuf *mbufc,
+                                gnttab_copy_table gnttab,
+                                const netif_rx_back_ring_t *rxb,
+                                domid_t otherend_id);
+static int     xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
+                             const gnttab_copy_table gnttab, int n_entries,
+                             netif_rx_back_ring_t *ring);
+static void    xnb_add_mbuf_cksum(struct mbuf *mbufc);
+static void    xnb_stop(struct xnb_softc*);
+static int     xnb_ioctl(struct ifnet*, u_long, caddr_t);
+static void    xnb_start_locked(struct ifnet*);
+static void    xnb_start(struct ifnet*);
+static void    xnb_ifinit_locked(struct xnb_softc*);
+static void    xnb_ifinit(void*);
+#ifdef XNB_DEBUG
+static int     xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
+static int     xnb_dump_rings(SYSCTL_HANDLER_ARGS);
 #endif
+/*------------------------------ Data Structures 
-----------------------------*/
+
+
+/**
+ * Representation of a xennet packet.  Simplified version of a packet as
+ * stored in the Xen tx ring.  Applicable to both RX and TX packets
+ */
+struct xnb_pkt{
+       /**
+        * Array index of the first data-bearing (eg, not extra info) entry
+        * for this packet
+        */
+       RING_IDX        car;
+
+       /**
+        * Array index of the second data-bearing entry for this packet.
+        * Invalid if the packet has only one data-bearing entry.  If the
+        * packet has more than two data-bearing entries, then the second
+        * through the last will be sequential modulo the ring size
+        */
+       RING_IDX        cdr;
 
-#define WPRINTF(fmt, args...) \
-    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+       /**
+        * Optional extra info.  Only valid if flags contains
+        * NETTXF_extra_info.  Note that extra.type will always be
+        * XEN_NETIF_EXTRA_TYPE_GSO.  Currently, no known netfront or netback
+        * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_*
+        */
+       netif_extra_info_t extra;
+
+       /** Size of entire packet in bytes.       */
+       uint16_t        size;
 
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-#define BUG_ON PANIC_IF
+       /** The size of the first entry's data in bytes */
+       uint16_t        car_size;
 
-#define IFNAME(_np) (_np)->ifp->if_xname
+       /**
+        * Either NETTXF_ or NETRXF_ flags.  Note that the flag values are
+        * not the same for TX and RX packets
+        */
+       uint16_t        flags;
 
-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+       /**
+        * The number of valid data-bearing entries (either netif_tx_request's
+        * or netif_rx_response's) in the packet.  If this is 0, it means the
+        * entire packet is invalid.
+        */
+       uint16_t        list_len;
 
-struct ring_ref {
-       vm_offset_t va;
-       grant_handle_t handle;
-       uint64_t bus_addr;
+       /** There was an error processing the packet */
+       uint8_t         error;
 };
 
-typedef struct netback_info {
+/** xnb_pkt method: initialize it */
+static inline void
+xnb_pkt_initialize(struct xnb_pkt *pxnb)
+{
+       bzero(pxnb, sizeof(*pxnb));
+}
 
-       /* Schedule lists */
-       STAILQ_ENTRY(netback_info) next_tx;
-       STAILQ_ENTRY(netback_info) next_rx;
-       int on_tx_sched_list;
-       int on_rx_sched_list;
-
-       struct xenbus_device *xdev;
-       XenbusState frontend_state;
-
-       domid_t domid;
-       int handle;
-       char *bridge;
-
-       int rings_connected;
-       struct ring_ref tx_ring_ref;
-       struct ring_ref rx_ring_ref;
-       netif_tx_back_ring_t tx;
-       netif_rx_back_ring_t rx;
-       evtchn_port_t evtchn;
-       int irq;
-       void *irq_cookie;
+/** xnb_pkt method: mark the packet as valid */
+static inline void
+xnb_pkt_validate(struct xnb_pkt *pxnb)
+{
+       pxnb->error = 0;
+};
 
-       struct ifnet *ifp;
-       int ref_cnt;
+/** xnb_pkt method: mark the packet as invalid */
+static inline void
+xnb_pkt_invalidate(struct xnb_pkt *pxnb)
+{
+       pxnb->error = 1;
+};
 
-       device_t ndev;
-       int attached;
-} netif_t;
-
-
-#define MAX_PENDING_REQS 256
-#define PKT_PROT_LEN 64
-
-static struct {
-       netif_tx_request_t req;
-       netif_t *netif;
-} pending_tx_info[MAX_PENDING_REQS];
-static uint16_t pending_ring[MAX_PENDING_REQS];
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static unsigned long mmap_vstart;
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-
-/* Freed TX mbufs get batched on this ring before return to pending_ring. */
-static uint16_t dealloc_ring[MAX_PENDING_REQS];
-static PEND_RING_IDX dealloc_prod, dealloc_cons;
-
-static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
-
-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
-
-static struct task net_tx_task, net_rx_task;
-static struct callout rx_task_callout;
-
-static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
-       STAILQ_HEAD_INITIALIZER(tx_sched_list);
-static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
-       STAILQ_HEAD_INITIALIZER(rx_sched_list);
-static struct mtx tx_sched_list_lock;
-static struct mtx rx_sched_list_lock;
-
-static int vif_unit_maker = 0;
-
-/* Protos */
-static void netback_start(struct ifnet *ifp);
-static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-static int vif_add_dev(struct xenbus_device *xdev);
-static void disconnect_rings(netif_t *netif);
+/** xnb_pkt method: Check whether the packet is valid */
+static inline int
+xnb_pkt_is_valid(const struct xnb_pkt *pxnb)
+{
+       return (! pxnb->error);
+}
+
+#ifdef XNB_DEBUG
+/** xnb_pkt method: print the packet's contents in human-readable format*/
+static void __unused
+xnb_dump_pkt(const struct xnb_pkt *pkt) {
+       if (pkt == NULL) {
+         DPRINTF("Was passed a null pointer.\n");
+         return;
+       }
+       DPRINTF("pkt address= %p\n", pkt);
+       DPRINTF("pkt->size=%d\n", pkt->size);
+       DPRINTF("pkt->car_size=%d\n", pkt->car_size);
+       DPRINTF("pkt->flags=0x%04x\n", pkt->flags);
+       DPRINTF("pkt->list_len=%d\n", pkt->list_len);
+       /* DPRINTF("pkt->extra");       TODO */
+       DPRINTF("pkt->car=%d\n", pkt->car);
+       DPRINTF("pkt->cdr=%d\n", pkt->cdr);
+       DPRINTF("pkt->error=%d\n", pkt->error);
+}
+#endif /* XNB_DEBUG */
 
-#ifdef XEN_NETBACK_DEBUG_LOTS
-/* Debug code to display the contents of an mbuf */
 static void
-print_mbuf(struct mbuf *m, int max)
+xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq)
 {
-       int i, j=0;
-       printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
-       for (; m; m = m->m_next) {
-               unsigned char *d = m->m_data;
-               for (i=0; i < m->m_len; i++) {
-                       if (max && j == max)
-                               break;
-                       if ((j++ % 16) == 0)
-                               printf("\n%04x:", j);
-                       printf(" %02x", d[i]);
-               }
+       if (txreq != NULL) {
+               DPRINTF("netif_tx_request index =%u\n", idx);
+               DPRINTF("netif_tx_request.gref  =%u\n", txreq->gref);
+               DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset);
+               DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags);
+               DPRINTF("netif_tx_request.id    =%hu\n", txreq->id);
+               DPRINTF("netif_tx_request.size  =%hu\n", txreq->size);
        }
-       printf("\n");
 }
-#endif
 
 
-#define MAX_MFN_ALLOC 64
-static unsigned long mfn_list[MAX_MFN_ALLOC];
-static unsigned int alloc_index = 0;
+/**
+ * \brief Configuration data for a shared memory request ring
+ *        used to communicate with the front-end client of this
+ *        this driver.
+ */
+struct xnb_ring_config {
+       /**
+        * Runtime structures for ring access.  Unfortunately, TX and RX rings
+        * use different data structures, and that cannot be changed since it
+        * is part of the interdomain protocol.
+        */
+       union{
+               netif_rx_back_ring_t      rx_ring;
+               netif_tx_back_ring_t      tx_ring;
+       } back_ring;
+
+       /**
+        * The device bus address returned by the hypervisor when
+        * mapping the ring and required to unmap it when a connection
+        * is torn down.
+        */
+       uint64_t        bus_addr;
 
-static unsigned long
-alloc_mfn(void)
-{
-       unsigned long mfn = 0;
-       struct xen_memory_reservation reservation = {
-               .extent_start = mfn_list,
-               .nr_extents   = MAX_MFN_ALLOC,
-               .extent_order = 0,
-               .domid        = DOMID_SELF
-       };
-       if ( unlikely(alloc_index == 0) )
-               alloc_index = HYPERVISOR_memory_op(
-                       XENMEM_increase_reservation, &reservation);
-       if ( alloc_index != 0 )
-               mfn = mfn_list[--alloc_index];
-       return mfn;
-}
+       /** The pseudo-physical address where ring memory is mapped.*/
+       uint64_t        gnt_addr;
 
-static unsigned long
-alloc_empty_page_range(unsigned long nr_pages)
+       /** KVA address where ring memory is mapped. */
+       vm_offset_t     va;
+
+       /**
+        * Grant table handles, one per-ring page, returned by the
+        * hyperpervisor upon mapping of the ring and required to
+        * unmap it when a connection is torn down.
+        */
+       grant_handle_t  handle;
+
+       /** The number of ring pages mapped for the current connection. */
+       unsigned        ring_pages;
+
+       /**
+        * The grant references, one per-ring page, supplied by the
+        * front-end, allowing us to reference the ring pages in the
+        * front-end's domain and to map these pages into our own domain.
+        */
+       grant_ref_t     ring_ref;
+};
+
+/**
+ * Per-instance connection state flags.
+ */
+typedef enum
 {
-       void *pages;
-       int i = 0, j = 0;
-       multicall_entry_t mcl[17];
-       unsigned long mfn_list[16];
-       struct xen_memory_reservation reservation = {
-               .extent_start = mfn_list,
-               .nr_extents   = 0,
-               .address_bits = 0,
-               .extent_order = 0,
-               .domid        = DOMID_SELF
-       };
+       /** Communication with the front-end has been established. */
+       XNBF_RING_CONNECTED    = 0x01,
+
+       /**
+        * Front-end requests exist in the ring and are waiting for
+        * xnb_xen_req objects to free up.
+        */
+       XNBF_RESOURCE_SHORTAGE = 0x02,
 
-       pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
-       if (pages == NULL)
-               return 0;
+       /** Connection teardown has started. */
+       XNBF_SHUTDOWN          = 0x04,
 
-       memset(mcl, 0, sizeof(mcl));
+       /** A thread is already performing shutdown processing. */
+       XNBF_IN_SHUTDOWN       = 0x08
+} xnb_flag_t;
 
-       while (i < nr_pages) {
-               unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+/**
+ * Types of rings.  Used for array indices and to identify a ring's control
+ * data structure type
+ */
+typedef enum{
+       XNB_RING_TYPE_TX = 0,   /* ID of TX rings, used for array indices */
+       XNB_RING_TYPE_RX = 1,   /* ID of RX rings, used for array indices */
+       XNB_NUM_RING_TYPES
+} xnb_ring_type_t;
 
-               mcl[j].op = __HYPERVISOR_update_va_mapping;
-               mcl[j].args[0] = va;
+/**
+ * Per-instance configuration data.
+ */
+struct xnb_softc {
+       /** NewBus device corresponding to this instance. */
+       device_t                dev;
+
+       /* Media related fields */
+
+       /** Generic network media state */
+       struct ifmedia          sc_media;
+
+       /** Media carrier info */
+       struct ifnet            *xnb_ifp;
+
+       /** Our own private carrier state */
+       unsigned carrier;
+
+       /** Device MAC Address */
+       uint8_t                 mac[ETHER_ADDR_LEN];
+
+       /* Xen related fields */
+
+       /**
+        * \brief The netif protocol abi in effect.
+        *
+        * There are situations where the back and front ends can
+        * have a different, native abi (e.g. intel x86_64 and
+        * 32bit x86 domains on the same machine).  The back-end
+        * always accomodates the front-end's native abi.  That
+        * value is pulled from the XenStore and recorded here.
+        */
+       int                     abi;
 
-               mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+       /**
+        * Name of the bridge to which this VIF is connected, if any
+        * This field is dynamically allocated by xenbus and must be free()ed
+        * when no longer needed
+        */
+       char                    *bridge;
 
-               xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = 
INVALID_P2M_ENTRY;
+       /** The interrupt driven even channel used to signal ring events. */
+       evtchn_port_t           evtchn;
 
-               if (j == 16 || i == nr_pages) {
-                       mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 
UVMF_TLB_FLUSH|UVMF_LOCAL;
+       /** Xen device handle.*/
+       long                    handle;
 
-                       reservation.nr_extents = j;
+       /** IRQ mapping for the communication ring event channel. */
+       int                     irq;
+
+       /**
+        * \brief Cached value of the front-end's domain id.
+        *
+        * This value is used at once for each mapped page in
+        * a transaction.  We cache it to avoid incuring the
+        * cost of an ivar access every time this is needed.
+        */
+       domid_t                 otherend_id;
 
-                       mcl[j].op = __HYPERVISOR_memory_op;
-                       mcl[j].args[0] = XENMEM_decrease_reservation;
-                       mcl[j].args[1] =  (unsigned long)&reservation;
-                       
-                       (void)HYPERVISOR_multicall(mcl, j+1);
+       /**
+        * Undocumented frontend feature.  Has something to do with
+        * scatter/gather IO
+        */
+       uint8_t                 can_sg;
+       /** Undocumented frontend feature */
+       uint8_t                 gso;
+       /** Undocumented frontend feature */
+       uint8_t                 gso_prefix;
+       /** Can checksum TCP/UDP over IPv4 */
+       uint8_t                 ip_csum;
+
+       /* Implementation related fields */
+       /**
+        * Preallocated grant table copy descriptor for RX operations.
+        * Access must be protected by rx_lock
+        */
+       gnttab_copy_table       rx_gnttab;
 
-                       mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
-                       j = 0;
-               }
-       }
+       /**
+        * Preallocated grant table copy descriptor for TX operations.
+        * Access must be protected by tx_lock
+        */
+       gnttab_copy_table       tx_gnttab;
 
-       return (unsigned long)pages;
-}
+#ifdef XENHVM
+       /**
+        * Resource representing allocated physical address space
+        * associated with our per-instance kva region.
+        */
+       struct resource         *pseudo_phys_res;
 
-#ifdef XEN_NETBACK_FIXUP_CSUM
-static void
-fixup_checksum(struct mbuf *m)
-{
-       struct ether_header *eh = mtod(m, struct ether_header *);
-       struct ip *ip = (struct ip *)(eh + 1);
-       int iphlen = ip->ip_hl << 2;
-       int iplen = ntohs(ip->ip_len);
-
-       if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
-               struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
-               th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-                       htons(IPPROTO_TCP + (iplen - iphlen)));
-               th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) 
+ iphlen);
-               m->m_pkthdr.csum_flags &= ~CSUM_TCP;
-#ifdef SCTP
-       } else if (sw_csum & CSUM_SCTP) {
-               sctp_delayed_cksum(m, iphlen);
-               sw_csum &= ~CSUM_SCTP;
-#endif
-       } else {
-               u_short csum;
-               struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
-               uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-                       htons(IPPROTO_UDP + (iplen - iphlen)));
-               if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + 
iphlen)) == 0)
-                       csum = 0xffff;
-               uh->uh_sum = csum;
-               m->m_pkthdr.csum_flags &= ~CSUM_UDP;
-       }
-}
+       /** Resource id for allocated physical address space. */
+       int                     pseudo_phys_res_id;
 #endif
 
-/* Add the interface to the specified bridge */
-static int
-add_to_bridge(struct ifnet *ifp, char *bridge)
-{
-       struct ifdrv ifd;
-       struct ifbreq ifb;
-       struct ifnet *ifp_bridge = ifunit(bridge);
+       /** Ring mapping and interrupt configuration data. */
+       struct xnb_ring_config  ring_configs[XNB_NUM_RING_TYPES];
 
-       if (!ifp_bridge)
-               return ENOENT;
+       /**
+        * Global pool of kva used for mapping remote domain ring
+        * and I/O transaction data.
+        */
+       vm_offset_t             kva;
 
-       bzero(&ifd, sizeof(ifd));
-       bzero(&ifb, sizeof(ifb));
+       /** Psuedo-physical address corresponding to kva. */
+       uint64_t                gnt_base_addr;
 
-       strcpy(ifb.ifbr_ifsname, ifp->if_xname);
-       strcpy(ifd.ifd_name, ifp->if_xname);
-       ifd.ifd_cmd = BRDGADD;
-       ifd.ifd_len = sizeof(ifb);
-       ifd.ifd_data = &ifb;
+       /** Various configuration and state bit flags. */
+       xnb_flag_t              flags;
 
-       return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
-       
-}
+       /** Mutex protecting per-instance data in the receive path. */
+       struct mtx              rx_lock;
 
-static int
-netif_create(int handle, struct xenbus_device *xdev, char *bridge)
-{
-       netif_t *netif;
-       struct ifnet *ifp;
+       /** Mutex protecting per-instance data in the softc structure. */
+       struct mtx              sc_lock;
 
-       netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
-       if (!netif)
-               return ENOMEM;
+       /** Mutex protecting per-instance data in the transmit path. */
+       struct mtx              tx_lock;
 
-       netif->ref_cnt = 1;
-       netif->handle = handle;
-       netif->domid = xdev->otherend_id;
-       netif->xdev = xdev;
-       netif->bridge = bridge;
-       xdev->data = netif;
-
-       /* Set up ifnet structure */
-       ifp = netif->ifp = if_alloc(IFT_ETHER);
-       if (!ifp) {
-               if (bridge)
-                       free(bridge, M_DEVBUF);
-               free(netif, M_DEVBUF);
-               return ENOMEM;
-       }
+       /** The size of the global kva pool. */
+       int                     kva_size;
+};
 
-       ifp->if_softc = netif;
-       if_initname(ifp, "vif",
-               atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
-       ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
-       ifp->if_output = ether_output;
-       ifp->if_start = netback_start;
-       ifp->if_ioctl = netback_ioctl;
-       ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
-       
-       DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), 
netif->domid, netif->handle);
+/*---------------------------- Debugging functions 
---------------------------*/
+#ifdef XNB_DEBUG
+static void __unused
+xnb_dump_gnttab_copy(const struct gnttab_copy *entry)
+{
+       if (entry == NULL) {
+               printf("NULL grant table pointer\n");
+               return;
+       }
 
-       return 0;
+       if (entry->flags & GNTCOPY_dest_gref)
+               printf("gnttab dest ref=\t%u\n", entry->dest.u.ref);
+       else

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***

----- End forwarded message -----

-- 
Kenneth Merry
k...@kdm.org
_______________________________________________
freebsd-xen@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-xen
To unsubscribe, send any mail to "freebsd-xen-unsubscr...@freebsd.org"

Reply via email to