Thanks Reyk for Your great work!

Tested and works like a charm. Made some tcpbench tests too and got ~180 MBit
between host and vm.

Regards Uwe


On 12 Apr 13:44, Reyk Floeter wrote:
> Hi,
> 
> we want to make it easier to run NAT'ed and auto-configured VMs that
> don't need switches, L2, manual scripts, or any additional servers on
> the host.  I wrote a new mode that just needs vmd, pf, and forwarding.
> 
> I'm looking for feedback, testing, and responses on this list.
> 
> vmd currently supports two modes to configure network interfaces:
> -i/interface: unconfigured tap(4) interfaces for static configuration.
> -n/switch: automatically adds tap(4) interfaces to bridge/switch interfaces.
> 
> The attached diff adds a new mode for dynamic "NAT" interfaces:
> -L/local interface: auto-configure an L3 interface and run built-in DHCP.
> 
> Setting it up is easy:
> 
> 1. Enable forwarding:
> 
>     # sysctl net.inet.ip.forwarding=1
> 
> 2. Add a NAT rule to pf.conf(5) and a redirection for DNS (or run unbound):
> 
>     pass out on egress received-on tap nat-to (egress:0)
>     pass in on tap proto { tcp udp } to 100.64.0.0/10 port domain rdr-to 
> 8.8.8.8
> 
> 3. Now start a new VM with the -L option to add a "local" interface:
> 
>     # vmctl start foo -d foo.img -L -c
> 
> vmd configures a /31 address on the tap(4) interface of the host and
> provides another IP in the same subnet via DHCP (BOOTP) to the VM.
> vmd runs an internal DHCP server that replies with IP, gateway, and
> DNS addresses to the VM - there is no need to run dhcpd!  The built-in
> server only ever responds to the VM on the inside and cannot leak its
> DHCP responses to the outside.
> 
> DHCPDISCOVER on vio0 - interval 1
> BOOTREPLY from 100.64.3.2 (fe:e1:bb:d1:b7:5e)
> bound to 100.64.3.3 -- renewal in 8000 seconds.
> 
> Done.
> 
> This also replaces the "dhcpd on vether0 on switch" approach for
> NAT'ed VMs on laptops that I recommended before.  The switch concept
> itself will still be provided and improved for other use cases.
> 
> The addresses are currently allocated from the RFC6598 100.64.0.0/10
> "IPv4 Prefix for Shared Address Space" to avoid collisions with
> RFC1918 addresses on the host.  I will add a configuration option to
> change the default prefix later.
> 
> The current algorithm to generate the IPs and /31 subnets from the
> prefix can be found in the vm_priv_addr() function below.  The
> packet.c and dhcp.h code is copied from dhcrelay which got improved
> recently.
> 
> Thoughts? OKs?
> 
> Reyk
> 
> Index: usr.sbin/vmd/Makefile
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/Makefile,v
> retrieving revision 1.13
> diff -u -p -u -p -r1.13 Makefile
> --- usr.sbin/vmd/Makefile     1 Mar 2017 18:00:50 -0000       1.13
> +++ usr.sbin/vmd/Makefile     12 Apr 2017 11:16:50 -0000
> @@ -5,7 +5,7 @@
>  PROG=                vmd
>  SRCS=                vmd.c control.c log.c priv.c proc.c config.c vmm.c
>  SRCS+=               vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
> -SRCS+=               ns8250.c i8253.c vmboot.c ufs.c disklabel.c
> +SRCS+=               ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c 
> packet.c
>  SRCS+=               parse.y
>  
>  CFLAGS+=     -Wall -I${.CURDIR}
> Index: usr.sbin/vmd/dhcp.c
> ===================================================================
> RCS file: usr.sbin/vmd/dhcp.c
> diff -N usr.sbin/vmd/dhcp.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ usr.sbin/vmd/dhcp.c       12 Apr 2017 11:16:50 -0000
> @@ -0,0 +1,163 @@
> +/*   $OpenBSD$       */
> +
> +/*
> + * Copyright (c) 2017 Reyk Floeter <[email protected]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +
> +#include <net/if.h>
> +#include <netinet/in.h>
> +#include <netinet/if_ether.h>
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stddef.h>
> +
> +#include "proc.h"
> +#include "vmd.h"
> +#include "dhcp.h"
> +#include "virtio.h"
> +
> +static const uint8_t broadcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
> +
> +ssize_t
> +dhcp_request(struct vionet_dev *dev, char *buf, size_t buflen, char **obuf)
> +{
> +     unsigned char           *respbuf = NULL;
> +     ssize_t                  offset, respbuflen = 0;
> +     struct packet_ctx        pc;
> +     struct dhcp_packet       req, resp;
> +     struct in_addr           in, mask;
> +     size_t                   resplen, o;
> +
> +     if (buflen < (ssize_t)(BOOTP_MIN_LEN + sizeof(struct ether_header)))
> +             return (-1);
> +
> +     memset(&pc, 0, sizeof(pc));
> +     if ((offset = decode_hw_header(buf, buflen, 0, &pc, HTYPE_ETHER)) < 0)
> +             return (-1);
> +
> +     if (memcmp(pc.pc_smac, dev->mac, ETHER_ADDR_LEN) != 0 ||
> +         memcmp(pc.pc_dmac, broadcast, ETHER_ADDR_LEN) != 0)
> +             return (-1);
> +
> +     if ((offset = decode_udp_ip_header(buf, buflen, offset, &pc)) < 0)
> +             return (-1);
> +
> +     if (ntohs(ss2sin(&pc.pc_src)->sin_port) != CLIENT_PORT ||
> +         ntohs(ss2sin(&pc.pc_dst)->sin_port) != SERVER_PORT)
> +             return (-1);
> +
> +     memset(&req, 0, sizeof(req));
> +     memcpy(&req, buf + offset, buflen - offset);
> +
> +     if (req.op != BOOTREQUEST ||
> +         req.htype != pc.pc_htype ||
> +         req.hlen != ETHER_ADDR_LEN ||
> +         memcmp(dev->mac, req.chaddr, req.hlen) != 0)
> +             return (-1);
> +
> +     /* Ignore unsupported requests for now */
> +     if (req.ciaddr.s_addr != 0 || req.file[0] != '\0' || req.hops != 0)
> +             return (-1);
> +
> +     memset(&resp, 0, sizeof(resp));
> +     resp.op = BOOTREPLY;
> +     resp.htype = req.htype;
> +     resp.hlen = req.hlen;
> +     resp.xid = req.xid;
> +
> +     if ((in.s_addr = vm_priv_addr(dev->vm_vmid, dev->idx, 1)) == 0)
> +             return (-1);
> +     memcpy(&resp.yiaddr, &in, sizeof(in));
> +     memcpy(&ss2sin(&pc.pc_dst)->sin_addr, &in, sizeof(in));
> +     ss2sin(&pc.pc_dst)->sin_port = htons(CLIENT_PORT);
> +
> +     if ((in.s_addr = vm_priv_addr(dev->vm_vmid, dev->idx, 0)) == 0)
> +             return (-1);
> +     memcpy(&resp.siaddr, &in, sizeof(in));
> +     memcpy(&ss2sin(&pc.pc_src)->sin_addr, &in, sizeof(in));
> +     ss2sin(&pc.pc_src)->sin_port = htons(SERVER_PORT);
> +
> +     /* Packet is already allocated */
> +     if (*obuf != NULL)
> +             goto fail;
> +
> +     buflen = 0;
> +     respbuflen = DHCP_MTU_MAX;
> +     if ((respbuf = calloc(1, respbuflen)) == NULL)
> +             goto fail;
> +
> +     memcpy(&pc.pc_dmac, dev->mac, sizeof(pc.pc_dmac));
> +     memcpy(&resp.chaddr, dev->mac, resp.hlen);
> +     memcpy(&pc.pc_smac, dev->mac, sizeof(pc.pc_smac));
> +     pc.pc_smac[5]++;
> +     if ((offset = assemble_hw_header(respbuf, respbuflen, 0,
> +         &pc, HTYPE_ETHER)) < 0) {
> +             log_debug("%s: assemble_hw_header failed", __func__);
> +             goto fail;
> +     }
> +
> +     /* BOOTP uses a 64byte vendor field instead of the DHCP options */
> +     resplen = BOOTP_MIN_LEN;
> +
> +     /* Add BOOTP Vendor Extensions (DHCP options) */
> +     o = 0;
> +     memcpy(&resp.options,
> +         DHCP_OPTIONS_COOKIE, DHCP_OPTIONS_COOKIE_LEN);
> +     o+= DHCP_OPTIONS_COOKIE_LEN;
> +
> +     resp.options[o++] = DHO_SUBNET_MASK;
> +     resp.options[o++] = sizeof(mask);
> +     mask.s_addr = htonl(0xfffffffe);
> +     memcpy(&resp.options[o], &mask, sizeof(mask));
> +     o += sizeof(mask);
> +
> +     resp.options[o++] = DHO_ROUTERS;
> +     resp.options[o++] = sizeof(in);
> +     memcpy(&resp.options[o], &in, sizeof(in));
> +     o += sizeof(in);
> +
> +     resp.options[o++] = DHO_DOMAIN_NAME_SERVERS;
> +     resp.options[o++] = sizeof(in);
> +     memcpy(&resp.options[o], &in, sizeof(in));
> +     o += sizeof(in);
> +
> +     resp.options[o++] = DHO_END;
> +
> +     resplen = offsetof(struct dhcp_packet, options) + o;
> +
> +     /* Minimum packet size */
> +     if (resplen < BOOTP_MIN_LEN)
> +             resplen = BOOTP_MIN_LEN;
> +
> +     if ((offset = assemble_udp_ip_header(respbuf, respbuflen, offset, &pc,
> +         (unsigned char *)&resp, resplen)) < 0) {
> +             log_debug("%s: assemble_udp_ip_header failed", __func__);
> +             goto fail;
> +     }
> +
> +     memcpy(respbuf + offset, &resp, sizeof(resp));
> +     respbuflen = offset + resplen;
> +
> +     *obuf = respbuf;
> +     return (respbuflen);
> + fail:
> +     free(respbuf);
> +     return (0);
> +}
> +
> Index: usr.sbin/vmd/dhcp.h
> ===================================================================
> RCS file: usr.sbin/vmd/dhcp.h
> diff -N usr.sbin/vmd/dhcp.h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ usr.sbin/vmd/dhcp.h       12 Apr 2017 11:16:50 -0000
> @@ -0,0 +1,181 @@
> +/*   $OpenBSD: dhcp.h,v 1.6 2016/12/12 15:41:05 rzalamena Exp $      */
> +
> +/* Protocol structures... */
> +
> +/*
> + * Copyright (c) 1995, 1996 The Internet Software Consortium.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 3. Neither the name of The Internet Software Consortium nor the names
> + *    of its contributors may be used to endorse or promote products derived
> + *    from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND
> + * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
> + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> + * DISCLAIMED.  IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR
> + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + * This software has been written for the Internet Software Consortium
> + * by Ted Lemon <[email protected]> in cooperation with Vixie
> + * Enterprises.  To learn more about the Internet Software Consortium,
> + * see ``http://www.vix.com/isc''.  To learn more about Vixie
> + * Enterprises, see ``http://www.vix.com''.
> + */
> +
> +#define DHCP_UDP_OVERHEAD    (20 + /* IP header */           \
> +                              8)   /* UDP header */
> +#define DHCP_SNAME_LEN               64
> +#define DHCP_FILE_LEN                128
> +#define DHCP_FIXED_NON_UDP   236
> +#define DHCP_FIXED_LEN               (DHCP_FIXED_NON_UDP + DHCP_UDP_OVERHEAD)
> +                                             /* Everything but options. */
> +#define DHCP_MTU_MAX         1500
> +#define DHCP_OPTION_LEN              (DHCP_MTU_MAX - DHCP_FIXED_LEN)
> +/* The option/sub-option maximum length. */
> +#define DHCP_OPTION_MAXLEN   255
> +/* The option/sub-option header length. */
> +#define DHCP_OPTION_HDR_LEN  2
> +
> +#define BOOTP_MIN_LEN                300
> +
> +#define      SERVER_PORT             67
> +#define      CLIENT_PORT             68
> +
> +struct dhcp_packet {
> +     u_int8_t  op;           /* Message opcode/type */
> +     u_int8_t  htype;        /* Hardware addr type (see net/if_types.h) */
> +     u_int8_t  hlen;         /* Hardware addr length */
> +     u_int8_t  hops;         /* Number of relay agent hops from client */
> +     u_int32_t xid;          /* Transaction ID */
> +     u_int16_t secs;         /* Seconds since client started looking */
> +     u_int16_t flags;        /* Flag bits */
> +     struct in_addr ciaddr;  /* Client IP address (if already in use) */
> +     struct in_addr yiaddr;  /* Client IP address */
> +     struct in_addr siaddr;  /* IP address of next server to talk to */
> +     struct in_addr giaddr;  /* DHCP relay agent IP address */
> +     unsigned char chaddr[16];       /* Client hardware address */
> +     char sname[DHCP_SNAME_LEN];     /* Server name */
> +     char file[DHCP_FILE_LEN];       /* Boot filename */
> +     unsigned char options[DHCP_OPTION_LEN];
> +                             /* Optional parameters
> +                                (actual length dependent on MTU). */
> +};
> +
> +/* BOOTP (rfc951) message types */
> +#define BOOTREQUEST  1
> +#define BOOTREPLY    2
> +
> +/* Possible values for flags field... */
> +#define BOOTP_BROADCAST 32768L
> +
> +/* Possible values for hardware type (htype) field... */
> +#define HTYPE_ETHER          1       /* Ethernet                     */
> +#define HTYPE_IEEE802                6       /* IEEE 802.2 Token Ring...     
> */
> +#define HTYPE_FDDI           8       /* FDDI...                      */
> +#define HTYPE_IPSEC_TUNNEL   31      /* IPsec Tunnel (RFC3456)       */
> +
> +/* Magic cookie validating dhcp options field (and bootp vendor
> +   extensions field). */
> +#define DHCP_OPTIONS_COOKIE  "\143\202\123\143"
> +#define DHCP_OPTIONS_COOKIE_LEN      4
> +
> +/* DHCP Option codes: */
> +
> +#define DHO_PAD                              0
> +#define DHO_SUBNET_MASK                      1
> +#define DHO_TIME_OFFSET                      2
> +#define DHO_ROUTERS                  3
> +#define DHO_TIME_SERVERS             4
> +#define DHO_NAME_SERVERS             5
> +#define DHO_DOMAIN_NAME_SERVERS              6
> +#define DHO_LOG_SERVERS                      7
> +#define DHO_COOKIE_SERVERS           8
> +#define DHO_LPR_SERVERS                      9
> +#define DHO_IMPRESS_SERVERS          10
> +#define DHO_RESOURCE_LOCATION_SERVERS        11
> +#define DHO_HOST_NAME                        12
> +#define DHO_BOOT_SIZE                        13
> +#define DHO_MERIT_DUMP                       14
> +#define DHO_DOMAIN_NAME                      15
> +#define DHO_SWAP_SERVER                      16
> +#define DHO_ROOT_PATH                        17
> +#define DHO_EXTENSIONS_PATH          18
> +#define DHO_IP_FORWARDING            19
> +#define DHO_NON_LOCAL_SOURCE_ROUTING 20
> +#define DHO_POLICY_FILTER            21
> +#define DHO_MAX_DGRAM_REASSEMBLY     22
> +#define DHO_DEFAULT_IP_TTL           23
> +#define DHO_PATH_MTU_AGING_TIMEOUT   24
> +#define DHO_PATH_MTU_PLATEAU_TABLE   25
> +#define DHO_INTERFACE_MTU            26
> +#define DHO_ALL_SUBNETS_LOCAL                27
> +#define DHO_BROADCAST_ADDRESS                28
> +#define DHO_PERFORM_MASK_DISCOVERY   29
> +#define DHO_MASK_SUPPLIER            30
> +#define DHO_ROUTER_DISCOVERY         31
> +#define DHO_ROUTER_SOLICITATION_ADDRESS      32
> +#define DHO_STATIC_ROUTES            33
> +#define DHO_TRAILER_ENCAPSULATION    34
> +#define DHO_ARP_CACHE_TIMEOUT                35
> +#define DHO_IEEE802_3_ENCAPSULATION  36
> +#define DHO_DEFAULT_TCP_TTL          37
> +#define DHO_TCP_KEEPALIVE_INTERVAL   38
> +#define DHO_TCP_KEEPALIVE_GARBAGE    39
> +#define DHO_NIS_DOMAIN                       40
> +#define DHO_NIS_SERVERS                      41
> +#define DHO_NTP_SERVERS                      42
> +#define DHO_VENDOR_ENCAPSULATED_OPTIONS      43
> +#define DHO_NETBIOS_NAME_SERVERS     44
> +#define DHO_NETBIOS_DD_SERVER                45
> +#define DHO_NETBIOS_NODE_TYPE                46
> +#define DHO_NETBIOS_SCOPE            47
> +#define DHO_FONT_SERVERS             48
> +#define DHO_X_DISPLAY_MANAGER                49
> +#define DHO_DHCP_REQUESTED_ADDRESS   50
> +#define DHO_DHCP_LEASE_TIME          51
> +#define DHO_DHCP_OPTION_OVERLOAD     52
> +#define DHO_DHCP_MESSAGE_TYPE                53
> +#define DHO_DHCP_SERVER_IDENTIFIER   54
> +#define DHO_DHCP_PARAMETER_REQUEST_LIST      55
> +#define DHO_DHCP_MESSAGE             56
> +#define DHO_DHCP_MAX_MESSAGE_SIZE    57
> +#define DHO_DHCP_RENEWAL_TIME                58
> +#define DHO_DHCP_REBINDING_TIME              59
> +#define DHO_DHCP_CLASS_IDENTIFIER    60
> +#define DHO_DHCP_CLIENT_IDENTIFIER   61
> +#define DHO_DHCP_USER_CLASS_ID               77
> +#define DHO_RELAY_AGENT_INFORMATION  82
> +#define DHO_END                              255
> +
> +/* DHCP message types. */
> +#define DHCPDISCOVER 1
> +#define DHCPOFFER    2
> +#define DHCPREQUEST  3
> +#define DHCPDECLINE  4
> +#define DHCPACK              5
> +#define DHCPNAK              6
> +#define DHCPRELEASE  7
> +#define DHCPINFORM   8
> +
> +/* Relay Agent Information sub-options */
> +#define RAI_CIRCUIT_ID       1
> +#define RAI_REMOTE_ID        2
> +#define RAI_AGENT_ID 3
> Index: usr.sbin/vmd/packet.c
> ===================================================================
> RCS file: usr.sbin/vmd/packet.c
> diff -N usr.sbin/vmd/packet.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ usr.sbin/vmd/packet.c     12 Apr 2017 11:16:50 -0000
> @@ -0,0 +1,332 @@
> +/*   $OpenBSD: packet.c,v 1.13 2017/02/13 19:15:39 krw Exp $ */
> +
> +/* Packet assembly code, originally contributed by Archie Cobbs. */
> +
> +/*
> + * Copyright (c) 1995, 1996, 1999 The Internet Software Consortium.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 3. Neither the name of The Internet Software Consortium nor the names
> + *    of its contributors may be used to endorse or promote products derived
> + *    from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND
> + * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
> + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> + * DISCLAIMED.  IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR
> + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + * This software has been written for the Internet Software Consortium
> + * by Ted Lemon <[email protected]> in cooperation with Vixie
> + * Enterprises.  To learn more about the Internet Software Consortium,
> + * see ``http://www.vix.com/isc''.  To learn more about Vixie
> + * Enterprises, see ``http://www.vix.com''.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +
> +#include <arpa/inet.h>
> +
> +#include <net/if.h>
> +#include <net/if_enc.h>
> +
> +#include <netinet/in.h>
> +#include <netinet/ip.h>
> +#include <netinet/udp.h>
> +#include <netinet/if_ether.h>
> +
> +#include <string.h>
> +
> +#include "dhcp.h"
> +#include "vmd.h"
> +#include "proc.h"
> +
> +u_int32_t    checksum(unsigned char *, u_int32_t, u_int32_t);
> +u_int32_t    wrapsum(u_int32_t);
> +
> +u_int32_t
> +checksum(unsigned char *buf, u_int32_t nbytes, u_int32_t sum)
> +{
> +     u_int32_t i;
> +
> +     /* Checksum all the pairs of bytes first... */
> +     for (i = 0; i < (nbytes & ~1U); i += 2) {
> +             sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i)));
> +             if (sum > 0xFFFF)
> +                     sum -= 0xFFFF;
> +     }
> +
> +     /*
> +      * If there's a single byte left over, checksum it, too.
> +      * Network byte order is big-endian, so the remaining byte is
> +      * the high byte.
> +      */
> +     if (i < nbytes) {
> +             sum += buf[i] << 8;
> +             if (sum > 0xFFFF)
> +                     sum -= 0xFFFF;
> +     }
> +
> +     return (sum);
> +}
> +
> +u_int32_t
> +wrapsum(u_int32_t sum)
> +{
> +     sum = ~sum & 0xFFFF;
> +     return (htons(sum));
> +}
> +
> +ssize_t
> +assemble_hw_header(unsigned char *buf, size_t buflen,
> +    size_t offset, struct packet_ctx *pc, unsigned int intfhtype)
> +{
> +     struct ether_header eh;
> +
> +     switch (intfhtype) {
> +     case HTYPE_ETHER:
> +             if (buflen < offset + ETHER_HDR_LEN)
> +                     return (-1);
> +
> +             /* Use the supplied address or let the kernel fill it. */
> +             memcpy(eh.ether_shost, pc->pc_smac, ETHER_ADDR_LEN);
> +             memcpy(eh.ether_dhost, pc->pc_dmac, ETHER_ADDR_LEN);
> +
> +             eh.ether_type = htons(ETHERTYPE_IP);
> +
> +             memcpy(&buf[offset], &eh, ETHER_HDR_LEN);
> +             offset += ETHER_HDR_LEN;
> +             break;
> +     default:
> +             return (-1);
> +     }
> +
> +     return (offset);
> +}
> +
> +ssize_t
> +assemble_udp_ip_header(unsigned char *buf, size_t buflen, size_t offset,
> +    struct packet_ctx *pc, unsigned char *data, size_t datalen)
> +{
> +     struct ip ip;
> +     struct udphdr udp;
> +
> +     if (buflen < offset + sizeof(ip) + sizeof(udp))
> +             return (-1);
> +
> +     ip.ip_v = 4;
> +     ip.ip_hl = 5;
> +     ip.ip_tos = IPTOS_LOWDELAY;
> +     ip.ip_len = htons(sizeof(ip) + sizeof(udp) + datalen);
> +     ip.ip_id = 0;
> +     ip.ip_off = 0;
> +     ip.ip_ttl = 16;
> +     ip.ip_p = IPPROTO_UDP;
> +     ip.ip_sum = 0;
> +     ip.ip_src.s_addr = ss2sin(&pc->pc_src)->sin_addr.s_addr;
> +     ip.ip_dst.s_addr = ss2sin(&pc->pc_dst)->sin_addr.s_addr;
> +
> +     ip.ip_sum = wrapsum(checksum((unsigned char *)&ip, sizeof(ip), 0));
> +     memcpy(&buf[offset], &ip, sizeof(ip));
> +     offset += sizeof(ip);
> +
> +     udp.uh_sport = ss2sin(&pc->pc_src)->sin_port;
> +     udp.uh_dport = ss2sin(&pc->pc_dst)->sin_port;
> +     udp.uh_ulen = htons(sizeof(udp) + datalen);
> +     memset(&udp.uh_sum, 0, sizeof(udp.uh_sum));
> +
> +     udp.uh_sum = wrapsum(checksum((unsigned char *)&udp, sizeof(udp),
> +         checksum(data, datalen, checksum((unsigned char *)&ip.ip_src,
> +         2 * sizeof(ip.ip_src),
> +         IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen)))));
> +
> +     memcpy(&buf[offset], &udp, sizeof(udp));
> +     offset += sizeof(udp);
> +
> +     return (offset);
> +}
> +
> +ssize_t
> +decode_hw_header(unsigned char *buf, size_t buflen,
> +    size_t offset, struct packet_ctx *pc, unsigned int intfhtype)
> +{
> +     u_int32_t ip_len;
> +     struct ip *ip;
> +
> +     switch (intfhtype) {
> +     case HTYPE_IPSEC_TUNNEL:
> +             if (buflen < offset + ENC_HDRLEN + sizeof(*ip))
> +                     return (-1);
> +             offset += ENC_HDRLEN;
> +             ip_len = (buf[offset] & 0xf) << 2;
> +             if (buflen < offset + ip_len)
> +                     return (-1);
> +
> +             ip = (struct ip *)(buf + offset);
> +
> +             /* Encapsulated IP */
> +             if (ip->ip_p != IPPROTO_IPIP)
> +                     return (-1);
> +
> +             memset(pc->pc_dmac, 0xff, ETHER_ADDR_LEN);
> +             offset += ip_len;
> +
> +             pc->pc_htype = ARPHRD_ETHER;
> +             pc->pc_hlen = ETHER_ADDR_LEN;
> +             break;
> +     case HTYPE_ETHER:
> +             if (buflen < offset + ETHER_HDR_LEN)
> +                     return (-1);
> +
> +             memcpy(pc->pc_dmac, buf + offset, ETHER_ADDR_LEN);
> +             memcpy(pc->pc_smac, buf + offset + ETHER_ADDR_LEN,
> +                 ETHER_ADDR_LEN);
> +             offset += ETHER_HDR_LEN;
> +
> +             pc->pc_htype = ARPHRD_ETHER;
> +             pc->pc_hlen = ETHER_ADDR_LEN;
> +             break;
> +     default:
> +             return (-1);
> +     }
> +
> +     return (offset);
> +}
> +
> +ssize_t
> +decode_udp_ip_header(unsigned char *buf, size_t buflen,
> +    size_t offset, struct packet_ctx *pc)
> +{
> +     struct ip *ip;
> +     struct udphdr *udp;
> +     unsigned char *data;
> +     u_int32_t ip_len;
> +     u_int32_t sum, usum;
> +     static unsigned int ip_packets_seen;
> +     static unsigned int ip_packets_bad_checksum;
> +     static unsigned int udp_packets_seen;
> +     static unsigned int udp_packets_bad_checksum;
> +     static unsigned int udp_packets_length_checked;
> +     static unsigned int udp_packets_length_overflow;
> +     int len;
> +
> +     /* Assure that an entire IP header is within the buffer. */
> +     if (buflen < offset + sizeof(*ip))
> +             return (-1);
> +     ip_len = (buf[offset] & 0xf) << 2;
> +     if (buflen < offset + ip_len)
> +             return (-1);
> +
> +     ip = (struct ip *)(buf + offset);
> +     ip_packets_seen++;
> +
> +     /* Check the IP header checksum - it should be zero. */
> +     if (wrapsum(checksum(buf + offset, ip_len, 0)) != 0) {
> +             ip_packets_bad_checksum++;
> +             if (ip_packets_seen > 4 && ip_packets_bad_checksum != 0 &&
> +                 (ip_packets_seen / ip_packets_bad_checksum) < 2) {
> +                     log_info("%u bad IP checksums seen in %u packets",
> +                         ip_packets_bad_checksum, ip_packets_seen);
> +                     ip_packets_seen = ip_packets_bad_checksum = 0;
> +             }
> +             return (-1);
> +     }
> +
> +     pc->pc_src.ss_len = sizeof(struct sockaddr_in);
> +     pc->pc_src.ss_family = AF_INET;
> +     memcpy(&ss2sin(&pc->pc_src)->sin_addr, &ip->ip_src,
> +         sizeof(ss2sin(&pc->pc_src)->sin_addr));
> +
> +     pc->pc_dst.ss_len = sizeof(struct sockaddr_in);
> +     pc->pc_dst.ss_family = AF_INET;
> +     memcpy(&ss2sin(&pc->pc_dst)->sin_addr, &ip->ip_dst,
> +         sizeof(ss2sin(&pc->pc_dst)->sin_addr));
> +
> +#ifdef DEBUG
> +     if (buflen != offset + ntohs(ip->ip_len))
> +             log_debug("ip length %d disagrees with bytes received %zd.",
> +                 ntohs(ip->ip_len), buflen - offset);
> +#endif
> +
> +     /* Assure that the entire IP packet is within the buffer. */
> +     if (buflen < offset + ntohs(ip->ip_len))
> +             return (-1);
> +
> +     /* Assure that the UDP header is within the buffer. */
> +     if (buflen < offset + ip_len + sizeof(*udp))
> +             return (-1);
> +     udp = (struct udphdr *)(buf + offset + ip_len);
> +     udp_packets_seen++;
> +
> +     /* Assure that the entire UDP packet is within the buffer. */
> +     if (buflen < offset + ip_len + ntohs(udp->uh_ulen))
> +             return (-1);
> +     data = buf + offset + ip_len + sizeof(*udp);
> +
> +     /*
> +      * Compute UDP checksums, including the ``pseudo-header'', the
> +      * UDP header and the data. If the UDP checksum field is zero,
> +      * we're not supposed to do a checksum.
> +      */
> +     udp_packets_length_checked++;
> +     len = ntohs(udp->uh_ulen) - sizeof(*udp);
> +     if ((len < 0) || (len + data > buf + buflen)) {
> +             udp_packets_length_overflow++;
> +             if (udp_packets_length_checked > 4 &&
> +                 udp_packets_length_overflow != 0 &&
> +                 (udp_packets_length_checked /
> +                 udp_packets_length_overflow) < 2) {
> +                     log_info("%u udp packets in %u too long - dropped",
> +                         udp_packets_length_overflow,
> +                         udp_packets_length_checked);
> +                     udp_packets_length_overflow =
> +                         udp_packets_length_checked = 0;
> +             }
> +             return (-1);
> +     }
> +     if (len + data != buf + buflen)
> +             log_debug("accepting packet with data after udp payload.");
> +
> +     usum = udp->uh_sum;
> +     udp->uh_sum = 0;
> +
> +     sum = wrapsum(checksum((unsigned char *)udp, sizeof(*udp),
> +         checksum(data, len, checksum((unsigned char *)&ip->ip_src,
> +         2 * sizeof(ip->ip_src),
> +         IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)))));
> +
> +     udp_packets_seen++;
> +     if (usum && usum != sum) {
> +             udp_packets_bad_checksum++;
> +             if (udp_packets_seen > 4 && udp_packets_bad_checksum != 0 &&
> +                 (udp_packets_seen / udp_packets_bad_checksum) < 2) {
> +                     log_info("%u bad udp checksums in %u packets",
> +                         udp_packets_bad_checksum, udp_packets_seen);
> +                     udp_packets_seen = udp_packets_bad_checksum = 0;
> +             }
> +             return (-1);
> +     }
> +
> +     ss2sin(&pc->pc_src)->sin_port = udp->uh_sport;
> +     ss2sin(&pc->pc_dst)->sin_port = udp->uh_dport;
> +
> +     return (offset + ip_len + sizeof(*udp));
> +}
> Index: usr.sbin/vmd/parse.y
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/parse.y,v
> retrieving revision 1.24
> diff -u -p -u -p -r1.24 parse.y
> --- usr.sbin/vmd/parse.y      6 Apr 2017 21:35:22 -0000       1.24
> +++ usr.sbin/vmd/parse.y      12 Apr 2017 11:16:50 -0000
> @@ -116,10 +116,11 @@ typedef struct {
>  
>  %token       INCLUDE ERROR
>  %token       ADD DISK DOWN GROUP INTERFACE NIFS PATH SIZE SWITCH UP VMID
> -%token       ENABLE DISABLE VM BOOT LLADDR MEMORY OWNER LOCKED
> +%token       ENABLE DISABLE VM BOOT LLADDR MEMORY OWNER LOCKED LOCAL
>  %token       <v.string>      STRING
>  %token  <v.number>   NUMBER
>  %type        <v.number>      disable
> +%type        <v.number>      local
>  %type        <v.number>      locked
>  %type        <v.number>      updown
>  %type        <v.lladdr>      lladdr
> @@ -325,36 +326,38 @@ vm_opts         : disable                       {
>                       free($2);
>                       vmc.vmc_flags |= VMOP_CREATE_DISK;
>               }
> -             | INTERFACE optstring iface_opts_o {
> +             | local INTERFACE optstring iface_opts_o {
>                       unsigned int    i;
>                       char            type[IF_NAMESIZE];
>  
>                       i = vcp_nnics;
>                       if (++vcp_nnics > VMM_MAX_NICS_PER_VM) {
>                               yyerror("too many interfaces: %zu", vcp_nnics);
> -                             free($2);
> +                             free($3);
>                               YYERROR;
>                       }
>  
> -                     if ($2 != NULL) {
> -                             if (strcmp($2, "tap") != 0 &&
> -                                 (priv_getiftype($2, type, NULL) == -1 ||
> +                     if ($1)
> +                             vmc.vmc_ifflags[i] |= VMIFF_LOCAL;
> +                     if ($3 != NULL) {
> +                             if (strcmp($3, "tap") != 0 &&
> +                                 (priv_getiftype($3, type, NULL) == -1 ||
>                                   strcmp(type, "tap") != 0)) {
> -                                     yyerror("invalid interface: %s", $2);
> -                                     free($2);
> +                                     yyerror("invalid interface: %s", $3);
> +                                     free($3);
>                                       YYERROR;
>                               }
>  
> -                             if (strlcpy(vmc.vmc_ifnames[i], $2,
> +                             if (strlcpy(vmc.vmc_ifnames[i], $3,
>                                   sizeof(vmc.vmc_ifnames[i])) >=
>                                   sizeof(vmc.vmc_ifnames[i])) {
>                                       yyerror("interface name too long: %s",
> -                                         $2);
> -                                     free($2);
> +                                         $3);
> +                                     free($3);
>                                       YYERROR;
>                               }
>                       }
> -                     free($2);
> +                     free($3);
>                       vmc.vmc_flags |= VMOP_CREATE_NETWORK;
>               }
>               | BOOT string                   {
> @@ -547,6 +550,10 @@ lladdr           : STRING                        {
>               }
>               ;
>  
> +local                : /* empty */                   { $$ = 0; }
> +             | LOCAL                         { $$ = 1; }
> +             ;
> +
>  locked               : /* empty */                   { $$ = 0; }
>               | LOCKED                        { $$ = 1; }
>               ;
> @@ -616,6 +623,7 @@ lookup(char *s)
>               { "interface",          INTERFACE },
>               { "interfaces",         NIFS },
>               { "lladdr",             LLADDR },
> +             { "local",              LOCAL },
>               { "locked",             LOCKED },
>               { "memory",             MEMORY },
>               { "owner",              OWNER },
> Index: usr.sbin/vmd/priv.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/priv.c,v
> retrieving revision 1.6
> diff -u -p -u -p -r1.6 priv.c
> --- usr.sbin/vmd/priv.c       2 Mar 2017 07:33:37 -0000       1.6
> +++ usr.sbin/vmd/priv.c       12 Apr 2017 11:16:50 -0000
> @@ -29,6 +29,8 @@
>  #include <netinet/if_ether.h>
>  #include <net/if_bridge.h>
>  
> +#include <arpa/inet.h>
> +
>  #include <errno.h>
>  #include <event.h>
>  #include <fcntl.h>
> @@ -80,6 +82,7 @@ priv_dispatch_parent(int fd, struct priv
>       struct ifreq             ifr;
>       struct ifbreq            ifbr;
>       struct ifgroupreq        ifgr;
> +     struct ifaliasreq        ifra;
>       char                     type[IF_NAMESIZE];
>  
>       switch (imsg->hdr.type) {
> @@ -89,6 +92,7 @@ priv_dispatch_parent(int fd, struct priv
>       case IMSG_VMDOP_PRIV_IFUP:
>       case IMSG_VMDOP_PRIV_IFDOWN:
>       case IMSG_VMDOP_PRIV_IFGROUP:
> +     case IMSG_VMDOP_PRIV_IFADDR:
>               IMSG_SIZE_CHECK(imsg, &vfr);
>               memcpy(&vfr, imsg->data, sizeof(vfr));
>  
> @@ -160,6 +164,25 @@ priv_dispatch_parent(int fd, struct priv
>                   errno != EEXIST)
>                       log_warn("SIOCAIFGROUP");
>               break;
> +     case IMSG_VMDOP_PRIV_IFADDR:
> +             memset(&ifra, 0, sizeof(ifra));
> +
> +             /* Set the interface address */
> +             strlcpy(ifra.ifra_name, vfr.vfr_name, sizeof(ifra.ifra_name));
> +
> +             memcpy(&ifra.ifra_addr, &vfr.vfr_ifra.ifra_addr,
> +                 sizeof(ifra.ifra_addr));
> +             ifra.ifra_addr.sa_family = AF_INET;
> +             ifra.ifra_addr.sa_len = sizeof(struct sockaddr_in);
> +
> +             memcpy(&ifra.ifra_mask, &vfr.vfr_ifra.ifra_mask,
> +                 sizeof(ifra.ifra_mask));
> +             ifra.ifra_mask.sa_family = AF_INET;
> +             ifra.ifra_mask.sa_len = sizeof(struct sockaddr_in);
> +
> +             if (ioctl(env->vmd_fd, SIOCAIFADDR, &ifra) < 0)
> +                     log_warn("SIOCAIFADDR");
> +             break;
>       default:
>               return (-1);
>       }
> @@ -227,6 +250,7 @@ vm_priv_ifconfig(struct privsep *ps, str
>       struct vmd_switch       *vsw;
>       unsigned int             i;
>       struct vmop_ifreq        vfr, vfbr;
> +     struct sockaddr_in      *sin4;
>  
>       for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
>               vif = &vm->vm_ifs[i];
> @@ -298,6 +322,27 @@ vm_priv_ifconfig(struct privsep *ps, str
>               proc_compose(ps, PROC_PRIV, (vif->vif_flags & VMIFF_UP) ?
>                   IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN,
>                   &vfr, sizeof(vfr));
> +
> +             if (vm->vm_params.vmc_ifflags[i] & VMIFF_LOCAL) {
> +                     sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_mask;
> +                     sin4->sin_family = AF_INET;
> +                     sin4->sin_len = sizeof(*sin4);
> +                     sin4->sin_addr.s_addr = htonl(0xfffffffe);
> +
> +                     sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_addr;
> +                     sin4->sin_family = AF_INET;
> +                     sin4->sin_len = sizeof(*sin4);
> +                     if ((sin4->sin_addr.s_addr =
> +                         vm_priv_addr(vm->vm_vmid, i, 0)) == 0)
> +                             return (-1);
> +
> +                     log_debug("%s: interface %s address %s/31",
> +                         __func__, vfr.vfr_name,
> +                         inet_ntoa(sin4->sin_addr));
> +
> +                     proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADDR,
> +                         &vfr, sizeof(vfr));
> +             }
>       }
>  
>       return (0);
> @@ -345,4 +390,47 @@ vm_priv_brconfig(struct privsep *ps, str
>  
>       vsw->sw_running = 1;
>       return (0);
> +}
> +
> +uint32_t
> +vm_priv_addr(uint32_t vmid, int idx, int isvm)
> +{
> +     in_addr_t       prefix, mask, addr;
> +
> +     /*
> +      * 1. Set the address prefix and mask, 100.64.0.0/10 by default.
> +      * XXX make the global prefix configurable.
> +      */
> +     prefix = inet_addr(VMD_DHCP_PREFIX);
> +     mask = prefixlen2mask(VMD_DHCP_PREFIXLEN);
> +
> +     /* 2. Encode the VM ID as a per-VM subnet range N, 10.64.N.0/24. */
> +     addr = vmid << 8;
> +
> +     /*
> +      * 3. Assign a /31 subnet M per VM interface, 10.64.N.M/31.
> +      * Each subnet contains exactly two IP addresses; skip the
> +      * first subnet to avoid a gateway address ending with .0.
> +      */
> +     addr |= (idx + 1) * 2;
> +
> +     /* 4. Use the first address for the gateway, the second for the VM. */
> +     if (isvm)
> +             addr++;
> +
> +     /* 5. Convert to network byte order and add the prefix. */
> +     addr = htonl(addr) | prefix;
> +
> +     /*
> +      * Validate the results:
> +      * - the address should not exceed the prefix (eg. VM ID to high).
> +      * - up to 126 interfaces can be encoded per VM.
> +      */
> +     if (prefix != (addr & mask) || idx >= 0x7f) {
> +             log_warnx("%s: dhcp address range exceeded,"
> +                 " vm id %u interface %d", __func__, vmid, idx);
> +             return (0);
> +     }
> +
> +     return (addr);
>  }
> Index: usr.sbin/vmd/virtio.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
> retrieving revision 1.41
> diff -u -p -u -p -r1.41 virtio.c
> --- usr.sbin/vmd/virtio.c     8 Apr 2017 19:08:18 -0000       1.41
> +++ usr.sbin/vmd/virtio.c     12 Apr 2017 11:16:51 -0000
> @@ -1251,15 +1251,17 @@ vionet_notifyq(struct vionet_dev *dev)
>       uint32_t vr_sz;
>       uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx;
>       size_t pktsz;
> -     int ret, num_enq, ofs;
> -     char *vr, *pkt;
> +     ssize_t dhcpsz;
> +     int ret, num_enq, ofs, spc;
> +     char *vr, *pkt, *dhcppkt;
>       struct vring_desc *desc, *pkt_desc, *hdr_desc;
>       struct vring_avail *avail;
>       struct vring_used *used;
>       struct ether_header *eh;
>  
> -     vr = pkt = NULL;
> -     ret = 0;
> +     vr = pkt = dhcppkt = NULL;
> +     ret = spc = 0;
> +     dhcpsz = 0;
>  
>       /* Invalid queue? */
>       if (dev->cfg.queue_notify != 1) {
> @@ -1373,8 +1375,13 @@ vionet_notifyq(struct vionet_dev *dev)
>                       log_debug("vionet: wrong source address %s for vm %d",
>                           ether_ntoa((struct ether_addr *)
>                           eh->ether_shost), dev->vm_id);
> +             else if (dev->local && dhcpsz == 0 &&
> +                 (dhcpsz = dhcp_request(dev, pkt, pktsz, &dhcppkt)) != -1) {
> +                     log_debug("vionet: dhcp request,"
> +                         " local response size %zd", dhcpsz);
> +
>               /* XXX signed vs unsigned here, funky cast */
> -             else if (write(dev->fd, pkt, pktsz) != (int)pktsz) {
> +             } else if (write(dev->fd, pkt, pktsz) != (int)pktsz) {
>                       log_warnx("vionet: tx failed writing to tap: "
>                           "%d", errno);
>                       goto out;
> @@ -1398,9 +1405,15 @@ vionet_notifyq(struct vionet_dev *dev)
>               log_warnx("vionet: tx error writing vio ring");
>       }
>  
> +     if (dhcpsz > 0) {
> +             if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc))
> +                     ret = 1;
> +     }
> +
>  out:
>       free(vr);
>       free(pkt);
> +     free(dhcppkt);
>  
>       return (ret);
>  }
> @@ -1582,8 +1595,9 @@ vmmci_io(int dir, uint16_t reg, uint32_t
>  }
>  
>  void
> -virtio_init(struct vmop_create_params *vmc, int *child_disks, int 
> *child_taps)
> +virtio_init(struct vmd_vm *vm, int *child_disks, int *child_taps)
>  {
> +     struct vmop_create_params *vmc = &vm->vm_params;
>       struct vm_create_params *vcp = &vmc->vmc_params;
>       static const uint8_t zero_mac[6];
>       uint8_t id;
> @@ -1713,6 +1727,7 @@ virtio_init(struct vmop_create_params *v
>                       vionet[i].fd = child_taps[i];
>                       vionet[i].rx_pending = 0;
>                       vionet[i].vm_id = vcp->vcp_id;
> +                     vionet[i].vm_vmid = vm->vm_vmid;
>                       vionet[i].irq = pci_get_dev_irq(id);
>  
>                       event_set(&vionet[i].event, vionet[i].fd,
> @@ -1747,11 +1762,15 @@ virtio_init(struct vmop_create_params *v
>                       }
>                       vionet[i].lockedmac =
>                           vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0;
> +                     vionet[i].local =
> +                         vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0;
> +                     vionet[i].idx = i;
>  
> -                     log_debug("%s: vm \"%s\" vio%u lladdr %s%s",
> +                     log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s",
>                           __func__, vcp->vcp_name, i,
>                           ether_ntoa((void *)vionet[i].mac),
> -                         vionet[i].lockedmac ? " (locked)" : "");
> +                         vionet[i].lockedmac ? ", locked" : "",
> +                         vionet[i].local ? ", local" : "");
>               }
>       }
>  
> Index: usr.sbin/vmd/virtio.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
> retrieving revision 1.14
> diff -u -p -u -p -r1.14 virtio.h
> --- usr.sbin/vmd/virtio.h     27 Mar 2017 00:28:04 -0000      1.14
> +++ usr.sbin/vmd/virtio.h     12 Apr 2017 11:16:51 -0000
> @@ -118,9 +118,13 @@ struct vionet_dev {
>       int fd, rx_added;
>       int rx_pending;
>       uint32_t vm_id;
> +     uint32_t vm_vmid;
>       int irq;
>       uint8_t mac[6];
> +
> +     int idx;
>       int lockedmac;
> +     int local;
>  };
>  
>  struct virtio_net_hdr {
> @@ -154,7 +158,8 @@ struct vmmci_dev {
>       int irq;
>  };
>  
> -void virtio_init(struct vmop_create_params *, int *, int *);
> +/* virtio.c */
> +void virtio_init(struct vmd_vm *, int *, int *);
>  uint32_t vring_size(uint32_t);
>  
>  int virtio_rnd_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> @@ -181,3 +186,6 @@ void vmmci_ack(unsigned int);
>  void vmmci_timeout(int, short, void *);
>  
>  const char *vioblk_cmd_name(uint32_t);
> +
> +/* dhcp.c */
> +ssize_t dhcp_request(struct vionet_dev *, char *, size_t, char **);
> Index: usr.sbin/vmd/vm.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 vm.c
> --- usr.sbin/vmd/vm.c 27 Mar 2017 00:28:04 -0000      1.11
> +++ usr.sbin/vmd/vm.c 12 Apr 2017 11:16:51 -0000
> @@ -656,7 +656,7 @@ init_emulated_hw(struct vmop_create_para
>       pci_init();
>  
>       /* Initialize virtio devices */
> -     virtio_init(vmc, child_disks, child_taps);
> +     virtio_init(current_vm, child_disks, child_taps);
>  }
>  
>  /*
> Index: usr.sbin/vmd/vm.conf.5
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vm.conf.5,v
> retrieving revision 1.17
> diff -u -p -u -p -r1.17 vm.conf.5
> --- usr.sbin/vmd/vm.conf.5    25 Mar 2017 16:28:25 -0000      1.17
> +++ usr.sbin/vmd/vm.conf.5    12 Apr 2017 11:16:51 -0000
> @@ -112,7 +112,7 @@ is specified.
>  Do not start this VM.
>  .It Cm disk Ar path
>  Disk image file (may be specified multiple times to add multiple disk 
> images).
> -.It Cm interface Oo name Oc Op Brq ...
> +.It Oo Cm local Oc Cm interface Oo name Oc Op Brq ...
>  Network interface to add to the VM.
>  The optional
>  .Ar name
> @@ -123,6 +123,7 @@ to select the next available
>  interface on the VM host side (the default) or
>  .Ar tapN
>  to select a specific one.
> +.Pp
>  Valid options are:
>  .Bl -tag -width Ds
>  .It Cm group Ar group-name
> @@ -158,6 +159,13 @@ This is the default.
>  .It Cm down
>  Stop the interface from forwarding packets.
>  .El
> +.Pp
> +A
> +.Cm local
> +interface will auto-generate an IPv4 subnet for the interface,
> +configure a gateway address on the VM host side,
> +and run a simple DHCP (BOOTP) server for the VM.
> +This option can be used for layer 3 mode without configuring a switch.
>  .It Cm interfaces Ar count
>  Optional minimum number of network interfaces to add to the VM.
>  If the
> Index: usr.sbin/vmd/vmd.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vmd.c,v
> retrieving revision 1.56
> diff -u -p -u -p -r1.56 vmd.c
> --- usr.sbin/vmd/vmd.c        6 Apr 2017 18:07:13 -0000       1.56
> +++ usr.sbin/vmd/vmd.c        12 Apr 2017 11:16:51 -0000
> @@ -1045,3 +1045,15 @@ get_string(uint8_t *ptr, size_t len)
>  
>       return strndup(ptr, i);
>  }
> +
> +uint32_t
> +prefixlen2mask(uint8_t prefixlen)
> +{
> +     if (prefixlen == 0)
> +             return (0);
> +
> +     if (prefixlen > 32)
> +             prefixlen = 32;
> +
> +     return (htonl(0xffffffff << (32 - prefixlen)));
> +}
> Index: usr.sbin/vmd/vmd.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v
> retrieving revision 1.50
> diff -u -p -u -p -r1.50 vmd.h
> --- usr.sbin/vmd/vmd.h        6 Apr 2017 18:07:13 -0000       1.50
> +++ usr.sbin/vmd/vmd.h        12 Apr 2017 11:16:51 -0000
> @@ -23,6 +23,8 @@
>  #include <machine/vmmvar.h>
>  
>  #include <net/if.h>
> +#include <netinet/in.h>
> +#include <netinet/if_ether.h>
>  
>  #include <limits.h>
>  #include <stdio.h>
> @@ -48,6 +50,10 @@
>  #define VMD_SWITCH_TYPE              "bridge"
>  #define VM_DEFAULT_MEMORY    512
>  
> +/* 100.64.0.0/10 from rfc6598 (IPv4 Prefix for Shared Address Space) */
> +#define VMD_DHCP_PREFIX              "100.64.0.0"
> +#define VMD_DHCP_PREFIXLEN   10
> +
>  #ifdef VMD_DEBUG
>  #define dprintf(x...)   do { log_debug(x); } while(0)
>  #else
> @@ -74,6 +80,7 @@ enum imsg_type {
>       IMSG_VMDOP_PRIV_IFUP,
>       IMSG_VMDOP_PRIV_IFDOWN,
>       IMSG_VMDOP_PRIV_IFGROUP,
> +     IMSG_VMDOP_PRIV_IFADDR,
>       IMSG_VMDOP_VM_SHUTDOWN,
>       IMSG_VMDOP_VM_REBOOT
>  };
> @@ -102,6 +109,7 @@ struct vmop_ifreq {
>       uint32_t                 vfr_id;
>       char                     vfr_name[IF_NAMESIZE];
>       char                     vfr_value[VM_NAME_MAX];
> +     struct ifaliasreq        vfr_ifra;
>  };
>  
>  struct vmop_create_params {
> @@ -116,7 +124,8 @@ struct vmop_create_params {
>       unsigned int             vmc_ifflags[VMM_MAX_NICS_PER_VM];
>  #define VMIFF_UP             0x01
>  #define VMIFF_LOCKED         0x02
> -#define VMIFF_OPTMASK                VMIFF_LOCKED
> +#define VMIFF_LOCAL          0x04
> +#define VMIFF_OPTMASK                (VMIFF_LOCKED|VMIFF_LOCAL)
>       char                     vmc_ifnames[VMM_MAX_NICS_PER_VM][IF_NAMESIZE];
>       char                     vmc_ifswitch[VMM_MAX_NICS_PER_VM][VM_NAME_MAX];
>       char                     vmc_ifgroup[VMM_MAX_NICS_PER_VM][IF_NAMESIZE];
> @@ -198,6 +207,38 @@ struct vmd {
>       int                      vmd_ptmfd;
>  };
>  
> +static inline struct sockaddr_in *
> +ss2sin(struct sockaddr_storage *ss)
> +{
> +     return ((struct sockaddr_in *)ss);
> +}
> +
> +static inline struct sockaddr_in6 *
> +ss2sin6(struct sockaddr_storage *ss)
> +{
> +     return ((struct sockaddr_in6 *)ss);
> +}
> +
> +struct packet_ctx {
> +     uint8_t                  pc_htype;
> +     uint8_t                  pc_hlen;
> +     uint8_t                  pc_smac[ETHER_ADDR_LEN];
> +     uint8_t                  pc_dmac[ETHER_ADDR_LEN];
> +
> +     struct sockaddr_storage  pc_src;
> +     struct sockaddr_storage  pc_dst;
> +};
> +
> +/* packet.c */
> +ssize_t       assemble_hw_header(unsigned char *, size_t, size_t,
> +         struct packet_ctx *, unsigned int);
> +ssize_t       assemble_udp_ip_header(unsigned char *, size_t, size_t,
> +         struct packet_ctx *pc, unsigned char *, size_t);
> +ssize_t       decode_hw_header(unsigned char *, size_t, size_t, struct 
> packet_ctx *,
> +         unsigned int);
> +ssize_t       decode_udp_ip_header(unsigned char *, size_t, size_t,
> +         struct packet_ctx *);
> +
>  /* vmd.c */
>  void  vmd_reload(unsigned int, const char *);
>  struct vmd_vm *vm_getbyid(uint32_t);
> @@ -216,6 +257,7 @@ void       vm_closetty(struct vmd_vm *);
>  void  switch_remove(struct vmd_switch *);
>  struct vmd_switch *switch_getbyname(const char *);
>  char *get_string(uint8_t *, size_t);
> +uint32_t prefixlen2mask(uint8_t);
>  
>  /* priv.c */
>  void  priv(struct privsep *, struct privsep_proc *);
> @@ -224,6 +266,7 @@ int        priv_findname(const char *, const c
>  int   priv_validgroup(const char *);
>  int   vm_priv_ifconfig(struct privsep *, struct vmd_vm *);
>  int   vm_priv_brconfig(struct privsep *, struct vmd_switch *);
> +uint32_t vm_priv_addr(uint32_t, int, int);
>  
>  /* vmm.c */
>  void  vmm(struct privsep *, struct privsep_proc *);
> Index: usr.sbin/vmctl/main.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmctl/main.c,v
> retrieving revision 1.25
> diff -u -p -u -p -r1.25 main.c
> --- usr.sbin/vmctl/main.c     6 Apr 2017 18:07:13 -0000       1.25
> +++ usr.sbin/vmctl/main.c     12 Apr 2017 11:16:51 -0000
> @@ -65,7 +65,7 @@ struct ctl_command ctl_commands[] = {
>       { "reload",     CMD_RELOAD,     ctl_reload,     "" },
>       { "reset",      CMD_RESET,      ctl_reset,      "[all|vms|switches]" },
>       { "start",      CMD_START,      ctl_start,      "\"name\""
> -         " [-c] [-b image] [-m size]\n"
> +         " [-Lc] [-b image] [-m size]\n"
>           "\t\t[-n switch] [-i count] [-d disk]*" },
>       { "status",     CMD_STATUS,     ctl_status,     "[id]" },
>       { "stop",       CMD_STOP,       ctl_stop,       "id" },
> @@ -539,7 +539,7 @@ ctl_start(struct parse_result *res, int 
>       argc--;
>       argv++;
>  
> -     while ((ch = getopt(argc, argv, "b:cm:n:d:i:")) != -1) {
> +     while ((ch = getopt(argc, argv, "b:cLm:n:d:i:")) != -1) {
>               switch (ch) {
>               case 'b':
>                       if (res->path)
> @@ -551,6 +551,10 @@ ctl_start(struct parse_result *res, int 
>                       break;
>               case 'c':
>                       tty_autoconnect = 1;
> +                     break;
> +             case 'L':
> +                     if (parse_network(res, ".") != 0)
> +                             errx(1, "invalid network: %s", optarg);
>                       break;
>               case 'm':
>                       if (res->size)
> Index: usr.sbin/vmctl/vmctl.8
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmctl/vmctl.8,v
> retrieving revision 1.27
> diff -u -p -u -p -r1.27 vmctl.8
> --- usr.sbin/vmctl/vmctl.8    25 Mar 2017 16:28:25 -0000      1.27
> +++ usr.sbin/vmctl/vmctl.8    12 Apr 2017 11:16:51 -0000
> @@ -72,8 +72,8 @@ Reset the configured switches.
>  .It Cm reset vms
>  Reset and terminate all VMs.
>  .It Xo Cm start Ar name
> +.Op Fl Lc
>  .Op Fl b Ar path
> -.Op Fl c
>  .Op Fl d Ar path
>  .Op Fl i Ar count
>  .Op Fl m Ar size
> @@ -91,6 +91,12 @@ Automatically connect to the VM console.
>  Disk image file (may be specified multiple times to add multiple disk 
> images).
>  .It Fl i Ar count
>  Number of network interfaces to add to the VM.
> +.It Fl L
> +Add a local network interface.
> +.Xr vmd 8
> +will auto-generate an IPv4 subnet for the interface,
> +configure a gateway address on the VM host side,
> +and run a simple DHCP (BOOTP) server for the VM.
>  .It Fl m Ar size
>  Memory
>  .Ar size
> Index: usr.sbin/vmctl/vmctl.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmctl/vmctl.c,v
> retrieving revision 1.29
> diff -u -p -u -p -r1.29 vmctl.c
> --- usr.sbin/vmctl/vmctl.c    6 Apr 2017 18:07:13 -0000       1.29
> +++ usr.sbin/vmctl/vmctl.c    12 Apr 2017 11:16:51 -0000
> @@ -123,8 +123,16 @@ vm_start(uint32_t start_id, const char *
>       for (i = 0 ; i < ndisks; i++)
>               strlcpy(vcp->vcp_disks[i], disks[i], VMM_MAX_PATH_DISK);
>       for (i = 0 ; i < nnics; i++) {
> -             strlcpy(vmc->vmc_ifswitch[i], nics[i], IF_NAMESIZE);
>               vmc->vmc_ifflags[i] = VMIFF_UP;
> +
> +             if (strcmp(".", nics[i]) == 0) {
> +                     /* Add a "local" interface */
> +                     strlcpy(vmc->vmc_ifswitch[i], "", IF_NAMESIZE);
> +                     vmc->vmc_ifflags[i] |= VMIFF_LOCAL;
> +             } else {
> +                     /* Add a interface to a switch */
> +                     strlcpy(vmc->vmc_ifswitch[i], nics[i], IF_NAMESIZE);
> +             }
>       }
>       if (name != NULL)
>               strlcpy(vcp->vcp_name, name, VMM_MAX_NAME_LEN);
> 

-- 

Reply via email to