On Fri, Feb 28, 2014 at 08:28:11AM +0000, Anton Ivanov (antivano) wrote: I didn't check all of Paolo and Eric's comments. Feel free to skip comments that have already been made.
> diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h > index 45588d7..865f1c2 100644 > --- a/include/qemu/sockets.h > +++ b/include/qemu/sockets.h > @@ -79,6 +79,7 @@ int socket_dgram(SocketAddress *remote, SocketAddress > *local, Error **errp); > > /* Old, ipv4 only bits. Don't use for new code. */ > int parse_host_port(struct sockaddr_in *saddr, const char *str); > +int parse_host_port6(struct sockaddr_in6 *saddr, const char *str); > int socket_init(void); > > #endif /* QEMU_SOCKET_H */ Paolo posted ipv4/ipv6 follow-ups and suggestions, so maybe you won't need this in the next version. But right off the bat, adding a utility function to a common source file is a good indicator that this should be a separate patch. Splitting the code up into multiple patches gives you a chance to explain the rationale for one change at a time, aids bisectability, and makes code review easier. > diff --git a/net/Makefile.objs b/net/Makefile.objs > index 4854a14..364c785 100644 > --- a/net/Makefile.objs > +++ b/net/Makefile.objs > @@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o > common-obj-y += socket.o > common-obj-y += dump.o > common-obj-y += eth.o > +common-obj-y += l2tpv3.o > common-obj-$(CONFIG_POSIX) += tap.o > common-obj-$(CONFIG_LINUX) += tap-linux.o > common-obj-$(CONFIG_WIN32) += tap-win32.o > diff --git a/net/clients.h b/net/clients.h > index 7793294..ea2a831 100644 > --- a/net/clients.h > +++ b/net/clients.h > @@ -47,6 +47,11 @@ int net_init_tap(const NetClientOptions *opts, const char > *name, > int net_init_bridge(const NetClientOptions *opts, const char *name, > NetClientState *peer); > > +int net_init_raw(const NetClientOptions *opts, const char *name, > + NetClientState *peer); > + Think you already noticed, but the 'raw' net client code slipped into this patch. Please split it into a separate patch since it's not directly part of the L2TPv3 code changes. I suggest making the 'raw' net client a completely separate patch series. Post a separate email thread so it can be reviewed and merged independently of L2TPv3. > +int net_init_l2tpv3(const NetClientOptions *opts, const char *name, > + NetClientState *peer); > #ifdef CONFIG_VDE > int net_init_vde(const NetClientOptions *opts, const char *name, > NetClientState *peer); > diff --git a/net/l2tpv3.c b/net/l2tpv3.c > new file mode 100644 > index 0000000..4b5d8ee > --- /dev/null > +++ b/net/l2tpv3.c > @@ -0,0 +1,630 @@ > +/* > + * QEMU System Emulator > + * > + * Copyright (c) 2003-2008 Fabrice Bellard > + * Copyright (c) 2012 Cisco Systems > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "net/l2tpv3.h" > +#include "config-host.h" > +#include "config-host.h" > +#include "net/net.h" > +#include "clients.h" > +#include "monitor/monitor.h" > +#include "qemu-common.h" > +#include "qemu/error-report.h" > +#include "qemu/option.h" > +#include "qemu/sockets.h" > +#include "qemu/iov.h" > +#include "qemu/main-loop.h" > +#include <linux/ip.h> Please #include <system/headers.h> before #include "user/headers.h" just to make sure our user headers don't interfere with system headers. For example by defining MIN()/MAX() or other macros. > + > + > + > + > +#define PAGE_SIZE 4096 If you really *need* the page size, please use sysconf(_SC_PAGESIZE). If you just need a big buffer size, please rename this to L2TPV3_BUFSIZE or similar to avoid linking the name to page size. > +#define MAX_L2TPV3_IOVCNT 64 > + > +typedef struct NetL2TPV3State { > + NetClientState nc; > + int fd; > + int state; /* 0 = getting length, 1 = getting data */ > + unsigned int index; > + unsigned int packet_len; state, index, and packet_len are unused. Please remove. > + /* > + these are used for xmit - that happens packet a time > + and for first sign of life packet (easier to parse that once) > + > + */ > + uint8_t * header_buf; > + uint8_t * buf; > + struct iovec * vec; > + /* > + these are used for receive - try to "eat" up to 32 packets at a time It's best to avoid referring explicitly to 32 here. It can easily get out-of-sync with the code. > + */ > + struct mmsghdr * msgvec; > + /* > + contains inet host and port destination if > + connectionless (SOCK_DGRAM). Will be NULL if > + in listen mode waiting for first connect > + > + */ > + struct sockaddr_storage * dgram_dst; > + uint64_t rx_cookie; > + uint64_t tx_cookie; > + uint32_t rx_session; > + uint32_t tx_session; > + uint32_t header_size; > + uint32_t counter; > + uint32_t dst_size; > + uint32_t new_mode; Why is it called "new" mode? > + > + /* offsets */ > + > + uint32_t offset; /* main offset == header offset */ I suggest using a descriptive name. header_offset? > + uint32_t cookie_offset; > + uint32_t counter_offset; > + uint32_t session_offset; > + > +} NetL2TPV3State; > + > +typedef struct NetL2TPV3ListenState { > + NetClientState nc; > + char *model; > + char *name; > + int fd; > +} NetL2TPV3ListenState; Unused, please remove. > +static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, const uint8_t > *buf, size_t size) > +{ > + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); > + > + struct iovec * vec; > + > + struct msghdr message; > + l2tpv3_form_header(s); > + vec = s->vec; > + vec->iov_base = s->header_buf; > + vec->iov_len = s->offset; > + vec++; > + vec->iov_base = (void *) buf; > + vec->iov_len = size; > + > + if (s->dgram_dst) { > + message.msg_name = s->dgram_dst; > + message.msg_namelen = s->dst_size; > + message.msg_iov = (struct iovec *) s->vec; > + message.msg_iovlen = 2; > + message.msg_control = NULL; > + message.msg_controllen = 0; > + message.msg_flags = 0; > + size = sendmsg(s->fd, &message, 0) - s->offset; Explicit sendmesg() -1 return handling would be nice. Also, please use ssize_t (signed) for manipulating the return value instead of size_t (unsigned). What happens if the error was EINTR we can retry. If the error was EAGAIN we should return 0 and wait for the fd to become writable. When it becomes writable we should flush the queue (see qemu_net_queue_flush()). Other net clients already do this, so there is example code to look at. > +static void net_l2tpv3_send(void *opaque) > +{ > + NetL2TPV3State *s = opaque; > + > + int i, count, offset; > + > + struct mmsghdr * msgvec; > + struct iovec * vec; > + > + > + msgvec = s->msgvec; > + offset = s->offset; > + if ((!(s->new_mode & NEW_MODE_IP_VERSION)) && (!(s->new_mode & > NEW_MODE_UDP))){ > + offset += sizeof(struct iphdr); > + } > + count = recvmmsg(s->fd, msgvec, MAX_L2TPV3_IOVCNT/2, MSG_DONTWAIT, NULL); > + for (i=0;i<count;i++) { > + if (msgvec->msg_len > 0) { > + vec = msgvec->msg_hdr.msg_iov; > + if (l2tpv3_verify_header(s, vec->iov_base) == 0) { Header verification should check the size of the packet too. It must be large enough for the L2TPv3 header, otherwise we'd access stale data that happened to be in vec->iov_base... > + //vec->iov_len = offset; /* reset size just in case*/ > + vec++; > + qemu_send_packet(&s->nc, vec->iov_base, msgvec->msg_len - > offset); ...and if the header check happened to pass, the msgvec->msg_len - offset argument value will be negative (int)! > + } else { > + fprintf(stderr, "l2tpv3 header verification failed\n"); > + vec->iov_len = offset; > + vec++; > + } > + //vec->iov_len = PAGE_SIZE; /* reset for next read */ I think it *is* necessary to reset ->iov_len for both msgvec iovecs. > +static int net_l2tpv3_init(NetClientState *peer, > + const char *name, > + const char *src, > + const char *dst, > + const char *new_mode, > + const char *tx_cookie, > + const char *rx_cookie, > + const char *tx_session, > + const char *rx_session > +) > +{ > + NetL2TPV3State *s; > + NetClientState *nc; > + > + int fd, i; > + > + int sock_family, sock_type, sock_proto; > + unsigned int temp; > + > + struct mmsghdr * msgvec; > + struct iovec * iov; > + > + > + struct sockaddr_storage LocalSock; > + > + > + fprintf(stderr, "l2tpv3 user init mode %s\n", new_mode); If you want observability, please use trace events: docs/tracing.txt. You can define trace events in ./trace-events and they can be used with DTrace/SystemTap/LTTng/ftrace/stderr. It's often useful to define trace events for important state transitions in the code (e.g. creation, starting a connection, completing a connection, sending a packet, receiving a packet, disconnecting, etc). > + memset(&LocalSock, 0 , sizeof(LocalSock)); > + > + nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); > + > + s = DO_UPCAST(NetL2TPV3State, nc, nc); > + > + s->header_buf = g_malloc(256); Can you use C structs and unions instead of choosing an arbitrary 256-byte size and calculating offsets at runtime? typedef struct { uint16_t flags; uint16_t reserved; uint32_t session_id; union { uint32_t cookie32; uint64_t cookie64; }; } QEMU_PACKED L2TPv3SessionHeader; Or perhaps something like: typedef union { L2TPv3SessionHeader32 session32; L2TPv3SessionHeader64 session64; L2TPv3DataHeader data; } L2TPv3SessionHeader; (I haven't checked which approach is simplest but it would make the code clearer and avoid byte manipulation.) > + if (s->header_buf == NULL) { > + fprintf(stderr, "Failed to allocate header buffer \n"); > + return -1; > + } g_malloc() never fails, it calls abort(3) on out-of-memory. The same applies to other g_malloc() calls in this patch. > + s->buf = g_malloc(PAGE_SIZE); > + > + if (s->buf == NULL) { > + fprintf(stderr, "Failed to allocate packet buffer \n"); > + return -1; > + } This buffer isn't used? > + s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT);; Slightly more concise way to say the same thing: s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT); > + if (dst && (strlen(dst) > 0 )) { > + /* we now allocate it only if it we are not "listening" */ > + s->dgram_dst = malloc(sizeof(struct sockaddr_storage)); Please use g_malloc()/g_new() instead of malloc(3). > + } else { > + sock_type = SOCK_RAW; > + sock_proto = 0x73; Please use the IPPROTO_L2TP constant instead of a magic number. > + for (i=0;i<MAX_L2TPV3_IOVCNT/2;i++) { > + /* we need to allocate these only for the first time and first buffer */ > + msgvec->msg_hdr.msg_name = NULL; > + msgvec->msg_hdr.msg_namelen = 0; > + iov = g_malloc(sizeof(struct iovec) * 2); > + if (iov == NULL) { > + fprintf(stderr, "Failed to allocate receive packet vec \n"); > + return -1; > + } > + msgvec->msg_hdr.msg_iov = iov; > + if ((!(s->new_mode & NEW_MODE_IP_VERSION)) && (!(s->new_mode & > NEW_MODE_UDP))){ > + iov->iov_base = g_malloc(s->offset + sizeof(struct iphdr)); /* fix > for ipv4 raw */; Is there a way to disable the IP header included in received packets? I haven't looked into how IP_HDRINCL works... > + } else { > + iov->iov_base = g_malloc(s->offset); > + } > + > + if (iov->iov_base == NULL) { > + fprintf(stderr, "Failed to allocate receive packet vec \n"); > + return -1; > + } > + if ((!(s->new_mode & NEW_MODE_IP_VERSION)) && (!(s->new_mode & > NEW_MODE_UDP))){ > + iov->iov_len = s->offset + sizeof (struct iphdr); > + } else { > + iov->iov_len = s->offset; > + } I suggest setting iov_len first, and then allocating iov_base using that size. This way you don't duplicate the IPv4 raw check. > + qemu_set_fd_handler(s->fd, net_l2tpv3_send, NULL, s); > + > + if (!s) { > + fprintf (stderr, "l2tpv3_open : failed to set fd handler\n"); > + return -1; > + } > + > + /* this needs fixing too */ Please fix it :) > + > + snprintf(s->nc.info_str, sizeof(s->nc.info_str), > + "l2tpv3: connected"); > + return 0; > + > +} > + > +int net_init_l2tpv3(const NetClientOptions *opts, > + const char *name, > + NetClientState *peer) { > + > + > + const NetdevL2TPv3Options * l2tpv3; > + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); > + l2tpv3 = opts->l2tpv3; > + > + > + if (!(l2tpv3->has_src && l2tpv3->has_mode && l2tpv3->has_txsession && > l2tpv3->has_rxsession)) { > + error_report("src=, dst=, txsession=, rxsession= and mode= are > required for l2tpv3"); > + return -1; > + } If they are required then I'm not sure why they were declared optional in qapi-schema.json. Drop the '*' in the schema and these fields become mandatory. > + > + > + /* this needs cleaning up for new API */ > + > + if (net_l2tpv3_init( > + peer, > + name, > + l2tpv3->src, > + l2tpv3->dst, > + l2tpv3->mode, > + l2tpv3->txcookie, > + l2tpv3->rxcookie, > + l2tpv3->txsession, > + l2tpv3->rxsession > + > + ) == -1) { > + return -1; > + } > + return 0; > +} > diff --git a/net/l2tpv3.h b/net/l2tpv3.h > new file mode 100644 > index 0000000..822e0b0 > --- /dev/null > +++ b/net/l2tpv3.h > @@ -0,0 +1,65 @@ A header file is not necessary. These constants are internal to the L2TPv3 implementation, please put them in the .c file. > +/* > + * QEMU System Emulator > + * > + * Copyright (c) 2003-2008 Fabrice Bellard > + * Copyright (c) 2012 Cisco Systems > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "qemu-common.h" > + > +#ifndef QEMU_NET_L2TPV3_H > +#define QEMU_NET_L2TPV3_H > + > +#define L2TPV3_BUFSIZE 2048 Hey, I see you did define an L2TPV3_BUFSIZE at one point in time after all ;-). But it's unused. > + > +#define NEW_MODE_IP_VERSION 1 /* on for v6, off for v4 */ > +#define NEW_MODE_UDP 2 /* on for udp, off for raw ip */ > +#define NEW_MODE_COOKIE 4 /* cookie present */ > +#define NEW_MODE_COOKIE_SIZE 8 /* on for 64 bit */ > +#define NEW_MODE_NO_COUNTER 16 /* DT - no counter */ > + > +/* legacy modes */ > + > +/* mode 0 */ > + > +#define LEGACY_UDP6_64_NO_COUNTER (NEW_MODE_IP_VERSION + NEW_MODE_UDP + > NEW_MODE_COOKIE + NEW_MODE_COOKIE_SIZE + NEW_MODE_NO_COUNTER) > + > +#define LEGACY_MODE0 LEGACY_UDP6_64_NO_COUNTER > + > +/* mode 1 */ > + > +#define LEGACY_IP6_64_NO_COUNTER (NEW_MODE_IP_VERSION + NEW_MODE_COOKIE + > NEW_MODE_COOKIE_SIZE + NEW_MODE_NO_COUNTER) > + > +#define LEGACY_MODE1 LEGACY_IP6_64_NO_COUNTER > + > +/* mode 2 */ > + > +#define LEGACY_UDP4_64_COUNTER (NEW_MODE_COOKIE + NEW_MODE_UDP + > NEW_MODE_COOKIE_SIZE ) > + > +#define LEGACY_MODE2 LEGACY_UDP4_64_COUNTER The legacy mode constants are not used, please remove. > diff --git a/qapi-schema.json b/qapi-schema.json > index 83fa485..192d19c 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -2941,6 +2941,45 @@ > '*udp': 'str' } } > > ## > +# @NetdevL2TPv3Options > +# > +# Connect the VLAN to Ethernet over L2TPv3 Static tunnel The term "VLAN" in QEMU is outdated. QEMU now uses a peer-to-peer topology of NetClientState instances. I suggest "Connect to an Ethernet-over-L2TPv3 static tunnel" > +# > +# @fd: #optional file descriptor of an already opened socket This argument isn't used. > +# > +# @src: source address > +# > +# @dst: #optional destination address > +# > +# @mode: bitmask mode for the tunnel (see l2tpv3.h for actual definition) > +# > +# @txcookie:#optional 32 or 64 bit tx cookie for the tunnel (set mode for > actual type selection) > +# > +# @rxcookie:#optional 32 or 64 bit rx cookie for the tunnel (set mode for > actual type selection) > +# > +# @txsession: tx 32 bit session > +# > +# @rxsession: rx 32 bit session > +# > +# > +# Since 1.0 > +## > +## > +{ 'type': 'NetdevL2TPv3Options', > + 'data': { > + '*fd': 'str', > + '*src': 'str', > + '*dst': 'str', > + '*mode': 'str', > + '*txcookie': 'str', > + '*rxcookie': 'str', > + '*txsession': 'str', > + '*rxsession': 'str' > + > +} } I guess you already got plenty of feedback from Eric and Paolo on how to type these parameters and avoid the bitmask :-).