Hi Xie,

(2014/11/17 15:04), Tetsuya Mukawa wrote:
> Hi Xie,
>
>
> (2014/11/15 10:14), Huawei Xie wrote:
>> implement socket server
>> fd event dispatch mechanism
>> vhost sock  message handling
>> memory map for each region
>> VHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available
>> VHOST_USER_GET_VRING_BASE as the message that vring should be released
>>   
>> The message flow between vhost-user and vhost-cuse is kindof different,
>> which makes virtio-net common message handler layer difficult and 
>> complicated to handle
>> both cases in new_device/destroy_device/memory map/resource cleanup.
>>
>> Will only leave the most common messag handling in virtio-net, and move the
>> control logic to cuse/fuse layer.  
>>
>>
>> Signed-off-by: Huawei Xie <huawei.xie at intel.com>
> Great patch!
> I guess we can start from this patch to implement vhost-user and
> abstraction layer.
>
> I've checked patch.
>
> 1. White space, tab and indent patch.
> I will send patch that clears white space, tab and indent. Could you
> please check it?
> It might be difficult to see the difference, if your editor doesn't show
> a space or tab.
>
> 2. Some files are based on old codes.
> At least, following patch is not included.
> - vhost: fix build without unused result
> Also vhost_rxtx.c isn't probably based on latest code.
>
> 3. Device abstraction layer code
> I will send the device abstraction layer code after this email.
> Anyway, I guess we need to decide whether, or not we still keep
> vhost-cuse code
Additionally, the above patches are based on your RFC patch.

Tetsuya

>
> 4. Multiple devices operation.
> For example, when thread1 opens vhost-user device1 and thread2 opens
> vhost-user device2,
> each thread may want to register own callbacks.
> Current implementation may not allow this.
> I guess we need to eliminate global variables in librte_vhost as much as
> possible.
>
> Thanks,
> Tetsuya
>
>> ---
>>  lib/librte_vhost/Makefile                     |  14 +-
>>  lib/librte_vhost/eventfd_link/eventfd_link.c  |  27 +-
>>  lib/librte_vhost/eventfd_link/eventfd_link.h  |  48 +-
>>  lib/librte_vhost/libvirt/qemu-wrap.py         | 367 ---------------
>>  lib/librte_vhost/rte_virtio_net.h             | 106 ++---
>>  lib/librte_vhost/vhost-cuse/vhost-net-cdev.c  | 436 ++++++++++++++++++
>>  lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++
>>  lib/librte_vhost/vhost-cuse/virtio-net-cdev.h |  43 ++
>>  lib/librte_vhost/vhost-net-cdev.c             | 389 ----------------
>>  lib/librte_vhost/vhost-net-cdev.h             | 113 -----
>>  lib/librte_vhost/vhost-user/fd_man.c          | 158 +++++++
>>  lib/librte_vhost/vhost-user/fd_man.h          |  31 ++
>>  lib/librte_vhost/vhost-user/vhost-net-user.c  | 417 +++++++++++++++++
>>  lib/librte_vhost/vhost-user/vhost-net-user.h  |  74 +++
>>  lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++
>>  lib/librte_vhost/vhost-user/virtio-net-user.h |  11 +
>>  lib/librte_vhost/vhost_rxtx.c                 | 625 
>> ++++----------------------
>>  lib/librte_vhost/virtio-net.c                 | 450 ++++---------------
>>  18 files changed, 1939 insertions(+), 1892 deletions(-)
>>  delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
>>  create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>>  create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>>  create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>>  delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
>>  delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
>>  create mode 100644 lib/librte_vhost/vhost-user/fd_man.c
>>  create mode 100644 lib/librte_vhost/vhost-user/fd_man.h
>>  create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c
>>  create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h
>>  create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c
>>  create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h
>>
>> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
>> index c008d64..cb4e172 100644
>> --- a/lib/librte_vhost/Makefile
>> +++ b/lib/librte_vhost/Makefile
>> @@ -34,17 +34,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
>>  # library name
>>  LIB = librte_vhost.a
>>  
>> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 
>> -D_FILE_OFFSET_BITS=64 -lfuse
>>  LDFLAGS += -lfuse
>>  # all source are stored in SRCS-y
>> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c 
>> vhost_rxtx.c
>> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c 
>> vhost-cuse/virtio-net-cdev.c
>> +
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c 
>> vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
>> +
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
>>  
>>  # install includes
>>  SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
>>  
>> -# dependencies
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
>> +# this lib needs eal
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
>>  
>>  include $(RTE_SDK)/mk/rte.lib.mk
>> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c 
>> b/lib/librte_vhost/eventfd_link/eventfd_link.c
>> index 7755dd6..4c9b628 100644
>> --- a/lib/librte_vhost/eventfd_link/eventfd_link.c
>> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
>> @@ -13,8 +13,7 @@
>>   *   General Public License for more details.
>>   *
>>   *   You should have received a copy of the GNU General Public License
>> - *   along with this program; if not, write to the Free Software
>> - *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 
>> USA.
>> + *   along with this program; If not, see <http://www.gnu.org/licenses/>.
>>   *   The full GNU General Public License is included in this distribution
>>   *   in the file called LICENSE.GPL.
>>   *
>> @@ -78,8 +77,7 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, 
>> unsigned long arg)
>>  
>>      switch (ioctl) {
>>      case EVENTFD_COPY:
>> -            if (copy_from_user(&eventfd_copy, argp,
>> -                    sizeof(struct eventfd_copy)))
>> +            if (copy_from_user(&eventfd_copy, argp, sizeof(struct 
>> eventfd_copy)))
>>                      return -EFAULT;
>>  
>>              /*
>> @@ -88,28 +86,28 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, 
>> unsigned long arg)
>>              task_target =
>>                      pid_task(find_vpid(eventfd_copy.target_pid), 
>> PIDTYPE_PID);
>>              if (task_target == NULL) {
>> -                    pr_debug("Failed to get mem ctx for target pid\n");
>> +                    printk(KERN_DEBUG "Failed to get mem ctx for target 
>> pid\n");
>>                      return -EFAULT;
>>              }
>>  
>>              files = get_files_struct(current);
>>              if (files == NULL) {
>> -                    pr_debug("Failed to get files struct\n");
>> +                    printk(KERN_DEBUG "Failed to get files struct\n");
>>                      return -EFAULT;
>>              }
>>  
>>              rcu_read_lock();
>>              file = fcheck_files(files, eventfd_copy.source_fd);
>>              if (file) {
>> -                    if (file->f_mode & FMODE_PATH ||
>> -                            !atomic_long_inc_not_zero(&file->f_count))
>> +                    if (file->f_mode & FMODE_PATH
>> +                            || !atomic_long_inc_not_zero(&file->f_count))
>>                              file = NULL;
>>              }
>>              rcu_read_unlock();
>>              put_files_struct(files);
>>  
>>              if (file == NULL) {
>> -                    pr_debug("Failed to get file from source pid\n");
>> +                    printk(KERN_DEBUG "Failed to get file from source 
>> pid\n");
>>                      return 0;
>>              }
>>  
>> @@ -128,25 +126,26 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, 
>> unsigned long arg)
>>  
>>              files = get_files_struct(task_target);
>>              if (files == NULL) {
>> -                    pr_debug("Failed to get files struct\n");
>> +                    printk(KERN_DEBUG "Failed to get files struct\n");
>>                      return -EFAULT;
>>              }
>>  
>>              rcu_read_lock();
>>              file = fcheck_files(files, eventfd_copy.target_fd);
>>              if (file) {
>> -                    if (file->f_mode & FMODE_PATH ||
>> -                            !atomic_long_inc_not_zero(&file->f_count))
>> -                                    file = NULL;
>> +                    if (file->f_mode & FMODE_PATH
>> +                            || !atomic_long_inc_not_zero(&file->f_count))
>> +                            file = NULL;
>>              }
>>              rcu_read_unlock();
>>              put_files_struct(files);
>>  
>>              if (file == NULL) {
>> -                    pr_debug("Failed to get file from target pid\n");
>> +                    printk(KERN_DEBUG "Failed to get file from target 
>> pid\n");
>>                      return 0;
>>              }
>>  
>> +
>>              /*
>>               * Install the file struct from the target process into the
>>               * file desciptor of the source process,
>> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h 
>> b/lib/librte_vhost/eventfd_link/eventfd_link.h
>> index ea619ec..38052e2 100644
>> --- a/lib/librte_vhost/eventfd_link/eventfd_link.h
>> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
>> @@ -1,7 +1,4 @@
>>  /*-
>> - *  This file is provided under a dual BSD/GPLv2 license.  When using or
>> - *  redistributing this file, you may do so under either license.
>> - *
>>   * GPL LICENSE SUMMARY
>>   *
>>   *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> @@ -16,61 +13,28 @@
>>   *   General Public License for more details.
>>   *
>>   *   You should have received a copy of the GNU General Public License
>> - *   along with this program; if not, write to the Free Software
>> - *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 
>> USA.
>> + *   along with this program; If not, see <http://www.gnu.org/licenses/>.
>>   *   The full GNU General Public License is included in this distribution
>>   *   in the file called LICENSE.GPL.
>>   *
>>   *   Contact Information:
>>   *   Intel Corporation
>> - *
>> - * BSD LICENSE
>> - *
>> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - *   All rights reserved.
>> - *
>> - *   Redistribution and use in source and binary forms, with or without
>> - *   modification, are permitted provided that the following conditions
>> - *   are met:
>> - *
>> - *   Redistributions of source code must retain the above copyright
>> - *   notice, this list of conditions and the following disclaimer.
>> - *   Redistributions in binary form must reproduce the above copyright
>> - *   notice, this list of conditions and the following disclaimer in
>> - *   the documentation and/or other materials provided with the
>> - *   distribution.
>> - *   Neither the name of Intel Corporation nor the names of its
>> - *   contributors may be used to endorse or promote products derived
>> - *   from this software without specific prior written permission.
>> - *
>> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - *
>>   */
>>  
>>  #ifndef _EVENTFD_LINK_H_
>>  #define _EVENTFD_LINK_H_
>>  
>>  /*
>> - * ioctl to copy an fd entry in calling process to an fd in a target process
>> + *  ioctl to copy an fd entry in calling process to an fd in a target 
>> process
>>   */
>>  #define EVENTFD_COPY 1
>>  
>>  /*
>> - * arguements for the EVENTFD_COPY ioctl
>> + *  arguements for the EVENTFD_COPY ioctl
>>   */
>>  struct eventfd_copy {
>> -    unsigned target_fd; /* fd in the target pid */
>> -    unsigned source_fd; /* fd in the calling pid */
>> -    pid_t target_pid; /* pid of the target pid */
>> +    unsigned target_fd; /**< fd in the target pid */
>> +    unsigned source_fd; /**< fd in the calling pid */
>> +    pid_t target_pid;   /**< pid of the target pid */
>>  };
>>  #endif /* _EVENTFD_LINK_H_ */
>> diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py 
>> b/lib/librte_vhost/libvirt/qemu-wrap.py
>> deleted file mode 100755
>> index e2d68a0..0000000
>> --- a/lib/librte_vhost/libvirt/qemu-wrap.py
>> +++ /dev/null
>> @@ -1,367 +0,0 @@
>> -#!/usr/bin/python
>> -#/*
>> -# *   BSD LICENSE
>> -# *
>> -# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> -# *   All rights reserved.
>> -# *
>> -# *   Redistribution and use in source and binary forms, with or without
>> -# *   modification, are permitted provided that the following conditions
>> -# *   are met:
>> -# *
>> -# *     * Redistributions of source code must retain the above copyright
>> -# *       notice, this list of conditions and the following disclaimer.
>> -# *     * Redistributions in binary form must reproduce the above copyright
>> -# *       notice, this list of conditions and the following disclaimer in
>> -# *       the documentation and/or other materials provided with the
>> -# *       distribution.
>> -# *     * Neither the name of Intel Corporation nor the names of its
>> -# *       contributors may be used to endorse or promote products derived
>> -# *       from this software without specific prior written permission.
>> -# *
>> -# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> -# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> -# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> -# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> -# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> -# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> -# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> -# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> -# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> -# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> -# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> -# */
>> -
>> -#####################################################################
>> -# This script is designed to modify the call to the QEMU emulator
>> -# to support userspace vhost when starting a guest machine through
>> -# libvirt with vhost enabled. The steps to enable this are as follows
>> -# and should be run as root:
>> -#
>> -# 1. Place this script in a libvirtd's binary search PATH ($PATH)
>> -#    A good location would be in the same directory that the QEMU
>> -#    binary is located
>> -#
>> -# 2. Ensure that the script has the same owner/group and file
>> -#    permissions as the QEMU binary
>> -#
>> -# 3. Update the VM xml file using "virsh edit VM.xml"
>> -#
>> -#    3.a) Set the VM to use the launch script
>> -#
>> -#           Set the emulator path contained in the
>> -#           <emulator><emulator/> tags
>> -#
>> -#           e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
>> -#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>
>> -#
>> -#    3.b) Set the VM's device's to use vhost-net offload
>> -#
>> -#           <interface type="network">
>> -#           <model type="virtio"/>
>> -#           <driver name="vhost"/>
>> -#           <interface/>
>> -#
>> -# 4. Enable libvirt to access our userpace device file by adding it to
>> -#    controllers cgroup for libvirtd using the following steps
>> -#
>> -#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
>> -#         1) cgroup_controllers = [ ... "devices", ... ]
>> -#             2) clear_emulator_capabilities = 0
>> -#         3) user = "root"
>> -#         4) group = "root"
>> -#         5) cgroup_device_acl = [
>> -#                "/dev/null", "/dev/full", "/dev/zero",
>> -#                "/dev/random", "/dev/urandom",
>> -#                "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
>> -#                "/dev/rtc", "/dev/hpet", "/dev/net/tun",
>> -#                "/dev/<devbase-name>-<index>",
>> -#            ]
>> -#
>> -#   4.b) Disable SELinux or set to permissive mode
>> -#
>> -#   4.c) Mount cgroup device controller
>> -#        "mkdir /dev/cgroup"
>> -#        "mount -t cgroup none /dev/cgroup -o devices"
>> -#
>> -#   4.d) Set hugetlbfs_mount variable - ( Optional )
>> -#        VMs using userspace vhost must use hugepage backed
>> -#        memory. This can be enabled in the libvirt XML
>> -#        config by adding a memory backing section to the
>> -#        XML config e.g.
>> -#             <memoryBacking>
>> -#             <hugepages/>
>> -#             </memoryBacking>
>> -#        This memory backing section should be added after the
>> -#        <memory> and <currentMemory> sections. This will add
>> -#        flags "-mem-prealloc -mem-path <path>" to the QEMU
>> -#        command line. The hugetlbfs_mount variable can be used
>> -#        to override the default <path> passed through by libvirt.
>> -#
>> -#        if "-mem-prealloc" or "-mem-path <path>" are not passed
>> -#        through and a vhost device is detected then these options will
>> -#        be automatically added by this script. This script will detect
>> -#        the system hugetlbfs mount point to be used for <path>. The
>> -#        default <path> for this script can be overidden by the
>> -#        hugetlbfs_dir variable in the configuration section of this script.
>> -#
>> -#
>> -#   4.e) Restart the libvirtd system process
>> -#        e.g. on Fedora "systemctl restart libvirtd.service"
>> -#
>> -#
>> -#   4.f) Edit the Configuration Parameters section of this script
>> -#        to point to the correct emulator location and set any
>> -#        addition options
>> -#
>> -# The script modifies the libvirtd Qemu call by modifying/adding
>> -# options based on the configuration parameters below.
>> -# NOTE:
>> -#     emul_path and us_vhost_path must be set
>> -#     All other parameters are optional
>> -#####################################################################
>> -
>> -
>> -#############################################
>> -# Configuration Parameters
>> -#############################################
>> -#Path to QEMU binary
>> -emul_path = "/usr/local/bin/qemu-system-x86_64"
>> -
>> -#Path to userspace vhost device file
>> -# This filename should match the --dev-basename --dev-index parameters of
>> -# the command used to launch the userspace vhost sample application e.g.
>> -# if the sample app lauch command is:
>> -#    ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1
>> -# then this variable should be set to:
>> -#   us_vhost_path = "/dev/usvhost-1"
>> -us_vhost_path = "/dev/usvhost-1"
>> -
>> -#List of additional user defined emulation options. These options will
>> -#be added to all Qemu calls
>> -emul_opts_user = []
>> -
>> -#List of additional user defined emulation options for vhost only.
>> -#These options will only be added to vhost enabled guests
>> -emul_opts_user_vhost = []
>> -
>> -#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
>> -# Set this variable to one to enable this option for all VMs
>> -use_huge_all = 0
>> -
>> -#Instead of autodetecting, override the hugetlbfs directory by setting
>> -#this variable
>> -hugetlbfs_dir = ""
>> -
>> -#############################################
>> -
>> -
>> -#############################################
>> -# ****** Do Not Modify Below this Line ******
>> -#############################################
>> -
>> -import sys, os, subprocess
>> -
>> -
>> -#List of open userspace vhost file descriptors
>> -fd_list = []
>> -
>> -#additional virtio device flags when using userspace vhost
>> -vhost_flags = [ "csum=off",
>> -                "gso=off",
>> -                "guest_tso4=off",
>> -                "guest_tso6=off",
>> -                "guest_ecn=off"
>> -              ]
>> -
>> -
>> -#############################################
>> -# Find the system hugefile mount point.
>> -# Note:
>> -# if multiple hugetlbfs mount points exist
>> -# then the first one found will be used
>> -#############################################
>> -def find_huge_mount():
>> -
>> -    if (len(hugetlbfs_dir)):
>> -        return hugetlbfs_dir
>> -
>> -    huge_mount = ""
>> -
>> -    if (os.access("/proc/mounts", os.F_OK)):
>> -        f = open("/proc/mounts", "r")
>> -        line = f.readline()
>> -        while line:
>> -            line_split = line.split(" ")
>> -            if line_split[2] == 'hugetlbfs':
>> -                huge_mount = line_split[1]
>> -                break
>> -            line = f.readline()
>> -    else:
>> -        print "/proc/mounts not found"
>> -        exit (1)
>> -
>> -    f.close
>> -    if len(huge_mount) == 0:
>> -        print "Failed to find hugetlbfs mount point"
>> -        exit (1)
>> -
>> -    return huge_mount
>> -
>> -
>> -#############################################
>> -# Get a userspace Vhost file descriptor
>> -#############################################
>> -def get_vhost_fd():
>> -
>> -    if (os.access(us_vhost_path, os.F_OK)):
>> -        fd = os.open( us_vhost_path, os.O_RDWR)
>> -    else:
>> -        print ("US-Vhost file %s not found" %us_vhost_path)
>> -        exit (1)
>> -
>> -    return fd
>> -
>> -
>> -#############################################
>> -# Check for vhostfd. if found then replace
>> -# with our own vhost fd and append any vhost
>> -# flags onto the end
>> -#############################################
>> -def modify_netdev_arg(arg):
>> -    
>> -    global fd_list
>> -    vhost_in_use = 0
>> -    s = ''
>> -    new_opts = []
>> -    netdev_opts = arg.split(",")
>> -
>> -    for opt in netdev_opts:
>> -        #check if vhost is used
>> -        if "vhost" == opt[:5]:
>> -            vhost_in_use = 1
>> -        else:
>> -            new_opts.append(opt)
>> -
>> -    #if using vhost append vhost options
>> -    if vhost_in_use == 1:
>> -        #append vhost on option
>> -        new_opts.append('vhost=on')
>> -        #append vhostfd ption
>> -        new_fd = get_vhost_fd()
>> -        new_opts.append('vhostfd=' + str(new_fd))
>> -        fd_list.append(new_fd)
>> -
>> -    #concatenate all options
>> -    for opt in new_opts:
>> -        if len(s) > 0:
>> -                    s+=','
>> -
>> -        s+=opt
>> -
>> -    return s        
>> -
>> -
>> -#############################################
>> -# Main
>> -#############################################
>> -def main():
>> -
>> -    global fd_list
>> -    global vhost_in_use
>> -    new_args = []
>> -    num_cmd_args = len(sys.argv)
>> -    emul_call = ''
>> -    mem_prealloc_set = 0
>> -    mem_path_set = 0
>> -    num = 0;
>> -
>> -    #parse the parameters
>> -    while (num < num_cmd_args):
>> -        arg = sys.argv[num]
>> -
>> -            #Check netdev +1 parameter for vhostfd
>> -        if arg == '-netdev':
>> -            num_vhost_devs = len(fd_list)
>> -            new_args.append(arg)
>> -
>> -            num+=1
>> -            arg = sys.argv[num]
>> -            mod_arg = modify_netdev_arg(arg)
>> -            new_args.append(mod_arg)
>> -
>> -            #append vhost flags if this is a vhost device
>> -            # and -device is the next arg
>> -            # i.e -device -opt1,-opt2,...,-opt3,%vhost
>> -            if (num_vhost_devs < len(fd_list)):
>> -                num+=1
>> -                arg = sys.argv[num]
>> -                if arg == '-device':
>> -                    new_args.append(arg)
>> -                    num+=1
>> -                    new_arg = sys.argv[num]
>> -                    for flag in vhost_flags:
>> -                        new_arg = ''.join([new_arg,',',flag])
>> -                    new_args.append(new_arg)
>> -                else:
>> -                    new_args.append(arg)
>> -        elif arg == '-mem-prealloc':
>> -            mem_prealloc_set = 1
>> -            new_args.append(arg)
>> -        elif arg == '-mem-path':
>> -            mem_path_set = 1
>> -            new_args.append(arg)
>> -
>> -        else:
>> -            new_args.append(arg)
>> -
>> -        num+=1
>> -
>> -    #Set Qemu binary location
>> -    emul_call+=emul_path
>> -    emul_call+=" "
>> -
>> -    #Add prealloc mem options if using vhost and not already added
>> -    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
>> -        emul_call += "-mem-prealloc "
>> -
>> -    #Add mempath mem options if using vhost and not already added
>> -    if ((len(fd_list) > 0) and (mem_path_set == 0)):
>> -        #Detect and add hugetlbfs mount point
>> -        mp = find_huge_mount()
>> -        mp = "".join(["-mem-path ", mp])
>> -        emul_call += mp
>> -        emul_call += " "
>> -
>> -
>> -    #add user options
>> -    for opt in emul_opts_user:
>> -        emul_call += opt
>> -        emul_call += " "
>> -
>> -    #Add add user vhost only options
>> -    if len(fd_list) > 0:
>> -        for opt in emul_opts_user_vhost:
>> -            emul_call += opt
>> -            emul_call += " "
>> -
>> -    #Add updated libvirt options
>> -    iter_args = iter(new_args)
>> -    #skip 1st arg i.e. call to this script
>> -    next(iter_args)
>> -    for arg in iter_args:
>> -        emul_call+=str(arg)
>> -        emul_call+= " "
>> -
>> -    #Call QEMU
>> -    subprocess.call(emul_call, shell=True)
>> -
>> -
>> -    #Close usvhost files
>> -    for fd in fd_list:
>> -        os.close(fd)
>> -
>> -
>> -if __name__ == "__main__":
>> -    main()
>> -
>> diff --git a/lib/librte_vhost/rte_virtio_net.h 
>> b/lib/librte_vhost/rte_virtio_net.h
>> index 00b1328..7a05dab 100644
>> --- a/lib/librte_vhost/rte_virtio_net.h
>> +++ b/lib/librte_vhost/rte_virtio_net.h
>> @@ -34,11 +34,6 @@
>>  #ifndef _VIRTIO_NET_H_
>>  #define _VIRTIO_NET_H_
>>  
>> -/**
>> - * @file
>> - * Interface to vhost net
>> - */
>> -
>>  #include <stdint.h>
>>  #include <linux/virtio_ring.h>
>>  #include <linux/virtio_net.h>
>> @@ -48,66 +43,38 @@
>>  #include <rte_mempool.h>
>>  #include <rte_mbuf.h>
>>  
>> -/* Used to indicate that the device is running on a data core */
>> -#define VIRTIO_DEV_RUNNING 1
>> -
>> -/* Backend value set by guest. */
>> -#define VIRTIO_DEV_STOPPED -1
>> -
>> +#define VIRTIO_DEV_RUNNING 1  /**< Used to indicate that the device is 
>> running on a data core. */
>> +#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */
>>  
>>  /* Enum for virtqueue management. */
>>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>>  
>> -#define BUF_VECTOR_MAX 256
>> -
>> -/**
>> - * Structure contains buffer address, length and descriptor index
>> - * from vring to do scatter RX.
>> - */
>> -struct buf_vector {
>> -    uint64_t buf_addr;
>> -    uint32_t buf_len;
>> -    uint32_t desc_idx;
>> -};
>> -
>>  /**
>>   * Structure contains variables relevant to RX/TX virtqueues.
>>   */
>>  struct vhost_virtqueue {
>> -    struct vring_desc       *desc;                  /**< Virtqueue 
>> descriptor ring. */
>> -    struct vring_avail      *avail;                 /**< Virtqueue 
>> available ring. */
>> -    struct vring_used       *used;                  /**< Virtqueue used 
>> ring. */
>> -    uint32_t                size;                   /**< Size of descriptor 
>> ring. */
>> -    uint32_t                backend;                /**< Backend value to 
>> determine if device should started/stopped. */
>> -    uint16_t                vhost_hlen;             /**< Vhost header 
>> length (varies depending on RX merge buffers. */
>> -    volatile uint16_t       last_used_idx;          /**< Last index used on 
>> the available ring */
>> -    volatile uint16_t       last_used_idx_res;      /**< Used for multiple 
>> devices reserving buffers. */
>> -    eventfd_t               callfd;                 /**< Currently unused 
>> as polling mode is enabled. */
>> -    eventfd_t               kickfd;                 /**< Used to notify the 
>> guest (trigger interrupt). */
>> -    struct buf_vector       buf_vec[BUF_VECTOR_MAX];        /**< for 
>> scatter RX. */
>> -} __rte_cache_aligned;
>> -
>> -/**
>> - * Device structure contains all configuration information relating to the 
>> device.
>> - */
>> -struct virtio_net {
>> -    struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];        /**< Contains 
>> all virtqueue information. */
>> -    struct virtio_memory    *mem;           /**< QEMU memory and memory 
>> region information. */
>> -    uint64_t                features;       /**< Negotiated feature set. */
>> -    uint64_t                device_fh;      /**< device identifier. */
>> -    uint32_t                flags;          /**< Device flags. Only used to 
>> check if device is running on data core. */
>> -    void                    *priv;          /**< private context */
>> +    struct vring_desc    *desc;             /**< descriptor ring. */
>> +    struct vring_avail   *avail;            /**< available ring. */
>> +    struct vring_used    *used;             /**< used ring. */
>> +    uint32_t             size;              /**< Size of descriptor ring. */
>> +    uint32_t             backend;           /**< Backend value to determine 
>> if device should be started/stopped. */
>> +    uint16_t             vhost_hlen;        /**< Vhost header length 
>> (varies depending on RX merge buffers. */
>> +    volatile uint16_t    last_used_idx;     /**< Last index used on the 
>> available ring. */
>> +    volatile uint16_t    last_used_idx_res; /**< Used for multiple devices 
>> reserving buffers. */
>> +    eventfd_t            callfd;            /**< Currently unused as 
>> polling mode is enabled. */
>> +    eventfd_t            kickfd;            /**< Used to notify the guest 
>> (trigger interrupt). */
>>  } __rte_cache_aligned;
>>  
>>  /**
>> - * Information relating to memory regions including offsets to addresses in 
>> QEMUs memory file.
>> + * Information relating to memory regions including offsets to
>> + * addresses in QEMUs memory file.
>>   */
>>  struct virtio_memory_regions {
>> -    uint64_t        guest_phys_address;     /**< Base guest physical 
>> address of region. */
>> -    uint64_t        guest_phys_address_end; /**< End guest physical address 
>> of region. */
>> -    uint64_t        memory_size;            /**< Size of region. */
>> -    uint64_t        userspace_address;      /**< Base userspace address of 
>> region. */
>> -    uint64_t        address_offset;         /**< Offset of region for 
>> address translation. */
>> +    uint64_t    guest_phys_address;     /**< Base guest physical address of 
>> region. */
>> +    uint64_t    guest_phys_address_end; /**< End guest physical address of 
>> region. */
>> +    uint64_t    memory_size;            /**< Size of region. */
>> +    uint64_t    userspace_address;      /**< Base userspace address of 
>> region. */
>> +    uint64_t    address_offset;         /**< Offset of region for address 
>> translation. */
>>  };
>>  
>>  
>> @@ -115,21 +82,34 @@ struct virtio_memory_regions {
>>   * Memory structure includes region and mapping information.
>>   */
>>  struct virtio_memory {
>> -    uint64_t        base_address;   /**< Base QEMU userspace address of the 
>> memory file. */
>> -    uint64_t        mapped_address; /**< Mapped address of memory file base 
>> in our applications memory space. */
>> -    uint64_t        mapped_size;    /**< Total size of memory file. */
>> -    uint32_t        nregions;       /**< Number of memory regions. */
>> +    uint64_t    base_address;    /**< Base QEMU userspace address of the 
>> memory file. */
>> +    uint64_t    mapped_address;  /**< Mapped address of memory file base in 
>> our applications memory space. */
>> +    uint64_t    mapped_size;     /**< Total size of memory file. */
>> +    uint32_t    nregions;        /**< Number of memory regions. */
>>      struct virtio_memory_regions      regions[0]; /**< Memory region 
>> information. */
>>  };
>>  
>>  /**
>> + * Device structure contains all configuration information relating to the 
>> device.
>> + */
>> +struct virtio_net {
>> +    struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM]; /**< Contains all 
>> virtqueue information. */
>> +    struct virtio_memory    *mem;                    /**< QEMU memory and 
>> memory region information. */
>> +    uint64_t features;    /**< Negotiated feature set. */
>> +    uint64_t device_fh;   /**< Device identifier. */
>> +    uint32_t flags;       /**< Device flags. Only used to check if device 
>> is running on data core. */
>> +    void     *priv;
>> +} __rte_cache_aligned;
>> +
>> +/**
>>   * Device operations to add/remove device.
>>   */
>>  struct virtio_net_device_ops {
>> -    int (*new_device)(struct virtio_net *); /**< Add device. */
>> -    void (*destroy_device)(volatile struct virtio_net *);   /**< Remove 
>> device. */
>> +    int (*new_device)(struct virtio_net *); /**< Add device. */
>> +    void (*destroy_device)(struct virtio_net *); /**< Remove device. */
>>  };
>>  
>> +
>>  static inline uint16_t __attribute__((always_inline))
>>  rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
>>  {
>> @@ -179,7 +159,7 @@ int rte_vhost_driver_register(const char *dev_name);
>>  
>>  /* Register callbacks. */
>>  int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * 
>> const);
>> -/* Start vhost driver session blocking loop. */
>> +
>>  int rte_vhost_driver_session_start(void);
>>  
>>  /**
>> @@ -192,8 +172,8 @@ int rte_vhost_driver_session_start(void);
>>   * @return
>>   *  num of packets enqueued
>>   */
>> -uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -    struct rte_mbuf **pkts, uint16_t count);
>> +uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> +    struct rte_mbuf **pkts, uint32_t count);
>>  
>>  /**
>>   * This function gets guest buffers from the virtio device TX virtqueue,
>> @@ -206,7 +186,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, 
>> uint16_t queue_id,
>>   * @return
>>   *  num of packets dequeued
>>   */
>> -uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -    struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
>> +uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> +    struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
>>  
>>  #endif /* _VIRTIO_NET_H_ */
>> diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c 
>> b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>> new file mode 100644
>> index 0000000..4671643
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>> @@ -0,0 +1,436 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <fuse/cuse_lowlevel.h>
>> +#include <linux/limits.h>
>> +#include <linux/vhost.h>
>> +#include <linux/virtio_net.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <sys/ioctl.h>
>> +
>> +#include <rte_ethdev.h>
>> +#include <rte_log.h>
>> +#include <rte_string_fns.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "virtio-net-cdev.h"
>> +#include "vhost-net.h"
>> +#include "eventfd_link/eventfd_link.h"
>> +
>> +#define FUSE_OPT_DUMMY "\0\0"
>> +#define FUSE_OPT_FORE  "-f\0\0"
>> +#define FUSE_OPT_NOMULTI "-s\0\0"
>> +
>> +static const uint32_t default_major = 231;
>> +static const uint32_t default_minor = 1;
>> +static const char cuse_device_name[] = "/dev/cuse";
>> +static const char default_cdev[] = "vhost-net";
>> +static const char eventfd_cdev[] = "/dev/eventfd-link";
>> +
>> +static struct fuse_session *session;
>> +const struct vhost_net_device_ops const *ops;
>> +
>> +/*
>> + * Returns vhost_device_ctx from given fuse_req_t. The index is populated 
>> later
>> + * when the device is added to the device linked list.
>> + */
>> +static struct vhost_device_ctx
>> +fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> +    struct vhost_device_ctx ctx;
>> +    struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
>> +
>> +    ctx.pid = req_ctx->pid;
>> +    ctx.fh = fi->fh;
>> +
>> +    return ctx;
>> +}
>> +
>> +/*
>> + * When the device is created in QEMU it gets initialised here and
>> + * added to the device linked list.
>> + */
>> +static void
>> +vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> +    struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> +    int err = 0;
>> +
>> +    err = ops->new_device(ctx);
>> +    if (err == -1) {
>> +            fuse_reply_err(req, EPERM);
>> +            return;
>> +    }
>> +
>> +    fi->fh = err;
>> +
>> +    RTE_LOG(INFO, VHOST_CONFIG,
>> +            "(%"PRIu64") Device configuration started\n", fi->fh);
>> +    fuse_reply_open(req, fi);
>> +}
>> +
>> +/*
>> + * When QEMU is shutdown or killed the device gets released.
>> + */
>> +static void
>> +vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> +    int err = 0;
>> +    struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> +
>> +    ops->destroy_device(ctx);
>> +    RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
>> +    fuse_reply_err(req, err);
>> +}
>> +
>> +/*
>> + * Boilerplate code for CUSE IOCTL
>> + * Implicit arguments: ctx, req, result.
>> + */
>> +#define VHOST_IOCTL(func) do {      \
>> +    result = (func)(ctx);   \
>> +    fuse_reply_ioctl(req, result, NULL, 0); \
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate IOCTL RETRY
>> + * Implicit arguments: req.
>> + */
>> +#define VHOST_IOCTL_RETRY(size_r, size_w) do {      \
>> +    struct iovec iov_r = { arg, (size_r) }; \
>> +    struct iovec iov_w = { arg, (size_w) }; \
>> +    fuse_reply_ioctl_retry(req, &iov_r,     \
>> +            (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Read IOCTL
>> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> + */
>> +#define VHOST_IOCTL_R(type, var, func) do { \
>> +    if (!in_bufsz) {                        \
>> +            VHOST_IOCTL_RETRY(sizeof(type), 0);\
>> +    } else {        \
>> +            (var) = *(const type*)in_buf;   \
>> +            result = func(ctx, &(var));     \
>> +            fuse_reply_ioctl(req, result, NULL, 0);\
>> +    }       \
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Write IOCTL
>> + * Implicit arguments: ctx, req, result, out_bufsz.
>> + */
>> +#define VHOST_IOCTL_W(type, var, func) do { \
>> +    if (!out_bufsz) {                       \
>> +            VHOST_IOCTL_RETRY(0, sizeof(type));\
>> +    } else {        \
>> +            result = (func)(ctx, &(var));\
>> +            fuse_reply_ioctl(req, result, &(var), sizeof(type));\
>> +    } \
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Read/Write IOCTL
>> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> + */
>> +#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
>> +    if (!in_bufsz) {        \
>> +            VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
>> +    } else {        \
>> +            (var1) = *(const type1*) (in_buf);      \
>> +            result = (func)(ctx, (var1), &(var2));  \
>> +            fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
>> +    } \
>> +} while (0)
>> +
>> +/*
>> + * This function uses the eventfd_link kernel module to copy an eventfd file
>> + * descriptor provided by QEMU in to our process space.
>> + */
>> +static int
>> +eventfd_copy(int target_fd, int target_pid)
>> +{
>> +    int eventfd_link, ret;
>> +    struct eventfd_copy eventfd_copy;
>> +    int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> +
>> +    if (fd == -1)
>> +            return -1;
>> +
>> +    /* Open the character device to the kernel module. */
>> +    /* TODO: check this earlier rather than fail until VM boots! */
>> +    eventfd_link = open(eventfd_cdev, O_RDWR);
>> +    if (eventfd_link < 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "eventfd_link module is not loaded\n");
>> +            return -1;
>> +    }
>> +
>> +    eventfd_copy.source_fd = fd;
>> +    eventfd_copy.target_fd = target_fd;
>> +    eventfd_copy.target_pid = target_pid;
>> +    /* Call the IOCTL to copy the eventfd. */
>> +    ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
>> +    close(eventfd_link);
>> +
>> +    if (ret < 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "EVENTFD_COPY ioctl failed\n");
>> +            return -1;
>> +    }
>> +
>> +    return fd;
>> +}
>> +
>> +/*
>> + * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on
>> + * the type of IOCTL a buffer is requested to read or to write. This
>> + * request is handled by FUSE and the buffer is then given to CUSE.
>> + */
>> +static void
>> +vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
>> +            struct fuse_file_info *fi, __rte_unused unsigned flags,
>> +            const void *in_buf, size_t in_bufsz, size_t out_bufsz)
>> +{
>> +    struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> +    struct vhost_vring_file file;
>> +    struct vhost_vring_state state;
>> +    struct vhost_vring_addr addr;
>> +    uint64_t features;
>> +    uint32_t index;
>> +    int result = 0;
>> +
>> +    switch (cmd) {
>> +    case VHOST_NET_SET_BACKEND:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
>> +            VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
>> +            break;
>> +
>> +    case VHOST_GET_FEATURES:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
>> +            VHOST_IOCTL_W(uint64_t, features, ops->get_features);
>> +            break;
>> +
>> +    case VHOST_SET_FEATURES:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
>> +            VHOST_IOCTL_R(uint64_t, features, ops->set_features);
>> +            break;
>> +
>> +    case VHOST_RESET_OWNER:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
>> +            VHOST_IOCTL(ops->reset_owner);
>> +            break;
>> +
>> +    case VHOST_SET_OWNER:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
>> +            VHOST_IOCTL(ops->set_owner);
>> +            break;
>> +
>> +    case VHOST_SET_MEM_TABLE:
>> +            /*TODO fix race condition.*/
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
>> +            static struct vhost_memory mem_temp;
>> +            switch (in_bufsz) {
>> +            case 0:
>> +                    VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
>> +                    break;
>> +
>> +            case sizeof(struct vhost_memory):
>> +                    mem_temp = *(const struct vhost_memory *) in_buf;
>> +
>> +                    if (mem_temp.nregions > 0) {
>> +                            VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
>> +                                    (sizeof(struct vhost_memory_region) *
>> +                                            mem_temp.nregions), 0);
>> +                    } else {
>> +                            result = -1;
>> +                            fuse_reply_ioctl(req, result, NULL, 0);
>> +                    }
>> +                    break;
>> +
>> +            default:
>> +                    result = cuse_set_mem_table(ctx, in_buf,
>> +                            mem_temp.nregions);
>> +                    if (result)
>> +                            fuse_reply_err(req, EINVAL);
>> +                    else
>> +                            fuse_reply_ioctl(req, result, NULL, 0);
>> +            }
>> +            break;
>> +
>> +    case VHOST_SET_VRING_NUM:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
>> +            VHOST_IOCTL_R(struct vhost_vring_state, state, 
>> ops->set_vring_num);
>> +            break;
>> +
>> +    case VHOST_SET_VRING_BASE:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
>> +            VHOST_IOCTL_R(struct vhost_vring_state, state, 
>> ops->set_vring_base);
>> +            break;
>> +
>> +    case VHOST_GET_VRING_BASE:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
>> +            VHOST_IOCTL_RW(uint32_t, index,
>> +                    struct vhost_vring_state, state, ops->get_vring_base);
>> +            break;
>> +
>> +    case VHOST_SET_VRING_ADDR:
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
>> +            VHOST_IOCTL_R(struct vhost_vring_addr, addr, 
>> ops->set_vring_addr);
>> +            break;
>> +
>> +    case VHOST_SET_VRING_KICK:
>> +    case VHOST_SET_VRING_CALL:
>> +            if (!in_buf) {
>> +                    VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
>> +            } else {
>> +                    int fd;
>> +                    file = *(const struct vhost_vring_file *)in_buf;
>> +                    LOG_DEBUG(VHOST_CONFIG, 
>> +                            "kick/call idx:%d fd:%d\n", file.index, 
>> file.fd);
>> +                    if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
>> +                            fuse_reply_ioctl(req, -1, NULL, 0);
>> +                    }
>> +                    file.fd = fd;
>> +                    if (cmd == VHOST_SET_VRING_KICK) {
>> +                            VHOST_IOCTL_R(struct vhost_vring_file, file, 
>> ops->set_vring_call);
>> +                    }
>> +                    else { 
>> +                            VHOST_IOCTL_R(struct vhost_vring_file, file, 
>> ops->set_vring_kick);
>> +                    }
>> +            }
>> +            break;
>> +
>> +    default:
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
>> +            result = -1;
>> +            fuse_reply_ioctl(req, result, NULL, 0);
>> +    }
>> +
>> +    if (result < 0)
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
>> +    else
>> +            LOG_DEBUG(VHOST_CONFIG,
>> +                    "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
>> +}
>> +
>> +/*
>> + * Structure handling open, release and ioctl function pointers is 
>> populated.
>> + */
>> +static const struct cuse_lowlevel_ops vhost_net_ops = {
>> +    .open           = vhost_net_open,
>> +    .release        = vhost_net_release,
>> +    .ioctl          = vhost_net_ioctl,
>> +};
>> +
>> +/*
>> + * cuse_info is populated and used to register the cuse device.
>> + * vhost_net_device_ops are also passed when the device is registered in 
>> app.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *dev_name)
>> +{
>> +    struct cuse_info cuse_info;
>> +    char device_name[PATH_MAX] = "";
>> +    char char_device_name[PATH_MAX] = "";
>> +    const char *device_argv[] = { device_name };
>> +
>> +    char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
>> +    char fuse_opt_fore[] = FUSE_OPT_FORE;
>> +    char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
>> +    char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
>> +
>> +    if (access(cuse_device_name, R_OK | W_OK) < 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "char device %s can't be accessed, maybe not exist\n",
>> +                    cuse_device_name);
>> +            return -1;
>> +    }
>> +
>> +    /*
>> +     * The device name is created. This is passed to QEMU so that it can
>> +     * register the device with our application.
>> +     */
>> +    snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
>> +    snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
>> +
>> +    /* Check if device already exists. */
>> +    if (access(char_device_name, F_OK) != -1) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "char device %s already exists\n", char_device_name);
>> +            return -1;
>> +    }
>> +
>> +    memset(&cuse_info, 0, sizeof(cuse_info));
>> +    cuse_info.dev_major = default_major;
>> +    cuse_info.dev_minor = default_minor;
>> +    cuse_info.dev_info_argc = 1;
>> +    cuse_info.dev_info_argv = device_argv;
>> +    cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
>> +
>> +    ops = get_virtio_net_callbacks();
>> +
>> +    session = cuse_lowlevel_setup(3, fuse_argv,
>> +                    &cuse_info, &vhost_net_ops, 0, NULL);
>> +    if (session == NULL)
>> +            return -1;
>> +
>> +    return 0;
>> +}
>> +
>> +/**
>> + * The CUSE session is launched allowing the application to receive open,
>> + * release and ioctl calls.
>> + */
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> +    fuse_session_loop(session);
>> +
>> +    return 0;
>> +}
>> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c 
>> b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>> new file mode 100644
>> index 0000000..5c16aa5
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>> @@ -0,0 +1,314 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <dirent.h>
>> +#include <linux/vhost.h>
>> +#include <linux/virtio_net.h>
>> +#include <fuse/cuse_lowlevel.h>
>> +#include <stddef.h>
>> +#include <string.h>
>> +#include <stdlib.h>
>> +#include <sys/eventfd.h>
>> +#include <sys/mman.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "vhost-net.h"
>> +#include "virtio-net-cdev.h"
>> +
>> +extern struct vhost_net_device_ops const *ops;
>> +
>> +/* Line size for reading maps file. */
>> +static const uint32_t BUFSIZE = PATH_MAX;
>> +
>> +/* Size of prot char array in procmap. */
>> +#define PROT_SZ 5
>> +
>> +/* Number of elements in procmap struct. */
>> +#define PROCMAP_SZ 8
>> +
>> +/* Structure containing information gathered from maps file. */
>> +struct procmap {
>> +    uint64_t va_start;      /* Start virtual address in file. */
>> +    uint64_t len;           /* Size of file. */
>> +    uint64_t pgoff;         /* Not used. */
>> +    uint32_t maj;           /* Not used. */
>> +    uint32_t min;           /* Not used. */
>> +    uint32_t ino;           /* Not used. */
>> +    char prot[PROT_SZ];     /* Not used. */
>> +    char fname[PATH_MAX];   /* File name. */
>> +};
>> +
>> +/*
>> + * Locate the file containing QEMU's memory space and
>> + * map it to our address space.
>> + */
>> +static int
>> +host_memory_map(pid_t pid, uint64_t addr,
>> +    uint64_t *mapped_address, uint64_t *mapped_size)
>> +{
>> +    struct dirent *dptr = NULL;
>> +    struct procmap procmap;
>> +    DIR *dp = NULL;
>> +    int fd;
>> +    int i;
>> +    char memfile[PATH_MAX];
>> +    char mapfile[PATH_MAX];
>> +    char procdir[PATH_MAX];
>> +    char resolved_path[PATH_MAX];
>> +    FILE *fmap;
>> +    void *map;
>> +    uint8_t found = 0;
>> +    char line[BUFSIZE];
>> +    char dlm[] = "-   :   ";
>> +    char *str, *sp, *in[PROCMAP_SZ];
>> +    char *end = NULL;
>> +
>> +    /* Path where mem files are located. */
>> +    snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
>> +    /* Maps file used to locate mem file. */
>> +    snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
>> +
>> +    fmap = fopen(mapfile, "r");
>> +    if (fmap == NULL) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "Failed to open maps file for pid %d\n", pid);
>> +            return -1;
>> +    }
>> +
>> +    /* Read through maps file until we find out base_address. */
>> +    while (fgets(line, BUFSIZE, fmap) != 0) {
>> +            str = line;
>> +            errno = 0;
>> +            /* Split line in to fields. */
>> +            for (i = 0; i < PROCMAP_SZ; i++) {
>> +                    in[i] = strtok_r(str, &dlm[i], &sp);
>> +                    if ((in[i] == NULL) || (errno != 0)) {
>> +                            fclose(fmap);
>> +                            return -1;
>> +                    }
>> +                    str = NULL;
>> +            }
>> +
>> +            /* Convert/Copy each field as needed. */
>> +            procmap.va_start = strtoull(in[0], &end, 16);
>> +            if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
>> +                    (errno != 0)) {
>> +                    fclose(fmap);
>> +                    return -1;
>> +            }
>> +
>> +            procmap.len = strtoull(in[1], &end, 16);
>> +            if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
>> +                    (errno != 0)) {
>> +                    fclose(fmap);
>> +                    return -1;
>> +            }
>> +
>> +            procmap.pgoff = strtoull(in[3], &end, 16);
>> +            if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
>> +                    (errno != 0)) {
>> +                    fclose(fmap);
>> +                    return -1;
>> +            }
>> +
>> +            procmap.maj = strtoul(in[4], &end, 16);
>> +            if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
>> +                    (errno != 0)) {
>> +                    fclose(fmap);
>> +                    return -1;
>> +            }
>> +
>> +            procmap.min = strtoul(in[5], &end, 16);
>> +            if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
>> +                    (errno != 0)) {
>> +                    fclose(fmap);
>> +                    return -1;
>> +            }
>> +
>> +            procmap.ino = strtoul(in[6], &end, 16);
>> +            if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
>> +                    (errno != 0)) {
>> +                    fclose(fmap);
>> +                    return -1;
>> +            }
>> +
>> +            memcpy(&procmap.prot, in[2], PROT_SZ);
>> +            memcpy(&procmap.fname, in[7], PATH_MAX);
>> +
>> +            if (procmap.va_start == addr) {
>> +                    procmap.len = procmap.len - procmap.va_start;
>> +                    found = 1;
>> +                    break;
>> +            }
>> +    }
>> +    fclose(fmap);
>> +
>> +    if (!found) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "Failed to find memory file in pid %d maps file\n", 
>> pid);
>> +            return -1;
>> +    }
>> +
>> +    /* Find the guest memory file among the process fds. */
>> +    dp = opendir(procdir);
>> +    if (dp == NULL) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "Cannot open pid %d process directory\n",
>> +                    pid);
>> +            return -1;
>> +
>> +    }
>> +
>> +    found = 0;
>> +
>> +    /* Read the fd directory contents. */
>> +    while (NULL != (dptr = readdir(dp))) {
>> +            snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
>> +                            pid, dptr->d_name);
>> +            realpath(memfile, resolved_path);
>> +            if (resolved_path == NULL) {
>> +                    RTE_LOG(ERR, VHOST_CONFIG,
>> +                            "Failed to resolve fd directory\n");
>> +                    closedir(dp);
>> +                    return -1;
>> +            }
>> +            if (strncmp(resolved_path, procmap.fname,
>> +                    strnlen(procmap.fname, PATH_MAX)) == 0) {
>> +                    found = 1;
>> +                    break;
>> +            }
>> +    }
>> +
>> +    closedir(dp);
>> +
>> +    if (found == 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "Failed to find memory file for pid %d\n",
>> +                    pid);
>> +            return -1;
>> +    }
>> +    /* Open the shared memory file and map the memory into this process. */
>> +    fd = open(memfile, O_RDWR);
>> +
>> +    if (fd == -1) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "Failed to open %s for pid %d\n",
>> +                    memfile, pid);
>> +            return -1;
>> +    }
>> +
>> +    map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,
>> +                    MAP_POPULATE|MAP_SHARED, fd, 0);
>> +    close(fd);
>> +
>> +    if (map == MAP_FAILED) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "Error mapping the file %s for pid %d\n",
>> +                    memfile, pid);
>> +            return -1;
>> +    }
>> +
>> +    /* Store the memory address and size in the device data structure */
>> +    *mapped_address = (uint64_t)(uintptr_t)map;
>> +    *mapped_size = procmap.len;
>> +
>> +    LOG_DEBUG(VHOST_CONFIG,
>> +            "Mem File: %s->%s - Size: %llu - VA: %p\n",
>> +            memfile, resolved_path,
>> +            (unsigned long long)mapped_size, map);
>> +
>> +    return 0;
>> +}
>> +
>> +int
>> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory 
>> *mem_regions_addr,
>> +    uint32_t nregions)
>> +{
>> +    uint64_t size = offsetof(struct vhost_memory, regions);
>> +    uint32_t idx;
>> +    struct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */
>> +    struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
>> +                    ((uint64_t)(uintptr_t)mem_regions_addr + size);
>> +    uint64_t base_address = 0, mapped_address, mapped_size;
>> +
>> +    for (idx = 0; idx < nregions; idx++) {
>> +            regions[idx].guest_phys_address =
>> +                    mem_regions[idx].guest_phys_addr;
>> +            regions[idx].guest_phys_address_end =
>> +                    regions[idx].guest_phys_address +
>> +                    mem_regions[idx].memory_size;
>> +            regions[idx].memory_size =
>> +                    mem_regions[idx].memory_size;
>> +            regions[idx].userspace_address =
>> +                    mem_regions[idx].userspace_addr;
>> +
>> +            LOG_DEBUG(VHOST_CONFIG, "REGION: %u - GPA: %p - QEMU VA: %p - 
>> SIZE (%"PRIu64")\n",
>> +                    idx,
>> +                    (void *)(uintptr_t)regions[idx].guest_phys_address,
>> +                    (void *)(uintptr_t)regions[idx].userspace_address,
>> +                    regions[idx].memory_size);
>> +
>> +            /*set the base address mapping*/
>> +            if (regions[idx].guest_phys_address == 0x0) {
>> +                    base_address =
>> +                            regions[idx].userspace_address;
>> +                    /* Map VM memory file */
>> +                    if (host_memory_map(ctx.pid, base_address, 
>> +                            &mapped_address, &mapped_size) != 0) {
>> +                            return -1;
>> +                    }
>> +            }
>> +    }
>> +
>> +    /* Check that we have a valid base address. */
>> +    if (base_address == 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG, 
>> +                    "Failed to find base address of qemu memory file.\n");
>> +            return -1;
>> +    }
>> +
>> +    for (idx = 0; idx < nregions; idx++) {
>> +            regions[idx].address_offset = 
>> +                    mapped_address - base_address +
>> +                    regions[idx].userspace_address -
>> +                    regions[idx].guest_phys_address;
>> +    }
>> +    
>> +    ops->set_mem_table(ctx, &regions[0], nregions);
>> +    return 0;
>> +}
>> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h 
>> b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>> new file mode 100644
>> index 0000000..6f98ce8
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>> @@ -0,0 +1,43 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +#ifndef _VIRTIO_NET_CDEV_H
>> +#define _VIRTIO_NET_CDEV_H
>> +#include <stdint.h>
>> +
>> +#include "vhost-net.h"
>> +
>> +int
>> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory 
>> *mem_regions_addr,
>> +    uint32_t nregions);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-net-cdev.c 
>> b/lib/librte_vhost/vhost-net-cdev.c
>> deleted file mode 100644
>> index 57c76cb..0000000
>> --- a/lib/librte_vhost/vhost-net-cdev.c
>> +++ /dev/null
>> @@ -1,389 +0,0 @@
>> -/*-
>> - *   BSD LICENSE
>> - *
>> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - *   All rights reserved.
>> - *
>> - *   Redistribution and use in source and binary forms, with or without
>> - *   modification, are permitted provided that the following conditions
>> - *   are met:
>> - *
>> - *     * Redistributions of source code must retain the above copyright
>> - *       notice, this list of conditions and the following disclaimer.
>> - *     * Redistributions in binary form must reproduce the above copyright
>> - *       notice, this list of conditions and the following disclaimer in
>> - *       the documentation and/or other materials provided with the
>> - *       distribution.
>> - *     * Neither the name of Intel Corporation nor the names of its
>> - *       contributors may be used to endorse or promote products derived
>> - *       from this software without specific prior written permission.
>> - *
>> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - */
>> -
>> -#include <errno.h>
>> -#include <fuse/cuse_lowlevel.h>
>> -#include <linux/limits.h>
>> -#include <linux/vhost.h>
>> -#include <stdint.h>
>> -#include <string.h>
>> -#include <unistd.h>
>> -
>> -#include <rte_ethdev.h>
>> -#include <rte_log.h>
>> -#include <rte_string_fns.h>
>> -#include <rte_virtio_net.h>
>> -
>> -#include "vhost-net-cdev.h"
>> -
>> -#define FUSE_OPT_DUMMY "\0\0"
>> -#define FUSE_OPT_FORE  "-f\0\0"
>> -#define FUSE_OPT_NOMULTI "-s\0\0"
>> -
>> -static const uint32_t default_major = 231;
>> -static const uint32_t default_minor = 1;
>> -static const char cuse_device_name[] = "/dev/cuse";
>> -static const char default_cdev[] = "vhost-net";
>> -
>> -static struct fuse_session *session;
>> -static struct vhost_net_device_ops const *ops;
>> -
>> -/*
>> - * Returns vhost_device_ctx from given fuse_req_t. The index is populated 
>> later
>> - * when the device is added to the device linked list.
>> - */
>> -static struct vhost_device_ctx
>> -fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> -    struct vhost_device_ctx ctx;
>> -    struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
>> -
>> -    ctx.pid = req_ctx->pid;
>> -    ctx.fh = fi->fh;
>> -
>> -    return ctx;
>> -}
>> -
>> -/*
>> - * When the device is created in QEMU it gets initialised here and
>> - * added to the device linked list.
>> - */
>> -static void
>> -vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> -    struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> -    int err = 0;
>> -
>> -    err = ops->new_device(ctx);
>> -    if (err == -1) {
>> -            fuse_reply_err(req, EPERM);
>> -            return;
>> -    }
>> -
>> -    fi->fh = err;
>> -
>> -    RTE_LOG(INFO, VHOST_CONFIG,
>> -            "(%"PRIu64") Device configuration started\n", fi->fh);
>> -    fuse_reply_open(req, fi);
>> -}
>> -
>> -/*
>> - * When QEMU is shutdown or killed the device gets released.
>> - */
>> -static void
>> -vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> -    int err = 0;
>> -    struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> -
>> -    ops->destroy_device(ctx);
>> -    RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
>> -    fuse_reply_err(req, err);
>> -}
>> -
>> -/*
>> - * Boilerplate code for CUSE IOCTL
>> - * Implicit arguments: ctx, req, result.
>> - */
>> -#define VHOST_IOCTL(func) do {      \
>> -    result = (func)(ctx);   \
>> -    fuse_reply_ioctl(req, result, NULL, 0); \
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate IOCTL RETRY
>> - * Implicit arguments: req.
>> - */
>> -#define VHOST_IOCTL_RETRY(size_r, size_w) do {      \
>> -    struct iovec iov_r = { arg, (size_r) }; \
>> -    struct iovec iov_w = { arg, (size_w) }; \
>> -    fuse_reply_ioctl_retry(req, &iov_r,     \
>> -            (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Read IOCTL
>> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> - */
>> -#define VHOST_IOCTL_R(type, var, func) do { \
>> -    if (!in_bufsz) {        \
>> -            VHOST_IOCTL_RETRY(sizeof(type), 0);\
>> -    } else {        \
>> -            (var) = *(const type*)in_buf;   \
>> -            result = func(ctx, &(var));     \
>> -            fuse_reply_ioctl(req, result, NULL, 0);\
>> -    }       \
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Write IOCTL
>> - * Implicit arguments: ctx, req, result, out_bufsz.
>> - */
>> -#define VHOST_IOCTL_W(type, var, func) do { \
>> -    if (!out_bufsz) {       \
>> -            VHOST_IOCTL_RETRY(0, sizeof(type));\
>> -    } else {        \
>> -            result = (func)(ctx, &(var));\
>> -            fuse_reply_ioctl(req, result, &(var), sizeof(type));\
>> -    } \
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Read/Write IOCTL
>> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> - */
>> -#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
>> -    if (!in_bufsz) {        \
>> -            VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
>> -    } else {        \
>> -            (var1) = *(const type1*) (in_buf);      \
>> -            result = (func)(ctx, (var1), &(var2));  \
>> -            fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
>> -    }       \
>> -} while (0)
>> -
>> -/*
>> - * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the 
>> type
>> - * of IOCTL a buffer is requested to read or to write. This request is 
>> handled
>> - * by FUSE and the buffer is then given to CUSE.
>> - */
>> -static void
>> -vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
>> -            struct fuse_file_info *fi, __rte_unused unsigned flags,
>> -            const void *in_buf, size_t in_bufsz, size_t out_bufsz)
>> -{
>> -    struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> -    struct vhost_vring_file file;
>> -    struct vhost_vring_state state;
>> -    struct vhost_vring_addr addr;
>> -    uint64_t features;
>> -    uint32_t index;
>> -    int result = 0;
>> -
>> -    switch (cmd) {
>> -    case VHOST_NET_SET_BACKEND:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
>> -            VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
>> -            break;
>> -
>> -    case VHOST_GET_FEATURES:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
>> -            VHOST_IOCTL_W(uint64_t, features, ops->get_features);
>> -            break;
>> -
>> -    case VHOST_SET_FEATURES:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
>> -            VHOST_IOCTL_R(uint64_t, features, ops->set_features);
>> -            break;
>> -
>> -    case VHOST_RESET_OWNER:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
>> -            VHOST_IOCTL(ops->reset_owner);
>> -            break;
>> -
>> -    case VHOST_SET_OWNER:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
>> -            VHOST_IOCTL(ops->set_owner);
>> -            break;
>> -
>> -    case VHOST_SET_MEM_TABLE:
>> -            /*TODO fix race condition.*/
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
>> -            static struct vhost_memory mem_temp;
>> -
>> -            switch (in_bufsz) {
>> -            case 0:
>> -                    VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
>> -                    break;
>> -
>> -            case sizeof(struct vhost_memory):
>> -                    mem_temp = *(const struct vhost_memory *) in_buf;
>> -
>> -                    if (mem_temp.nregions > 0) {
>> -                            VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
>> -                                    (sizeof(struct vhost_memory_region) *
>> -                                            mem_temp.nregions), 0);
>> -                    } else {
>> -                            result = -1;
>> -                            fuse_reply_ioctl(req, result, NULL, 0);
>> -                    }
>> -                    break;
>> -
>> -            default:
>> -                    result = ops->set_mem_table(ctx,
>> -                                    in_buf, mem_temp.nregions);
>> -                    if (result)
>> -                            fuse_reply_err(req, EINVAL);
>> -                    else
>> -                            fuse_reply_ioctl(req, result, NULL, 0);
>> -            }
>> -            break;
>> -
>> -    case VHOST_SET_VRING_NUM:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
>> -            VHOST_IOCTL_R(struct vhost_vring_state, state,
>> -                    ops->set_vring_num);
>> -            break;
>> -
>> -    case VHOST_SET_VRING_BASE:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
>> -            VHOST_IOCTL_R(struct vhost_vring_state, state,
>> -                    ops->set_vring_base);
>> -            break;
>> -
>> -    case VHOST_GET_VRING_BASE:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
>> -            VHOST_IOCTL_RW(uint32_t, index,
>> -                    struct vhost_vring_state, state, ops->get_vring_base);
>> -            break;
>> -
>> -    case VHOST_SET_VRING_ADDR:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
>> -            VHOST_IOCTL_R(struct vhost_vring_addr, addr,
>> -                    ops->set_vring_addr);
>> -            break;
>> -
>> -    case VHOST_SET_VRING_KICK:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
>> -            VHOST_IOCTL_R(struct vhost_vring_file, file,
>> -                    ops->set_vring_kick);
>> -            break;
>> -
>> -    case VHOST_SET_VRING_CALL:
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
>> -            VHOST_IOCTL_R(struct vhost_vring_file, file,
>> -                    ops->set_vring_call);
>> -            break;
>> -
>> -    default:
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
>> -            result = -1;
>> -            fuse_reply_ioctl(req, result, NULL, 0);
>> -    }
>> -
>> -    if (result < 0)
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
>> -    else
>> -            LOG_DEBUG(VHOST_CONFIG,
>> -                    "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
>> -}
>> -
>> -/*
>> - * Structure handling open, release and ioctl function pointers is 
>> populated.
>> - */
>> -static const struct cuse_lowlevel_ops vhost_net_ops = {
>> -    .open           = vhost_net_open,
>> -    .release        = vhost_net_release,
>> -    .ioctl          = vhost_net_ioctl,
>> -};
>> -
>> -/*
>> - * cuse_info is populated and used to register the cuse device.
>> - * vhost_net_device_ops are also passed when the device is registered in 
>> app.
>> - */
>> -int
>> -rte_vhost_driver_register(const char *dev_name)
>> -{
>> -    struct cuse_info cuse_info;
>> -    char device_name[PATH_MAX] = "";
>> -    char char_device_name[PATH_MAX] = "";
>> -    const char *device_argv[] = { device_name };
>> -
>> -    char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
>> -    char fuse_opt_fore[] = FUSE_OPT_FORE;
>> -    char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
>> -    char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
>> -
>> -    if (access(cuse_device_name, R_OK | W_OK) < 0) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "char device %s can't be accessed, maybe not exist\n",
>> -                    cuse_device_name);
>> -            return -1;
>> -    }
>> -
>> -    /*
>> -     * The device name is created. This is passed to QEMU so that it can
>> -     * register the device with our application.
>> -     */
>> -    snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
>> -    snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
>> -
>> -    /* Check if device already exists. */
>> -    if (access(char_device_name, F_OK) != -1) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "char device %s already exists\n", char_device_name);
>> -            return -1;
>> -    }
>> -
>> -    memset(&cuse_info, 0, sizeof(cuse_info));
>> -    cuse_info.dev_major = default_major;
>> -    cuse_info.dev_minor = default_minor;
>> -    cuse_info.dev_info_argc = 1;
>> -    cuse_info.dev_info_argv = device_argv;
>> -    cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
>> -
>> -    ops = get_virtio_net_callbacks();
>> -
>> -    session = cuse_lowlevel_setup(3, fuse_argv,
>> -                    &cuse_info, &vhost_net_ops, 0, NULL);
>> -    if (session == NULL)
>> -            return -1;
>> -
>> -    return 0;
>> -}
>> -
>> -/**
>> - * The CUSE session is launched allowing the application to receive open,
>> - * release and ioctl calls.
>> - */
>> -int
>> -rte_vhost_driver_session_start(void)
>> -{
>> -    fuse_session_loop(session);
>> -
>> -    return 0;
>> -}
>> diff --git a/lib/librte_vhost/vhost-net-cdev.h 
>> b/lib/librte_vhost/vhost-net-cdev.h
>> deleted file mode 100644
>> index 03a5c57..0000000
>> --- a/lib/librte_vhost/vhost-net-cdev.h
>> +++ /dev/null
>> @@ -1,113 +0,0 @@
>> -/*-
>> - *   BSD LICENSE
>> - *
>> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - *   All rights reserved.
>> - *
>> - *   Redistribution and use in source and binary forms, with or without
>> - *   modification, are permitted provided that the following conditions
>> - *   are met:
>> - *
>> - *     * Redistributions of source code must retain the above copyright
>> - *       notice, this list of conditions and the following disclaimer.
>> - *     * Redistributions in binary form must reproduce the above copyright
>> - *       notice, this list of conditions and the following disclaimer in
>> - *       the documentation and/or other materials provided with the
>> - *       distribution.
>> - *     * Neither the name of Intel Corporation nor the names of its
>> - *       contributors may be used to endorse or promote products derived
>> - *       from this software without specific prior written permission.
>> - *
>> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - */
>> -
>> -#ifndef _VHOST_NET_CDEV_H_
>> -#define _VHOST_NET_CDEV_H_
>> -#include <stdint.h>
>> -#include <stdio.h>
>> -#include <sys/types.h>
>> -#include <unistd.h>
>> -#include <linux/vhost.h>
>> -
>> -#include <rte_log.h>
>> -
>> -/* Macros for printing using RTE_LOG */
>> -#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
>> -#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
>> -
>> -#ifdef RTE_LIBRTE_VHOST_DEBUG
>> -#define VHOST_MAX_PRINT_BUFF 6072
>> -#define LOG_LEVEL RTE_LOG_DEBUG
>> -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, 
>> ##args)
>> -#define PRINT_PACKET(device, addr, size, header) do { \
>> -    char *pkt_addr = (char *)(addr); \
>> -    unsigned int index; \
>> -    char packet[VHOST_MAX_PRINT_BUFF]; \
>> -    \
>> -    if ((header)) \
>> -            snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size 
>> %d: ", (device->device_fh), (size)); \
>> -    else \
>> -            snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size 
>> %d: ", (device->device_fh), (size)); \
>> -    for (index = 0; index < (size); index++) { \
>> -            snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), 
>> VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
>> -                    "%02hhx ", pkt_addr[index]); \
>> -    } \
>> -    snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), 
>> VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
>> -    \
>> -    LOG_DEBUG(VHOST_DATA, "%s", packet); \
>> -} while (0)
>> -#else
>> -#define LOG_LEVEL RTE_LOG_INFO
>> -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
>> -#define PRINT_PACKET(device, addr, size, header) do {} while (0)
>> -#endif
>> -
>> -
>> -/*
>> - * Structure used to identify device context.
>> - */
>> -struct vhost_device_ctx {
>> -    pid_t           pid;    /* PID of process calling the IOCTL. */
>> -    uint64_t        fh;     /* Populated with fi->fh to track the device 
>> index. */
>> -};
>> -
>> -/*
>> - * Structure contains function pointers to be defined in virtio-net.c. These
>> - * functions are called in CUSE context and are used to configure devices.
>> - */
>> -struct vhost_net_device_ops {
>> -    int (*new_device)(struct vhost_device_ctx);
>> -    void (*destroy_device)(struct vhost_device_ctx);
>> -
>> -    int (*get_features)(struct vhost_device_ctx, uint64_t *);
>> -    int (*set_features)(struct vhost_device_ctx, uint64_t *);
>> -
>> -    int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
>> -
>> -    int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state 
>> *);
>> -    int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr 
>> *);
>> -    int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state 
>> *);
>> -    int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct 
>> vhost_vring_state *);
>> -
>> -    int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file 
>> *);
>> -    int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file 
>> *);
>> -
>> -    int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
>> -
>> -    int (*set_owner)(struct vhost_device_ctx);
>> -    int (*reset_owner)(struct vhost_device_ctx);
>> -};
>> -
>> -
>> -struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
>> -#endif /* _VHOST_NET_CDEV_H_ */
>> diff --git a/lib/librte_vhost/vhost-user/fd_man.c 
>> b/lib/librte_vhost/vhost-user/fd_man.c
>> new file mode 100644
>> index 0000000..c7fd3f2
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/fd_man.c
>> @@ -0,0 +1,158 @@
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <sys/socket.h>
>> +#include <sys/select.h>
>> +#include <sys/time.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "fd_man.h"
>> +
>> +/**
>> + * Returns the index in the fdset for a fd.
>> + * If fd is -1, it means to search for a free entry.
>> + * @return
>> + *   Index for the fd, or -1 if fd isn't in the fdset.
>> + */
>> +static int
>> +fdset_find_fd(struct fdset *pfdset, int fd)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
>> +            
>> +    return i ==  pfdset->num ? -1 : i;
>> +}
>> +
>> +static int
>> +fdset_find_free_slot(struct fdset *pfdset)
>> +{
>> +    return fdset_find_fd(pfdset, -1);
>> +
>> +}
>> +
>> +static void
>> +fdset_add_fd(struct fdset  *pfdset, int idx, int fd, fd_cb rcb, 
>> +            fd_cb wcb, uint64_t dat)
>> +{
>> +    struct fdentry *pfdentry = &pfdset->fd[idx];
>> +
>> +    pfdentry->fd = fd;
>> +    pfdentry->rcb = rcb;
>> +    pfdentry->wcb = wcb;
>> +    pfdentry->dat = dat;
>> +}
>> +
>> +/**
>> + * Fill the read/write fdset with the fds in the fdset.
>> + * @return
>> + *  the maximum fds filled in the read/write fd_set.
>> + */
>> +static int
>> +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
>> +{
>> +    struct fdentry *pfdentry;
>> +    int i, maxfds = -1;
>> +    int num = MAX_FDS;
>> +
>> +    for (i = 0; i < num ; i++) {
>> +            pfdentry = &pfdset->fd[i];
>> +            if (pfdentry->fd != -1) {
>> +                    int added = 0;
>> +                    if (pfdentry->rcb && rfset) {
>> +                            FD_SET(pfdentry->fd, rfset);
>> +                            added = 1;
>> +                    }
>> +                    if (pfdentry->wcb && wfset) {
>> +                            FD_SET(pfdentry->fd, wfset);
>> +                            added = 1;
>> +                    }
>> +                    if (added)
>> +                            maxfds = pfdentry->fd < maxfds ?
>> +                                    maxfds : pfdentry->fd;
>> +            }
>> +    }
>> +    return maxfds;
>> +}
>> +
>> +void
>> +fdset_init(struct fdset *pfdset)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < MAX_FDS; i++)
>> +            pfdset->fd[i].fd = -1;
>> +    pfdset->num = MAX_FDS;
>> +
>> +}
>> +
>> +/**
>> + * Register the fd in the fdset with its read/write handler and context.
>> + */
>> +int
>> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
>> +{
>> +    int i;
>> +
>> +    if (fd == -1)
>> +            return -1;
>> +
>> +    /* Find a free slot in the list. */
>> +    i = fdset_find_free_slot(pfdset);
>> +    if (i == -1)
>> +            return -2;
>> +
>> +    fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
>> +
>> +    return 0;
>> +}
>> +
>> +/**
>> + *  Unregister the fd from the fdset.
>> + */
>> +void
>> +fdset_del(struct fdset *pfdset, int fd)
>> +{
>> +    int i;
>> +
>> +    i = fdset_find_fd(pfdset, fd);
>> +    if (i != -1) {
>> +            pfdset->fd[i].fd = -1;
>> +    }
>> +}
>> +
>> +
>> +void
>> +fdset_event_dispatch(struct fdset *pfdset)
>> +{
>> +    fd_set rfds,wfds;
>> +    int i, maxfds;
>> +    struct fdentry *pfdentry;
>> +    int num = MAX_FDS;
>> +
>> +    if (pfdset == NULL)
>> +            return;
>> +    while (1) {
>> +            FD_ZERO(&rfds);
>> +            FD_ZERO(&wfds);
>> +            maxfds = fdset_fill(&rfds, &wfds, pfdset);
>> +            /* fd management runs in one thread */
>> +            if (maxfds == -1) {
>> +                    return;
>> +            }
>> +
>> +            select(maxfds + 1, &rfds, &wfds, NULL, NULL);
>> +
>> +            for (i = 0; i < num; i++) {
>> +                    pfdentry = &pfdset->fd[i];
>> +                    if (FD_ISSET(pfdentry->fd, &rfds)) 
>> +                            pfdentry->rcb(pfdentry->fd, pfdentry->dat);
>> +                    if (FD_ISSET(pfdentry->fd, &wfds))
>> +                            pfdentry->wcb(pfdentry->fd, pfdentry->dat);
>> +            }
>> +            
>> +    }
>> +}
>> diff --git a/lib/librte_vhost/vhost-user/fd_man.h 
>> b/lib/librte_vhost/vhost-user/fd_man.h
>> new file mode 100644
>> index 0000000..57cc81d
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/fd_man.h
>> @@ -0,0 +1,31 @@
>> +#ifndef _FD_MAN_H_
>> +#define _FD_MAN_H_
>> +#include <stdint.h>
>> +
>> +#define MAX_FDS 1024
>> +
>> +typedef void (*fd_cb)(int fd, uint64_t dat);
>> +
>> +struct fdentry {
>> +    int fd; /* -1 indicates this entry is empty */
>> +    fd_cb rcb; /* callback when this fd is readable. */
>> +    fd_cb wcb; /* callback when this fd is writeable.*/
>> +    uint64_t dat;   /* fd context */
>> +};
>> +
>> +struct fdset {
>> +    struct fdentry fd[MAX_FDS];
>> +    int num;        
>> +};
>> +
>> +
>> +void fdset_init(struct fdset *pfdset);
>> +
>> +int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,
>> +    fd_cb wcb, uint64_t ctx);
>> +
>> +void fdset_del(struct fdset *pfdset, int fd);
>> +
>> +void fdset_event_dispatch(struct fdset *pfdset);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c 
>> b/lib/librte_vhost/vhost-user/vhost-net-user.c
>> new file mode 100644
>> index 0000000..34450f4
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
>> @@ -0,0 +1,417 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <limits.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <string.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "fd_man.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +#include "virtio-net-user.h"
>> +
>> +static void vserver_new_vq_conn(int fd, uint64_t data);
>> +static void vserver_message_handler(int fd, uint64_t dat);
>> +const struct vhost_net_device_ops *ops;
>> +
>> +static struct vhost_server *g_vhost_server;
>> +
>> +static const char *vhost_message_str[VHOST_USER_MAX] =
>> +{
>> +    [VHOST_USER_NONE] = "VHOST_USER_NONE",
>> +    [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
>> +    [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
>> +    [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
>> +    [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
>> +    [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
>> +    [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
>> +    [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
>> +    [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
>> +    [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
>> +    [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
>> +    [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
>> +    [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
>> +    [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
>> +    [VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
>> +};
>> +
>> +/**
>> + * Create a unix domain socket and bind to path.
>> + * @return
>> + *  socket fd or -1 on failure
>> + */
>> +static int
>> +uds_socket(const char *path)
>> +{
>> +    struct sockaddr_un un;
>> +    int sockfd;
>> +    int ret;
>> +
>> +    if (path == NULL)
>> +            return -1;
>> +
>> +    sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
>> +    if (sockfd < 0)
>> +            return -1;
>> +    RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
>> +
>> +    memset(&un, 0, sizeof(un));
>> +    un.sun_family = AF_UNIX;
>> +    snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
>> +    ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
>> +    if (ret == -1)
>> +            goto err;
>> +    RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
>> +
>> +    ret = listen(sockfd, 1);
>> +    if (ret == -1)
>> +            goto err;
>> +    
>> +    return sockfd;
>> +
>> +err:
>> +    close(sockfd);
>> +    return -1;
>> +}
>> +
>> +
>> +/* return bytes# of read */
>> +static int
>> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> +    struct iovec  iov;
>> +    struct msghdr msgh = { 0 };
>> +    size_t fdsize = fd_num * sizeof(int);
>> +    char control[CMSG_SPACE(fdsize)];
>> +    struct cmsghdr *cmsg;
>> +    int ret;
>> +
>> +    iov.iov_base = buf;
>> +    iov.iov_len  = buflen;
>> +    
>> +    msgh.msg_iov = &iov;
>> +    msgh.msg_iovlen = 1;
>> +    msgh.msg_control = control;
>> +    msgh.msg_controllen = sizeof(control);
>> +
>> +    ret = recvmsg(sockfd, &msgh, 0);
>> +    if (ret <= 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
>> +            return ret;
>> +    }
>> +    /* ret == buflen */
>> +    if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
>> +            RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
>> +            return -1;
>> +    }
>> +
>> +    for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
>> +            cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
>> +            if ( (cmsg->cmsg_level == SOL_SOCKET) && 
>> +                    (cmsg->cmsg_type == SCM_RIGHTS)) {
>> +                    memcpy(fds, CMSG_DATA(cmsg), fdsize);
>> +                    break;
>> +            }
>> +    }
>> +    return ret;
>> +}
>> +
>> +static int
>> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> +    int ret;
>> +
>> +    ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, 
>> +            msg->fds, VHOST_MEMORY_MAX_NREGIONS);
>> +    if (ret <= 0)
>> +            return ret;
>> +
>> +    if (msg->size) {
>> +            if (msg->size > sizeof(msg->payload)) {
>> +                    RTE_LOG(ERR, VHOST_CONFIG, 
>> +                            "%s: invalid size:%d\n", __func__, msg->size);
>> +                    return -1;
>> +            }
>> +            ret = read(sockfd, &msg->payload, msg->size);
>> +            if (ret == 0)
>> +                    return 0;
>> +            if (ret != (int)msg->size) {
>> +                    printf("read control message failed\n");
>> +                    return -1;
>> +            }
>> +    }
>> +
>> +    return ret; 
>> +}
>> +
>> +static int
>> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> +    struct iovec iov;
>> +    struct msghdr msgh = { 0 };
>> +    size_t fdsize = fd_num * sizeof(int);
>> +    char control[CMSG_SPACE(fdsize)];
>> +    struct cmsghdr *cmsg;
>> +    int ret;
>> +
>> +    iov.iov_base = buf;
>> +    iov.iov_len = buflen;
>> +    msgh.msg_iov = &iov;
>> +    msgh.msg_iovlen = 1;
>> +    
>> +    if (fds && fd_num > 0) {
>> +            msgh.msg_control = control;
>> +            msgh.msg_controllen = sizeof(control);
>> +            cmsg = CMSG_FIRSTHDR(&msgh);
>> +            cmsg->cmsg_len = CMSG_LEN(fdsize);
>> +            cmsg->cmsg_level = SOL_SOCKET;
>> +            cmsg->cmsg_type = SCM_RIGHTS;
>> +            memcpy(CMSG_DATA(cmsg), fds, fdsize);
>> +    } else {
>> +            msgh.msg_control = NULL;
>> +            msgh.msg_controllen = 0;
>> +    }
>> +
>> +    do {
>> +            ret = sendmsg(sockfd, &msgh, 0);
>> +    } while (ret < 0 && errno == EINTR);
>> +
>> +    if (ret < 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
>> +            return -1;
>> +    }
>> +    
>> +    return 0;
>> +}
>> +
>> +static int
>> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> +    int ret;
>> +
>> +    msg->flags &= ~VHOST_USER_VERSION_MASK;
>> +        msg->flags |= VHOST_USER_VERSION;
>> +        msg->flags |= VHOST_USER_REPLY_MASK;        
>> +
>> +    ret = send_fd_message(sockfd, (char *)msg, 
>> +            VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
>> +    
>> +    return ret;
>> +}
>> +
>> +/* call back when there is new connection.  */
>> +static void
>> +vserver_new_vq_conn(int fd, uint64_t dat)
>> +{
>> +    struct vhost_server *vserver = (void *)(uintptr_t)dat;
>> +    int conn_fd;
>> +    uint32_t fh;
>> +    struct vhost_device_ctx vdev_ctx = { 0 };
>> +
>> +    conn_fd = accept(fd, NULL, NULL);
>> +    RTE_LOG(INFO, VHOST_CONFIG, 
>> +            "%s: new connection is %d\n", __func__, conn_fd);
>> +    if (conn_fd < 0)
>> +            return;
>> +
>> +    fh = ops->new_device(vdev_ctx);
>> +    RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
>> +
>> +    fdset_add(&vserver->fdset, 
>> +            conn_fd, vserver_message_handler, NULL, fh);    
>> +}
>> +
>> +/* callback when there is message on the connfd */
>> +static void
>> +vserver_message_handler(int connfd, uint64_t dat)
>> +{
>> +    struct vhost_device_ctx ctx;
>> +    uint32_t fh = (uint32_t)dat;
>> +    struct VhostUserMsg msg;
>> +    uint64_t features;
>> +    int ret;
>> +
>> +    ctx.fh = fh;
>> +    ret = read_vhost_message(connfd, &msg);
>> +    if (ret < 0) {
>> +            printf("vhost read message failed\n");
>> +    
>> +            /*TODO: cleanup */
>> +            close(connfd);
>> +            fdset_del(&g_vhost_server->fdset, connfd);
>> +            ops->destroy_device(ctx);
>> +
>> +            return;
>> +    } else if (ret == 0) {
>> +            /*TODO: cleanup */
>> +            RTE_LOG(INFO, VHOST_CONFIG, 
>> +                    "vhost peer closed\n");
>> +            close(connfd);
>> +            fdset_del(&g_vhost_server->fdset, connfd);
>> +            ops->destroy_device(ctx);
>> +
>> +            return;
>> +    }
>> +    if (msg.request > VHOST_USER_MAX) {
>> +            /*TODO: cleanup */
>> +            RTE_LOG(INFO, VHOST_CONFIG, 
>> +                    "vhost read incorrect message\n");
>> +            close(connfd);
>> +            fdset_del(&g_vhost_server->fdset, connfd);
>> +
>> +            return;
>> +    }
>> +
>> +    RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
>> +            vhost_message_str[msg.request]);
>> +    switch (msg.request) {
>> +    case VHOST_USER_GET_FEATURES:
>> +            ret = ops->get_features(ctx, &features);
>> +            msg.payload.u64 = ret;
>> +            msg.size = sizeof(msg.payload.u64);
>> +            send_vhost_message(connfd, &msg);
>> +            break;
>> +    case VHOST_USER_SET_FEATURES:
>> +            ops->set_features(ctx, &features);
>> +            break;
>> +
>> +    case VHOST_USER_SET_OWNER:
>> +            ops->set_owner(ctx);
>> +            break;
>> +    case VHOST_USER_RESET_OWNER:
>> +            ops->reset_owner(ctx);
>> +            break;
>> +
>> +    case VHOST_USER_SET_MEM_TABLE:
>> +            user_set_mem_table(ctx, &msg);
>> +            break;
>> +
>> +    case VHOST_USER_SET_LOG_BASE:
>> +    case VHOST_USER_SET_LOG_FD:
>> +            RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
>> +            break;
>> +
>> +    case VHOST_USER_SET_VRING_NUM:
>> +            ops->set_vring_num(ctx, &msg.payload.state);
>> +            break;
>> +    case VHOST_USER_SET_VRING_ADDR:
>> +            ops->set_vring_addr(ctx, &msg.payload.addr);
>> +            break;
>> +    case VHOST_USER_SET_VRING_BASE:
>> +            ops->set_vring_base(ctx, &msg.payload.state);
>> +            break;
>> +
>> +    case VHOST_USER_GET_VRING_BASE:
>> +            ret = ops->get_vring_base(ctx, msg.payload.state.index,
>> +                    &msg.payload.state);
>> +            msg.size = sizeof(msg.payload.state);
>> +            send_vhost_message(connfd, &msg);
>> +            break;
>> +
>> +    case VHOST_USER_SET_VRING_KICK:
>> +            user_set_vring_kick(ctx, &msg);
>> +            break;
>> +    case VHOST_USER_SET_VRING_CALL:
>> +            user_set_vring_call(ctx, &msg);
>> +            break;
>> +
>> +    case VHOST_USER_SET_VRING_ERR:
>> +            RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
>> +            break;
>> +
>> +    default:
>> +            break;
>> +    
>> +    }
>> +}
>> +
>> +
>> +/**
>> + * Creates and initialise the vhost server.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *path)
>> +{
>> +
>> +    struct vhost_server *vserver;
>> +
>> +    if (g_vhost_server != NULL)
>> +            return -1;
>> +
>> +    vserver = calloc(sizeof(struct vhost_server), 1);
>> +    /*TODO: all allocation is through DPDK memory allocation */
>> +    if (vserver == NULL)
>> +            return -1;
>> +
>> +    fdset_init(&vserver->fdset);
>> +
>> +    unlink(path);
>> +
>> +    vserver->listenfd = uds_socket(path);
>> +    if (vserver->listenfd < 0) {
>> +            free(vserver);
>> +            return -1;
>> +    }
>> +    vserver->path = path;
>> +
>> +    fdset_add(&vserver->fdset, vserver->listenfd,
>> +                    vserver_new_vq_conn, NULL,
>> +                    (uint64_t)(uintptr_t)vserver);
>> +
>> +    ops = get_virtio_net_callbacks();
>> +
>> +    g_vhost_server = vserver;
>> +
>> +    return 0;
>> +}
>> +
>> +
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> +    fdset_event_dispatch(&g_vhost_server->fdset);
>> +    return 0;
>> +}
>> +
>> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h 
>> b/lib/librte_vhost/vhost-user/vhost-net-user.h
>> new file mode 100644
>> index 0000000..c9df9fa
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
>> @@ -0,0 +1,74 @@
>> +#ifndef _VHOST_NET_USER_H
>> +#define _VHOST_NET_USER_H
>> +#include <stdint.h>
>> +#include <linux/vhost.h>
>> +
>> +#include "fd_man.h"
>> +
>> +struct vhost_server {
>> +    const char *path; /**< The path the uds is bind to. */
>> +    int listenfd;     /**< The listener sockfd. */
>> +    struct fdset fdset; /**< The fd list this vhost server manages. */
>> +};
>> +
>> +/*********** FROM hw/virtio/vhost-user.c 
>> *************************************/
>> +
>> +#define VHOST_MEMORY_MAX_NREGIONS    8
>> +
>> +typedef enum VhostUserRequest {
>> +    VHOST_USER_NONE = 0,
>> +    VHOST_USER_GET_FEATURES = 1,
>> +    VHOST_USER_SET_FEATURES = 2,
>> +    VHOST_USER_SET_OWNER = 3,
>> +    VHOST_USER_RESET_OWNER = 4,
>> +    VHOST_USER_SET_MEM_TABLE = 5,
>> +    VHOST_USER_SET_LOG_BASE = 6,
>> +    VHOST_USER_SET_LOG_FD = 7,
>> +    VHOST_USER_SET_VRING_NUM = 8,
>> +    VHOST_USER_SET_VRING_ADDR = 9,
>> +    VHOST_USER_SET_VRING_BASE = 10,
>> +    VHOST_USER_GET_VRING_BASE = 11,
>> +    VHOST_USER_SET_VRING_KICK = 12,
>> +    VHOST_USER_SET_VRING_CALL = 13,
>> +    VHOST_USER_SET_VRING_ERR = 14,
>> +    VHOST_USER_MAX
>> +} VhostUserRequest;
>> +
>> +typedef struct VhostUserMemoryRegion {
>> +    uint64_t guest_phys_addr;
>> +    uint64_t memory_size;
>> +    uint64_t userspace_addr;
>> +    uint64_t mmap_offset;
>> +} VhostUserMemoryRegion;
>> +
>> +typedef struct VhostUserMemory {
>> +    uint32_t nregions;
>> +    uint32_t padding;
>> +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>> +} VhostUserMemory;
>> +
>> +typedef struct VhostUserMsg {
>> +    VhostUserRequest request;
>> +
>> +#define VHOST_USER_VERSION_MASK     (0x3)
>> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
>> +    uint32_t flags;
>> +    uint32_t size; /* the following payload size */
>> +    union {
>> +#define VHOST_USER_VRING_IDX_MASK   (0xff)
>> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
>> +        uint64_t u64;
>> +        struct vhost_vring_state state;
>> +        struct vhost_vring_addr addr;
>> +        VhostUserMemory memory;
>> +    } payload;
>> +     int fds[VHOST_MEMORY_MAX_NREGIONS];
>> +} __attribute__((packed)) VhostUserMsg;
>> +
>> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
>> +
>> +/* The version of the protocol we support */
>> +#define VHOST_USER_VERSION    (0x1)
>> +
>> +/*****************************************************************************/
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c 
>> b/lib/librte_vhost/vhost-user/virtio-net-user.c
>> new file mode 100644
>> index 0000000..f38e6cc
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
>> @@ -0,0 +1,208 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <sys/mman.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "virtio-net-user.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +
>> +extern const struct vhost_net_device_ops *ops;
>> +
>> +#if 0
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +    unsigned int idx;
>> +    struct VhostUserMemory memory = pmsg->payload.memory;
>> +    struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> +    uint64_t mapped_address, base_address = 0, mem_size = 0;
>> +
>> +    for (idx = 0; idx < memory.nregions; idx++) {
>> +            if (memory.regions[idx].guest_phys_addr == 0)
>> +                    base_address = memory.regions[idx].userspace_addr;
>> +    }
>> +    if (base_address == 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "couldn't find the mem region whose gpa is 0.\n");
>> +            return -1;
>> +    }
>> +
>> +    for (idx = 0; idx < memory.nregions;  idx++) {
>> +            uint64_t size = memory.regions[idx].userspace_addr - 
>> +                    base_address + memory.regions[idx].memory_size;
>> +            if (mem_size < size)
>> +                    mem_size = size;
>> +    }
>> +
>> +    /*
>> +     * here we assume qemu will map only one file for memory allocation,
>> +     * we only use fds[0] with offset 0.
>> +     */
>> +    mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size, 
>> +            PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
>> +
>> +    if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> +            RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
>> +            return -1;
>> +    }
>> +                    
>> +    for (idx = 0; idx < memory.nregions; idx++) {
>> +            regions[idx].guest_phys_address = 
>> +                    memory.regions[idx].guest_phys_addr;
>> +            regions[idx].guest_phys_address_end = 
>> +                    memory.regions[idx].guest_phys_addr +
>> +                    memory.regions[idx].memory_size;
>> +            regions[idx].memory_size = memory.regions[idx].memory_size;
>> +            regions[idx].userspace_address = 
>> +                    memory.regions[idx].userspace_addr;
>> +
>> +            regions[idx].address_offset = mapped_address - base_address + 
>> +                    regions[idx].userspace_address -
>> +                    regions[idx].guest_phys_address;
>> +            LOG_DEBUG(VHOST_CONFIG, 
>> +                    "REGION: %u - GPA: %p - QEMU VA: %p - SIZE 
>> (%"PRIu64")\n",
>> +                    idx,
>> +                    (void *)(uintptr_t)regions[idx].guest_phys_address,
>> +                    (void *)(uintptr_t)regions[idx].userspace_address,
>> +                     regions[idx].memory_size);
>> +    }
>> +    ops->set_mem_table(ctx, regions, memory.nregions);
>> +    return 0;
>> +}
>> +
>> +#else
>> +
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +    unsigned int idx;
>> +    struct VhostUserMemory memory = pmsg->payload.memory;
>> +    struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> +    uint64_t mapped_address, base_address = 0;
>> +
>> +    for (idx = 0; idx < memory.nregions; idx++) {
>> +            if (memory.regions[idx].guest_phys_addr == 0)
>> +                    base_address = memory.regions[idx].userspace_addr;
>> +    }
>> +    if (base_address == 0) {
>> +            RTE_LOG(ERR, VHOST_CONFIG,
>> +                    "couldn't find the mem region whose gpa is 0.\n");
>> +            return -1;
>> +    }
>> +
>> +
>> +    for (idx = 0; idx < memory.nregions; idx++) {
>> +            regions[idx].guest_phys_address = 
>> +                    memory.regions[idx].guest_phys_addr;
>> +            regions[idx].guest_phys_address_end = 
>> +                    memory.regions[idx].guest_phys_addr +
>> +                    memory.regions[idx].memory_size;
>> +            regions[idx].memory_size = memory.regions[idx].memory_size;
>> +            regions[idx].userspace_address = 
>> +                    memory.regions[idx].userspace_addr;
>> +/*
>> +            mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
>> +                    regions[idx].memory_size, 
>> +                    PROT_READ | PROT_WRITE, MAP_SHARED, 
>> +                    pmsg->fds[idx], 
>> +                    memory.regions[idx].mmap_offset);
>> +*/
>> +
>> +/* This is ugly */
>> +            mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
>> +                    regions[idx].memory_size +
>> +                            memory.regions[idx].mmap_offset, 
>> +                    PROT_READ | PROT_WRITE, MAP_SHARED, 
>> +                    pmsg->fds[idx], 
>> +                    0);
>> +            printf("mapped to %p\n", (void *)mapped_address);
>> +
>> +            if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> +                    RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest 
>> failed.\n");
>> +                    return -1;
>> +            }
>> +
>> +//          printf("ret=%d\n", munmap((void *)mapped_address, 
>> (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & 
>> ~0x3FFFFFFF));
>> +//          printf("unaligned ret=%d\n", munmap((void *)mapped_address, 
>> (regions[idx].memory_size + memory.regions[idx].mmap_offset )  ));
>> +            mapped_address +=  memory.regions[idx].mmap_offset;
>> +
>> +            regions[idx].address_offset = mapped_address -
>> +                    regions[idx].guest_phys_address;
>> +            LOG_DEBUG(VHOST_CONFIG, 
>> +                    "REGION: %u - GPA: %p - QEMU VA: %p - SIZE 
>> (%"PRIu64")\n",
>> +                    idx,
>> +                    (void *)(uintptr_t)regions[idx].guest_phys_address,
>> +                    (void *)(uintptr_t)regions[idx].userspace_address,
>> +                     regions[idx].memory_size);
>> +    }
>> +    ops->set_mem_table(ctx, regions, memory.nregions);
>> +    return 0;
>> +}
>> +
>> +
>> +
>> +
>> +#endif
>> +
>> +
>> +void
>> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +    struct vhost_vring_file file;
>> +
>> +    file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> +    file.fd = pmsg->fds[0];
>> +    RTE_LOG(INFO, VHOST_CONFIG, 
>> +            "vring call idx:%d file:%d\n", file.index, file.fd);
>> +    ops->set_vring_call(ctx, &file);
>> +}
>> +
>> +
>> +void
>> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +    struct vhost_vring_file file;
>> +
>> +    file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> +    file.fd = pmsg->fds[0];
>> +    RTE_LOG(INFO, VHOST_CONFIG, 
>> +            "vring kick idx:%d file:%d\n", file.index, file.fd);
>> +    ops->set_vring_kick(ctx, &file);
>> +}
>> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h 
>> b/lib/librte_vhost/vhost-user/virtio-net-user.h
>> new file mode 100644
>> index 0000000..0969376
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.h
>> @@ -0,0 +1,11 @@
>> +#ifndef _VIRTIO_NET_USER_H
>> +#define _VIRTIO_NET_USER_H
>> +
>> +#include "vhost-net.h"
>> +#include "vhost-net-user.h"
>> +
>> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
>> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
>> index ccfd82f..8ff0301 100644
>> --- a/lib/librte_vhost/vhost_rxtx.c
>> +++ b/lib/librte_vhost/vhost_rxtx.c
>> @@ -38,19 +38,14 @@
>>  #include <rte_memcpy.h>
>>  #include <rte_virtio_net.h>
>>  
>> -#include "vhost-net-cdev.h"
>> +#include "vhost-net.h"
>>  
>> -#define MAX_PKT_BURST 32
>> +#define VHOST_MAX_PKT_BURST 64
>> +#define VHOST_MAX_MRG_PKT_BURST 64
>>  
>> -/**
>> - * This function adds buffers to the virtio devices RX virtqueue. Buffers 
>> can
>> - * be received from the physical port or from another virtio device. A 
>> packet
>> - * count is returned to indicate the number of packets that are succesfully
>> - * added to the RX queue. This function works when mergeable is disabled.
>> - */
>> -static inline uint32_t __attribute__((always_inline))
>> -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>> -    struct rte_mbuf **pkts, uint32_t count)
>> +
>> +uint32_t
>> +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct 
>> rte_mbuf **pkts, uint32_t count)
>>  {
>>      struct vhost_virtqueue *vq;
>>      struct vring_desc *desc;
>> @@ -59,26 +54,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>>      struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>>      uint64_t buff_addr = 0;
>>      uint64_t buff_hdr_addr = 0;
>> -    uint32_t head[MAX_PKT_BURST], packet_len = 0;
>> +    uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;
>>      uint32_t head_idx, packet_success = 0;
>> +    uint32_t mergeable, mrg_count = 0;
>>      uint16_t avail_idx, res_cur_idx;
>>      uint16_t res_base_idx, res_end_idx;
>>      uint16_t free_entries;
>>      uint8_t success = 0;
>>  
>> -    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
>> +    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", dev->device_fh, __func__);
>>      if (unlikely(queue_id != VIRTIO_RXQ)) {
>>              LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
>>              return 0;
>>      }
>>  
>>      vq = dev->virtqueue[VIRTIO_RXQ];
>> -    count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
>> -
>> -    /*
>> -     * As many data cores may want access to available buffers,
>> -     * they need to be reserved.
>> -     */
>> +    count = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;
>> +    /* As many data cores may want access to available buffers, they need 
>> to be reserved. */
>>      do {
>>              res_base_idx = vq->last_used_idx_res;
>>              avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>> @@ -93,21 +85,25 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>>  
>>              res_end_idx = res_base_idx + count;
>>              /* vq->last_used_idx_res is atomically updated. */
>> -            /* TODO: Allow to disable cmpset if no concurrency in 
>> application. */
>> +            /* TODO: Allow to disable cmpset if no concurrency in 
>> application */
>>              success = rte_atomic16_cmpset(&vq->last_used_idx_res,
>>                              res_base_idx, res_end_idx);
>> +            /* If there is contention here and failed, try again. */
>>      } while (unlikely(success == 0));
>>      res_cur_idx = res_base_idx;
>>      LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
>> -                    dev->device_fh, res_cur_idx, res_end_idx);
>> +                    dev->device_fh,
>> +                    res_cur_idx, res_end_idx);
>>  
>>      /* Prefetch available ring to retrieve indexes. */
>>      rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
>>  
>> +    /* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
>> +    mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
>> +
>>      /* Retrieve all of the head indexes first to avoid caching issues. */
>>      for (head_idx = 0; head_idx < count; head_idx++)
>> -            head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
>> -                                    (vq->size - 1)];
>> +            head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & 
>> (vq->size - 1)];
>>  
>>      /*Prefetch descriptor index. */
>>      rte_prefetch0(&vq->desc[head[packet_success]]);
>> @@ -123,46 +119,57 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t 
>> queue_id,
>>              /* Prefetch buffer address. */
>>              rte_prefetch0((void *)(uintptr_t)buff_addr);
>>  
>> -            /* Copy virtio_hdr to packet and increment buffer address */
>> -            buff_hdr_addr = buff_addr;
>> -            packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>> -
>> -            /*
>> -             * If the descriptors are chained the header and data are
>> -             * placed in separate buffers.
>> -             */
>> -            if (desc->flags & VRING_DESC_F_NEXT) {
>> -                    desc->len = vq->vhost_hlen;
>> -                    desc = &vq->desc[desc->next];
>> -                    /* Buffer address translation. */
>> -                    buff_addr = gpa_to_vva(dev, desc->addr);
>> -                    desc->len = rte_pktmbuf_data_len(buff);
>> +            if (mergeable && (mrg_count != 0)) {
>> +                    desc->len = packet_len = rte_pktmbuf_data_len(buff);
>>              } else {
>> -                    buff_addr += vq->vhost_hlen;
>> -                    desc->len = packet_len;
>> +                    /* Copy virtio_hdr to packet and increment buffer 
>> address */
>> +                    buff_hdr_addr = buff_addr;
>> +                    packet_len = rte_pktmbuf_data_len(buff) + 
>> vq->vhost_hlen;
>> +
>> +                    /*
>> +                     * If the descriptors are chained the header and data 
>> are placed in
>> +                     * separate buffers.
>> +                     */
>> +                    if (desc->flags & VRING_DESC_F_NEXT) {
>> +                            desc->len = vq->vhost_hlen;
>> +                            desc = &vq->desc[desc->next];
>> +                            /* Buffer address translation. */
>> +                            buff_addr = gpa_to_vva(dev, desc->addr);
>> +                            desc->len = rte_pktmbuf_data_len(buff);
>> +                    } else {
>> +                            buff_addr += vq->vhost_hlen;
>> +                            desc->len = packet_len;
>> +                    }
>>              }
>>  
>> +            VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, 
>> rte_pktmbuf_data_len(buff), 0);
>> +
>>              /* Update used ring with desc information */
>> -            vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>> -                                                    head[packet_success];
>> +            vq->used->ring[res_cur_idx & (vq->size - 1)].id = 
>> head[packet_success];
>>              vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>>  
>>              /* Copy mbuf data to buffer */
>> -            /* FIXME for sg mbuf and the case that desc couldn't hold the 
>> mbuf data */
>> -            rte_memcpy((void *)(uintptr_t)buff_addr,
>> -                    rte_pktmbuf_mtod(buff, const void *),
>> -                    rte_pktmbuf_data_len(buff));
>> -            PRINT_PACKET(dev, (uintptr_t)buff_addr,
>> -                    rte_pktmbuf_data_len(buff), 0);
>> +            /* TODO fixme for sg mbuf and the case that desc couldn't hold 
>> the mbuf data */
>> +            rte_memcpy((void *)(uintptr_t)buff_addr, (const void 
>> *)buff->pkt.data, rte_pktmbuf_data_len(buff));
>>  
>>              res_cur_idx++;
>>              packet_success++;
>>  
>> -            rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
>> -                    (const void *)&virtio_hdr, vq->vhost_hlen);
>> -
>> -            PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> -
>> +            /* If mergeable is disabled then a header is required per 
>> buffer. */
>> +            if (!mergeable) {
>> +                    rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const 
>> void *)&virtio_hdr, vq->vhost_hlen);
>> +                    VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, 
>> vq->vhost_hlen, 1);
>> +            } else {
>> +                    mrg_count++;
>> +                    /* Merge buffer can only handle so many buffers at a 
>> time. Tell the guest if this limit is reached. */
>> +                    if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || 
>> (res_cur_idx == res_end_idx)) {
>> +                            virtio_hdr.num_buffers = mrg_count;
>> +                            LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num 
>> merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
>> +                            rte_memcpy((void *)(uintptr_t)buff_hdr_addr, 
>> (const void *)&virtio_hdr, vq->vhost_hlen);
>> +                            VHOST_PRINT_PACKET(dev, 
>> (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> +                            mrg_count = 0;
>> +                    }
>> +            }
>>              if (res_cur_idx < res_end_idx) {
>>                      /* Prefetch descriptor index. */
>>                      rte_prefetch0(&vq->desc[head[packet_success]]);
>> @@ -184,357 +191,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t 
>> queue_id,
>>      return count;
>>  }
>>  
>> -static inline uint32_t __attribute__((always_inline))
>> -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
>> -    uint16_t res_end_idx, struct rte_mbuf *pkt)
>> -{
>> -    uint32_t vec_idx = 0;
>> -    uint32_t entry_success = 0;
>> -    struct vhost_virtqueue *vq;
>> -    /* The virtio_hdr is initialised to 0. */
>> -    struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
>> -            {0, 0, 0, 0, 0, 0}, 0};
>> -    uint16_t cur_idx = res_base_idx;
>> -    uint64_t vb_addr = 0;
>> -    uint64_t vb_hdr_addr = 0;
>> -    uint32_t seg_offset = 0;
>> -    uint32_t vb_offset = 0;
>> -    uint32_t seg_avail;
>> -    uint32_t vb_avail;
>> -    uint32_t cpy_len, entry_len;
>> -
>> -    if (pkt == NULL)
>> -            return 0;
>> -
>> -    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
>> -            "End Index %d\n",
>> -            dev->device_fh, cur_idx, res_end_idx);
>> -
>> -    /*
>> -     * Convert from gpa to vva
>> -     * (guest physical addr -> vhost virtual addr)
>> -     */
>> -    vq = dev->virtqueue[VIRTIO_RXQ];
>> -    vb_addr =
>> -            gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
>> -    vb_hdr_addr = vb_addr;
>> -
>> -    /* Prefetch buffer address. */
>> -    rte_prefetch0((void *)(uintptr_t)vb_addr);
>> -
>> -    virtio_hdr.num_buffers = res_end_idx - res_base_idx;
>> -
>> -    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
>> -            dev->device_fh, virtio_hdr.num_buffers);
>>  
>> -    rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
>> -            (const void *)&virtio_hdr, vq->vhost_hlen);
>> -
>> -    PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
>> -
>> -    seg_avail = rte_pktmbuf_data_len(pkt);
>> -    vb_offset = vq->vhost_hlen;
>> -    vb_avail =
>> -            vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
>> -
>> -    entry_len = vq->vhost_hlen;
>> -
>> -    if (vb_avail == 0) {
>> -            uint32_t desc_idx =
>> -                    vq->buf_vec[vec_idx].desc_idx;
>> -            vq->desc[desc_idx].len = vq->vhost_hlen;
>> -
>> -            if ((vq->desc[desc_idx].flags
>> -                    & VRING_DESC_F_NEXT) == 0) {
>> -                    /* Update used ring with desc information */
>> -                    vq->used->ring[cur_idx & (vq->size - 1)].id
>> -                            = vq->buf_vec[vec_idx].desc_idx;
>> -                    vq->used->ring[cur_idx & (vq->size - 1)].len
>> -                            = entry_len;
>> -
>> -                    entry_len = 0;
>> -                    cur_idx++;
>> -                    entry_success++;
>> -            }
>> -
>> -            vec_idx++;
>> -            vb_addr =
>> -                    gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
>> -
>> -            /* Prefetch buffer address. */
>> -            rte_prefetch0((void *)(uintptr_t)vb_addr);
>> -            vb_offset = 0;
>> -            vb_avail = vq->buf_vec[vec_idx].buf_len;
>> -    }
>> -
>> -    cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -
>> -    while (cpy_len > 0) {
>> -            /* Copy mbuf data to vring buffer */
>> -            rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
>> -                    (const void *)(rte_pktmbuf_mtod(pkt, char*) + 
>> seg_offset),
>> -                    cpy_len);
>> -
>> -            PRINT_PACKET(dev,
>> -                    (uintptr_t)(vb_addr + vb_offset),
>> -                    cpy_len, 0);
>> -
>> -            seg_offset += cpy_len;
>> -            vb_offset += cpy_len;
>> -            seg_avail -= cpy_len;
>> -            vb_avail -= cpy_len;
>> -            entry_len += cpy_len;
>> -
>> -            if (seg_avail != 0) {
>> -                    /*
>> -                     * The virtio buffer in this vring
>> -                     * entry reach to its end.
>> -                     * But the segment doesn't complete.
>> -                     */
>> -                    if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
>> -                            VRING_DESC_F_NEXT) == 0) {
>> -                            /* Update used ring with desc information */
>> -                            vq->used->ring[cur_idx & (vq->size - 1)].id
>> -                                    = vq->buf_vec[vec_idx].desc_idx;
>> -                            vq->used->ring[cur_idx & (vq->size - 1)].len
>> -                                    = entry_len;
>> -                            entry_len = 0;
>> -                            cur_idx++;
>> -                            entry_success++;
>> -                    }
>> -
>> -                    vec_idx++;
>> -                    vb_addr = gpa_to_vva(dev,
>> -                            vq->buf_vec[vec_idx].buf_addr);
>> -                    vb_offset = 0;
>> -                    vb_avail = vq->buf_vec[vec_idx].buf_len;
>> -                    cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -            } else {
>> -                    /*
>> -                     * This current segment complete, need continue to
>> -                     * check if the whole packet complete or not.
>> -                     */
>> -                    pkt = pkt->next;
>> -                    if (pkt != NULL) {
>> -                            /*
>> -                             * There are more segments.
>> -                             */
>> -                            if (vb_avail == 0) {
>> -                                    /*
>> -                                     * This current buffer from vring is
>> -                                     * used up, need fetch next buffer
>> -                                     * from buf_vec.
>> -                                     */
>> -                                    uint32_t desc_idx =
>> -                                            vq->buf_vec[vec_idx].desc_idx;
>> -                                    vq->desc[desc_idx].len = vb_offset;
>> -
>> -                                    if ((vq->desc[desc_idx].flags &
>> -                                            VRING_DESC_F_NEXT) == 0) {
>> -                                            uint16_t wrapped_idx =
>> -                                                    cur_idx & (vq->size - 
>> 1);
>> -                                            /*
>> -                                             * Update used ring with the
>> -                                             * descriptor information
>> -                                             */
>> -                                            vq->used->ring[wrapped_idx].id
>> -                                                    = desc_idx;
>> -                                            vq->used->ring[wrapped_idx].len
>> -                                                    = entry_len;
>> -                                            entry_success++;
>> -                                            entry_len = 0;
>> -                                            cur_idx++;
>> -                                    }
>> -
>> -                                    /* Get next buffer from buf_vec. */
>> -                                    vec_idx++;
>> -                                    vb_addr = gpa_to_vva(dev,
>> -                                            vq->buf_vec[vec_idx].buf_addr);
>> -                                    vb_avail =
>> -                                            vq->buf_vec[vec_idx].buf_len;
>> -                                    vb_offset = 0;
>> -                            }
>> -
>> -                            seg_offset = 0;
>> -                            seg_avail = rte_pktmbuf_data_len(pkt);
>> -                            cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -                    } else {
>> -                            /*
>> -                             * This whole packet completes.
>> -                             */
>> -                            uint32_t desc_idx =
>> -                                    vq->buf_vec[vec_idx].desc_idx;
>> -                            vq->desc[desc_idx].len = vb_offset;
>> -
>> -                            while (vq->desc[desc_idx].flags &
>> -                                    VRING_DESC_F_NEXT) {
>> -                                    desc_idx = vq->desc[desc_idx].next;
>> -                                     vq->desc[desc_idx].len = 0;
>> -                            }
>> -
>> -                            /* Update used ring with desc information */
>> -                            vq->used->ring[cur_idx & (vq->size - 1)].id
>> -                                    = vq->buf_vec[vec_idx].desc_idx;
>> -                            vq->used->ring[cur_idx & (vq->size - 1)].len
>> -                                    = entry_len;
>> -                            entry_len = 0;
>> -                            cur_idx++;
>> -                            entry_success++;
>> -                            seg_avail = 0;
>> -                            cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -                    }
>> -            }
>> -    }
>> -
>> -    return entry_success;
>> -}
>> -
>> -/*
>> - * This function works for mergeable RX.
>> - */
>> -static inline uint32_t __attribute__((always_inline))
>> -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
>> -    struct rte_mbuf **pkts, uint32_t count)
>> +uint32_t
>> +rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct 
>> rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
>>  {
>> -    struct vhost_virtqueue *vq;
>> -    uint32_t pkt_idx = 0, entry_success = 0;
>> -    uint16_t avail_idx, res_cur_idx;
>> -    uint16_t res_base_idx, res_end_idx;
>> -    uint8_t success = 0;
>> -
>> -    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
>> -            dev->device_fh);
>> -    if (unlikely(queue_id != VIRTIO_RXQ)) {
>> -            LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
>> -    }
>> -
>> -    vq = dev->virtqueue[VIRTIO_RXQ];
>> -    count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>> -
>> -    if (count == 0)
>> -            return 0;
>> -
>> -    for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
>> -            uint32_t secure_len = 0;
>> -            uint16_t need_cnt;
>> -            uint32_t vec_idx = 0;
>> -            uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
>> -            uint16_t i, id;
>> -
>> -            do {
>> -                    /*
>> -                     * As many data cores may want access to available
>> -                     * buffers, they need to be reserved.
>> -                     */
>> -                    res_base_idx = vq->last_used_idx_res;
>> -                    res_cur_idx = res_base_idx;
>> -
>> -                    do {
>> -                            avail_idx = *((volatile uint16_t 
>> *)&vq->avail->idx);
>> -                            if (unlikely(res_cur_idx == avail_idx)) {
>> -                                    LOG_DEBUG(VHOST_DATA,
>> -                                            "(%"PRIu64") Failed "
>> -                                            "to get enough desc from "
>> -                                            "vring\n",
>> -                                            dev->device_fh);
>> -                                    return pkt_idx;
>> -                            } else {
>> -                                    uint16_t wrapped_idx =
>> -                                            (res_cur_idx) & (vq->size - 1);
>> -                                    uint32_t idx =
>> -                                            vq->avail->ring[wrapped_idx];
>> -                                    uint8_t next_desc;
>> -
>> -                                    do {
>> -                                            next_desc = 0;
>> -                                            secure_len += vq->desc[idx].len;
>> -                                            if (vq->desc[idx].flags &
>> -                                                    VRING_DESC_F_NEXT) {
>> -                                                    idx = 
>> vq->desc[idx].next;
>> -                                                    next_desc = 1;
>> -                                            }
>> -                                    } while (next_desc);
>> -
>> -                                    res_cur_idx++;
>> -                            }
>> -                    } while (pkt_len > secure_len);
>> -
>> -                    /* vq->last_used_idx_res is atomically updated. */
>> -                    success = rte_atomic16_cmpset(&vq->last_used_idx_res,
>> -                                                    res_base_idx,
>> -                                                    res_cur_idx);
>> -            } while (success == 0);
>> -
>> -            id = res_base_idx;
>> -            need_cnt = res_cur_idx - res_base_idx;
>> -
>> -            for (i = 0; i < need_cnt; i++, id++) {
>> -                    uint16_t wrapped_idx = id & (vq->size - 1);
>> -                    uint32_t idx = vq->avail->ring[wrapped_idx];
>> -                    uint8_t next_desc;
>> -                    do {
>> -                            next_desc = 0;
>> -                            vq->buf_vec[vec_idx].buf_addr =
>> -                                    vq->desc[idx].addr;
>> -                            vq->buf_vec[vec_idx].buf_len =
>> -                                    vq->desc[idx].len;
>> -                            vq->buf_vec[vec_idx].desc_idx = idx;
>> -                            vec_idx++;
>> -
>> -                            if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
>> -                                    idx = vq->desc[idx].next;
>> -                                    next_desc = 1;
>> -                            }
>> -                    } while (next_desc);
>> -            }
>> -
>> -            res_end_idx = res_cur_idx;
>> -
>> -            entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
>> -                    res_end_idx, pkts[pkt_idx]);
>> -
>> -            rte_compiler_barrier();
>> -
>> -            /*
>> -             * Wait until it's our turn to add our buffer
>> -             * to the used ring.
>> -             */
>> -            while (unlikely(vq->last_used_idx != res_base_idx))
>> -                    rte_pause();
>> -
>> -            *(volatile uint16_t *)&vq->used->idx += entry_success;
>> -            vq->last_used_idx = res_end_idx;
>> -
>> -            /* Kick the guest if necessary. */
>> -            if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
>> -                    eventfd_write((int)vq->kickfd, 1);
>> -    }
>> -
>> -    return count;
>> -}
>> -
>> -uint16_t
>> -rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -    struct rte_mbuf **pkts, uint16_t count)
>> -{
>> -    if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
>> -            return virtio_dev_merge_rx(dev, queue_id, pkts, count);
>> -    else
>> -            return virtio_dev_rx(dev, queue_id, pkts, count);
>> -}
>> -
>> -uint16_t
>> -rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -    struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
>> -{
>> -    struct rte_mbuf *m, *prev;
>> +    struct rte_mbuf *mbuf;
>>      struct vhost_virtqueue *vq;
>>      struct vring_desc *desc;
>> -    uint64_t vb_addr = 0;
>> -    uint32_t head[MAX_PKT_BURST];
>> +    uint64_t buff_addr = 0;
>> +    uint32_t head[VHOST_MAX_PKT_BURST];
>>      uint32_t used_idx;
>>      uint32_t i;
>> -    uint16_t free_entries, entry_success = 0;
>> +    uint16_t free_entries, packet_success = 0;
>>      uint16_t avail_idx;
>>  
>>      if (unlikely(queue_id != VIRTIO_TXQ)) {
>> @@ -549,8 +217,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
>> queue_id,
>>      if (vq->last_used_idx == avail_idx)
>>              return 0;
>>  
>> -    LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
>> -            dev->device_fh);
>> +    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n", 
>> +            dev->device_fh, __func__, vq->last_used_idx, avail_idx);
>>  
>>      /* Prefetch available ring to retrieve head indexes. */
>>      rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
>> @@ -558,173 +226,68 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, 
>> uint16_t queue_id,
>>      /*get the number of free entries in the ring*/
>>      free_entries = (avail_idx - vq->last_used_idx);
>>  
>> -    free_entries = RTE_MIN(free_entries, count);
>> +    if (free_entries > count)
>> +            free_entries = count;
>>      /* Limit to MAX_PKT_BURST. */
>> -    free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
>> +    if (free_entries > VHOST_MAX_PKT_BURST)
>> +            free_entries = VHOST_MAX_PKT_BURST;
>>  
>> -    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
>> -                    dev->device_fh, free_entries);
>> +    LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", 
>> dev->device_fh, free_entries);
>>      /* Retrieve all of the head indexes first to avoid caching issues. */
>>      for (i = 0; i < free_entries; i++)
>>              head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
>> 1)];
>>  
>>      /* Prefetch descriptor index. */
>> -    rte_prefetch0(&vq->desc[head[entry_success]]);
>> +    rte_prefetch0(&vq->desc[head[packet_success]]);
>>      rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
>>  
>> -    while (entry_success < free_entries) {
>> -            uint32_t vb_avail, vb_offset;
>> -            uint32_t seg_avail, seg_offset;
>> -            uint32_t cpy_len;
>> -            uint32_t seg_num = 0;
>> -            struct rte_mbuf *cur;
>> -            uint8_t alloc_err = 0;
>> -
>> -            desc = &vq->desc[head[entry_success]];
>> +    while (packet_success < free_entries) {
>> +            desc = &vq->desc[head[packet_success]];
>>  
>>              /* Discard first buffer as it is the virtio header */
>>              desc = &vq->desc[desc->next];
>>  
>>              /* Buffer address translation. */
>> -            vb_addr = gpa_to_vva(dev, desc->addr);
>> +            buff_addr = gpa_to_vva(dev, desc->addr);
>>              /* Prefetch buffer address. */
>> -            rte_prefetch0((void *)(uintptr_t)vb_addr);
>> +            rte_prefetch0((void *)(uintptr_t)buff_addr);
>>  
>>              used_idx = vq->last_used_idx & (vq->size - 1);
>>  
>> -            if (entry_success < (free_entries - 1)) {
>> +            if (packet_success < (free_entries - 1)) {
>>                      /* Prefetch descriptor index. */
>> -                    rte_prefetch0(&vq->desc[head[entry_success+1]]);
>> +                    rte_prefetch0(&vq->desc[head[packet_success+1]]);
>>                      rte_prefetch0(&vq->used->ring[(used_idx + 1) & 
>> (vq->size - 1)]);
>>              }
>>  
>>              /* Update used index buffer information. */
>> -            vq->used->ring[used_idx].id = head[entry_success];
>> +            vq->used->ring[used_idx].id = head[packet_success];
>>              vq->used->ring[used_idx].len = 0;
>>  
>> -            vb_offset = 0;
>> -            vb_avail = desc->len;
>> -            /* Allocate an mbuf and populate the structure. */
>> -            m = rte_pktmbuf_alloc(mbuf_pool);
>> -            if (unlikely(m == NULL)) {
>> -                    RTE_LOG(ERR, VHOST_DATA,
>> -                            "Failed to allocate memory for mbuf.\n");
>> -                    return entry_success;
>> +            mbuf = rte_pktmbuf_alloc(mbuf_pool);
>> +            if (unlikely(mbuf == NULL)) {
>> +                    RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for 
>> mbuf.\n");
>> +                    return packet_success;
>>              }
>> -            seg_offset = 0;
>> -            seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
>> -            cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -
>> -            PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
>> -
>> -            seg_num++;
>> -            cur = m;
>> -            prev = m;
>> -            while (cpy_len != 0) {
>> -                    rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + 
>> seg_offset),
>> -                            (void *)((uintptr_t)(vb_addr + vb_offset)),
>> -                            cpy_len);
>> -
>> -                    seg_offset += cpy_len;
>> -                    vb_offset += cpy_len;
>> -                    vb_avail -= cpy_len;
>> -                    seg_avail -= cpy_len;
>> -
>> -                    if (vb_avail != 0) {
>> -                            /*
>> -                             * The segment reachs to its end,
>> -                             * while the virtio buffer in TX vring has
>> -                             * more data to be copied.
>> -                             */
>> -                            cur->data_len = seg_offset;
>> -                            m->pkt_len += seg_offset;
>> -                            /* Allocate mbuf and populate the structure. */
>> -                            cur = rte_pktmbuf_alloc(mbuf_pool);
>> -                            if (unlikely(cur == NULL)) {
>> -                                    RTE_LOG(ERR, VHOST_DATA, "Failed to "
>> -                                            "allocate memory for mbuf.\n");
>> -                                    rte_pktmbuf_free(m);
>> -                                    alloc_err = 1;
>> -                                    break;
>> -                            }
>> -
>> -                            seg_num++;
>> -                            prev->next = cur;
>> -                            prev = cur;
>> -                            seg_offset = 0;
>> -                            seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
>> -                    } else {
>> -                            if (desc->flags & VRING_DESC_F_NEXT) {
>> -                                    /*
>> -                                     * There are more virtio buffers in
>> -                                     * same vring entry need to be copied.
>> -                                     */
>> -                                    if (seg_avail == 0) {
>> -                                            /*
>> -                                             * The current segment hasn't
>> -                                             * room to accomodate more
>> -                                             * data.
>> -                                             */
>> -                                            cur->data_len = seg_offset;
>> -                                            m->pkt_len += seg_offset;
>> -                                            /*
>> -                                             * Allocate an mbuf and
>> -                                             * populate the structure.
>> -                                             */
>> -                                            cur = 
>> rte_pktmbuf_alloc(mbuf_pool);
>> -                                            if (unlikely(cur == NULL)) {
>> -                                                    RTE_LOG(ERR,
>> -                                                            VHOST_DATA,
>> -                                                            "Failed to "
>> -                                                            "allocate 
>> memory "
>> -                                                            "for mbuf\n");
>> -                                                    rte_pktmbuf_free(m);
>> -                                                    alloc_err = 1;
>> -                                                    break;
>> -                                            }
>> -                                            seg_num++;
>> -                                            prev->next = cur;
>> -                                            prev = cur;
>> -                                            seg_offset = 0;
>> -                                            seg_avail = cur->buf_len - 
>> RTE_PKTMBUF_HEADROOM;
>> -                                    }
>> -
>> -                                    desc = &vq->desc[desc->next];
>> -
>> -                                    /* Buffer address translation. */
>> -                                    vb_addr = gpa_to_vva(dev, desc->addr);
>> -                                    /* Prefetch buffer address. */
>> -                                    rte_prefetch0((void 
>> *)(uintptr_t)vb_addr);
>> -                                    vb_offset = 0;
>> -                                    vb_avail = desc->len;
>> -
>> -                                    PRINT_PACKET(dev, (uintptr_t)vb_addr,
>> -                                            desc->len, 0);
>> -                            } else {
>> -                                    /* The whole packet completes. */
>> -                                    cur->data_len = seg_offset;
>> -                                    m->pkt_len += seg_offset;
>> -                                    vb_avail = 0;
>> -                            }
>> -                    }
>> +            mbuf->pkt.data_len = desc->len;
>> +            mbuf->pkt.pkt_len  = mbuf->pkt.data_len;
>>  
>> -                    cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -            }
>> +            rte_memcpy((void *) mbuf->pkt.data,
>> +                    (const void *) buff_addr, mbuf->pkt.data_len);
>>  
>> -            if (unlikely(alloc_err == 1))
>> -                    break;
>> +            pkts[packet_success] = mbuf;
>>  
>> -            m->nb_segs = seg_num;
>> +            VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
>>  
>> -            pkts[entry_success] = m;
>>              vq->last_used_idx++;
>> -            entry_success++;
>> +            packet_success++;
>>      }
>>  
>>      rte_compiler_barrier();
>> -    vq->used->idx += entry_success;
>> +    vq->used->idx += packet_success;
>>      /* Kick guest if required. */
>>      if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
>>              eventfd_write((int)vq->kickfd, 1);
>> -    return entry_success;
>> +
>> +    return packet_success;
>>  }
>> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
>> index 852b6d1..516e743 100644
>> --- a/lib/librte_vhost/virtio-net.c
>> +++ b/lib/librte_vhost/virtio-net.c
>> @@ -31,17 +31,14 @@
>>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>>   */
>>  
>> -#include <dirent.h>
>> -#include <fuse/cuse_lowlevel.h>
>>  #include <linux/vhost.h>
>>  #include <linux/virtio_net.h>
>>  #include <stddef.h>
>>  #include <stdint.h>
>>  #include <stdlib.h>
>> -#include <sys/eventfd.h>
>> -#include <sys/ioctl.h>
>>  #include <sys/mman.h>
>>  #include <unistd.h>
>> +#include <assert.h>
>>  
>>  #include <rte_ethdev.h>
>>  #include <rte_log.h>
>> @@ -49,10 +46,8 @@
>>  #include <rte_memory.h>
>>  #include <rte_virtio_net.h>
>>  
>> -#include "vhost-net-cdev.h"
>> -#include "eventfd_link/eventfd_link.h"
>> -
>> -/*
>> +#include "vhost-net.h"
>> +/**
>>   * Device linked list structure for configuration.
>>   */
>>  struct virtio_net_config_ll {
>> @@ -60,38 +55,15 @@ struct virtio_net_config_ll {
>>      struct virtio_net_config_ll *next;      /* Next dev on linked list.*/
>>  };
>>  
>> -const char eventfd_cdev[] = "/dev/eventfd-link";
>> -
>> -/* device ops to add/remove device to/from data core. */
>> +/* device ops to add/remove device to data core. */
>>  static struct virtio_net_device_ops const *notify_ops;
>> -/* root address of the linked list of managed virtio devices */
>> +/* root address of the linked list in the configuration core. */
>>  static struct virtio_net_config_ll *ll_root;
>>  
>>  /* Features supported by this lib. */
>> -#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
>> -                              (1ULL << VIRTIO_NET_F_CTRL_RX))
>> +#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)
>>  static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
>>  
>> -/* Line size for reading maps file. */
>> -static const uint32_t BUFSIZE = PATH_MAX;
>> -
>> -/* Size of prot char array in procmap. */
>> -#define PROT_SZ 5
>> -
>> -/* Number of elements in procmap struct. */
>> -#define PROCMAP_SZ 8
>> -
>> -/* Structure containing information gathered from maps file. */
>> -struct procmap {
>> -    uint64_t va_start;      /* Start virtual address in file. */
>> -    uint64_t len;           /* Size of file. */
>> -    uint64_t pgoff;         /* Not used. */
>> -    uint32_t maj;           /* Not used. */
>> -    uint32_t min;           /* Not used. */
>> -    uint32_t ino;           /* Not used. */
>> -    char prot[PROT_SZ];     /* Not used. */
>> -    char fname[PATH_MAX];   /* File name. */
>> -};
>>  
>>  /*
>>   * Converts QEMU virtual address to Vhost virtual address. This function is
>> @@ -110,199 +82,15 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>>              if ((qemu_va >= region->userspace_address) &&
>>                      (qemu_va <= region->userspace_address +
>>                      region->memory_size)) {
>> -                    vhost_va = dev->mem->mapped_address + qemu_va -
>> -                                    dev->mem->base_address;
>> +                    vhost_va = qemu_va +  region->guest_phys_address + 
>> +                            region->address_offset -
>> +                            region->userspace_address;
>>                      break;
>>              }
>>      }
>>      return vhost_va;
>>  }
>>  
>> -/*
>> - * Locate the file containing QEMU's memory space and
>> - * map it to our address space.
>> - */
>> -static int
>> -host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
>> -    pid_t pid, uint64_t addr)
>> -{
>> -    struct dirent *dptr = NULL;
>> -    struct procmap procmap;
>> -    DIR *dp = NULL;
>> -    int fd;
>> -    int i;
>> -    char memfile[PATH_MAX];
>> -    char mapfile[PATH_MAX];
>> -    char procdir[PATH_MAX];
>> -    char resolved_path[PATH_MAX];
>> -    char *path = NULL;
>> -    FILE *fmap;
>> -    void *map;
>> -    uint8_t found = 0;
>> -    char line[BUFSIZE];
>> -    char dlm[] = "-   :   ";
>> -    char *str, *sp, *in[PROCMAP_SZ];
>> -    char *end = NULL;
>> -
>> -    /* Path where mem files are located. */
>> -    snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
>> -    /* Maps file used to locate mem file. */
>> -    snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
>> -
>> -    fmap = fopen(mapfile, "r");
>> -    if (fmap == NULL) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") Failed to open maps file for pid %d\n",
>> -                    dev->device_fh, pid);
>> -            return -1;
>> -    }
>> -
>> -    /* Read through maps file until we find out base_address. */
>> -    while (fgets(line, BUFSIZE, fmap) != 0) {
>> -            str = line;
>> -            errno = 0;
>> -            /* Split line into fields. */
>> -            for (i = 0; i < PROCMAP_SZ; i++) {
>> -                    in[i] = strtok_r(str, &dlm[i], &sp);
>> -                    if ((in[i] == NULL) || (errno != 0)) {
>> -                            fclose(fmap);
>> -                            return -1;
>> -                    }
>> -                    str = NULL;
>> -            }
>> -
>> -            /* Convert/Copy each field as needed. */
>> -            procmap.va_start = strtoull(in[0], &end, 16);
>> -            if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
>> -                    (errno != 0)) {
>> -                    fclose(fmap);
>> -                    return -1;
>> -            }
>> -
>> -            procmap.len = strtoull(in[1], &end, 16);
>> -            if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
>> -                    (errno != 0)) {
>> -                    fclose(fmap);
>> -                    return -1;
>> -            }
>> -
>> -            procmap.pgoff = strtoull(in[3], &end, 16);
>> -            if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
>> -                    (errno != 0)) {
>> -                    fclose(fmap);
>> -                    return -1;
>> -            }
>> -
>> -            procmap.maj = strtoul(in[4], &end, 16);
>> -            if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
>> -                    (errno != 0)) {
>> -                    fclose(fmap);
>> -                    return -1;
>> -            }
>> -
>> -            procmap.min = strtoul(in[5], &end, 16);
>> -            if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
>> -                    (errno != 0)) {
>> -                    fclose(fmap);
>> -                    return -1;
>> -            }
>> -
>> -            procmap.ino = strtoul(in[6], &end, 16);
>> -            if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
>> -                    (errno != 0)) {
>> -                    fclose(fmap);
>> -                    return -1;
>> -            }
>> -
>> -            memcpy(&procmap.prot, in[2], PROT_SZ);
>> -            memcpy(&procmap.fname, in[7], PATH_MAX);
>> -
>> -            if (procmap.va_start == addr) {
>> -                    procmap.len = procmap.len - procmap.va_start;
>> -                    found = 1;
>> -                    break;
>> -            }
>> -    }
>> -    fclose(fmap);
>> -
>> -    if (!found) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") Failed to find memory file in pid %d maps 
>> file\n",
>> -                    dev->device_fh, pid);
>> -            return -1;
>> -    }
>> -
>> -    /* Find the guest memory file among the process fds. */
>> -    dp = opendir(procdir);
>> -    if (dp == NULL) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") Cannot open pid %d process directory\n",
>> -                    dev->device_fh, pid);
>> -            return -1;
>> -    }
>> -
>> -    found = 0;
>> -
>> -    /* Read the fd directory contents. */
>> -    while (NULL != (dptr = readdir(dp))) {
>> -            snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
>> -                            pid, dptr->d_name);
>> -            path = realpath(memfile, resolved_path);
>> -            if ((path == NULL) && (strlen(resolved_path) == 0)) {
>> -                    RTE_LOG(ERR, VHOST_CONFIG,
>> -                            "(%"PRIu64") Failed to resolve fd directory\n",
>> -                            dev->device_fh);
>> -                    closedir(dp);
>> -                    return -1;
>> -            }
>> -            if (strncmp(resolved_path, procmap.fname,
>> -                    strnlen(procmap.fname, PATH_MAX)) == 0) {
>> -                    found = 1;
>> -                    break;
>> -            }
>> -    }
>> -
>> -    closedir(dp);
>> -
>> -    if (found == 0) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") Failed to find memory file for pid %d\n",
>> -                    dev->device_fh, pid);
>> -            return -1;
>> -    }
>> -    /* Open the shared memory file and map the memory into this process. */
>> -    fd = open(memfile, O_RDWR);
>> -
>> -    if (fd == -1) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") Failed to open %s for pid %d\n",
>> -                    dev->device_fh, memfile, pid);
>> -            return -1;
>> -    }
>> -
>> -    map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
>> -            MAP_POPULATE|MAP_SHARED, fd, 0);
>> -    close(fd);
>> -
>> -    if (map == MAP_FAILED) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") Error mapping the file %s for pid %d\n",
>> -                    dev->device_fh, memfile, pid);
>> -            return -1;
>> -    }
>> -
>> -    /* Store the memory address and size in the device data structure */
>> -    mem->mapped_address = (uint64_t)(uintptr_t)map;
>> -    mem->mapped_size = procmap.len;
>> -
>> -    LOG_DEBUG(VHOST_CONFIG,
>> -            "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
>> -            dev->device_fh,
>> -            memfile, resolved_path,
>> -            (unsigned long long)mem->mapped_size, map);
>> -
>> -    return 0;
>> -}
>>  
>>  /*
>>   * Retrieves an entry from the devices configuration linked list.
>> @@ -376,7 +164,7 @@ add_config_ll_entry(struct virtio_net_config_ll 
>> *new_ll_dev)
>>      }
>>  
>>  }
>> -
>> +/*TODO dpdk alloc/free if possible */
>>  /*
>>   * Unmap any memory, close any file descriptors and
>>   * free any memory owned by a device.
>> @@ -389,16 +177,17 @@ cleanup_device(struct virtio_net *dev)
>>              munmap((void *)(uintptr_t)dev->mem->mapped_address,
>>                      (size_t)dev->mem->mapped_size);
>>              free(dev->mem);
>> +            dev->mem = NULL;
>>      }
>>  
>>      /* Close any event notifiers opened by device. */
>> -    if (dev->virtqueue[VIRTIO_RXQ]->callfd)
>> +    if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
>>              close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
>> -    if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
>> +    if (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)
>>              close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
>> -    if (dev->virtqueue[VIRTIO_TXQ]->callfd)
>> +    if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
>>              close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
>> -    if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
>> +    if (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)
>>              close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
>>  }
>>  
>> @@ -522,8 +311,8 @@ new_device(struct vhost_device_ctx ctx)
>>  }
>>  
>>  /*
>> - * Function is called from the CUSE release function. This function will
>> - * cleanup the device and remove it from device configuration linked list.
>> + * Function is called from the CUSE release function. This function will 
>> cleanup
>> + * the device and remove it from device configuration linked list.
>>   */
>>  static void
>>  destroy_device(struct vhost_device_ctx ctx)
>> @@ -569,6 +358,7 @@ set_owner(struct vhost_device_ctx ctx)
>>              return -1;
>>  
>>      return 0;
>> +    /* TODO check ctx.fh is meaningfull here */
>>  }
>>  
>>  /*
>> @@ -651,14 +441,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>>   * This includes storing offsets used to translate buffer addresses.
>>   */
>>  static int
>> -set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>> -    uint32_t nregions)
>> +set_mem_table(struct vhost_device_ctx ctx,
>> +    const struct virtio_memory_regions *regions, uint32_t nregions)
>>  {
>>      struct virtio_net *dev;
>> -    struct vhost_memory_region *mem_regions;
>>      struct virtio_memory *mem;
>> -    uint64_t size = offsetof(struct vhost_memory, regions);
>> -    uint32_t regionidx, valid_regions;
>> +    uint32_t regionidx;
>>  
>>      dev = get_device(ctx);
>>      if (dev == NULL)
>> @@ -682,107 +470,24 @@ set_mem_table(struct vhost_device_ctx ctx, const void 
>> *mem_regions_addr,
>>  
>>      mem->nregions = nregions;
>>  
>> -    mem_regions = (void *)(uintptr_t)
>> -                    ((uint64_t)(uintptr_t)mem_regions_addr + size);
>> -
>>      for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>>              /* Populate the region structure for each region. */
>> -            mem->regions[regionidx].guest_phys_address =
>> -                    mem_regions[regionidx].guest_phys_addr;
>> -            mem->regions[regionidx].guest_phys_address_end =
>> -                    mem->regions[regionidx].guest_phys_address +
>> -                    mem_regions[regionidx].memory_size;
>> -            mem->regions[regionidx].memory_size =
>> -                    mem_regions[regionidx].memory_size;
>> -            mem->regions[regionidx].userspace_address =
>> -                    mem_regions[regionidx].userspace_addr;
>> -
>> -            LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - 
>> QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
>> -                    regionidx,
>> -                    (void 
>> *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
>> -                    (void 
>> *)(uintptr_t)mem->regions[regionidx].userspace_address,
>> -                    mem->regions[regionidx].memory_size);
>> -
>> -            /*set the base address mapping*/
>> +            mem->regions[regionidx] = regions[regionidx];
>>              if (mem->regions[regionidx].guest_phys_address == 0x0) {
>>                      mem->base_address =
>>                              mem->regions[regionidx].userspace_address;
>> -                    /* Map VM memory file */
>> -                    if (host_memory_map(dev, mem, ctx.pid,
>> -                            mem->base_address) != 0) {
>> -                            free(mem);
>> -                            return -1;
>> -                    }
>> +                    mem->mapped_address = 
>> +                            mem->regions[regionidx].address_offset;
>>              }
>>      }
>>  
>> -    /* Check that we have a valid base address. */
>> -    if (mem->base_address == 0) {
>> -            RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base 
>> address of qemu memory file.\n", dev->device_fh);
>> -            free(mem);
>> -            return -1;
>> -    }
>> -
>> -    /*
>> -     * Check if all of our regions have valid mappings.
>> -     * Usually one does not exist in the QEMU memory file.
>> -     */
>> -    valid_regions = mem->nregions;
>> -    for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>> -            if ((mem->regions[regionidx].userspace_address <
>> -                    mem->base_address) ||
>> -                    (mem->regions[regionidx].userspace_address >
>> -                    (mem->base_address + mem->mapped_size)))
>> -                            valid_regions--;
>> -    }
>> -
>> -    /*
>> -     * If a region does not have a valid mapping,
>> -     * we rebuild our memory struct to contain only valid entries.
>> -     */
>> -    if (valid_regions != mem->nregions) {
>> -            LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions 
>> exist in the QEMU mem file. Re-populating mem structure\n",
>> -                    dev->device_fh);
>> -
>> -            /*
>> -             * Re-populate the memory structure with only valid regions.
>> -             * Invalid regions are over-written with memmove.
>> -             */
>> -            valid_regions = 0;
>> -
>> -            for (regionidx = mem->nregions; 0 != regionidx--;) {
>> -                    if ((mem->regions[regionidx].userspace_address <
>> -                            mem->base_address) ||
>> -                            (mem->regions[regionidx].userspace_address >
>> -                            (mem->base_address + mem->mapped_size))) {
>> -                            memmove(&mem->regions[regionidx],
>> -                                    &mem->regions[regionidx + 1],
>> -                                    sizeof(struct virtio_memory_regions) *
>> -                                            valid_regions);
>> -                    } else {
>> -                            valid_regions++;
>> -                    }
>> -            }
>> -    }
>> -    mem->nregions = valid_regions;
>> +    /*TODO addback the logic that remove invalid memory regions */
>>      dev->mem = mem;
>>  
>> -    /*
>> -     * Calculate the address offset for each region.
>> -     * This offset is used to identify the vhost virtual address
>> -     * corresponding to a QEMU guest physical address.
>> -     */
>> -    for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
>> -            dev->mem->regions[regionidx].address_offset =
>> -                    dev->mem->regions[regionidx].userspace_address -
>> -                            dev->mem->base_address +
>> -                            dev->mem->mapped_address -
>> -                            dev->mem->regions[regionidx].guest_phys_address;
>> -
>> -    }
>>      return 0;
>>  }
>>  
>> +
>>  /*
>>   * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
>>   * The virtio device sends us the size of the descriptor ring.
>> @@ -896,38 +601,62 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t 
>> index,
>>      /* State->index refers to the queue index. The txq is 1, rxq is 0. */
>>      state->num = dev->virtqueue[state->index]->last_used_idx;
>>  
>> -    return 0;
>> -}
>> +    if (dev->flags & VIRTIO_DEV_RUNNING) {
>> +            RTE_LOG(INFO, VHOST_CONFIG, 
>> +                    "get_vring_base message is for release\n");
>> +            notify_ops->destroy_device(dev);
>> +            /*
>> +             * sync call.
>> +             * when it returns, it means it si removed from data core.
>> +             */
>> +    }
>> +    /* TODO fix all munmap */
>> +    if (dev->mem) {
>> +            munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> +                    (size_t)dev->mem->mapped_size);
>> +            free(dev->mem);
>> +            dev->mem = NULL;
>> +    }
>>  
>> -/*
>> - * This function uses the eventfd_link kernel module to copy an eventfd file
>> - * descriptor provided by QEMU in to our process space.
>> - */
>> -static int
>> -eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
>> -{
>> -    int eventfd_link, ret;
>>  
>> -    /* Open the character device to the kernel module. */
>> -    eventfd_link = open(eventfd_cdev, O_RDWR);
>> -    if (eventfd_link < 0) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") eventfd_link module is not loaded\n",
>> -                    dev->device_fh);
>> -            return -1;
>> -    }
>> +    if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
>> +            close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
>> +    dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
>> +    if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
>> +            close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
>> +    dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
>> +    /* We don't cleanup callfd here as we willn't get CALLFD again */
>> +    
>> +    dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
>> +    dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
>> +    dev->virtqueue[VIRTIO_RXQ]->used = NULL;
>> +    dev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;
>> +    dev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;
>> +
>> +    dev->virtqueue[VIRTIO_TXQ]->desc = NULL;
>> +    dev->virtqueue[VIRTIO_TXQ]->avail = NULL;
>> +    dev->virtqueue[VIRTIO_TXQ]->used = NULL;
>> +    dev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;
>> +    dev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;
>>  
>> -    /* Call the IOCTL to copy the eventfd. */
>> -    ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
>> -    close(eventfd_link);
>>  
>> -    if (ret < 0) {
>> -            RTE_LOG(ERR, VHOST_CONFIG,
>> -                    "(%"PRIu64") EVENTFD_COPY ioctl failed\n",
>> -                    dev->device_fh);
>> -            return -1;
>> -    }
>> +    return 0;
>> +}
>>  
>> +static int
>> +virtio_is_ready(struct virtio_net *dev, int index)
>> +{
>> +    struct vhost_virtqueue *vq1, *vq2;
>> +    /* mq support in future.*/
>> +    vq1 = dev->virtqueue[index];
>> +    vq2 = dev->virtqueue[index ^ 1];
>> +    if (vq1 && vq2 && vq1->desc && vq2->desc && 
>> +            (vq1->kickfd > 0) && (vq1->callfd > 0) &&
>> +            (vq2->kickfd > 0) && (vq2->callfd > 0)) {
>> +            LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
>> +            return 1;
>> +    }
>> +    LOG_DEBUG(VHOST_CONFIG, "virtio isn't ready for processing.\n");
>>      return 0;
>>  }
>>  
>> @@ -940,7 +669,6 @@ static int
>>  set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>  {
>>      struct virtio_net *dev;
>> -    struct eventfd_copy     eventfd_kick;
>>      struct vhost_virtqueue *vq;
>>  
>>      dev = get_device(ctx);
>> @@ -953,14 +681,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct 
>> vhost_vring_file *file)
>>      if (vq->kickfd)
>>              close((int)vq->kickfd);
>>  
>> -    /* Populate the eventfd_copy structure and call eventfd_copy. */
>> -    vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> -    eventfd_kick.source_fd = vq->kickfd;
>> -    eventfd_kick.target_fd = file->fd;
>> -    eventfd_kick.target_pid = ctx.pid;
>> -
>> -    if (eventfd_copy(dev, &eventfd_kick))
>> -            return -1;
>> +    vq->kickfd = file->fd;
>>  
>>      return 0;
>>  }
>> @@ -974,7 +695,6 @@ static int
>>  set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>  {
>>      struct virtio_net *dev;
>> -    struct eventfd_copy eventfd_call;
>>      struct vhost_virtqueue *vq;
>>  
>>      dev = get_device(ctx);
>> @@ -986,16 +706,11 @@ set_vring_kick(struct vhost_device_ctx ctx, struct 
>> vhost_vring_file *file)
>>  
>>      if (vq->callfd)
>>              close((int)vq->callfd);
>> +    vq->callfd = file->fd;
>>  
>> -    /* Populate the eventfd_copy structure and call eventfd_copy. */
>> -    vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> -    eventfd_call.source_fd = vq->callfd;
>> -    eventfd_call.target_fd = file->fd;
>> -    eventfd_call.target_pid = ctx.pid;
>> -
>> -    if (eventfd_copy(dev, &eventfd_call))
>> -            return -1;
>> -
>> +    if (virtio_is_ready(dev, file->index) &&
>> +            !(dev->flags & VIRTIO_DEV_RUNNING))
>> +                    notify_ops->new_device(dev);
>>      return 0;
>>  }
>>  
>> @@ -1024,6 +739,7 @@ set_backend(struct vhost_device_ctx ctx, struct 
>> vhost_vring_file *file)
>>       * If the device isn't already running and both backend fds are set,
>>       * we add the device.
>>       */
>> +    LOG_DEBUG(VHOST_CONFIG, "%s %d\n", __func__, file->fd);
>>      if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
>>              if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != 
>> VIRTIO_DEV_STOPPED) &&
>>                      ((int)dev->virtqueue[VIRTIO_RXQ]->backend != 
>> VIRTIO_DEV_STOPPED))

Reply via email to