[Qemu-devel] [PATCH qemu v17 11/12] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-06-01 Thread Alexey Kardashevskiy
This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.5 machine (TODO: update version) and older disable it.
This also creates a single DMA window for the older machines to
maintain backward migration.

This implements DDW for PHB with emulated and VFIO devices. The host
kernel support is required. The advertised IOMMU page sizes are 4K and
64K; 16M pages are supported but not advertised by default, in order to
enable them, the user has to specify "pgsz" property for PHB and
enable huge pages for RAM.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800... as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v17:
* fixed: "query" did return non-page-shifted value when memory hotplug is 
enabled

v16:
* s/dma_liobn/dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]/
* s/SPAPR_PCI_LIOBN()/dma_liobn[]/

v15:
* moved page mask filtering to PHB realize(), use "-mempath" to know
if there are huge pages
* fixed error reporting in RTAS handlers
* max window size accounts now hotpluggable memory boundaries
---
 hw/ppc/Makefile.objs|   1 +
 hw/ppc/spapr.c  |   5 +
 hw/ppc/spapr_pci.c  |  77 +---
 hw/ppc/spapr_rtas_ddw.c | 293 
 include/hw/pci-host/spapr.h |   8 +-
 include/hw/ppc/spapr.h  |  16 ++-
 trace-events|   4 +
 7 files changed, 383 insertions(+), 21 deletions(-)
 create mode 100644 hw/ppc/spapr_rtas_ddw.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..986b36f 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
spapr_rng.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
 # PowerPC 4xx boards
 obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
 obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 44e401a..6ddcda9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2366,6 +2366,11 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
 .driver   = "spapr-vlan", \
 .property = "use-rx-buffer-pools", \
 .value= "off", \
+}, \
+{\
+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
+.property = "ddw",\
+.value= stringify(off),\
 },
 
 static void spapr_machine_2_5_instance_options(MachineState *machine)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 68de523..bcf0360 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -35,6 +35,7 @@
 #include "hw/ppc/spapr.h"
 #include "hw/pci-host/spapr.h"
 #include "exec/address-spaces.h"
+#include "exec/ram_addr.h"
 #include 
 #include "trace.h"
 #include "qemu/error-report.h"
@@ -45,6 +46,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hostmem.h"
 
 #include "hw/vfio/vfio.h"
 
@@ -1088,7 +1090,7 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
*drc,
 int fdt_start_offset = 0, fdt_size;
 
 if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
-sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
+sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
 
 spapr_tce_set_need_vfio(tcet, true);
 }
@@ -1310,11 +1312,14 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 PCIBus *bus;
 uint64_t msi_window_size = 4096;
 sPAPRTCETable *tcet;
+const unsigned windows_supported =
+sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
 
 if (sphb->index != (uint32_t)-1) {
 hwaddr windows_base;
 
-if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn != (uint32_t)-1)
+if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
+|| ((sphb->dma_liobn[1] != (uint32_t)-1) && (windows_supported > 
1))
 || (sphb->mem_win_addr != (hwaddr)-1)
 || 

[Qemu-devel] [PATCH qemu v17 05/12] spapr_pci: Reset DMA config on PHB reset

2016-06-01 Thread Alexey Kardashevskiy
LoPAPR dictates that during system reset all DMA windows must be removed
and the default DMA32 window must be created so does the patch.

At the moment there is just one window supported so no change in
behaviour is expected.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
---
Changes:
v17:
* due to " spapr_iommu: Introduce "enabled" state for TCE table" rework,
instead of making spapr_tce_table_disable() public, this just adds it
---
 hw/ppc/spapr_pci.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index a529eff..4a7be4d 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1310,7 +1310,6 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 PCIBus *bus;
 uint64_t msi_window_size = 4096;
 sPAPRTCETable *tcet;
-uint32_t nb_table;
 
 if (sphb->index != (uint32_t)-1) {
 hwaddr windows_base;
@@ -1462,7 +1461,6 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
-nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT;
 tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn);
 if (!tcet) {
 error_setg(errp, "Unable to create TCE table for %s",
@@ -1473,10 +1471,6 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 memory_region_add_subregion_overlap(>iommu_root, 0,
 spapr_tce_get_iommu(tcet), 0);
 
-/* Register default 32bit DMA window */
-spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
-   nb_table);
-
 sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
 }
 
@@ -1493,6 +1487,17 @@ static int spapr_phb_children_reset(Object *child, void 
*opaque)
 
 static void spapr_phb_reset(DeviceState *qdev)
 {
+sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
+sPAPRTCETable *tcet = spapr_tce_find_by_liobn(sphb->dma_liobn);
+
+if (tcet && tcet->nb_table) {
+spapr_tce_table_disable(tcet);
+}
+
+/* Register default 32bit DMA window */
+spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
+   sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
+
 /* Reset the IOMMU state */
 object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
 
-- 
2.5.0.rc3




Re: [Qemu-devel] [PATCH v7 04/15] Makefile: Rules for docker testing

2016-06-01 Thread Fam Zheng
On Wed, 06/01 10:18, Paolo Bonzini wrote:
> 
> 
> On 01/06/2016 06:25, Fam Zheng wrote:
> > +# Use a global constant ccache directory to speed up repetitive builds
> > +DOCKER_CCACHE_DIR := /var/tmp/qemu-docker-ccache
> 
> Same here, use $HOME/.cache/qemu-docker-ccache instead.  Do you need a
> mkdir -p?

I think it just works, but I can add one to be safe.

Fam



[Qemu-devel] [PATCH qemu v17 08/12] spapr_pci: Add and export DMA resetting helper

2016-06-01 Thread Alexey Kardashevskiy
This will be later used by the "ibm,reset-pe-dma-window" RTAS handler
which resets the DMA configuration to the defaults.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr_pci.c  | 10 --
 include/hw/pci-host/spapr.h |  2 ++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 4a7be4d..68de523 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1485,9 +1485,8 @@ static int spapr_phb_children_reset(Object *child, void 
*opaque)
 return 0;
 }
 
-static void spapr_phb_reset(DeviceState *qdev)
+void spapr_phb_dma_reset(sPAPRPHBState *sphb)
 {
-sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
 sPAPRTCETable *tcet = spapr_tce_find_by_liobn(sphb->dma_liobn);
 
 if (tcet && tcet->nb_table) {
@@ -1497,6 +1496,13 @@ static void spapr_phb_reset(DeviceState *qdev)
 /* Register default 32bit DMA window */
 spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
+}
+
+static void spapr_phb_reset(DeviceState *qdev)
+{
+sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
+
+spapr_phb_dma_reset(sphb);
 
 /* Reset the IOMMU state */
 object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 03ee006..7848366 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -147,4 +147,6 @@ static inline void spapr_phb_vfio_reset(DeviceState *qdev)
 }
 #endif
 
+void spapr_phb_dma_reset(sPAPRPHBState *sphb);
+
 #endif /* __HW_SPAPR_PCI_H__ */
-- 
2.5.0.rc3




[Qemu-devel] [PATCH qemu v17 02/12] spapr_iommu: Introduce "enabled" state for TCE table

2016-06-01 Thread Alexey Kardashevskiy
Currently TCE tables are created once at start and their sizes never
change. We are going to change that by introducing a Dynamic DMA windows
support where DMA configuration may change during the guest execution.

This changes spapr_tce_new_table() to create an empty zero-size IOMMU
memory region (IOMMU MR). Only LIOBN is assigned by the time of creation.
It still will be called once at the owner object (VIO or PHB) creation.

This introduces an "enabled" state for TCE table objects, some
helper functions are added:
- spapr_tce_table_enable() receives TCE table parameters, stores in
sPAPRTCETable and allocates a guest view of the TCE table
(in the user space or KVM) and sets the correct size on the IOMMU MR;
- spapr_tce_table_disable() disposes the table and resets the IOMMU MR
size; it is made public as the following DDW code will be using it.

This changes the PHB reset handler to do the default DMA initialization
instead of spapr_phb_realize(). This does not make differenct now but
later with more than just one DMA window, we will have to remove them all
and create the default one on a system reset.

No visible change in behaviour is expected except the actual table
will be reallocated every reset. We might optimize this later.

The other way to implement this would be dynamically create/remove
the TCE table QOM objects but this would make migration impossible
as the migration code expects all QOM objects to exist at the receiver
so we have to have TCE table objects created when migration begins.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v17:
* spapr_tce_table_unrealize() calls spapr_tce_table_do_disable() directly
* moved spapr_tce_table_disable() to next patch as it is not used here
* removed @enabled as nb_table indicates already if the table is enabled

v15:
* made adjustments after removing spapr_phb_dma_window_enable()

v14:
* added spapr_tce_table_do_disable(), will make difference in following
patch with fully dynamic table migration
---
 hw/ppc/spapr_iommu.c   | 68 --
 hw/ppc/spapr_pci.c |  8 +++---
 hw/ppc/spapr_vio.c |  8 +++---
 include/hw/ppc/spapr.h |  9 +++
 4 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 96bb018..de63467 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -17,6 +17,7 @@
  * License along with this library; if not, see .
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "hw/hw.h"
 #include "qemu/log.h"
 #include "sysemu/kvm.h"
@@ -175,15 +176,9 @@ static int spapr_tce_table_realize(DeviceState *dev)
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
 
 tcet->fd = -1;
-tcet->table = spapr_tce_alloc_table(tcet->liobn,
-tcet->page_shift,
-tcet->nb_table,
->fd,
-tcet->need_vfio);
-
+tcet->need_vfio = false;
 memory_region_init_iommu(>iommu, OBJECT(dev), _iommu_ops,
- "iommu-spapr",
- (uint64_t)tcet->nb_table << tcet->page_shift);
+ "iommu-spapr", 0);
 
 QLIST_INSERT_HEAD(_tce_tables, tcet, list);
 
@@ -225,14 +220,10 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool 
need_vfio)
 tcet->table = newtable;
 }
 
-sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
-   uint64_t bus_offset,
-   uint32_t page_shift,
-   uint32_t nb_table,
-   bool need_vfio)
+sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
 {
 sPAPRTCETable *tcet;
-char tmp[64];
+char tmp[32];
 
 if (spapr_tce_find_by_liobn(liobn)) {
 fprintf(stderr, "Attempted to create TCE table with duplicate"
@@ -240,16 +231,8 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn,
 return NULL;
 }
 
-if (!nb_table) {
-return NULL;
-}
-
 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
 tcet->liobn = liobn;
-tcet->bus_offset = bus_offset;
-tcet->page_shift = page_shift;
-tcet->nb_table = nb_table;
-tcet->need_vfio = need_vfio;
 
 snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
 object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
@@ -259,14 +242,51 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn,
 return tcet;
 }
 
+void spapr_tce_table_enable(sPAPRTCETable *tcet,
+uint32_t page_shift, uint64_t bus_offset,
+uint32_t nb_table)
+{
+if (tcet->nb_table) {
+error_report("Warning: trying to enable already enabled TCE table");
+return;
+}
+
+

[Qemu-devel] [PATCH qemu v17 12/12] spapr_iommu, vfio, memory: Notify IOMMU about starting/stopping listening

2016-06-01 Thread Alexey Kardashevskiy
The sPAPR TCE tables manage 2 copies when VFIO is using an IOMMU -
a guest view of the table and a hardware TCE table. If there is no VFIO
presense in the address space, then just the guest view is used, if
this is the case, it is allocated in the KVM. However since there is no
support yet for VFIO in KVM TCE hypercalls, when we start using VFIO,
we need to move the guest view from KVM to the userspace; and we need
to do this for every IOMMU on a bus with VFIO devices.

This adds notify_started/notify_stopped callbacks in MemoryRegionIOMMUOps
to notify IOMMU that listeners were set/removed. This allows IOMMU to
take necessary steps before actual notifications happen and do proper
cleanup when the last notifier is removed.

This implements the callbacks for the sPAPR IOMMU - notify_started()
reallocated the guest view to the user space, notify_stopped() does
the opposite.

This removes explicit spapr_tce_set_need_vfio() call from PCI hotplug
path as the new callbacks do this better - they notify IOMMU at
the exact moment when the configuration is changed, and this also
includes the case of PCI hot unplug.

This adds MemoryRegion* to memory_region_unregister_iommu_notifier()
as we need iommu_ops to call notify_stopped() and Notifier* does not
store the owner.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
---
Changes:
v17:
* replaced IOMMU users counting with simple QLIST_EMPTY()
* renamed the callbacks
* removed requirement for region_del() to be called on 
memory_listener_unregister()

v16:
* added a use counter in VFIOAddressSpace->VFIOIOMMUMR

v15:
* s/need_vfio/vfio-Users/g
---
 hw/ppc/spapr_iommu.c  | 12 
 hw/ppc/spapr_pci.c|  6 --
 hw/vfio/common.c  |  5 +++--
 include/exec/memory.h |  8 +++-
 memory.c  | 10 +-
 5 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 90a45c0..994a8a0 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -156,6 +156,16 @@ static uint64_t spapr_tce_get_page_sizes(MemoryRegion 
*iommu)
 return 1ULL << tcet->page_shift;
 }
 
+static void spapr_tce_notify_started(MemoryRegion *iommu)
+{
+spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true);
+}
+
+static void spapr_tce_notify_stopped(MemoryRegion *iommu)
+{
+spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), false);
+}
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@@ -236,6 +246,8 @@ static const VMStateDescription vmstate_spapr_tce_table = {
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
 .translate = spapr_tce_translate_iommu,
 .get_page_sizes = spapr_tce_get_page_sizes,
+.notify_started = spapr_tce_notify_started,
+.notify_stopped = spapr_tce_notify_stopped,
 };
 
 static int spapr_tce_table_realize(DeviceState *dev)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index bcf0360..06ce902 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1089,12 +1089,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
*drc,
 void *fdt = NULL;
 int fdt_start_offset = 0, fdt_size;
 
-if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
-sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
-
-spapr_tce_set_need_vfio(tcet, true);
-}
-
 fdt = create_device_tree(_size);
 fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
 if (!fdt_start_offset) {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 7f55c26..356640e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -522,7 +522,8 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
 
 QLIST_FOREACH(giommu, >giommu_list, giommu_next) {
 if (giommu->iommu == section->mr) {
-memory_region_unregister_iommu_notifier(>n);
+memory_region_unregister_iommu_notifier(giommu->iommu,
+>n);
 QLIST_REMOVE(giommu, giommu_next);
 g_free(giommu);
 break;
@@ -1094,7 +1095,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
 QLIST_REMOVE(container, next);
 
 QLIST_FOREACH_SAFE(giommu, >giommu_list, giommu_next, tmp) {
-memory_region_unregister_iommu_notifier(>n);
+memory_region_unregister_iommu_notifier(giommu->iommu, >n);
 QLIST_REMOVE(giommu, giommu_next);
 g_free(giommu);
 }
diff --git a/include/exec/memory.h b/include/exec/memory.h
index bd9625f..f08439b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -151,6 +151,10 @@ struct MemoryRegionIOMMUOps {
 IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool 
is_write);
 /* Returns supported page sizes */
 uint64_t (*get_page_sizes)(MemoryRegion *iommu);
+

[Qemu-devel] [PATCH qemu v17 10/12] vfio/spapr: Create DMA window dynamically (SPAPR IOMMU v2)

2016-06-01 Thread Alexey Kardashevskiy
New VFIO_SPAPR_TCE_v2_IOMMU type supports dynamic DMA window management.
This adds ability to VFIO common code to dynamically allocate/remove
DMA windows in the host kernel when new VFIO container is added/removed.

This adds VFIO_IOMMU_SPAPR_TCE_CREATE ioctl to vfio_listener_region_add
and adds just created IOMMU into the host IOMMU list; the opposite
action is taken in vfio_listener_region_del.

When creating a new window, this uses heuristic to decide on the TCE table
levels number.

This should cause no guest visible change in behavior.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v17:
* moved spapr window create/remove helpers to separate file
* added hw_error() if vfio_host_win_del() failed

v16:
* used memory_region_iommu_get_page_sizes() in vfio_listener_region_add()
* enforced no intersections between windows

v14:
* new to the series
---
 hw/vfio/common.c  | 76 +--
 hw/vfio/spapr.c   | 70 +++
 include/hw/vfio/vfio-common.h |  6 
 trace-events  |  2 ++
 4 files changed, 144 insertions(+), 10 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 52b08fd..7f55c26 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -275,6 +275,18 @@ static void vfio_host_win_add(VFIOContainer *container,
 QLIST_INSERT_HEAD(>hostwin_list, hostwin, hostwin_next);
 }
 
+static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova)
+{
+VFIOHostDMAWindow *hostwin = vfio_host_win_lookup(container, min_iova, 1);
+
+if (!hostwin) {
+return -1;
+}
+QLIST_REMOVE(hostwin, hostwin_next);
+
+return 0;
+}
+
 static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
 return (!memory_region_is_ram(section->mr) &&
@@ -388,6 +400,30 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
 }
 end = int128_get64(int128_sub(llend, int128_one()));
 
+if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+VFIOHostDMAWindow *hostwin;
+hwaddr pgsize = 0;
+
+/* For now intersections are not allowed, we may relax this later */
+QLIST_FOREACH(hostwin, >hostwin_list, hostwin_next) {
+if (ranges_overlap(hostwin->min_iova,
+   hostwin->max_iova - hostwin->min_iova + 1,
+   section->offset_within_address_space,
+   int128_get64(section->size))) {
+goto fail;
+}
+}
+
+ret = vfio_spapr_create_window(container, section, );
+if (ret) {
+goto fail;
+}
+
+vfio_host_win_add(container, section->offset_within_address_space,
+  section->offset_within_address_space +
+  int128_get64(section->size) - 1, pgsize);
+}
+
 if (!vfio_host_win_lookup(container, iova, end)) {
 error_report("vfio: IOMMU container %p can't map guest IOVA region"
  " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
@@ -523,6 +559,18 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
  "0x%"HWADDR_PRIx") = %d (%m)",
  container, iova, int128_get64(llsize), ret);
 }
+
+if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+vfio_spapr_remove_window(container,
+ section->offset_within_address_space);
+if (vfio_host_win_del(container,
+  section->offset_within_address_space) < 0) {
+hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
+ __func__, section->offset_within_address_space);
+}
+
+trace_vfio_spapr_remove_window(section->offset_within_address_space);
+}
 }
 
 static const MemoryListener vfio_memory_listener = {
@@ -960,11 +1008,6 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 }
 }
 
-/*
- * This only considers the host IOMMU's 32-bit window.  At
- * some point we need to add support for the optional 64-bit
- * window and dynamic windows
- */
 info.argsz = sizeof(info);
 ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, );
 if (ret) {
@@ -973,11 +1016,24 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 goto listener_release_exit;
 }
 
-/* The default table uses 4K pages */
-vfio_host_win_add(container, info.dma32_window_start,
-  info.dma32_window_start +
-  info.dma32_window_size - 1,
-  0x1000);
+if (v2) {
+/*
+ * There is a default window in just created container.
+ * To make region_add/del simpler, we better remove this
+ * window now and let those iommu_listener 

[Qemu-devel] [PATCH qemu v17 09/12] vfio: Add host side DMA window capabilities

2016-06-01 Thread Alexey Kardashevskiy
There are going to be multiple IOMMUs per a container. This moves
the single host IOMMU parameter set to a list of VFIOHostDMAWindow.

This should cause no behavioral change and will be used later by
the SPAPR TCE IOMMU v2 which will also add a vfio_host_win_del() helper.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
---
Changes:
v17:
* vfio_host_win_add() uses vfio_host_win_lookup() for overlap check and
aborts if any found instead of returning an error (as recovery is not
possible anyway)
* hw_error() when overlapped iommu is detected

v16:
* adjusted commit log with changes from v15

v15:
* s/vfio_host_iommu_add/vfio_host_win_add/
* s/VFIOHostIOMMU/VFIOHostDMAWindow/
---
 hw/vfio/common.c  | 59 +++
 include/hw/vfio/vfio-common.h |  9 +--
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 770f630..52b08fd 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -29,6 +29,7 @@
 #include "exec/memory.h"
 #include "hw/hw.h"
 #include "qemu/error-report.h"
+#include "qemu/range.h"
 #include "sysemu/kvm.h"
 #ifdef CONFIG_KVM
 #include "linux/kvm.h"
@@ -242,6 +243,38 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr 
iova,
 return -errno;
 }
 
+static VFIOHostDMAWindow *vfio_host_win_lookup(VFIOContainer *container,
+   hwaddr min_iova, hwaddr 
max_iova)
+{
+VFIOHostDMAWindow *hostwin;
+
+QLIST_FOREACH(hostwin, >hostwin_list, hostwin_next) {
+if (hostwin->min_iova <= min_iova && max_iova <= hostwin->max_iova) {
+return hostwin;
+}
+}
+
+return NULL;
+}
+
+static void vfio_host_win_add(VFIOContainer *container,
+ hwaddr min_iova, hwaddr max_iova,
+ uint64_t iova_pgsizes)
+{
+VFIOHostDMAWindow *hostwin;
+
+if (vfio_host_win_lookup(container, min_iova, max_iova)) {
+hw_error("%s: Overlapped IOMMU are not enabled", __func__);
+}
+
+hostwin = g_malloc0(sizeof(*hostwin));
+
+hostwin->min_iova = min_iova;
+hostwin->max_iova = max_iova;
+hostwin->iova_pgsizes = iova_pgsizes;
+QLIST_INSERT_HEAD(>hostwin_list, hostwin, hostwin_next);
+}
+
 static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
 return (!memory_region_is_ram(section->mr) &&
@@ -355,7 +388,7 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
 }
 end = int128_get64(int128_sub(llend, int128_one()));
 
-if ((iova < container->min_iova) || (end > container->max_iova)) {
+if (!vfio_host_win_lookup(container, iova, end)) {
 error_report("vfio: IOMMU container %p can't map guest IOVA region"
  " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
  container, iova, end);
@@ -370,10 +403,6 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
 
 trace_vfio_listener_region_add_iommu(iova, end);
 /*
- * FIXME: We should do some checking to see if the
- * capabilities of the host VFIO IOMMU are adequate to model
- * the guest IOMMU
- *
  * FIXME: For VFIO iommu types which have KVM acceleration to
  * avoid bouncing all map/unmaps through qemu this way, this
  * would be the right place to wire that up (tell the KVM
@@ -880,17 +909,14 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
  * existing Type1 IOMMUs generally support any IOVA we're
  * going to actually try in practice.
  */
-container->min_iova = 0;
-container->max_iova = (hwaddr)-1;
-
-/* Assume just 4K IOVA page size */
-container->iova_pgsizes = 0x1000;
 info.argsz = sizeof(info);
 ret = ioctl(fd, VFIO_IOMMU_GET_INFO, );
 /* Ignore errors */
-if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
-container->iova_pgsizes = info.iova_pgsizes;
+if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
+/* Assume 4k IOVA page size */
+info.iova_pgsizes = 4096;
 }
+vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
 } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
 struct vfio_iommu_spapr_tce_info info;
@@ -946,11 +972,12 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 ret = -errno;
 goto listener_release_exit;
 }
-container->min_iova = info.dma32_window_start;
-container->max_iova = container->min_iova + info.dma32_window_size - 1;
 
-/* Assume just 4K IOVA pages for now */
-container->iova_pgsizes = 0x1000;
+/* The default table uses 4K pages */
+vfio_host_win_add(container, 

Re: [Qemu-devel] [PATCH v6 08/11] mptsas: change msi property type

2016-06-01 Thread Markus Armbruster
Cao jin  writes:

>>From uint32 to enum OnOffAuto, and give it a shorter name.
>
> cc: Paolo Bonzini 
> cc: Michael S. Tsirkin 
> cc: Markus Armbruster 
> cc: Marcel Apfelbaum 
>
> Signed-off-by: Cao jin 
> ---
>  hw/scsi/mptsas.c | 4 ++--
>  hw/scsi/mptsas.h | 3 ++-
>  2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
> index 1c18c84..afee576 100644
> --- a/hw/scsi/mptsas.c
> +++ b/hw/scsi/mptsas.c
> @@ -1285,7 +1285,7 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error 
> **errp)
>  memory_region_init_io(>diag_io, OBJECT(s), _diag_ops, s,
>"mptsas-diag", 0x1);
>  
> -if (s->msi_available &&
> +if ((s->msi == ON_OFF_AUTO_AUTO || s->msi == ON_OFF_AUTO_ON) &&
>  msi_init(dev, 0, 1, true, false) >= 0) {
>  s->msi_in_use = true;
>  }

Same suggestions as for PATCH 06:

* Use the s->msi != ON_OFF_AUTO_OFF
* Add /* TODO check for errors */ now, drop it when you add the check in
  PATCH 11.

> @@ -1404,7 +1404,7 @@ static const VMStateDescription vmstate_mptsas = {
>  static Property mptsas_properties[] = {
>  DEFINE_PROP_UINT64("sas_address", MPTSASState, sas_addr, 0),
>  /* TODO: test MSI support under Windows */
> -DEFINE_PROP_BIT("msi", MPTSASState, msi_available, 0, true),
> +DEFINE_PROP_ON_OFF_AUTO("msi", MPTSASState, msi, ON_OFF_AUTO_AUTO),
>  DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/scsi/mptsas.h b/hw/scsi/mptsas.h
> index 595f81f..0436a33 100644
> --- a/hw/scsi/mptsas.h
> +++ b/hw/scsi/mptsas.h
> @@ -27,7 +27,8 @@ struct MPTSASState {
>  MemoryRegion diag_io;
>  QEMUBH *request_bh;
>  
> -uint32_t msi_available;
> +/* properties */
> +OnOffAuto msi;
>  uint64_t sas_addr;
>  
>  bool msi_in_use;



Re: [Qemu-devel] [RFC PATCH v4 3/3] VFIO Type1 IOMMU: Add support for mediated devices

2016-06-01 Thread Dong Jia
On Wed, 25 May 2016 01:28:17 +0530
Kirti Wankhede  wrote:

> +
> +/*
> + * Pin a set of guest PFNs and return their associated host PFNs for API
> + * supported domain only.
> + * @vaddr [in]: array of guest PFNs
> + * @npage [in]: count of array elements
> + * @prot [in] : protection flags
> + * @pfn_base[out] : array of host PFNs
> + */
> +long vfio_pin_pages(void *iommu_data, dma_addr_t *vaddr, long npage,
> +int prot, dma_addr_t *pfn_base)
> +{
> + struct vfio_iommu *iommu = iommu_data;
> + struct vfio_domain *domain = NULL;
> + int i = 0, ret = 0;
> + long retpage;
> + unsigned long remote_vaddr = 0;
> + dma_addr_t *pfn = pfn_base;
> + struct vfio_dma *dma;
> +
> + if (!iommu || !vaddr || !pfn_base)
> + return -EINVAL;
> +
> + mutex_lock(>lock);
> +
> + if (!iommu->mediated_domain) {
> + ret = -EINVAL;
> + goto pin_done;
> + }
> +
> + domain = iommu->mediated_domain;
> +
> + for (i = 0; i < npage; i++) {
> + struct vfio_pfn *p, *lpfn;
> + unsigned long tpfn;
> + dma_addr_t iova;
> + long pg_cnt = 1;
> +
> + iova = vaddr[i] << PAGE_SHIFT;
Dear Kirti:

Got one question for the vaddr-iova conversion here.
Is this a common rule that can be applied to all architectures?
AFAIK, this is wrong for the s390 case. Or I must miss something...

If the answer to the above question is 'no', should we introduce a new
argument to pass in the iovas? Say 'dma_addr_t *iova'.

> +
> + dma = vfio_find_dma(iommu, iova, 0 /*  size */);
> + if (!dma) {
> + ret = -EINVAL;
> + goto pin_done;
> + }
> +
> + remote_vaddr = dma->vaddr + iova - dma->iova;
> +
> + retpage = vfio_pin_pages_internal(domain, remote_vaddr,
> +   pg_cnt, prot, );
> + if (retpage <= 0) {
> + WARN_ON(!retpage);
> + ret = (int)retpage;
> + goto pin_done;
> + }
> +
> + pfn[i] = tpfn;
> +
> + /* search if pfn exist */
> + p = vfio_find_pfn(domain, tpfn);
> + if (p) {
> + atomic_inc(>ref_count);
> + continue;
> + }
> +
> + /* add to pfn_list */
> + lpfn = kzalloc(sizeof(*lpfn), GFP_KERNEL);
> + if (!lpfn) {
> + ret = -ENOMEM;
> + goto pin_done;
> + }
> + lpfn->vaddr = remote_vaddr;
> + lpfn->iova = iova;
> + lpfn->pfn = pfn[i];
> + lpfn->npage = 1;
> + lpfn->prot = prot;
> + atomic_inc(>ref_count);
> + vfio_link_pfn(domain, lpfn);
> + }
> +
> + ret = i;
> +
> +pin_done:
> + mutex_unlock(>lock);
> + return ret;
> +}
> +EXPORT_SYMBOL(vfio_pin_pages);



Dong Jia




Re: [Qemu-devel] [PATCH] configure: save git working tree information in "pkgversion"

2016-06-01 Thread Laszlo Ersek
On 05/31/16 19:45, Eric Blake wrote:
> On 05/31/2016 11:01 AM, Laszlo Ersek wrote:
> 
 Grepping git's Documentation/RelNotes/ directory, I find:
 - in "1.6.6.txt": the introduction of --dirty
 - in "1.7.6.4.txt": an apparently important bugfix for --dirty

 Version 1.7.6.4 of git was tagged on Sep 23 2011.

 Does this information help in deciding if we can use --dirty?
>>>
>>> 5 years old sounds new enough for my liking :-)
>>>
>>> I guess we could use --dirty and catch the non-zero exit code and just
>>> re-try without --dirty.
>>
>> But, if we can't use --dirty, I should probably use the plus-sign
>> fallback (we need *something* to mark a dirty state).
>>
>> In which case however, shouldn't we just go with the current patch,
>> which doesn't care about --dirty at all? Otherwise, some build hosts
>> will append "-dirty", and others will append "+".
>>
>> IMO we should either require --dirty, or go with the current patch.
> 
> Gnulib's build-aux/git-version-gen script doesn't yet use --dirty, but
> may be an inspiration for how to generate the same suffix:
> 
> # Test whether to append the "-dirty" suffix only if the version
> # string we're using came from git.  I.e., skip the test if it's "UNKNOWN"
> # or if it came from .tarball-version.
> if test "x$v_from_git" != x; then
>   # Don't declare a version "dirty" merely because a time stamp has changed.
>   git update-index --refresh > /dev/null 2>&1
> 
>   dirty=`exec 2>/dev/null;git diff-index --name-only HEAD` || dirty=
>   case "$dirty" in
>   '') ;;
>   *) # Append the suffix only if there isn't one already.
>   case $v in
> *-dirty) ;;
> *) v="$v-dirty" ;;
>   esac ;;
>   esac
> fi
> 

BTW, my patch has a functionality bug. Consider the case when you change
some of the tracked files, then stage all those changes with "git add",
then *undo* the changes in the working tree only. In this case, my patch
will report "dirty" ("+"), because there will be both staged changes
(relative to the HEAD commit) and working tree changes (relative to the
index). But that's incorrect -- the working tree actually matches the
HEAD commit, so the build qualifies as "clean".

On the other hand, git-diff-index will do the right thing, namely:

   git-diff-index 
   compares the  and the files on the filesystem.

which is exactly right. The index (= the staged changes) are irrelevant
for a build; only the working tree matters.

(Anyway, this is moot now; I'll happily leave it to Fam! :))

Thanks
Laszlo



Re: [Qemu-devel] [PATCH] virtio: move bi-endian target support to a single location

2016-06-01 Thread Paolo Bonzini


On 01/06/2016 04:33, David Gibson wrote:
> On Tue, May 31, 2016 at 03:15:21PM +0200, Paolo Bonzini wrote:
>>
>>
>> On 31/05/2016 15:10, Greg Kurz wrote:
>>> +#if defined(TARGET_PPC64) || defined(TARGET_ARM)
>>> +#define LEGACY_VIRTIO_IS_BIENDIAN 1
>>> +#endif  
>
> These will only be correct if something else includes cpu.h.  Instead of
>>> Unless I missed something, the TARGET_* macros come from the generated
>>> config-target.h header, which is in turn included by qemu/osdep.h and
>>> thus included by most of the code.
>>
>> You're right.  Problems _could_ happen if virtio-access.h is included in
>> a file compiled without -DNEED_CPU_H (i.e. with common-obj-y instead of
>> obj-y) but include/exec/poison.h should take care of that.
>>
> defining this, you should add
>
> #include "cpu.h"
>
> at the top of include/hw/virtio-access.h and leave the definitions in
> target-*/cpu.h.
>
>>> All this bi-endian stuff is really an old-virtio-only thing... it is
>>> only to be used by virtio_access_is_big_endian(). The fact that it
>>> broke silently with your cleanup series is yet another proof that
>>> this workaround is fragile.
>>
>> It is not fragile actually.  cpu.h doesn't exist in common-obj-y, so the
>> TARGET_IS_BIENDIAN define can be safely taken from cpu.h.
>>
>> Anyway because of poison.h your solution isn't fragile either, so
>>
>> Reviewed-by: Paolo Bonzini 
> 
> Should I take this through my tree?

If you don't hear from mst, go ahead.

Paolo



Re: [Qemu-devel] [PATCH v2 0/2] Let PKGVERSION include the "git describe" output

2016-06-01 Thread Laszlo Ersek
On 06/01/16 06:41, Fam Zheng wrote:
> v2: Address Eric's comments:
> "echo -n" -> "printf".
> "-unclean" -> "-dirty".
> 
> Makefile happened to be in my working set because of the docker test work, so 
> I
> went ahead to try this nice feature last evening before going to bed. My
> apologies if we have duplicated work.
> 
> Please review, especially please comment on the redundant version string:
> 
> $ qemu-img --version
> qemu-img version 2.6.50-v2.6.0-603-g684a494, Copyright (c) 2004-2008 Fabrice 
> Bellard
> 
> 
> Fam Zheng (2):
>   Makefile: Add a "FORCE" target
>   Makefile: Derive "PKGVERSION" from "git describe" by default
> 
>  Makefile  | 24 ++--
>  linux-user/main.c |  1 +
>  qemu-img.c|  1 +
>  qmp.c |  1 +
>  scripts/create_config |  4 
>  vl.c  |  1 +
>  6 files changed, 26 insertions(+), 6 deletions(-)
> 

I only skimmed the git commands in patch #2:

git status --> this verifies if we have git at all, and refreshes the
   index if we do, in the same step, so that's good
git describe --> okay
git diff-index --quiet HEAD --> okay

Another advantage of this approach seems to be that it sets the version
at "make", not at "configure", time.

Some comments / questions on the version string:
- What happens if "--with-pkgversion=blah" was passed to ./configure?
  Do I see it right that in that case there's no change in behavior?

- I think the following format would look better (stolen from
  --with-pkgversion):

  2.6.50 (v2.6.0-603-g684a494)

Thanks
Laszlo



[Qemu-devel] [PATCH v8 12/17] net_pkt: Extend packet abstraction as required by e1000e functionality

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

This patch extends the TX/RX packet abstractions with features that will
be used by the e1000e device implementation.

Changes are:

  1. Support iovec lists for RX buffers
  2. Deeper RX packets parsing
  3. Loopback option for TX packets
  4. Extended VLAN headers handling
  5. RSS processing for RX packets

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/net/net_rx_pkt.c| 473 +
 hw/net/net_rx_pkt.h| 193 +++-
 hw/net/net_tx_pkt.c| 204 +
 hw/net/net_tx_pkt.h|  60 ++-
 include/net/checksum.h |   4 +-
 include/net/eth.h  | 150 +++-
 net/checksum.c |   7 +-
 net/eth.c  | 410 +-
 trace-events   |  40 +
 9 files changed, 1336 insertions(+), 205 deletions(-)

diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
index 8a4f29f..1019b50 100644
--- a/hw/net/net_rx_pkt.c
+++ b/hw/net/net_rx_pkt.c
@@ -16,24 +16,16 @@
  */
 
 #include "qemu/osdep.h"
+#include "trace.h"
 #include "net_rx_pkt.h"
-#include "net/eth.h"
-#include "qemu-common.h"
-#include "qemu/iov.h"
 #include "net/checksum.h"
 #include "net/tap.h"
 
-/*
- * RX packet may contain up to 2 fragments - rebuilt eth header
- * in case of VLAN tag stripping
- * and payload received from QEMU - in any case
- */
-#define NET_MAX_RX_PACKET_FRAGMENTS (2)
-
 struct NetRxPkt {
 struct virtio_net_hdr virt_hdr;
-uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN];
-struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS];
+uint8_t ehdr_buf[sizeof(struct eth_header)];
+struct iovec *vec;
+uint16_t vec_len_total;
 uint16_t vec_len;
 uint32_t tot_len;
 uint16_t tci;
@@ -46,17 +38,31 @@ struct NetRxPkt {
 bool isip6;
 bool isudp;
 bool istcp;
+
+size_t l3hdr_off;
+size_t l4hdr_off;
+size_t l5hdr_off;
+
+eth_ip6_hdr_info ip6hdr_info;
+eth_ip4_hdr_info ip4hdr_info;
+eth_l4_hdr_info  l4hdr_info;
 };
 
 void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
 {
 struct NetRxPkt *p = g_malloc0(sizeof *p);
 p->has_virt_hdr = has_virt_hdr;
+p->vec = NULL;
+p->vec_len_total = 0;
 *pkt = p;
 }
 
 void net_rx_pkt_uninit(struct NetRxPkt *pkt)
 {
+if (pkt->vec_len_total != 0) {
+g_free(pkt->vec);
+}
+
 g_free(pkt);
 }
 
@@ -66,33 +72,88 @@ struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt 
*pkt)
 return >virt_hdr;
 }
 
-void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
-   size_t len, bool strip_vlan)
+static inline void
+net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt,
+int new_iov_len)
+{
+if (pkt->vec_len_total < new_iov_len) {
+g_free(pkt->vec);
+pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len);
+pkt->vec_len_total = new_iov_len;
+}
+}
+
+static void
+net_rx_pkt_pull_data(struct NetRxPkt *pkt,
+const struct iovec *iov, int iovcnt,
+size_t ploff)
+{
+if (pkt->vlan_stripped) {
+net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
+
+pkt->vec[0].iov_base = pkt->ehdr_buf;
+pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf);
+
+pkt->tot_len =
+iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header);
+
+pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
+iov, iovcnt, ploff, pkt->tot_len);
+} else {
+net_rx_pkt_iovec_realloc(pkt, iovcnt);
+
+pkt->tot_len = iov_size(iov, iovcnt) - ploff;
+pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
+iov, iovcnt, ploff, pkt->tot_len);
+}
+
+eth_get_protocols(pkt->vec, pkt->vec_len, >isip4, >isip6,
+  >isudp, >istcp,
+  >l3hdr_off, >l4hdr_off, >l5hdr_off,
+  >ip6hdr_info, >ip4hdr_info, >l4hdr_info);
+
+trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp,
+pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
+}
+
+void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
+const struct iovec *iov, int iovcnt,
+size_t iovoff, bool strip_vlan)
 {
 uint16_t tci = 0;
-uint16_t ploff;
+uint16_t ploff = iovoff;
 assert(pkt);
 pkt->vlan_stripped = false;
 
 if (strip_vlan) {
-pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, , );
+pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
+, );
 }
 
-if (pkt->vlan_stripped) {
-pkt->vec[0].iov_base = pkt->ehdr_buf;
-

Re: [Qemu-devel] [PATCH v6 02/11] fix some coding style problems

2016-06-01 Thread Cao jin



On 06/01/2016 04:09 PM, Markus Armbruster wrote:

Cao jin  writes:


It has:
1. More newlines make the code block well separated.
2. Add more comments for msi_init.
3. Fix a indentation in vmxnet3.c.
4. ioh3420 & xio3130_downstream: put PCI Express capability init function
together, make it more readable.

cc: Dmitry Fleytman 
cc: Jason Wang 
cc: Michael S. Tsirkin 
cc: Markus Armbruster 
cc: Marcel Apfelbaum 

Reviewed-by: Marcel Apfelbaum 
Signed-off-by: Cao jin 

[...]

diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index e0e64c2..97f35c0 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -165,6 +165,23 @@ bool msi_enabled(const PCIDevice *dev)
   PCI_MSI_FLAGS_ENABLE);
  }

+/*
+ * Make PCI device @dev MSI-capable.
+ * Non-zero @offset puts capability MSI at that offset in PCI config
+ * space.
+ * @nr_vectors is the number of MSI vectors (1, 2, 4, 8, 16 or 32).
+ * If @msi64bit, make the device capable of sending a 64-bit message
+ * address.
+ * If @msi_per_vector_mask, make the device support per-vector masking.
+ * @errp is for returning errors.


@errp only appears in PATCH 11.  The easiest fix is to add this comment
only then.



eh...sorry for my carelessness...

--
Yours Sincerely,

Cao jin





Re: [Qemu-devel] [PATCH] fix xen hvm direct kernel boot

2016-06-01 Thread Chun Yan Liu


>>> On 5/31/2016 at 12:10 AM, in message
, Stefano
Stabellini  wrote: 
> On Fri, 29 Apr 2016, Chunyan Liu wrote: 
> > Since commit a1666142: acpi-build: make ROMs RAM blocks resizeable, 
> > xen HVM direct kernel boot failed. Xen HVM direct kernel boot will 
> > insert a linuxboot.bin or multiboot.bin to /genroms, before this 
> > commit, in acpi_setup, for rom linuxboot.bin/multiboot.bin, it 
> > only needs 0x2 size; after the commit, it will reserve x16 
> > size for resize, that is 0x20 size. It causes xen_ram_alloc 
> > failed due to running out of memory. 
> >  
> > To resolve it, either: 
> > 1. keep using original rom size instead of max size, don't reserve x16  
> size. 
> > 2. guest maxmem needs to be increased. (commit c1d322e6 "xen-hvm: increase 
> >maxmem before calling xc_domain_populate_physmap" solved the problem for 
> >a time, by accident. But then it is reverted in commit bb369 due to 
> >other problem.) 
> >  
> > For 2, more discussion is needed about howto. So this patch tries 1, to 
> > use unresizable rom size in xen case in rom_set_mr. 
> >  
> > Signed-off-by: Chunyan Liu  
>  
> Thank you for the patch! 
>  
>  
> >  hw/core/loader.c | 6 +- 
> >  1 file changed, 5 insertions(+), 1 deletion(-) 
> >  
> > diff --git a/hw/core/loader.c b/hw/core/loader.c 
> > index c049957..5150101 100644 
> > --- a/hw/core/loader.c 
> > +++ b/hw/core/loader.c 
> > @@ -55,6 +55,7 @@ 
> >  #include "exec/address-spaces.h" 
> >  #include "hw/boards.h" 
> >  #include "qemu/cutils.h" 
> > +#include "hw/xen/xen.h" 
> >   
> >  #include  
> >   
> > @@ -818,7 +819,10 @@ static void *rom_set_mr(Rom *rom, Object *owner, const 
> >  
> char *name) 
> >  void *data; 
> >   
> >  rom->mr = g_malloc(sizeof(*rom->mr)); 
> > -memory_region_init_resizeable_ram(rom->mr, owner, name, 
> > +if (xen_enabled()) 
> > +memory_region_init_ram(rom->mr, owner, name, rom->datasize,  
> _fatal); 
> > +else 
> > +memory_region_init_resizeable_ram(rom->mr, owner, name, 
> >rom->datasize, rom->romsize, 
> >fw_cfg_resized, 
> >_fatal); 
>  
> Wouldn't it be better to change ram_block_add so that it calls 
> xen_ram_alloc with used_length rather than max_length? 

Seems that's right. But after that changes, when doing direct kernel boot,
it reports:
qemu-system-x86_64: /home/cyliu/git/qemu/include/exec/ram_addr.h:48:
ramblock_ptr: Assertion `offset_in_ramblock(block, offset)' failed.

Looking at it.

Chunyan
>  
> I think that on Xen we want to only allocate used_length bytes, but 
> reserve max_length of address space. 
>  
>  
>  





Re: [Qemu-devel] [PATCH v6 06/11] usb xhci: change msi/msix property type

2016-06-01 Thread Markus Armbruster
Cao jin  writes:

>>From bit to enum OnOffAuto
>
> cc: Gerd Hoffmann 
> cc: Michael S. Tsirkin 
> cc: Markus Armbruster 
> cc: Marcel Apfelbaum 
>
> Signed-off-by: Cao jin 
> ---
>  hw/usb/hcd-xhci.c | 16 +---
>  1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
> index 43ba615..bbe5cca 100644
> --- a/hw/usb/hcd-xhci.c
> +++ b/hw/usb/hcd-xhci.c
> @@ -461,6 +461,8 @@ struct XHCIState {
>  uint32_t numslots;
>  uint32_t flags;
>  uint32_t max_pstreams_mask;
> +OnOffAuto msi;
> +OnOffAuto msix;
>  
>  /* Operational Registers */
>  uint32_t usbcmd;
> @@ -498,9 +500,7 @@ typedef struct XHCIEvRingSeg {
>  } XHCIEvRingSeg;
>  
>  enum xhci_flags {
> -XHCI_FLAG_USE_MSI = 1,
> -XHCI_FLAG_USE_MSI_X,
> -XHCI_FLAG_SS_FIRST,
> +XHCI_FLAG_SS_FIRST = 1,
>  XHCI_FLAG_FORCE_PCIE_ENDCAP,
>  XHCI_FLAG_ENABLE_STREAMS,
>  };
> @@ -3648,10 +3648,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, 
> Error **errp)
>  assert(ret >= 0);
>  }
>  
> -if (xhci_get_flag(xhci, XHCI_FLAG_USE_MSI)) {
> +if (xhci->msi == ON_OFF_AUTO_ON ||
> +xhci->msi == ON_OFF_AUTO_AUTO) {

Easier:
   if (xhci->msi != ON_OFF_AUTO_OFF) {

Hmm, you switch to this simpler conditional in PATCH 11, when you move
this code.  I'd use the simpler conditional from the start.  Since it
doesn't affect the final state, this is a suggestion, not a demand.

>  msi_init(dev, 0x70, xhci->numintrs, true, false);

Shouldn't we check for errors here?  Hmm, you do it in PATCH 11.  Okay,
but I'd add a /* TODO check for errors */ comment here.

>  }
> -if (xhci_get_flag(xhci, XHCI_FLAG_USE_MSI_X)) {
> +if (xhci->msix == ON_OFF_AUTO_ON ||
> +xhci->msix == ON_OFF_AUTO_AUTO) {

Likewise.

>  msix_init(dev, xhci->numintrs,
>>mem, 0, OFF_MSIX_TABLE,
>>mem, 0, OFF_MSIX_PBA,

Likewise.

> @@ -3872,8 +3874,8 @@ static const VMStateDescription vmstate_xhci = {
>  };
>  
>  static Property xhci_properties[] = {
> -DEFINE_PROP_BIT("msi",  XHCIState, flags, XHCI_FLAG_USE_MSI, true),
> -DEFINE_PROP_BIT("msix", XHCIState, flags, XHCI_FLAG_USE_MSI_X, true),
> +DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO),
> +DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO),
>  DEFINE_PROP_BIT("superspeed-ports-first",
>  XHCIState, flags, XHCI_FLAG_SS_FIRST, true),
>  DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags,



[Qemu-devel] [PATCH v8 10/17] net_pkt: Name vmxnet3 packet abstractions more generic

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

This patch drops "vmx" prefix from packet abstractions names
to emphasize the fact they are generic and not tied to any
specific network device.

These abstractions will be reused by e1000e emulation implementation
introduced by following patches so their names need generalization.

This patch (except renamed files, adjusted comments and changes in MAINTAINTERS)
was produced by:

git grep -lz 'vmxnet_tx_pkt' | xargs -0 perl -i'' -pE 
"s/vmxnet_tx_pkt/net_tx_pkt/g"
git grep -lz 'vmxnet_rx_pkt' | xargs -0 perl -i'' -pE 
"s/vmxnet_rx_pkt/net_rx_pkt/g"
git grep -lz 'VmxnetTxPkt' | xargs -0 perl -i'' -pE "s/VmxnetTxPkt/NetTxPkt/g"
git grep -lz 'VMXNET_TX_PKT' | xargs -0 perl -i'' -pE 
"s/VMXNET_TX_PKT/NET_TX_PKT/g"
git grep -lz 'VmxnetRxPkt' | xargs -0 perl -i'' -pE "s/VmxnetRxPkt/NetRxPkt/g"
git grep -lz 'VMXNET_RX_PKT' | xargs -0 perl -i'' -pE 
"s/VMXNET_RX_PKT/NET_RX_PKT/g"
sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_rx_pkt.c
sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_tx_pkt.c

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 MAINTAINERS|   8 +
 hw/net/Makefile.objs   |   2 +-
 hw/net/net_rx_pkt.c| 187 
 hw/net/net_rx_pkt.h| 174 +++
 hw/net/net_tx_pkt.c| 581 +
 hw/net/net_tx_pkt.h| 146 +
 hw/net/vmxnet3.c   |  88 
 hw/net/vmxnet_rx_pkt.c | 187 
 hw/net/vmxnet_rx_pkt.h | 174 ---
 hw/net/vmxnet_tx_pkt.c | 581 -
 hw/net/vmxnet_tx_pkt.h | 146 -
 tests/Makefile |   4 +-
 12 files changed, 1143 insertions(+), 1135 deletions(-)
 create mode 100644 hw/net/net_rx_pkt.c
 create mode 100644 hw/net/net_rx_pkt.h
 create mode 100644 hw/net/net_tx_pkt.c
 create mode 100644 hw/net/net_tx_pkt.h
 delete mode 100644 hw/net/vmxnet_rx_pkt.c
 delete mode 100644 hw/net/vmxnet_rx_pkt.h
 delete mode 100644 hw/net/vmxnet_tx_pkt.c
 delete mode 100644 hw/net/vmxnet_tx_pkt.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3c949d5..e890849 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -954,6 +954,14 @@ S: Maintained
 F: hw/*/xilinx_*
 F: include/hw/xilinx.h
 
+Network packet abstractions
+M: Dmitry Fleytman 
+S: Maintained
+F: include/net/eth.h
+F: net/eth.c
+F: hw/net/net_rx_pkt*
+F: hw/net/net_tx_pkt*
+
 Vmware
 M: Dmitry Fleytman 
 S: Maintained
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs
index 64d0449..527d264 100644
--- a/hw/net/Makefile.objs
+++ b/hw/net/Makefile.objs
@@ -8,7 +8,7 @@ common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
 common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
 common-obj-$(CONFIG_E1000_PCI) += e1000.o
 common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
-common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet_tx_pkt.o vmxnet_rx_pkt.o
+common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o
 common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o
 
 common-obj-$(CONFIG_SMC91C111) += smc91c111.o
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
new file mode 100644
index 000..8a4f29f
--- /dev/null
+++ b/hw/net/net_rx_pkt.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU RX packets abstractions
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman 
+ * Tamir Shomer 
+ * Yan Vugenfirer 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "net_rx_pkt.h"
+#include "net/eth.h"
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "net/checksum.h"
+#include "net/tap.h"
+
+/*
+ * RX packet may contain up to 2 fragments - rebuilt eth header
+ * in case of VLAN tag stripping
+ * and payload received from QEMU - in any case
+ */
+#define NET_MAX_RX_PACKET_FRAGMENTS (2)
+
+struct NetRxPkt {
+struct virtio_net_hdr virt_hdr;
+uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN];
+struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS];
+uint16_t vec_len;
+uint32_t tot_len;
+uint16_t tci;
+bool vlan_stripped;
+bool has_virt_hdr;
+eth_pkt_types_e packet_type;
+
+/* Analysis results */
+bool isip4;
+bool isip6;
+bool isudp;
+bool istcp;
+};
+
+void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
+{
+struct NetRxPkt *p = g_malloc0(sizeof *p);
+p->has_virt_hdr = has_virt_hdr;
+*pkt = p;
+}
+
+void net_rx_pkt_uninit(struct NetRxPkt *pkt)
+{
+g_free(pkt);
+}
+
+struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
+{
+assert(pkt);
+return >virt_hdr;
+}
+
+void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
+  

Re: [Qemu-devel] [PATCH v6 07/11] intel-hda: change msi property type

2016-06-01 Thread Markus Armbruster
Cao jin  writes:

>>From uint32 to enum OnOffAuto.
>
> cc: Gerd Hoffmann 
> cc: Michael S. Tsirkin 
> cc: Markus Armbruster 
> cc: Marcel Apfelbaum 
>
> Signed-off-by: Cao jin 
> ---
>  hw/audio/intel-hda.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
> index d372d4a..61362e5 100644
> --- a/hw/audio/intel-hda.c
> +++ b/hw/audio/intel-hda.c
> @@ -187,7 +187,7 @@ struct IntelHDAState {
>  
>  /* properties */
>  uint32_t debug;
> -uint32_t msi;
> +OnOffAuto msi;

I'm going to review all uses of this member.

>  bool old_msi_addr;
>  };
>  
> @@ -1142,7 +1142,8 @@ static void intel_hda_realize(PCIDevice *pci, Error 
> **errp)
>  memory_region_init_io(>mmio, OBJECT(d), _hda_mmio_ops, d,
>"intel-hda", 0x4000);
>  pci_register_bar(>pci, 0, 0, >mmio);
> -if (d->msi) {
> +if (d->msi == ON_OFF_AUTO_AUTO ||
> +d->msi == ON_OFF_AUTO_ON) {
>  msi_init(>pci, d->old_msi_addr ? 0x50 : 0x60, 1, true, false);

Same suggestions as for PATCH 06:

* Use the d->msi != ON_OFF_AUTO_OFF
* Add /* TODO check for errors */ now, drop it when you add the check in
  PATCH 11.

>  }
>  
> @@ -1234,7 +1235,7 @@ static const VMStateDescription vmstate_intel_hda = {
>  
>  static Property intel_hda_properties[] = {
>  DEFINE_PROP_UINT32("debug", IntelHDAState, debug, 0),
> -DEFINE_PROP_UINT32("msi", IntelHDAState, msi, 1),
> +DEFINE_PROP_ON_OFF_AUTO("msi", IntelHDAState, msi, ON_OFF_AUTO_AUTO),
>  DEFINE_PROP_BOOL("old_msi_addr", IntelHDAState, old_msi_addr, false),
>  DEFINE_PROP_END_OF_LIST(),
>  };

Not covered:

   static void intel_hda_update_irq(IntelHDAState *d)
   {
-->int msi = d->msi && msi_enabled(>pci);
   int level;

   intel_hda_update_int_sts(d);
   if (d->int_sts & (1U << 31) && d->int_ctl & (1U << 31)) {
   level = 1;
   } else {
   level = 0;
   }
   dprint(d, 2, "%s: level %d [%s]\n", __FUNCTION__,
  level, msi ? "msi" : "intx");
   if (msi) {
   if (level) {
   msi_notify(>pci, 0);
   }
   } else {
   pci_set_irq(>pci, level);
   }
   }

This is wrong, because the meaning of the test changes from

(user didn't specify msi=off) && (guest enabled MSI)

to

(user didn't specify msi=on or msi=off) && (guest enabled MSI)

What about:

   int msi = msi_enabled(>pci);

If I understand it correctly, it can only be true when we added MSI
capability, and we do that only when msi=auto (default) or msi=on.



[Qemu-devel] [PATCH v8 15/17] e1000: Move out code that will be reused in e1000e

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Code that will be shared moved to a separate files.

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 MAINTAINERS|   5 +
 hw/net/Makefile.objs   |   2 +-
 hw/net/e1000.c | 411 +++--
 hw/net/e1000x_common.c | 267 
 hw/net/e1000x_common.h | 213 +
 trace-events   |  13 ++
 6 files changed, 591 insertions(+), 320 deletions(-)
 create mode 100644 hw/net/e1000x_common.c
 create mode 100644 hw/net/e1000x_common.h

diff --git a/MAINTAINERS b/MAINTAINERS
index e890849..ab4e884 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -981,6 +981,11 @@ F: hw/acpi/nvdimm.c
 F: hw/mem/nvdimm.c
 F: include/hw/mem/nvdimm.h
 
+e1000x
+M: Dmitry Fleytman 
+S: Maintained
+F: hw/net/e1000x*
+
 Subsystems
 --
 Audio
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs
index 527d264..bc69948 100644
--- a/hw/net/Makefile.objs
+++ b/hw/net/Makefile.objs
@@ -6,7 +6,7 @@ common-obj-$(CONFIG_NE2000_PCI) += ne2000.o
 common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o
 common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
 common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
-common-obj-$(CONFIG_E1000_PCI) += e1000.o
+common-obj-$(CONFIG_E1000_PCI) += e1000.o e1000x_common.o
 common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
 common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o
 common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 8e79b55..36e3dbe 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -36,7 +36,7 @@
 #include "qemu/iov.h"
 #include "qemu/range.h"
 
-#include "e1000_regs.h"
+#include "e1000x_common.h"
 
 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
@@ -64,11 +64,6 @@ static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
 #define PNPMMIO_SIZE  0x2
 #define MIN_BUF_SIZE  60 /* Min. octets in an ethernet frame sans FCS */
 
-/* this is the size past which hardware will drop packets when setting LPE=0 */
-#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
-/* this is the size past which hardware will drop packets when setting LPE=1 */
-#define MAXIMUM_ETHERNET_LPE_SIZE 16384
-
 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
 
 /*
@@ -102,22 +97,9 @@ typedef struct E1000State_st {
 unsigned char vlan[4];
 unsigned char data[0x1];
 uint16_t size;
-unsigned char sum_needed;
 unsigned char vlan_needed;
-uint8_t ipcss;
-uint8_t ipcso;
-uint16_t ipcse;
-uint8_t tucss;
-uint8_t tucso;
-uint16_t tucse;
-uint8_t hdr_len;
-uint16_t mss;
-uint32_t paylen;
+e1000x_txd_props props;
 uint16_t tso_frames;
-char tse;
-int8_t ip;
-int8_t tcp;
-char cptse; // current packet tse bit
 } tx;
 
 struct {
@@ -162,52 +144,19 @@ typedef struct E1000BaseClass {
 #define E1000_DEVICE_GET_CLASS(obj) \
 OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
 
-#define defreg(x)x = (E1000_##x>>2)
-enum {
-defreg(CTRL),defreg(EECD),defreg(EERD),defreg(GPRC),
-defreg(GPTC),defreg(ICR), defreg(ICS), defreg(IMC),
-defreg(IMS), defreg(LEDCTL),  defreg(MANC),defreg(MDIC),
-defreg(MPC), defreg(PBA), defreg(RCTL),defreg(RDBAH),
-defreg(RDBAL),   defreg(RDH), defreg(RDLEN),   defreg(RDT),
-defreg(STATUS),  defreg(SWSM),defreg(TCTL),defreg(TDBAH),
-defreg(TDBAL),   defreg(TDH), defreg(TDLEN),   defreg(TDT),
-defreg(TORH),defreg(TORL),defreg(TOTH),defreg(TOTL),
-defreg(TPR), defreg(TPT), defreg(TXDCTL),  defreg(WUFC),
-defreg(RA),  defreg(MTA), defreg(CRCERRS), defreg(VFTA),
-defreg(VET), defreg(RDTR),defreg(RADV),defreg(TADV),
-defreg(ITR), defreg(FCRUC),   defreg(TDFH),defreg(TDFT),
-defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
-defreg(RDFT),defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
-defreg(IPAV),defreg(WUC), defreg(WUS), defreg(AIT),
-defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),defreg(FFMT),
-defreg(FFVT),defreg(WUPM),defreg(PBM), defreg(SCC),
-defreg(ECOL),defreg(MCC), defreg(LATECOL), defreg(COLC),
-defreg(DC),  defreg(TNCRS),   defreg(SEC), defreg(CEXTERR),
-defreg(RLEC),defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
-defreg(XOFFTXC), defreg(RFC), defreg(RJC), defreg(RNBC),
-defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC),
-defreg(RUC), defreg(ROC), defreg(GORCL),   defreg(GORCH),
-defreg(GOTCL),   defreg(GOTCH),   defreg(BPRC),defreg(MPRC),
-defreg(TSCTC),   

[Qemu-devel] [PATCH v8 11/17] rtl8139: Move more TCP definitions to common header

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/net/rtl8139.c  | 5 -
 include/net/eth.h | 8 
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 1e5ec14..562c1fd 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -1867,11 +1867,6 @@ static int rtl8139_transmit_one(RTL8139State *s, int 
descriptor)
 return 1;
 }
 
-/* structures and macros for task offloading */
-#define TCP_HEADER_DATA_OFFSET(tcp) (((be16_to_cpu(tcp->th_offset_flags) >> 
12)&0xf) << 2)
-#define TCP_FLAGS_ONLY(flags) ((flags)&0x3f)
-#define TCP_HEADER_FLAGS(tcp) TCP_FLAGS_ONLY(be16_to_cpu(tcp->th_offset_flags))
-
 #define TCP_HEADER_CLEAR_FLAGS(tcp, off) ((tcp)->th_offset_flags &= 
cpu_to_be16(~TCP_FLAGS_ONLY(off)))
 
 /* produces ones' complement sum of data */
diff --git a/include/net/eth.h b/include/net/eth.h
index 18d0be3..5a32259 100644
--- a/include/net/eth.h
+++ b/include/net/eth.h
@@ -67,6 +67,14 @@ typedef struct tcp_header {
 uint16_t th_urp;/* urgent pointer */
 } tcp_header;
 
+#define TCP_FLAGS_ONLY(flags) ((flags) & 0x3f)
+
+#define TCP_HEADER_FLAGS(tcp) \
+TCP_FLAGS_ONLY(be16_to_cpu((tcp)->th_offset_flags))
+
+#define TCP_HEADER_DATA_OFFSET(tcp) \
+(((be16_to_cpu((tcp)->th_offset_flags) >> 12) & 0xf) << 2)
+
 typedef struct udp_header {
 uint16_t uh_sport; /* source port */
 uint16_t uh_dport; /* destination port */
-- 
2.5.5




[Qemu-devel] [PATCH v8 09/17] vmxnet3: Use common MAC address tracing macros

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/net/vmxnet3.c  | 8 
 hw/net/vmxnet_debug.h | 3 ---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 586e915..200d2ea 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -474,7 +474,7 @@ static void vmxnet3_set_variable_mac(VMXNET3State *s, 
uint32_t h, uint32_t l)
 s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
 s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
 
-VMW_CFPRN("Variable MAC: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a));
+VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
 
 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }
@@ -1219,7 +1219,7 @@ static void vmxnet3_reset_interrupt_states(VMXNET3State 
*s)
 static void vmxnet3_reset_mac(VMXNET3State *s)
 {
 memcpy(>conf.macaddr.a, >perm_mac.a, sizeof(s->perm_mac.a));
-VMW_CFPRN("MAC address set to: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a));
+VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
 }
 
 static void vmxnet3_deactivate_device(VMXNET3State *s)
@@ -1301,7 +1301,7 @@ static void vmxnet3_update_mcast_filters(VMXNET3State *s)
 cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes);
 VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
 for (i = 0; i < s->mcast_list_len; i++) {
-VMW_CFPRN("\t" VMXNET_MF, VMXNET_MA(s->mcast_list[i].a));
+VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
 }
 }
 }
@@ -2102,7 +2102,7 @@ static void vmxnet3_net_init(VMXNET3State *s)
 
 s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
 
-VMW_CFPRN("Permanent MAC: " VMXNET_MF, VMXNET_MA(s->perm_mac.a));
+VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
 
 s->nic = qemu_new_nic(_vmxnet3_info, >conf,
   object_get_typename(OBJECT(s)),
diff --git a/hw/net/vmxnet_debug.h b/hw/net/vmxnet_debug.h
index 96495db..5aab00b 100644
--- a/hw/net/vmxnet_debug.h
+++ b/hw/net/vmxnet_debug.h
@@ -142,7 +142,4 @@
 } \
 } while (0)
 
-#define VMXNET_MF   "%02X:%02X:%02X:%02X:%02X:%02X"
-#define VMXNET_MA(a)(a)[0], (a)[1], (a)[2], (a)[3], (a)[4], (a)[5]
-
 #endif /* _QEMU_VMXNET3_DEBUG_H  */
-- 
2.5.5




[Qemu-devel] [PATCH v8 08/17] net: Add macros for MAC address tracing

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

These macros will be used by future commits introducing
e1000e device emulation and by vmxnet3 tracing code.

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 include/net/net.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/net/net.h b/include/net/net.h
index 73e4c46..129d46b 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -9,6 +9,11 @@
 #include "migration/vmstate.h"
 #include "qapi-types.h"
 
+#define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%02X"
+#define MAC_ARG(x) ((uint8_t *)(x))[0], ((uint8_t *)(x))[1], \
+   ((uint8_t *)(x))[2], ((uint8_t *)(x))[3], \
+   ((uint8_t *)(x))[4], ((uint8_t *)(x))[5]
+
 #define MAX_QUEUE_NUM 1024
 
 /* Maximum GSO packet size (64k) plus plenty of room for
-- 
2.5.5




[Qemu-devel] [PATCH v8 07/17] net: Introduce Toeplitz hash calculator

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 include/net/checksum.h | 45 +
 1 file changed, 45 insertions(+)

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 7de1acb..dd8b4f6 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -18,6 +18,7 @@
 #ifndef QEMU_NET_CHECKSUM_H
 #define QEMU_NET_CHECKSUM_H
 
+#include "qemu/bswap.h"
 struct iovec;
 
 uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq);
@@ -50,4 +51,48 @@ uint32_t net_checksum_add_iov(const struct iovec *iov,
   const unsigned int iov_cnt,
   uint32_t iov_off, uint32_t size);
 
+typedef struct toeplitz_key_st {
+uint32_t leftmost_32_bits;
+uint8_t *next_byte;
+} net_toeplitz_key;
+
+static inline
+void net_toeplitz_key_init(net_toeplitz_key *key, uint8_t *key_bytes)
+{
+key->leftmost_32_bits = be32_to_cpu(*(uint32_t *)key_bytes);
+key->next_byte = key_bytes + sizeof(uint32_t);
+}
+
+static inline
+void net_toeplitz_add(uint32_t *result,
+  uint8_t *input,
+  uint32_t len,
+  net_toeplitz_key *key)
+{
+register uint32_t accumulator = *result;
+register uint32_t leftmost_32_bits = key->leftmost_32_bits;
+register uint32_t byte;
+
+for (byte = 0; byte < len; byte++) {
+register uint8_t input_byte = input[byte];
+register uint8_t key_byte = *(key->next_byte++);
+register uint8_t bit;
+
+for (bit = 0; bit < 8; bit++) {
+if (input_byte & (1 << 7)) {
+accumulator ^= leftmost_32_bits;
+}
+
+leftmost_32_bits =
+(leftmost_32_bits << 1) | ((key_byte & (1 << 7)) >> 7);
+
+input_byte <<= 1;
+key_byte <<= 1;
+}
+}
+
+key->leftmost_32_bits = leftmost_32_bits;
+*result = accumulator;
+}
+
 #endif /* QEMU_NET_CHECKSUM_H */
-- 
2.5.5




[Qemu-devel] [PATCH v8 14/17] e1000_regs: Add definitions for Intel 82574-specific bits

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/net/e1000_regs.h | 345 +++-
 1 file changed, 342 insertions(+), 3 deletions(-)

diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h
index 1c40244..d62b3fa 100644
--- a/hw/net/e1000_regs.h
+++ b/hw/net/e1000_regs.h
@@ -85,6 +85,7 @@
 #define E1000_DEV_ID_82573E  0x108B
 #define E1000_DEV_ID_82573E_IAMT 0x108C
 #define E1000_DEV_ID_82573L  0x109A
+#define E1000_DEV_ID_82574L  0x10D3
 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
 #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096
 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098
@@ -104,6 +105,7 @@
 #define E1000_PHY_ID2_82544x 0xC30
 #define E1000_PHY_ID2_8254xx_DEFAULT 0xC20 /* 82540x, 82545x, and 82546x */
 #define E1000_PHY_ID2_82573x 0xCC0
+#define E1000_PHY_ID2_82574x 0xCB1
 
 /* Register Set. (82543, 82544)
  *
@@ -135,8 +137,11 @@
 #define E1000_ITR  0x000C4  /* Interrupt Throttling Rate - RW */
 #define E1000_ICS  0x000C8  /* Interrupt Cause Set - WO */
 #define E1000_IMS  0x000D0  /* Interrupt Mask Set - RW */
+#define E1000_EIAC 0x000DC  /* Ext. Interrupt Auto Clear - RW */
 #define E1000_IMC  0x000D8  /* Interrupt Mask Clear - WO */
 #define E1000_IAM  0x000E0  /* Interrupt Acknowledge Auto Mask */
+#define E1000_IVAR 0x000E4  /* Interrupt Vector Allocation Register - RW */
+#define E1000_EITR 0x000E8  /* Extended Interrupt Throttling Rate - RW */
 #define E1000_RCTL 0x00100  /* RX Control - RW */
 #define E1000_RDTR10x02820  /* RX Delay Timer (1) - RW */
 #define E1000_RDBAL1   0x02900  /* RX Descriptor Base Address Low (1) - RW */
@@ -145,6 +150,7 @@
 #define E1000_RDH1 0x02910  /* RX Descriptor Head (1) - RW */
 #define E1000_RDT1 0x02918  /* RX Descriptor Tail (1) - RW */
 #define E1000_FCTTV0x00170  /* Flow Control Transmit Timer Value - RW */
+#define E1000_FCRTV0x05F40  /* Flow Control Refresh Timer Value - RW */
 #define E1000_TXCW 0x00178  /* TX Configuration Word - RW */
 #define E1000_RXCW 0x00180  /* RX Configuration Word - RO */
 #define E1000_TCTL 0x00400  /* TX Control - RW */
@@ -161,6 +167,10 @@
 #define E1000_PBM  0x1  /* Packet Buffer Memory - RW */
 #define E1000_PBS  0x01008  /* Packet Buffer Size - RW */
 #define E1000_EEMNGCTL 0x01010  /* MNG EEprom Control */
+#define E1000_EEMNGDATA0x01014 /* MNG EEPROM Read/Write data */
+#define E1000_FLMNGCTL 0x01018 /* MNG Flash Control */
+#define E1000_FLMNGDATA0x0101C /* MNG FLASH Read data */
+#define E1000_FLMNGCNT 0x01020 /* MNG FLASH Read Counter */
 #define E1000_FLASH_UPDATES 1000
 #define E1000_EEARBC   0x01024  /* EEPROM Auto Read Bus Control */
 #define E1000_FLASHT   0x01028  /* FLASH Timer Register */
@@ -169,9 +179,12 @@
 #define E1000_FLSWDATA 0x01034  /* FLASH data register */
 #define E1000_FLSWCNT  0x01038  /* FLASH Access Counter */
 #define E1000_FLOP 0x0103C  /* FLASH Opcode Register */
+#define E1000_FLOL 0x01050  /* FEEP Auto Load */
 #define E1000_ERT  0x02008  /* Early Rx Threshold - RW */
 #define E1000_FCRTL0x02160  /* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTL_A  0x00168  /* Alias to FCRTL */
 #define E1000_FCRTH0x02168  /* Flow Control Receive Threshold High - RW */
+#define E1000_FCRTH_A  0x00160  /* Alias to FCRTH */
 #define E1000_PSRCTL   0x02170  /* Packet Split Receive Control - RW */
 #define E1000_RDBAL0x02800  /* RX Descriptor Base Address Low - RW */
 #define E1000_RDBAH0x02804  /* RX Descriptor Base Address High - RW */
@@ -179,11 +192,17 @@
 #define E1000_RDH  0x02810  /* RX Descriptor Head - RW */
 #define E1000_RDT  0x02818  /* RX Descriptor Tail - RW */
 #define E1000_RDTR 0x02820  /* RX Delay Timer - RW */
+#define E1000_RDTR_A   0x00108  /* Alias to RDTR */
 #define E1000_RDBAL0   E1000_RDBAL /* RX Desc Base Address Low (0) - RW */
+#define E1000_RDBAL0_A 0x00110 /* Alias to RDBAL0 */
 #define E1000_RDBAH0   E1000_RDBAH /* RX Desc Base Address High (0) - RW */
+#define E1000_RDBAH0_A 0x00114 /* Alias to RDBAH0 */
 #define E1000_RDLEN0   E1000_RDLEN /* RX Desc Length (0) - RW */
+#define E1000_RDLEN0_A 0x00118 /* Alias to RDLEN0 */
 #define E1000_RDH0 E1000_RDH   /* RX Desc Head (0) - RW */
+#define E1000_RDH0_A   0x00120 /* Alias to RDH0 */
 #define E1000_RDT0 E1000_RDT   /* RX Desc Tail (0) - RW */
+#define E1000_RDT0_A   0x00128 /* Alias to RDT0 */
 #define E1000_RDTR0E1000_RDTR  /* RX Delay Timer (0) - RW */
 #define E1000_RXDCTL   0x02828  /* RX Descriptor Control queue 0 - RW */
 #define E1000_RXDCTL1  0x02928  /* RX Descriptor Control queue 1 - RW */
@@ -192,22 +211,33 @@
 #define E1000_RAID   

[Qemu-devel] [PATCH v8 17/17] e1000e: Introduce qtest for e1000e device

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 tests/Makefile  |   3 +
 tests/e1000e-test.c | 479 
 2 files changed, 482 insertions(+)
 create mode 100644 tests/e1000e-test.c

diff --git a/tests/Makefile b/tests/Makefile
index c79691a..a3e20e3 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -142,6 +142,8 @@ gcov-files-virtio-y += $(gcov-files-virtioserial-y)
 
 check-qtest-pci-y += tests/e1000-test$(EXESUF)
 gcov-files-pci-y += hw/net/e1000.c
+check-qtest-pci-y += tests/e1000e-test$(EXESUF)
+gcov-files-pci-y += hw/net/e1000e.c hw/net/e1000e_core.c
 check-qtest-pci-y += tests/rtl8139-test$(EXESUF)
 gcov-files-pci-y += hw/net/rtl8139.c
 check-qtest-pci-y += tests/pcnet-test$(EXESUF)
@@ -551,6 +553,7 @@ tests/i440fx-test$(EXESUF): tests/i440fx-test.o 
$(libqos-pc-obj-y)
 tests/q35-test$(EXESUF): tests/q35-test.o $(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
 tests/e1000-test$(EXESUF): tests/e1000-test.o
+tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y)
 tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y)
 tests/pcnet-test$(EXESUF): tests/pcnet-test.o
 tests/eepro100-test$(EXESUF): tests/eepro100-test.o
diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c
new file mode 100644
index 000..dbf4859
--- /dev/null
+++ b/tests/e1000e-test.c
@@ -0,0 +1,479 @@
+ /*
+ * QTest testcase for e1000e NIC
+ *
+ * Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman 
+ * Leonid Bloch 
+ * Yan Vugenfirer 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+
+#include "qemu/osdep.h"
+#include 
+#include "libqtest.h"
+#include "qemu-common.h"
+#include "libqos/pci-pc.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/bitops.h"
+#include "libqos/malloc.h"
+#include "libqos/malloc-pc.h"
+#include "libqos/malloc-generic.h"
+
+#define E1000E_IMS  (0x00d0)
+
+#define E1000E_STATUS   (0x0008)
+#define E1000E_STATUS_LU BIT(1)
+#define E1000E_STATUS_ASDV1000 BIT(9)
+
+#define E1000E_CTRL (0x)
+#define E1000E_CTRL_RESET BIT(26)
+
+#define E1000E_RCTL (0x0100)
+#define E1000E_RCTL_EN  BIT(1)
+#define E1000E_RCTL_UPE BIT(3)
+#define E1000E_RCTL_MPE BIT(4)
+
+#define E1000E_RFCTL (0x5008)
+#define E1000E_RFCTL_EXTEN  BIT(15)
+
+#define E1000E_TCTL (0x0400)
+#define E1000E_TCTL_EN  BIT(1)
+
+#define E1000E_CTRL_EXT (0x0018)
+#define E1000E_CTRL_EXT_DRV_LOADBIT(28)
+#define E1000E_CTRL_EXT_TXLSFLOWBIT(22)
+
+#define E1000E_RX0_MSG_ID   (0)
+#define E1000E_TX0_MSG_ID   (1)
+#define E1000E_OTHER_MSG_ID (2)
+
+#define E1000E_IVAR (0x00E4)
+#define E1000E_IVAR_TEST_CFG((E1000E_RX0_MSG_ID << 0)| BIT(3)  | \
+ (E1000E_TX0_MSG_ID << 8)| BIT(11) | \
+ (E1000E_OTHER_MSG_ID << 16) | BIT(19) | \
+ BIT(31))
+
+#define E1000E_RING_LEN (0x1000)
+#define E1000E_TXD_LEN  (16)
+#define E1000E_RXD_LEN  (16)
+
+#define E1000E_TDBAL(0x3800)
+#define E1000E_TDBAH(0x3804)
+#define E1000E_TDLEN(0x3808)
+#define E1000E_TDH  (0x3810)
+#define E1000E_TDT  (0x3818)
+
+#define E1000E_RDBAL(0x2800)
+#define E1000E_RDBAH(0x2804)
+#define E1000E_RDLEN(0x2808)
+#define E1000E_RDH  (0x2810)
+#define E1000E_RDT  (0x2818)
+
+typedef struct e1000e_device {
+QPCIDevice *pci_dev;
+void *mac_regs;
+
+uint64_t tx_ring;
+uint64_t rx_ring;
+} e1000e_device;
+
+static int test_sockets[2];
+static QGuestAllocator *test_alloc;
+static QPCIBus *test_bus;
+
+static void e1000e_pci_foreach_callback(QPCIDevice *dev, int devfn, void *data)
+{
+*(QPCIDevice **) data = dev;
+}
+
+static QPCIDevice *e1000e_device_find(QPCIBus *bus)
+{
+static const int e1000e_vendor_id = 0x8086;
+static const int e1000e_dev_id = 0x10D3;
+
+QPCIDevice 

[Qemu-devel] [PATCH v8 05/17] pcie: Introduce function for DSN capability creation

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/pci/pcie.c | 10 ++
 include/hw/pci/pcie.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 24cfc3b..9599fde 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -695,3 +695,13 @@ void pcie_ari_init(PCIDevice *dev, uint16_t offset, 
uint16_t nextfn)
 offset, PCI_ARI_SIZEOF);
 pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8);
 }
+
+void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num)
+{
+static const int pci_dsn_ver = 1;
+static const int pci_dsn_cap = 4;
+
+pcie_add_capability(dev, PCI_EXT_CAP_ID_DSN, pci_dsn_ver, offset,
+PCI_EXT_CAP_DSN_SIZEOF);
+pci_set_quad(dev->config + offset + pci_dsn_cap, ser_num);
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index cbbf0c5..056d25e 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -119,6 +119,7 @@ void pcie_add_capability(PCIDevice *dev,
  uint16_t offset, uint16_t size);
 
 void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
+void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 
 extern const VMStateDescription vmstate_pcie_device;
 
-- 
2.5.5




[Qemu-devel] [PATCH v8 04/17] pcie: Add support for PCIe CAP v1

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Added support for PCIe CAP v1, while reusing some of the existing v2
infrastructure.

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/pci/pcie.c  | 84 --
 include/hw/pci/pcie.h  |  4 +++
 include/hw/pci/pcie_regs.h |  5 +--
 3 files changed, 73 insertions(+), 20 deletions(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 728386a..24cfc3b 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -43,26 +43,15 @@
 /***
  * pci express capability helper functions
  */
-int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port)
-{
-int pos;
-uint8_t *exp_cap;
-
-assert(pci_is_express(dev));
-
-pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset,
- PCI_EXP_VER2_SIZEOF);
-if (pos < 0) {
-return pos;
-}
-dev->exp.exp_cap = pos;
-exp_cap = dev->config + pos;
 
+static void
+pcie_cap_v1_fill(uint8_t *exp_cap, uint8_t port, uint8_t type, uint8_t version)
+{
 /* capability register
-   interrupt message number defaults to 0 */
+interrupt message number defaults to 0 */
 pci_set_word(exp_cap + PCI_EXP_FLAGS,
  ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) |
- PCI_EXP_FLAGS_VER2);
+ version);
 
 /* device capability register
  * table 7-12:
@@ -81,7 +70,27 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t 
type, uint8_t port)
 
 pci_set_word(exp_cap + PCI_EXP_LNKSTA,
  PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA);
+}
+
+int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port)
+{
+/* PCIe cap v2 init */
+int pos;
+uint8_t *exp_cap;
+
+assert(pci_is_express(dev));
+
+pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, PCI_EXP_VER2_SIZEOF);
+if (pos < 0) {
+return pos;
+}
+dev->exp.exp_cap = pos;
+exp_cap = dev->config + pos;
+
+/* Filling values common with v1 */
+pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER2);
 
+/* Filling v2 specific values */
 pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
  PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
 
@@ -89,7 +98,29 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t 
type, uint8_t port)
 return pos;
 }
 
-int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset)
+int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset, uint8_t type,
+ uint8_t port)
+{
+/* PCIe cap v1 init */
+int pos;
+uint8_t *exp_cap;
+
+assert(pci_is_express(dev));
+
+pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, PCI_EXP_VER1_SIZEOF);
+if (pos < 0) {
+return pos;
+}
+dev->exp.exp_cap = pos;
+exp_cap = dev->config + pos;
+
+pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER1);
+
+return pos;
+}
+
+static int
+pcie_endpoint_cap_common_init(PCIDevice *dev, uint8_t offset, uint8_t cap_size)
 {
 uint8_t type = PCI_EXP_TYPE_ENDPOINT;
 
@@ -102,7 +133,19 @@ int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset)
 type = PCI_EXP_TYPE_RC_END;
 }
 
-return pcie_cap_init(dev, offset, type, 0);
+return (cap_size == PCI_EXP_VER1_SIZEOF)
+? pcie_cap_v1_init(dev, offset, type, 0)
+: pcie_cap_init(dev, offset, type, 0);
+}
+
+int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset)
+{
+return pcie_endpoint_cap_common_init(dev, offset, PCI_EXP_VER2_SIZEOF);
+}
+
+int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset)
+{
+return pcie_endpoint_cap_common_init(dev, offset, PCI_EXP_VER1_SIZEOF);
 }
 
 void pcie_cap_exit(PCIDevice *dev)
@@ -110,6 +153,11 @@ void pcie_cap_exit(PCIDevice *dev)
 pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF);
 }
 
+void pcie_cap_v1_exit(PCIDevice *dev)
+{
+pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER1_SIZEOF);
+}
+
 uint8_t pcie_cap_get_type(const PCIDevice *dev)
 {
 uint32_t pos = dev->exp.exp_cap;
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index b48a7a2..cbbf0c5 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -80,8 +80,12 @@ struct PCIExpressDevice {
 
 /* PCI express capability helper functions */
 int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port);
+int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset,
+ uint8_t type, uint8_t port);
 int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset);
 void pcie_cap_exit(PCIDevice *dev);
+int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset);
+void pcie_cap_v1_exit(PCIDevice *dev);
 uint8_t pcie_cap_get_type(const PCIDevice *dev);
 void 

[Qemu-devel] [PATCH v8 13/17] vmxnet3: Use pci_dma_* API instead of cpu_physical_memory_*

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

To make this device and network packets
abstractions ready for IOMMU.

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/net/net_tx_pkt.c | 16 +++-
 hw/net/net_tx_pkt.h |  5 +++--
 hw/net/vmxnet3.c| 51 ++-
 3 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index a64f51c..e4478be 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -20,6 +20,7 @@
 #include "net/checksum.h"
 #include "net/tap.h"
 #include "net/net.h"
+#include "hw/pci/pci.h"
 
 enum {
 NET_TX_PKT_VHDR_FRAG = 0,
@@ -30,6 +31,8 @@ enum {
 
 /* TX packet private context */
 struct NetTxPkt {
+PCIDevice *pci_dev;
+
 struct virtio_net_hdr virt_hdr;
 bool has_virt_hdr;
 
@@ -54,11 +57,13 @@ struct NetTxPkt {
 bool is_loopback;
 };
 
-void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags,
-bool has_virt_hdr)
+void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev,
+uint32_t max_frags, bool has_virt_hdr)
 {
 struct NetTxPkt *p = g_malloc0(sizeof *p);
 
+p->pci_dev = pci_dev;
+
 p->vec = g_malloc((sizeof *p->vec) *
 (max_frags + NET_TX_PKT_PL_START_FRAG));
 
@@ -383,7 +388,8 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, 
hwaddr pa,
 ventry = >raw[pkt->raw_frags];
 mapped_len = len;
 
-ventry->iov_base = cpu_physical_memory_map(pa, _len, false);
+ventry->iov_base = pci_dma_map(pkt->pci_dev, pa,
+   _len, DMA_DIRECTION_TO_DEVICE);
 
 if ((ventry->iov_base != NULL) && (len == mapped_len)) {
 ventry->iov_len = mapped_len;
@@ -444,8 +450,8 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
 assert(pkt->raw);
 for (i = 0; i < pkt->raw_frags; i++) {
 assert(pkt->raw[i].iov_base);
-cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len,
-  false, pkt->raw[i].iov_len);
+pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, pkt->raw[i].iov_len,
+  DMA_DIRECTION_TO_DEVICE, 0);
 }
 pkt->raw_frags = 0;
 
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index e49772d..07b9a20 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -31,11 +31,12 @@ struct NetTxPkt;
  * Init function for tx packet functionality
  *
  * @pkt:packet pointer
+ * @pci_dev:PCI device processing this packet
  * @max_frags:  max tx ip fragments
  * @has_virt_hdr:   device uses virtio header.
  */
-void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags,
-bool has_virt_hdr);
+void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev,
+uint32_t max_frags, bool has_virt_hdr);
 
 /**
  * Clean all tx packet resources.
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 33cd07d..16645e6 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -802,7 +802,9 @@ vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t 
*descr_gen)
 hwaddr daddr =
 vmxnet3_ring_curr_cell_pa(>rxq_descr[qidx].comp_ring);
 
-cpu_physical_memory_read(daddr, , sizeof(struct Vmxnet3_RxCompDesc));
+pci_dma_read(PCI_DEVICE(s), daddr,
+ , sizeof(struct Vmxnet3_RxCompDesc));
+
 ring_gen = vmxnet3_ring_curr_gen(>rxq_descr[qidx].comp_ring);
 
 if (rxcd.gen != ring_gen) {
@@ -1023,10 +1025,11 @@ nocsum:
 }
 
 static void
-vmxnet3_physical_memory_writev(const struct iovec *iov,
-   size_t start_iov_off,
-   hwaddr target_addr,
-   size_t bytes_to_copy)
+vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
+   const struct iovec *iov,
+   size_t start_iov_off,
+   hwaddr target_addr,
+   size_t bytes_to_copy)
 {
 size_t curr_off = 0;
 size_t copied = 0;
@@ -1036,9 +1039,9 @@ vmxnet3_physical_memory_writev(const struct iovec *iov,
 size_t chunk_len =
 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
 
-cpu_physical_memory_write(target_addr + copied,
-  iov->iov_base + start_iov_off - curr_off,
-  chunk_len);
+pci_dma_write(pci_dev, target_addr + copied,
+  iov->iov_base + start_iov_off - curr_off,
+  chunk_len);
 
 copied += chunk_len;
 start_iov_off += chunk_len;
@@ -1088,15 +1091,15 @@ vmxnet3_indicate_packet(VMXNET3State *s)
 }
 
 chunk_size = MIN(bytes_left, rxd.len);
-vmxnet3_physical_memory_writev(data, bytes_copied,
-   le64_to_cpu(rxd.addr), 

[Qemu-devel] [PATCH v8 03/17] pci: Introduce define for PM capability version 1.1

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/pci/pci_regs.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h
index ba8cbe9..7a83142 100644
--- a/include/hw/pci/pci_regs.h
+++ b/include/hw/pci/pci_regs.h
@@ -1 +1,3 @@
 #include "standard-headers/linux/pci_regs.h"
+
+#define  PCI_PM_CAP_VER_1_1 0x0002  /* PCI PM spec ver. 1.1 */
-- 
2.5.5




[Qemu-devel] [PATCH v8 00/17] Introduce Intel 82574 GbE Controller Emulation (e1000e)

2016-06-01 Thread Dmitry Fleytman
Hello All,

This is v8 of e1000e series.

For convenience, the same patches are available at:
https://github.com/daynix/qemu-e1000e/tree/e1000e-submit-v8

Series
Reviewed-by: Michael S. Tsirkin 

Best regards,
Dmitry.

Changes since v7:

1. Fixed broken ETH macros reported by Jason
2. Rebase to the latest master

Changes since v6:

1. Comments added and commit message extended for pci_[set|get]_quad() patch
2. Fixes a few styling issues reported by Michael
3. Rebase to the latest master

Changes since v5:

1. Fixed build failure on old clang versions
2. Added patch that fixes unaligned access in pci_[set|get]_quad()
3. Rebased to the latest master

Changes since v4:

1. Rebased to the latest master (2.6.0+)

Changes since v3:

1. Various code fixes as suggested by Jason and Michael
2. Rebased to the latest master

Changes since v2:

1. Interrupt storm on latest Linux kernels fixed
2. Device unit test added
3. Introduced code sharing between e1000 and e1000e
4. Various code fixes as suggested by Jason
5. Rebased to the latest master

Changes since v1:

1. PCI_PM_CAP_VER_1_1 is defined now in include/hw/pci/pci_regs.h and
   not in include/standard-headers/linux/pci_regs.h.
2. Changes in naming and extra comments in hw/pci/pcie.c and in
   include/hw/pci/pcie.h.
3. Defining pci_dsn_ver and pci_dsn_cap static const variables in
   hw/pci/pcie.c, instead of PCI_DSN_VER and PCI_DSN_CAP symbolic
   constants in include/hw/pci/pcie_regs.h.
4. Changing the vmxnet3_device_serial_num function in hw/net/vmxnet3.c
   to avoid the cast when it is called.
5. Avoiding a preceding underscore in all the e1000e-related names.
6. Minor style changes.

===

Hello All,

This series is the final code of the e1000e device emulation, that we
have developed. Please review, and consider acceptance of these patches
to the upstream QEMU repository.

The code stability was verified by various traffic tests using Fedora 22
Linux, and Windows Server 2012R2 guests. Also, Microsoft Hardware
Certification Kit (HCK) tests were run on a Windows Server 2012R2 guest.

There was a discussion on the possibility of code sharing between the
e1000e, and the existing e1000 devices. We have reviewed the final code
for parts that may be shared between this device and the currently
available e1000 emulation. The device specifications are very different,
and there are almost no registers, nor functions, that were left as is
from e1000. The ring descriptor structures were changed as well, by the
introduction of extended and PS descriptors, as well as additional bits.

Additional differences stem from the fact that the e1000e device re-uses
network packet abstractions introduced by the vmxnet3 device, while the
e1000 has its own code for packet handling. BTW, it may be worth reusing
those abstractions in e1000 as well. (Following these changes the
vmxnet3 device was successfully tested for possible regressions.)

There are a few minor parts that may be shared, e.g. the default
register handlers, and the ring management functions. The total amount
of shared lines will be about 100--150, so we're not sure if it makes
sense bothering, and taking a risk of breaking e1000, which is a good,
old, and stable device.

Currently, the e1000e code is stand alone w.r.t. e1000.

Please share your thoughts.

Thanks in advance,
Dmitry.

Changes since RFCv2:

1. Device functionality verified using Microsoft Hardware Certification Test 
Kit (HCK)
2. Introduced a number of performance improvements
3. The code was cleaned, and rebased to the latest master
4. Patches verified with checkpatch.pl

===

Changes since RFCv1:

1. Added support for all the device features:
  - Interrupt moderation.
  - RSS.
  - Multiqueue.
2. Simulated exact PCI/PCIe configuration space layout.
3. Made fixes needed to pass Microsoft's HW certification tests (HCK).

This series is still an RFC, because the following tasks are not done yet:

1. See which code can be shared between this device and the existing e1000 
device.
2. Rebase patches to the latest master (current base is v2.3.0).

Please share your thoughts,
Thanks, Dmitry.

===

Hello qemu-devel,

This patch series is an RFC for the new networking device emulation
we're developing for QEMU.

This new device emulates the Intel 82574 GbE Controller and works
with unmodified Intel e1000e drivers from the Linux/Windows kernels.

The status of the current series is "Functional Device Ready, work
on Extended Features in Progress".

More precisely, these patches represent a functional device, which
is recognized by the standard Intel drivers, and is able to transfer
TX/RX packets with CSO/TSO offloads, according to the spec.

Extended features not supported yet (work in progress):
  1. TX/RX Interrupt moderation mechanisms
  2. RSS
  3. Full-featured multi-queue (use of multiqueued network backend)

Also, there will be some code refactoring and performance
optimization efforts.

This 

[Qemu-devel] [PATCH v8 02/17] msix: make msix_clr_pending() visible for clients

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

This function will be used by e1000e device code.

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/pci/msix.c | 2 +-
 include/hw/pci/msix.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index b75f0e9..0ec1cb1 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -72,7 +72,7 @@ void msix_set_pending(PCIDevice *dev, unsigned int vector)
 *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
 }
 
-static void msix_clr_pending(PCIDevice *dev, int vector)
+void msix_clr_pending(PCIDevice *dev, int vector)
 {
 *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
 }
diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h
index 72e5f93..048a29d 100644
--- a/include/hw/pci/msix.h
+++ b/include/hw/pci/msix.h
@@ -29,6 +29,7 @@ int msix_present(PCIDevice *dev);
 
 bool msix_is_masked(PCIDevice *dev, unsigned vector);
 void msix_set_pending(PCIDevice *dev, unsigned vector);
+void msix_clr_pending(PCIDevice *dev, int vector);
 
 int msix_vector_use(PCIDevice *dev, unsigned vector);
 void msix_vector_unuse(PCIDevice *dev, unsigned vector);
-- 
2.5.5




[Qemu-devel] [PATCH v8 06/17] vmxnet3: Use generic function for DSN capability definition

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 hw/net/vmxnet3.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 20f26b7..586e915 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2255,9 +2255,9 @@ static const MemoryRegionOps b1_ops = {
 },
 };
 
-static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s)
+static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
 {
-static uint64_t dsn_payload;
+uint64_t dsn_payload;
 uint8_t *dsnp = (uint8_t *)_payload;
 
 dsnp[0] = 0xfe;
@@ -2268,7 +2268,7 @@ static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s)
 dsnp[5] = s->conf.macaddr.a[1];
 dsnp[6] = s->conf.macaddr.a[2];
 dsnp[7] = 0xff;
-return dsnp;
+return dsn_payload;
 }
 
 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
@@ -2313,10 +2313,8 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, 
Error **errp)
 pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
 }
 
-pcie_add_capability(pci_dev, PCI_EXT_CAP_ID_DSN, 0x1,
-VMXNET3_DSN_OFFSET, PCI_EXT_CAP_DSN_SIZEOF);
-memcpy(pci_dev->config + VMXNET3_DSN_OFFSET + 4,
-   vmxnet3_device_serial_num(s), sizeof(uint64_t));
+pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
+  vmxnet3_device_serial_num(s));
 }
 
 register_savevm(dev, "vmxnet3-msix", -1, 1,
-- 
2.5.5




[Qemu-devel] [PATCH v8 01/17] pci: fix unaligned access in pci_xxx_quad()

2016-06-01 Thread Dmitry Fleytman
From: Dmitry Fleytman 

Replace legacy cpu_to_le64w()/le64_to_cpup()
calls with stq_le_p()/ldq_le_p().

Motivation for this modification is that
follow up patches add utility function
pcie_dev_ser_num_init() for PCIe DSN
capability creation which uses
pci_set_quad() with a misaligned offset.

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Leonid Bloch 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/pci/pci.h | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index ef6ba51..4420f47 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -465,16 +465,23 @@ pci_get_long(const uint8_t *config)
 return ldl_le_p(config);
 }
 
+/*
+ * PCI capabilities and/or their fields
+ * are generally DWORD aligned only so
+ * mechanism used by pci_set/get_quad()
+ * must be tolerant to unaligned pointers
+ *
+ */
 static inline void
 pci_set_quad(uint8_t *config, uint64_t val)
 {
-cpu_to_le64w((uint64_t *)config, val);
+stq_le_p(config, val);
 }
 
 static inline uint64_t
 pci_get_quad(const uint8_t *config)
 {
-return le64_to_cpup((const uint64_t *)config);
+return ldq_le_p(config);
 }
 
 static inline void
-- 
2.5.5




Re: [Qemu-devel] [PATCH 4/6 Resend] Vhost-pci RFC: Detailed Description in the Virtio Specification Format

2016-06-01 Thread Xiao Guangrong



On 05/29/2016 04:11 PM, Wei Wang wrote:

Signed-off-by: Wei Wang 
---
  Details | 324 
  1 file changed, 324 insertions(+)
  create mode 100644 Details

diff --git a/Details b/Details
new file mode 100644
index 000..4ea2252
--- /dev/null
+++ b/Details
@@ -0,0 +1,324 @@
+1 Device ID
+TBD
+
+2 Virtqueues
+0 controlq
+
+3 Feature Bits
+3.1 Local Feature Bits
+Currently no local feature bits are defined, so the standard virtio feature
+bits negation will always be successful and complete.
+
+3.2 Remote Feature Bits
+The remote feature bits are obtained from the frontend virtio device and
+negotiated with the vhost-pci driver via the controlq. The negotiation steps
+are described in 4.5 Device Initialization.
+
+4 Device Configuration Layout
+struct vhost_pci_config {
+   #define VHOST_PCI_CONTROLQ_MEMORY_INFO_ACK 0
+   #define VHOST_PCI_CONTROLQ_DEVICE_INFO_ACK 1
+   #define VHOST_PCI_CONTROLQ_FEATURE_BITS_ACK 2
+   u32 ack_type;
+   u32 ack_device_type;
+   u64 ack_device_id;
+   union {
+   #define VHOST_PCI_CONTROLQ_ACK_ADD_DONE 0
+   #define VHOST_PCI_CONTROLQ_ACK_ADD_FAIL 1
+   #define VHOST_PCI_CONTROLQ_ACK_DEL_DONE 2
+   #define VHOST_PCI_CONTROLQ_ACK_DEL_FAIL 3
+   u64 ack_memory_info;
+   u64 ack_device_info;
+   u64 ack_feature_bits;
+   };
+};


Do you need to write all these 4 field to ack the operation? It seems
it is not efficient and it is not flexible if the driver need to
offer more data to the device in the further. Can we dedicate a
vq for this purpose?

BTW, current approach can not handle the case if there are multiple
same kind of requests in the control queue, e.g, if there are two
memory-add request in the control queue.


+
+The configuration fields are currently used for the vhost-pci driver to
+acknowledge to the vhost-pci device after it receives controlq messages.
+
+4.5 Device Initialization
+When a device VM boots, it creates a vhost-pci server socket.
+
+When a virtio device on the driver VM is created with specifying the use of a
+vhost-pci device as a backend, a client socket is created and connected to the
+corresponding vhost-pci server for message exchanges.
+
+The messages passed to the vhost-pci server is proceeded by the following
+header:
+struct vhost_pci_socket_hdr {
+   #define VHOST_PCI_SOCKET_MEMORY_INFO 0
+   #define VHOST_PCI_SOCKET_MEMORY_INFO_ACK 1
+   #define VHOST_PCI_SOCKET_DEVICE_INFO 2
+   #define VHOST_PCI_SOCKET_DEVICE_INFO_ACK 3
+   #define VHOST_PCI_SOCKET_FEATURE_BITS 4
+   #define VHOST_PCI_SOCKET_FEATURE_BITS_ACK 5
+   u16 msg_type;
+   u16 msg_version;
+   u32 msg_len;
+   u64 qemu_pid;
+};
+
+The payload of the above message types can be constructed using the structures
+below:
+/* VHOST_PCI_SOCKET_MEMORY_INFO message */
+struct vhost_pci_socket_memory_info {
+   #define VHOST_PCI_ADD_MEMORY 0
+   #define VHOST_PCI_DEL_MEMORY 1
+   u16 ops;
+   u32 nregions;
+   struct vhost_pci_memory_region {
+   int fd;
+   u64 guest_phys_addr;
+   u64 memory_size;
+   u64 mmap_offset;
+   } regions[VHOST_PCI_MAX_NREGIONS];
+};
+
+/* VHOST_PCI_SOCKET_DEVICE_INFO message */
+struct vhost_pci_device_info {
+   #define VHOST_PCI_ADD_FRONTEND_DEVICE 0
+   #define VHOST_PCI_DEL_FRONTEND_DEVICE 1
+   u16ops;
+   u32nvirtq;
+   #define VHOST_PCI_FRONTEND_DEVICE_NET 1
+   #define VHOST_PCI_FRONTEND_DEVICE_BLK 2
+   #define VHOST_PCI_FRONTEND_DEVICE_CONSOLE 3
+   #define VHOST_PCI_FRONTEND_DEVICE_ENTROPY 4
+   #define VHOST_PCI_FRONTEND_DEVICE_BALLOON 5
+   #define VHOST_PCI_FRONTEND_DEVICE_SCSI 8
+   u32device_type;
+   u64device_id;
+   struct virtq exotic_virtq[VHOST_PCI_MAX_NVIRTQ];
+};
+The device_id field identifies the device. For example, it can be used to
+store a MAC address if the device_type is VHOST_PCI_FRONTEND_DEVICE_NET.
+
+/* VHOST_PCI_SOCKET_FEATURE_BITS message*/
+struct vhost_pci_feature_bits {
+   u64 feature_bits;
+};


We not only have 'socket feature bits' but also the feature bits for per virtio 
device
plugged in on the side of vhost-pci device.

E.g: if there are two virtio devices (e.g, a NIC and BLK) both of them need to 
directly communicate
with another VM. The feature bits of these two devices need to be negotiated with that VM 
respectively. And you can not put these feature bits in vhost_pci_device_info struct as its

vq is not created at that time.


+
+/* VHOST_PCI_SOCKET_xx_ACK messages */
+struct vhost_pci_socket_ack {
+   #define VHOST_PCI_SOCKET_ACK_ADD_DONE 0
+   #define VHOST_PCI_SOCKET_ACK_ADD_FAIL 1
+   #define VHOST_PCI_SOCKET_ACK_DEL_DONE 2
+   #define VHOST_PCI_SOCKET_ACK_DEL_FAIL 3
+   u64 ack;
+};
+
+The 

Re: [Qemu-devel] [PATCH v7 04/15] Makefile: Rules for docker testing

2016-06-01 Thread Paolo Bonzini


On 01/06/2016 06:25, Fam Zheng wrote:
> +# Use a global constant ccache directory to speed up repetitive builds
> +DOCKER_CCACHE_DIR := /var/tmp/qemu-docker-ccache

Same here, use $HOME/.cache/qemu-docker-ccache instead.  Do you need a
mkdir -p?

Paolo



Re: [Qemu-devel] [PATCH v7 04/15] Makefile: Rules for docker testing

2016-06-01 Thread Paolo Bonzini


On 01/06/2016 06:25, Fam Zheng wrote:
> +CUR_TIME := $(shell date +%Y-%m-%d-%H.%M.%S.)
> +# Makes the definition constant after the first expansion
> +DOCKER_SRC_COPY = $(eval DOCKER_SRC_COPY := 
> /tmp/docker-src.$(CUR_TIME))$(DOCKER_SRC_COPY)
> +
> +$(DOCKER_SRC_COPY):
> + @mkdir $@
> + $(call make-archive-maybe, $(SRC_PATH), $@/qemu.tgz)
> + $(call make-archive-maybe, $(SRC_PATH)/dtc, $@/dtc.tgz)
> + $(call make-archive-maybe, $(SRC_PATH)/pixman, $@/pixman.tgz)
> + $(call quiet-command, cp $(SRC_PATH)/tests/docker/run $@/run, \
> + "  COPY RUNNER")
> +
> +docker-qemu-src: $(DOCKER_SRC_COPY)

Do not use /tmp, instead place it under the current directory.

You can do the change and send a pull request! :)

Paolo



Re: [Qemu-devel] [PATCH v6 04/11] megasas: Fix

2016-06-01 Thread Markus Armbruster
Title "megasas: Fix" is no good, possibly an editing accident.  Suggest
something like "megasas: Fix check for msi_init() failure"

Cao jin  writes:

> msi_init returns non-zero value on both failure and success.
>
> cc: Hannes Reinecke 
> cc: Paolo Bonzini 
> cc: Marcel Apfelbaum 
>
> Reviewed-by: Marcel Apfelbaum 
> Signed-off-by: Cao jin 
> ---
>  hw/scsi/megasas.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
> index a63a581..56fb645 100644
> --- a/hw/scsi/megasas.c
> +++ b/hw/scsi/megasas.c
> @@ -2348,7 +2348,7 @@ static void megasas_scsi_realize(PCIDevice *dev, Error 
> **errp)
>"megasas-queue", 0x4);
>  
>  if (megasas_use_msi(s) &&
> -msi_init(dev, 0x50, 1, true, false)) {
> +msi_init(dev, 0x50, 1, true, false) < 0) {
>  s->flags &= ~MEGASAS_MASK_USE_MSI;
>  }
>  if (megasas_use_msix(s) &&



Re: [Qemu-devel] [PATCH qemu v16 04/19] vmstate: Define VARRAY with VMS_ALLOC

2016-06-01 Thread Paolo Bonzini


On 01/06/2016 04:29, Alexey Kardashevskiy wrote:
> On 27/05/16 17:54, Alexey Kardashevskiy wrote:
>> On 04/05/16 16:52, Alexey Kardashevskiy wrote:
>>> This allows dynamic allocation for migrating arrays.
>>>
>>> Already existing VMSTATE_VARRAY_UINT32 requires an array to be
>>> pre-allocated, however there are cases when the size is not known in
>>> advance and there is no real need to enforce it.
>>>
>>> This defines another variant of VMSTATE_VARRAY_UINT32 with WMS_ALLOC
>>> flag which tells the receiving side to allocate memory for the array
>>> before receiving the data.
>>>
>>> The first user of it is a dynamic DMA window which existence and size
>>> are totally dynamic.
>>>
>>> Signed-off-by: Alexey Kardashevskiy 
>>> Reviewed-by: David Gibson 
>>> Reviewed-by: Thomas Huth 
>>
>>
>> In what tree is this going to go? pseries? Or migration?
> 
> Anyone?

Go ahead, include it.

Paolo



Re: [Qemu-devel] [PATCH] block: Drop bdrv_ioctl_bh_cb

2016-06-01 Thread Paolo Bonzini


On 01/06/2016 03:52, Fam Zheng wrote:
> Similar to the "!drv || !drv->bdrv_aio_ioctl" case above, here it is
> okay to set co.ret and return. As pointed out by Paolo, a BH will be
> created as necessary by the caller (bdrv_co_maybe_schedule_bh).
> Besides, as pointed out by Kevin, "data" was leaked before.
> 
> Reported-by: Kevin Wolf 
> Reported-by: Paolo Bonzini 
> Signed-off-by: Fam Zheng 
> ---
>  block/io.c | 20 ++--
>  1 file changed, 2 insertions(+), 18 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index 2d832aa..c32f5b7 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2309,19 +2309,6 @@ int bdrv_discard(BlockDriverState *bs, int64_t 
> sector_num, int nb_sectors)
>  return rwco.ret;
>  }
>  
> -typedef struct {
> -CoroutineIOCompletion *co;
> -QEMUBH *bh;
> -} BdrvIoctlCompletionData;
> -
> -static void bdrv_ioctl_bh_cb(void *opaque)
> -{
> -BdrvIoctlCompletionData *data = opaque;
> -
> -bdrv_co_io_em_complete(data->co, -ENOTSUP);
> -qemu_bh_delete(data->bh);
> -}
> -
>  static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
>  {
>  BlockDriver *drv = bs->drv;
> @@ -2339,11 +2326,8 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int 
> req, void *buf)
>  
>  acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, );
>  if (!acb) {
> -BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
> -data->bh = aio_bh_new(bdrv_get_aio_context(bs),
> -bdrv_ioctl_bh_cb, data);
> -data->co = 
> -qemu_bh_schedule(data->bh);
> +co.ret = -ENOTSUP;
> +goto out;
>  }
>  qemu_coroutine_yield();
>  out:
> 

Reviewed-by: Paolo Bonzini 



Re: [Qemu-devel] [PATCH v6 02/11] fix some coding style problems

2016-06-01 Thread Markus Armbruster
Cao jin  writes:

> It has:
> 1. More newlines make the code block well separated.
> 2. Add more comments for msi_init.
> 3. Fix a indentation in vmxnet3.c.
> 4. ioh3420 & xio3130_downstream: put PCI Express capability init function
>together, make it more readable.
>
> cc: Dmitry Fleytman 
> cc: Jason Wang 
> cc: Michael S. Tsirkin 
> cc: Markus Armbruster 
> cc: Marcel Apfelbaum 
>
> Reviewed-by: Marcel Apfelbaum 
> Signed-off-by: Cao jin 
[...]
> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
> index e0e64c2..97f35c0 100644
> --- a/hw/pci/msi.c
> +++ b/hw/pci/msi.c
> @@ -165,6 +165,23 @@ bool msi_enabled(const PCIDevice *dev)
>   PCI_MSI_FLAGS_ENABLE);
>  }
>  
> +/*
> + * Make PCI device @dev MSI-capable.
> + * Non-zero @offset puts capability MSI at that offset in PCI config
> + * space.
> + * @nr_vectors is the number of MSI vectors (1, 2, 4, 8, 16 or 32).
> + * If @msi64bit, make the device capable of sending a 64-bit message
> + * address.
> + * If @msi_per_vector_mask, make the device support per-vector masking.
> + * @errp is for returning errors.

@errp only appears in PATCH 11.  The easiest fix is to add this comment
only then.

> + * Return the offset of capability MSI in config space on success,
> + * set @errp and return -errno on error.
> + *
> + * -ENOTSUP means lacking msi support for a msi-capable platform.
> + * -EINVAL means capability overlap, happens when @offset is non-zero,
> + *  also means a programming error, except device assignment, which can check
> + *  if a real HW is broken.
> + */
>  int msi_init(struct PCIDevice *dev, uint8_t offset,
>   unsigned int nr_vectors, bool msi64bit, bool 
> msi_per_vector_mask)
>  {



Re: [Qemu-devel] [PATCH] host-utils: Prefer 'false' for bool type

2016-06-01 Thread Michael Tokarev
31.05.2016 21:33, Eric Blake wrote:
> Mixing '0' and 'bool' looks stupid.

Applied to -trivial, thanks!

/mjt



Re: [Qemu-devel] [PATCH] docs/multi-thread-compression: Fix wrong command string

2016-06-01 Thread Michael Tokarev
23.05.2016 12:43, Wei Jiangang wrote:
> s/info_migrate_capabilities/info migrate_capabilities

Applied to -trivial, thanks!

/mjt



Re: [Qemu-devel] [PATCH] docs: Fix a couple of typos in throttle.txt

2016-06-01 Thread Michael Tokarev
30.05.2016 13:00, Alberto Garcia wrote:
> On Mon 30 May 2016 08:49:18 AM CEST, Changlong Xie wrote:
 - Water leaks from the bucket at a rate of 100 IOPS.
 - Water can be added to the bucket at a rate of 2000 IOPS.
 - The size of the bucket is 2000 x 60 = 12
 -  - If 'iops-total-max-length' is unset then the bucket size is 100.
 +  - If 'iops-total-max' is unset then the bucket size is 100.
>>
>> Sorry to brother, why the bucket size is 100 rather than 100 x 60?
> 
> Oh, that's because 'iops-total-max-length' can only be set if
> 'iops-total-max' is set as well. It's explained earlier in the document,
> maybe I should make it clear there as well.
> 
> Michael, shall I send a new patch on top of my previous one or can the
> previous one be replaced?

I fixed it in the original commit, thanks!

/mjt



[Qemu-devel] [PATCH] hw/char: QOM'ify escc.c (fix)

2016-06-01 Thread xiaoqiang zhao
The previous commit e7c9136977cb99c6eb52c9139f7b8d8b5fa87db9
(hw/char: QOM'ify escc.c) cause qemu-system-ppc/ppc64
OpenBIOS to freeze on startup, this commit fix it.

Signed-off-by: xiaoqiang zhao 
---
 hw/char/escc.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/char/escc.c b/hw/char/escc.c
index 8e6a7df..31a5f90 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -989,18 +989,13 @@ static void escc_init1(Object *obj)
 SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 unsigned int i;
 
-s->chn[0].disabled = s->disabled;
-s->chn[1].disabled = s->disabled;
 for (i = 0; i < 2; i++) {
 sysbus_init_irq(dev, >chn[i].irq);
 s->chn[i].chn = 1 - i;
-s->chn[i].clock = s->frequency / 2;
 }
 s->chn[0].otherchn = >chn[1];
 s->chn[1].otherchn = >chn[0];
 
-memory_region_init_io(>mmio, obj, _mem_ops, s, "escc",
-  ESCC_SIZE << s->it_shift);
 sysbus_init_mmio(dev, >mmio);
 }
 
@@ -1009,8 +1004,15 @@ static void escc_realize(DeviceState *dev, Error **errp)
 ESCCState *s = ESCC(dev);
 unsigned int i;
 
+s->chn[0].disabled = s->disabled;
+s->chn[1].disabled = s->disabled;
+
+memory_region_init_io(>mmio, OBJECT(dev), _mem_ops, s, "escc",
+  ESCC_SIZE << s->it_shift);
+
 for (i = 0; i < 2; i++) {
 if (s->chn[i].chr) {
+s->chn[i].clock = s->frequency / 2;
 qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
   serial_receive1, serial_event, >chn[i]);
 }
-- 
2.1.4





Re: [Qemu-devel] [PATCH v2 3/3] qapi: Fix memleak in string visitors on int lists

2016-06-01 Thread Markus Armbruster
Eric Blake  writes:

> Commit 7f8f9ef1 introduced the ability to store a list of
> integers as a sorted list of ranges, but when merging ranges,
> it leaks one or more ranges.  It was also using range_get_last()
> incorrectly within range_compare() (a range is a start/end pair,
> but range_get_last() is for start/len pairs), and will also
> mishandle a range ending in UINT64_MAX (remember, we document
> that no range covers 2**64 bytes, but that ranges that end on
> UINT64_MAX have end < begin).
>
> The whole merge algorithm was rather complex, and included
> unnecessary passes over data within glib functions, and enough
> indirection to make it hard to easily plug the data leaks.
> Since we are already hard-coding things to a list of ranges,
> just rewrite the thing to open-code the traversal and
> comparisons, by making the range_compare() helper function give
> us an answer that is easier to use, at which point we avoid the
> need to pass any callbacks to g_list_*(). Then by reusing
> range_extend() instead of duplicating effort with range_merge(),
> we cover the corner cases correctly.
>
> Drop the now-unused range_merge() and ranges_can_merge().
>
> Doing this lets test-string-{input,output}-visitor pass under
> valgrind without leaks.
>
> Signed-off-by: Eric Blake 
> ---
>  util/range.c | 75 
> +++-
>  1 file changed, 29 insertions(+), 46 deletions(-)
>
> diff --git a/util/range.c b/util/range.c
> index dd46092..56e6baf 100644
> --- a/util/range.c
> +++ b/util/range.c
> @@ -28,65 +28,48 @@
>   *   - this can not represent a full 0 to ~0x0LL range.
>   */
>
> -/* 0,1 can merge with 1,2 but don't overlap */
> -static bool ranges_can_merge(Range *range1, Range *range2)
> +/* Return -1 if @a < @b, 1 if greater, and 0 if they touch or overlap. */
> +static inline int range_compare(Range *a, Range *b)
>  {
> -return !(range1->end < range2->begin || range2->end < range1->begin);
> -}
> -
> -static void range_merge(Range *range1, Range *range2)
> -{
> -if (range1->end < range2->end) {
> -range1->end = range2->end;
> -}
> -if (range1->begin > range2->begin) {
> -range1->begin = range2->begin;
> -}
> -}
> -
> -static gint range_compare(gconstpointer a, gconstpointer b)
> -{
> -Range *ra = (Range *)a, *rb = (Range *)b;
> -if (ra->begin == rb->begin && ra->end == rb->end) {
> -return 0;
> -} else if (range_get_last(ra->begin, ra->end) <
> -   range_get_last(rb->begin, rb->end)) {
> +/* Zero a->end is 2**64, and therefore not less than any b->begin */
> +if (a->end && a->end < b->begin) {
>  return -1;
> -} else {
> +}
> +if (b->end && a->begin > b->end) {
>  return 1;
>  }
> +return 0;
>  }
>
> +/* Insert @data into @list of ranges; caller no longer owns @data */
>  GList *range_list_insert(GList *list, Range *data)
>  {
> -GList *l, *next = NULL;
> -Range *r, *nextr;
> +GList *l;
>
> -if (!list) {
> -list = g_list_insert_sorted(list, data, range_compare);
> -return list;
> +/* Range lists require no empty ranges */
> +assert(data->begin < data->end || (data->begin && !data->end));

Consider { begin = 0, end = 0 }.

Since zero @end means 2^64, this encodes the (non-empty) range
0..2^64-1.

range.h's comment

 * Notes:
 *   - ranges must not wrap around 0, but can include the last byte ~0x0LL.
 *   - this can not represent a full 0 to ~0x0LL range.

appears to be wrong.  The actual limitation is "can't represent ranges
wrapping around zero, and can't represent the empty range starting at
zero."  Would you like to correct it?

I'm afraid range.h is too clever by half.

> +
> +for (l = list; l && range_compare(l->data, data) < 0; l = l->next) {
> +/* Skip all list elements strictly less than data */
>  }

Let's put the comment before the loop.  It describes the whole loop.
Also makes the emptiness of the body more obvious.

>
> -nextr = data;
> -l = list;
> -while (l && l != next && nextr) {
> -r = l->data;
> -if (ranges_can_merge(r, nextr)) {
> -range_merge(r, nextr);
> -l = g_list_remove_link(l, next);
> -next = g_list_next(l);
> -if (next) {
> -nextr = next->data;
> -} else {
> -nextr = NULL;
> -}
> -} else {
> -l = g_list_next(l);
> -}
> +if (!l || range_compare(l->data, data) > 0) {
> +/* Rest of the list (if any) is strictly greater than @data */
> +return g_list_insert_before(list, l, data);
>  }
>
> -if (!l) {
> -list = g_list_insert_sorted(list, data, range_compare);
> +/* Current list element overlaps @data, merge the two */
> +range_extend(l->data, data);
> +g_free(data);
> +
> +/* Merge any subsequent list elements that now also overlap 

Re: [Qemu-devel] [PATCH] block/raw-posix: Fix error_report of mounting message

2016-06-01 Thread Fam Zheng
On Wed, 06/01 15:08, Wei Jiangang wrote:
> Use a single error_printf to replace triple error_report.
> 
> Signed-off-by: Wei Jiangang 
> ---
>  block/raw-posix.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index a4f5a1b..141b01a 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -2061,11 +2061,11 @@ static bool setup_cdrom(char *bsd_path, Error **errp)
>  /* Prints directions on mounting and unmounting a device */
>  static void print_unmounting_directions(const char *file_name)
>  {
> -error_report("If device %s is mounted on the desktop, unmount"
> - " it first before using it in QEMU", file_name);
> -error_report("Command to unmount device: diskutil unmountDisk %s",
> - file_name);
> -error_report("Command to mount device: diskutil mountDisk %s", 
> file_name);
> +error_printf("If device %s is mounted on the desktop, unmount"
> +" it first before using it in QEMU\n"
> +"Command to unmount device: diskutil unmountDisk %s\n"
> +"Command to mount device: diskutil mountDisk %s\n",
> +file_name, file_name, file_name);

I'd say it's less readable with a worse alignment to the parameters.

Fam



[Qemu-devel] [PATCH] block/raw-posix: Fix error_report of mounting message

2016-06-01 Thread Wei Jiangang
Use a single error_printf to replace triple error_report.

Signed-off-by: Wei Jiangang 
---
 block/raw-posix.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index a4f5a1b..141b01a 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -2061,11 +2061,11 @@ static bool setup_cdrom(char *bsd_path, Error **errp)
 /* Prints directions on mounting and unmounting a device */
 static void print_unmounting_directions(const char *file_name)
 {
-error_report("If device %s is mounted on the desktop, unmount"
- " it first before using it in QEMU", file_name);
-error_report("Command to unmount device: diskutil unmountDisk %s",
- file_name);
-error_report("Command to mount device: diskutil mountDisk %s", file_name);
+error_printf("If device %s is mounted on the desktop, unmount"
+" it first before using it in QEMU\n"
+"Command to unmount device: diskutil unmountDisk %s\n"
+"Command to mount device: diskutil mountDisk %s\n",
+file_name, file_name, file_name);
 }
 
 #endif /* defined(__APPLE__) && defined(__MACH__) */
-- 
1.9.3






Re: [Qemu-devel] [PULL 03/31] hw/char: QOM'ify escc.c

2016-06-01 Thread xiaoqiang zhao



在 2016年06月01日 15:04, Mark Cave-Ayland 写道:

On 01/06/16 04:06, xiaoqiang zhao wrote:


在 2016年6月1日,06:13,Mark Cave-Ayland  写道:


On 27/05/16 11:06, Paolo Bonzini wrote:

From: xiaoqiang zhao 

* Drop the old SysBus init function and use instance_init
* Call qemu_chr_add_handlers in the realize callback

Signed-off-by: xiaoqiang zhao 
Message-Id: <1464158344-12266-2-git-send-email-zxq_yx_...@163.com>
Signed-off-by: Paolo Bonzini 
---
hw/char/escc.c | 30 +++---
1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/hw/char/escc.c b/hw/char/escc.c
index 7bf09a0..8e6a7df 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -983,9 +983,10 @@ void slavio_serial_ms_kbd_init(hwaddr base, qemu_irq irq,
 sysbus_mmio_map(s, 0, base);
}

-static int escc_init1(SysBusDevice *dev)
+static void escc_init1(Object *obj)
{
-ESCCState *s = ESCC(dev);
+ESCCState *s = ESCC(obj);
+SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 unsigned int i;

 s->chn[0].disabled = s->disabled;
@@ -994,17 +995,26 @@ static int escc_init1(SysBusDevice *dev)
 sysbus_init_irq(dev, >chn[i].irq);
 s->chn[i].chn = 1 - i;
 s->chn[i].clock = s->frequency / 2;
-if (s->chn[i].chr) {
-qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
-  serial_receive1, serial_event, >chn[i]);
-}
 }
 s->chn[0].otherchn = >chn[1];
 s->chn[1].otherchn = >chn[0];

-memory_region_init_io(>mmio, OBJECT(s), _mem_ops, s, "escc",
+memory_region_init_io(>mmio, obj, _mem_ops, s, "escc",
   ESCC_SIZE << s->it_shift);
 sysbus_init_mmio(dev, >mmio);
+}
+
+static void escc_realize(DeviceState *dev, Error **errp)
+{
+ESCCState *s = ESCC(dev);
+unsigned int i;
+
+for (i = 0; i < 2; i++) {
+if (s->chn[i].chr) {
+qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
+  serial_receive1, serial_event, >chn[i]);
+}
+}

 if (s->chn[0].type == mouse) {
 qemu_add_mouse_event_handler(sunmouse_event, >chn[0], 0,
@@ -1014,8 +1024,6 @@ static int escc_init1(SysBusDevice *dev)
 s->chn[1].hs = qemu_input_handler_register((DeviceState *)(>chn[1]),
_handler);
 }
-
-return 0;
}

static Property escc_properties[] = {
@@ -1032,10 +1040,9 @@ static Property escc_properties[] = {
static void escc_class_init(ObjectClass *klass, void *data)
{
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);

-k->init = escc_init1;
 dc->reset = escc_reset;
+dc->realize = escc_realize;
 dc->vmsd = _escc;
 dc->props = escc_properties;
 set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
@@ -1045,6 +1052,7 @@ static const TypeInfo escc_info = {
 .name  = TYPE_ESCC,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(ESCCState),
+.instance_init = escc_init1,
 .class_init= escc_class_init,
};

Unfortunately this patch causes OpenBIOS to freeze on startup under
qemu-system-ppc (presumably as there is a problem accessing the serial
port). You can reproduce this by starting qemu-system-ppc with no
parameters against the commits below:

Bad : e7c9136977cb99c6eb52c9139f7b8d8b5fa87db9
Good: b138e654a0525f009e7e7c96fc67d74baf3e011b

Note that you'll currently need to use the above two hashes to reproduce
the issue against git master as another regression has just crept in.


ATB,

Mark.



Mark:
Sorry for the inconvenience. This problem is due to the incorrect property 
value in the realize stage. I have fix this and the test is passed.

No problem.


Paolo:  Do i need to send a new version or just this one?

The original patch has already applied to git master, so please send
your fix as a separate patch.


ATB,

Mark.


Ok, coming soon ;-)




Re: [Qemu-devel] [PULL 03/31] hw/char: QOM'ify escc.c

2016-06-01 Thread Mark Cave-Ayland
On 01/06/16 04:06, xiaoqiang zhao wrote:

>> 在 2016年6月1日,06:13,Mark Cave-Ayland  写道:
>>
>>> On 27/05/16 11:06, Paolo Bonzini wrote:
>>>
>>> From: xiaoqiang zhao 
>>>
>>> * Drop the old SysBus init function and use instance_init
>>> * Call qemu_chr_add_handlers in the realize callback
>>>
>>> Signed-off-by: xiaoqiang zhao 
>>> Message-Id: <1464158344-12266-2-git-send-email-zxq_yx_...@163.com>
>>> Signed-off-by: Paolo Bonzini 
>>> ---
>>> hw/char/escc.c | 30 +++---
>>> 1 file changed, 19 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/hw/char/escc.c b/hw/char/escc.c
>>> index 7bf09a0..8e6a7df 100644
>>> --- a/hw/char/escc.c
>>> +++ b/hw/char/escc.c
>>> @@ -983,9 +983,10 @@ void slavio_serial_ms_kbd_init(hwaddr base, qemu_irq 
>>> irq,
>>> sysbus_mmio_map(s, 0, base);
>>> }
>>>
>>> -static int escc_init1(SysBusDevice *dev)
>>> +static void escc_init1(Object *obj)
>>> {
>>> -ESCCState *s = ESCC(dev);
>>> +ESCCState *s = ESCC(obj);
>>> +SysBusDevice *dev = SYS_BUS_DEVICE(obj);
>>> unsigned int i;
>>>
>>> s->chn[0].disabled = s->disabled;
>>> @@ -994,17 +995,26 @@ static int escc_init1(SysBusDevice *dev)
>>> sysbus_init_irq(dev, >chn[i].irq);
>>> s->chn[i].chn = 1 - i;
>>> s->chn[i].clock = s->frequency / 2;
>>> -if (s->chn[i].chr) {
>>> -qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
>>> -  serial_receive1, serial_event, 
>>> >chn[i]);
>>> -}
>>> }
>>> s->chn[0].otherchn = >chn[1];
>>> s->chn[1].otherchn = >chn[0];
>>>
>>> -memory_region_init_io(>mmio, OBJECT(s), _mem_ops, s, "escc",
>>> +memory_region_init_io(>mmio, obj, _mem_ops, s, "escc",
>>>   ESCC_SIZE << s->it_shift);
>>> sysbus_init_mmio(dev, >mmio);
>>> +}
>>> +
>>> +static void escc_realize(DeviceState *dev, Error **errp)
>>> +{
>>> +ESCCState *s = ESCC(dev);
>>> +unsigned int i;
>>> +
>>> +for (i = 0; i < 2; i++) {
>>> +if (s->chn[i].chr) {
>>> +qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
>>> +  serial_receive1, serial_event, 
>>> >chn[i]);
>>> +}
>>> +}
>>>
>>> if (s->chn[0].type == mouse) {
>>> qemu_add_mouse_event_handler(sunmouse_event, >chn[0], 0,
>>> @@ -1014,8 +1024,6 @@ static int escc_init1(SysBusDevice *dev)
>>> s->chn[1].hs = qemu_input_handler_register((DeviceState 
>>> *)(>chn[1]),
>>>_handler);
>>> }
>>> -
>>> -return 0;
>>> }
>>>
>>> static Property escc_properties[] = {
>>> @@ -1032,10 +1040,9 @@ static Property escc_properties[] = {
>>> static void escc_class_init(ObjectClass *klass, void *data)
>>> {
>>> DeviceClass *dc = DEVICE_CLASS(klass);
>>> -SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
>>>
>>> -k->init = escc_init1;
>>> dc->reset = escc_reset;
>>> +dc->realize = escc_realize;
>>> dc->vmsd = _escc;
>>> dc->props = escc_properties;
>>> set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
>>> @@ -1045,6 +1052,7 @@ static const TypeInfo escc_info = {
>>> .name  = TYPE_ESCC,
>>> .parent= TYPE_SYS_BUS_DEVICE,
>>> .instance_size = sizeof(ESCCState),
>>> +.instance_init = escc_init1,
>>> .class_init= escc_class_init,
>>> };
>>
>> Unfortunately this patch causes OpenBIOS to freeze on startup under
>> qemu-system-ppc (presumably as there is a problem accessing the serial
>> port). You can reproduce this by starting qemu-system-ppc with no
>> parameters against the commits below:
>>
>> Bad : e7c9136977cb99c6eb52c9139f7b8d8b5fa87db9
>> Good: b138e654a0525f009e7e7c96fc67d74baf3e011b
>>
>> Note that you'll currently need to use the above two hashes to reproduce
>> the issue against git master as another regression has just crept in.
>>
>>
>> ATB,
>>
>> Mark.
>>
>>
> 
> Mark:
>Sorry for the inconvenience. This problem is due to the incorrect property 
> value in the realize stage. I have fix this and the test is passed.

No problem.

> Paolo:  Do i need to send a new version or just this one?

The original patch has already applied to git master, so please send
your fix as a separate patch.


ATB,

Mark.




Re: [Qemu-devel] [Qemu-ppc] [PULL 04/12] ppc: tlbie, tlbia and tlbisync are HV only

2016-06-01 Thread Mark Cave-Ayland
On 01/06/16 03:15, David Gibson wrote:

> On Tue, May 31, 2016 at 11:28:49PM +0100, Mark Cave-Ayland wrote:
>> On 31/05/16 01:41, David Gibson wrote:
>>
>>> From: Benjamin Herrenschmidt 
>>>
>>> Not that anything remotely recent supports tlbia but ...
>>>
>>> Signed-off-by: Benjamin Herrenschmidt 
>>> Signed-off-by: David Gibson 
>>> ---
>>>  target-ppc/translate.c | 6 +++---
>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
>>> index dfd3010..690ffd2 100644
>>> --- a/target-ppc/translate.c
>>> +++ b/target-ppc/translate.c
>>> @@ -4858,7 +4858,7 @@ static void gen_tlbie(DisasContext *ctx)
>>>  #if defined(CONFIG_USER_ONLY)
>>>  gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>>>  #else
>>> -if (unlikely(ctx->pr)) {
>>> +if (unlikely(ctx->pr || !ctx->hv)) {
>>>  gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>>>  return;
>>>  }
>>> @@ -4879,7 +4879,7 @@ static void gen_tlbsync(DisasContext *ctx)
>>>  #if defined(CONFIG_USER_ONLY)
>>>  gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>>>  #else
>>> -if (unlikely(ctx->pr)) {
>>> +if (unlikely(ctx->pr || !ctx->hv)) {
>>>  gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>>>  return;
>>>  }
>>> @@ -4898,7 +4898,7 @@ static void gen_slbia(DisasContext *ctx)
>>>  #if defined(CONFIG_USER_ONLY)
>>>  gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>>>  #else
>>> -if (unlikely(ctx->pr)) {
>>> +if (unlikely(ctx->pr || !ctx->hv)) {
>>>  gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>>>  return;
>>>  }
>>
>> Unfortunately this patch breaks qemu-system-ppc for both g3beige and
>> mac99 under TCG causing a freeze in OpenBIOS when starting
>> qemu-system-ppc with no parameters.
> 
> Bother, sorry.
> 
> I think this is because I applied this without the patch that treats
> machines with no hypervisor mode (e.g. Apples) as always being in
> hypervisor mode.

No problem, I can cope for a couple of days or so.

>> Note that there is also another regression that has recently landed in
>> git master so you'll also need to revert
>> e7c9136977cb99c6eb52c9139f7b8d8b5fa87db9 in order to get back to a
>> functioning OpenBIOS.
> 
> I'd preter to see it fixed rather than just reverted..

Looks like the original author has found the bug, so there should be a
fix coming up for this soon (I only included it here in case you needed
an explicit test case).


ATB,

Mark.




Re: [Qemu-devel] [PATCH v6 00/11] Add param Error ** for msi_init()

2016-06-01 Thread Cao jin

ping again...
because got many feedback " was undeliverable."

On 05/24/2016 12:04 PM, Cao jin wrote:

v6 changelog:
1. make "assert ENOSPC" the 1st one in the series, and remove ENOSPC line
of comments of msi_init(). also fix to other minor comments.
2. Because semantics changes, add hint message for devices who have msi/msix
property, to help old machine user to know what should do
3. update pci_bridge_dev hehaviour, because it has non-msi variant, but it can
never fall back to INTx before the patch. make it behaviour like others
4. mptsas: forget to assign s->msi_in_use before, now make it.

About test: Only compiled every patch.

cc: Gerd Hoffmann 
cc: John Snow 
cc: Dmitry Fleytman 
cc: Jason Wang 
cc: Michael S. Tsirkin 
cc: Hannes Reinecke 
cc: Paolo Bonzini 
cc: Alex Williamson 
cc: Markus Armbruster 
cc: Marcel Apfelbaum 

Cao jin (11):
   pci core: assert ENOSPC when add capability
   fix some coding style problems
   change pvscsi_init_msi() type to void
   megasas: Fix
   mptsas: change .realize function name
   usb xhci: change msi/msix property type
   intel-hda: change msi property type
   mptsas: change msi property type
   megasas: change msi/msix property type
   pci bridge dev: change msi property type
   pci: Convert msi_init() to Error and fix callers to check it

  hw/audio/intel-hda.c   | 23 ++
  hw/ide/ich.c   | 17 +-
  hw/net/vmxnet3.c   | 44 ++
  hw/pci-bridge/ioh3420.c| 12 --
  hw/pci-bridge/pci_bridge_dev.c | 31 +---
  hw/pci-bridge/xio3130_downstream.c | 11 +++--
  hw/pci-bridge/xio3130_upstream.c   |  8 ++-
  hw/pci/msi.c   | 25 ++--
  hw/pci/pci.c   |  6 ++---
  hw/scsi/megasas.c  | 48 +-
  hw/scsi/mptsas.c   | 32 ++---
  hw/scsi/mptsas.h   |  3 ++-
  hw/scsi/vmw_pvscsi.c   | 10 
  hw/usb/hcd-xhci.c  | 33 +++---
  hw/vfio/pci.c  |  7 --
  include/hw/pci/msi.h   |  3 ++-
  16 files changed, 209 insertions(+), 104 deletions(-)



--
Yours Sincerely,

Cao jin





Re: [Qemu-devel] [PATCH v7 12/17] net_pkt: Extend packet abstraction as required by e1000e functionality

2016-06-01 Thread Dmitry Fleytman

> On 1 Jun 2016, at 07:25 AM, Jason Wang  wrote:
> 
> 
> 
> On 2016年05月31日 15:20, Dmitry Fleytman wrote:
>> From: Dmitry Fleytman 
>> 
>> This patch extends the TX/RX packet abstractions with features that will
>> be used by the e1000e device implementation.
>> 
>> Changes are:
>> 
>>   1. Support iovec lists for RX buffers
>>   2. Deeper RX packets parsing
>>   3. Loopback option for TX packets
>>   4. Extended VLAN headers handling
>>   5. RSS processing for RX packets
>> 
>> Signed-off-by: Dmitry Fleytman 
>> Signed-off-by: Leonid Bloch 
>> ---
>>  hw/net/net_rx_pkt.c| 473 
>> +
>>  hw/net/net_rx_pkt.h| 193 +++-
>>  hw/net/net_tx_pkt.c| 204 +
>>  hw/net/net_tx_pkt.h|  60 ++-
>>  include/net/checksum.h |   4 +-
>>  include/net/eth.h  | 153 +++-
>>  net/checksum.c |   7 +-
>>  net/eth.c  | 410 +-
>>  trace-events   |  40 +
>>  9 files changed, 1336 insertions(+), 208 deletions(-)
> 
> [...]
> 
>>  struct udp_hdr {
>>uint16_t uh_sport;   /* source port */
>>uint16_t uh_dport;   /* destination port */
>> @@ -169,19 +194,22 @@ struct tcp_hdr {
>>  #define PKT_GET_IP_HDR(p) \
>>  ((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
>>  #define IP_HDR_GET_LEN(p) \
>> -struct ip_header *)p)->ip_ver_len & 0x0F) << 2)
>> +struct ip_header *)(p))->ip_ver_len & 0x0F) << 2)
>>  #define PKT_GET_IP_HDR_LEN(p) \
>>  (IP_HDR_GET_LEN(PKT_GET_IP_HDR(p)))
>>  #define PKT_GET_IP6_HDR(p)\
>>  ((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
>>  #define IP_HEADER_VERSION(ip) \
>> -((ip->ip_ver_len >> 4)&0xf)
>> +(((ip)->ip_ver_len >> 4) & 0xf)
>> +#define IP4_IS_FRAGMENT(ip) \
>> +((be16_to_cpu((ip)->ip_off) & (IP_OFFMASK | IP_MF)) != 0)
>>#define ETH_P_IP  (0x0800)  /* Internet Protocol 
>> packet  */
>>  #define ETH_P_ARP (0x0806)  /* Address Resolution 
>> packet */
>>  #define ETH_P_IPV6(0x86dd)
>>  #define ETH_P_VLAN(0x8100)
>>  #define ETH_P_DVLAN   (0x88a8)
>> +#define ETH_P_UNKNOWN (0x)
>>  #define VLAN_VID_MASK 0x0fff
>>  #define IP_HEADER_VERSION_4   (4)
>>  #define IP_HEADER_VERSION_6   (6)
>> @@ -258,15 +286,25 @@ get_eth_packet_type(const struct eth_header *ehdr)
>>  }
>>static inline uint32_t
>> -eth_get_l2_hdr_length(const void *p)
>> +eth_get_l2_hdr_length(const struct iovec *iov, int iovcnt)
>>  {
> 
> Looks like this changes breaks the above PKT_GET_IP_HDR and PKT_GET_IP6_HDR. 
> This will be a problem e.g ENET series depends on this.
> 
> A solution is keeping current eth_get_l2_hdr_length() and call it in a new 
> helper e.g eth_get_l2_hdr_length_iov().

Right, sending fixed series.
Thanks!

> 
>> -uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
>> -struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
>> +uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)];
>> +size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p));
>> +uint16_t proto;
>> +struct vlan_header *hvlan;
>> +
>> +if (copied < ARRAY_SIZE(p)) {
>> +return copied;
>> +}
>> +
>> +proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
>> +hvlan = PKT_GET_VLAN_HDR(p);
>> +
>>  switch (proto) {
>>  case ETH_P_VLAN:
>>  return sizeof(struct eth_header) + sizeof(struct vlan_header);
>>  case ETH_P_DVLAN:
>> -if (hvlan->h_proto == ETH_P_VLAN) {
>> +if (be16_to_cpu(hvlan->h_proto) == ETH_P_VLAN) {
>>  return sizeof(struct eth_header) + 2 * sizeof(struct 
>> vlan_header);
>>  } else {
>>  return sizeof(struct eth_header) + sizeof(struct vlan_header);
>> @@ -290,51 +328,67 @@ eth_get_pkt_tci(const void *p)
>>  }
>>  }
>>  
> 
> [...]




Re: [Qemu-devel] [PATCH] ui: sdl2: Fix crash with -nodefaults -sdl

2016-06-01 Thread Gerd Hoffmann
On Di, 2016-05-31 at 22:49 +0100, Peter Maydell wrote:
> On 31 May 2016 at 21:56, Cole Robinson  wrote:
> > $ ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -sdl
> > Segmentation fault (core dumped)
> >
> > 0  0x559631af in sdl_display_init (ds=, 
> > full_screen=0, no_frame=) at ui/sdl2.c:822
> > 1  0x556c8a9a in main (argc=, argv=, 
> > envp=) at vl.c:4527
> >
> > Setting the window icon assumes there's always an SDL output window
> > available, which isn't the case with when there's no video device,
> > like via -nodefaults. So don't try to set a window icon when we don't
> > have any outputs.
> 
> Presumably we also crash for boards like the arm 'virt'
> which just don't have a display device at all...

There are still the vc's for monitor and serial.

cheers,
  Gerd




Re: [Qemu-devel] [PATCH] ui: sdl2: Fix crash with -nodefaults -sdl

2016-06-01 Thread Gerd Hoffmann
On Di, 2016-05-31 at 16:56 -0400, Cole Robinson wrote:
> $ ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -sdl
> Segmentation fault (core dumped)
> 
> 0  0x559631af in sdl_display_init (ds=, full_screen=0, 
> no_frame=) at ui/sdl2.c:822
> 1  0x556c8a9a in main (argc=, argv=, 
> envp=) at vl.c:4527
> 
> Setting the window icon assumes there's always an SDL output window
> available, which isn't the case with when there's no video device,
> like via -nodefaults. So don't try to set a window icon when we don't
> have any outputs.

Hmm, I guess we can skip pretty much all of the init in case there are
no outputs:

@@ -794,6 +794,9 @@ void sdl_display_init(DisplayState *ds, int
full_screen, int no_frame)
 }
 }
 sdl2_num_outputs = i;
+if (sdl2_num_outputs == 0) {
+return;
+}
 sdl2_console = g_new0(struct sdl2_console, sdl2_num_outputs);
 for (i = 0; i < sdl2_num_outputs; i++) {
 QemuConsole *con = qemu_console_lookup_by_index(i);


Maybe even move up the loop counting the outputs, so we can skip the
SDL_Init() call too.  We don't get a empty window then.

cheers,
  Gerd




[Qemu-devel] [PATCH v3] vnc: add configurable keyboard delay

2016-06-01 Thread Gerd Hoffmann
Limits the rate kbd events from the vnc server are forwarded to the
guest, so input devices which are typically low-bandwidth can keep
up even on bulky input.

v2: update documentation too.
v3: spell fixes.

Signed-off-by: Gerd Hoffmann 
---
 qemu-options.hx |  8 
 ui/vnc.c| 13 +++--
 ui/vnc.h|  1 +
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 6106520..9f33361 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1410,6 +1410,14 @@ everybody else.  'ignore' completely ignores the shared 
flag and
 allows everybody connect unconditionally.  Doesn't conform to the rfb
 spec but is traditional QEMU behavior.
 
+@item key-delay-ms
+
+Set keyboard delay, for key down and key up events, in milliseconds.
+Default is 1.  Keyboards are low-bandwidth devices, so this slowdown
+can help the device and guest to keep up and not lose events in case
+events are arriving in bulk.  Possible causes for the latter are flaky
+network connections, or scripts for automated testing.
+
 @end table
 ETEXI
 
diff --git a/ui/vnc.c b/ui/vnc.c
index d2ebf1f..ea3b3d4 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1629,6 +1629,7 @@ static void reset_keys(VncState *vs)
 for(i = 0; i < 256; i++) {
 if (vs->modifiers_state[i]) {
 qemu_input_event_send_key_number(vs->vd->dcl.con, i, false);
+qemu_input_event_send_key_delay(vs->vd->key_delay_ms);
 vs->modifiers_state[i] = 0;
 }
 }
@@ -1638,9 +1639,9 @@ static void press_key(VncState *vs, int keysym)
 {
 int keycode = keysym2scancode(vs->vd->kbd_layout, keysym) & 
SCANCODE_KEYMASK;
 qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, true);
-qemu_input_event_send_key_delay(0);
+qemu_input_event_send_key_delay(vs->vd->key_delay_ms);
 qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, false);
-qemu_input_event_send_key_delay(0);
+qemu_input_event_send_key_delay(vs->vd->key_delay_ms);
 }
 
 static int current_led_state(VncState *vs)
@@ -1792,6 +1793,7 @@ static void do_key_event(VncState *vs, int down, int 
keycode, int sym)
 
 if (qemu_console_is_graphic(NULL)) {
 qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, down);
+qemu_input_event_send_key_delay(vs->vd->key_delay_ms);
 } else {
 bool numlock = vs->modifiers_state[0x45];
 bool control = (vs->modifiers_state[0x1d] ||
@@ -1913,6 +1915,7 @@ static void vnc_release_modifiers(VncState *vs)
 continue;
 }
 qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, false);
+qemu_input_event_send_key_delay(vs->vd->key_delay_ms);
 }
 }
 
@@ -3249,6 +3252,9 @@ static QemuOptsList qemu_vnc_opts = {
 .name = "lock-key-sync",
 .type = QEMU_OPT_BOOL,
 },{
+.name = "key-delay-ms",
+.type = QEMU_OPT_NUMBER,
+},{
 .name = "sasl",
 .type = QEMU_OPT_BOOL,
 },{
@@ -3486,6 +3492,7 @@ void vnc_display_open(const char *id, Error **errp)
 #endif
 int acl = 0;
 int lock_key_sync = 1;
+int key_delay_ms;
 
 if (!vs) {
 error_setg(errp, "VNC display not active");
@@ -3604,6 +3611,7 @@ void vnc_display_open(const char *id, Error **errp)
 
 reverse = qemu_opt_get_bool(opts, "reverse", false);
 lock_key_sync = qemu_opt_get_bool(opts, "lock-key-sync", true);
+key_delay_ms = qemu_opt_get_number(opts, "key-delay-ms", 1);
 sasl = qemu_opt_get_bool(opts, "sasl", false);
 #ifndef CONFIG_VNC_SASL
 if (sasl) {
@@ -3735,6 +3743,7 @@ void vnc_display_open(const char *id, Error **errp)
 }
 #endif
 vs->lock_key_sync = lock_key_sync;
+vs->key_delay_ms = key_delay_ms;
 
 device_id = qemu_opt_get(opts, "display");
 if (device_id) {
diff --git a/ui/vnc.h b/ui/vnc.h
index 81a3261..6568bca 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -155,6 +155,7 @@ struct VncDisplay
 DisplayChangeListener dcl;
 kbd_layout_t *kbd_layout;
 int lock_key_sync;
+int key_delay_ms;
 QemuMutex mutex;
 
 QEMUCursor *cursor;
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH 2/2] qapi: Fix memleak in string visitors on int lists

2016-06-01 Thread Markus Armbruster
[I accidentally sent this just to Eric, resending to list...]

Eric Blake  writes:

> Commit 7f8f9ef1 introduced the ability to store a list of
> integers as a sorted list of ranges, but when merging ranges,
> it leaks one or more ranges.  It was also using range_get_last()
> incorrectly within range_compare() (a range is a start/end pair,
> but range_get_last() is for start/len pairs), and will also
> mishandle a range ending in UINT64_MAX (remember, we document
> that no range covers 2**64 bytes, but that ranges that end on
> UINT64_MAX have end < begin).
>
> The whole merge algorithm was rather complex, especially since
> we are hard-coding things to a list of ranges; so just rewrite
> the thing to open-code the traversal and comparisons, making
> the range_compare() helper function give us a nicer answer,
> avoiding the need to pass any callbacks to g_list_*(). And
> reusing range_extend() ensures we cover the corner cases
> correctly.
>
> Drop the now-unused range_merge() and ranges_can_merge().
>
> Doing this lets test-string-{input,output}-visitor pass under
> valgrind without leaks.
>
> CC: qemu-sta...@nongnu.org
> Signed-off-by: Eric Blake 
> ---
>  include/qemu/range.h | 78 
> +---
>  1 file changed, 31 insertions(+), 47 deletions(-)
>
> diff --git a/include/qemu/range.h b/include/qemu/range.h
> index 4a4801b..9955cca 100644
> --- a/include/qemu/range.h
> +++ b/include/qemu/range.h
> @@ -59,67 +59,51 @@ static inline int ranges_overlap(uint64_t first1, 
> uint64_t len1,
>  return !(last2 < first1 || last1 < first2);
>  }
>
> -/* 0,1 can merge with 1,2 but don't overlap */
> -static inline bool ranges_can_merge(Range *range1, Range *range2)
> +/* Return -1 if @a < b, 1 if greater, and 0 if they overlap. */
> +static inline int range_compare(Range *a, Range *b)
>  {
> -return !(range1->end < range2->begin || range2->end < range1->begin);
> -}
> -
> -static inline void range_merge(Range *range1, Range *range2)
> -{
> -if (range1->end < range2->end) {
> -range1->end = range2->end;
> -}
> -if (range1->begin > range2->begin) {
> -range1->begin = range2->begin;
> -}
> -}
> -
> -static inline gint range_compare(gconstpointer a, gconstpointer b)
> -{
> -Range *ra = (Range *)a, *rb = (Range *)b;
> -if (ra->begin == rb->begin && ra->end == rb->end) {
> -return 0;
> -} else if (range_get_last(ra->begin, ra->end) <
> -   range_get_last(rb->begin, rb->end)) {
> +if (a->end && a->end < b->begin) {

This gave me pause.  It's owed to Range's subtle semantics.  Zero @start
means zero, but zero @end means 2^64!  Zero a->end cannot be less than
any b->begin, so this conditional computes "a's end < b's begin", or in
other words "a ends before b".  Correct.

>  return -1;
> -} else {
> +}
> +if (b->end && a->begin > b->end) {
>  return 1;
>  }
> +return 0;
>  }
>
> +/* Insert @data into @list of ranges; caller no longer owns @data */
>  static inline GList *range_list_insert(GList *list, Range *data)
>  {
> -GList *l, *next = NULL;
> -Range *r, *nextr;
> +GList *l = list;
>
> -if (!list) {
> -list = g_list_insert_sorted(list, data, range_compare);
> -return list;
> +/* Range lists require no empty ranges */
> +assert(data->begin || data->end);

Uh, wouldn't { begin = 1, end = 1 } be empty, too?  

Do you mean assert(data->begin < data->end || !data->end)?

> +
> +/* Skip all list elements strictly less than data */
> +while (l && range_compare(l->data, data) < 0) {
> +l = l->next;
> +}

Recommend

   for (l = list; l && range_compare(l->data, data) < 0; l = l->next)
   ;

> +
> +/* If no list, or rest of list exceeds data, insert data and done */

I understand what you mean, but "exceeds" seems less than clear.
Perhaps: "If the rest of the list (if any) is strictly greater than
@data".

> +if (!l || range_compare(l->data, data) > 0) {
> +return g_list_insert_before(list, l, data);
>  }
>
> -nextr = data;
> -l = list;
> -while (l && l != next && nextr) {
> -r = l->data;
> -if (ranges_can_merge(r, nextr)) {
> -range_merge(r, nextr);
> -l = g_list_remove_link(l, next);
> -next = g_list_next(l);
> -if (next) {
> -nextr = next->data;
> -} else {
> -nextr = NULL;
> -}
> -} else {
> -l = g_list_next(l);
> +/* Merge data into current list element */

Suggest: /* Current list element overlaps @data, merge the two */

> +range_extend(l->data, data);
> +g_free(data);
> +
> +/* Merge any subsequent list elements that now also overlap */
> +while (l->next && range_compare(l->data, l->next->data) == 0) {
> +range_extend(l->data, l->next->data);
> +

<    1   2   3