[Qemu-devel] [RFC PATCH V9 2/7] colo-base: add colo-base to define and handle packet

2016-07-21 Thread Zhang Chen
COLO-base used by colo-compare and filter-rewriter.
this can share common data structure like:net packet,
and share other functions.

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 net/Makefile.objs  |   1 +
 net/colo-base.c|  74 +
 net/colo-base.h|  38 +
 net/colo-compare.c | 119 -
 trace-events   |   3 ++
 5 files changed, 233 insertions(+), 2 deletions(-)
 create mode 100644 net/colo-base.c
 create mode 100644 net/colo-base.h

diff --git a/net/Makefile.objs b/net/Makefile.objs
index ba92f73..119589f 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -17,3 +17,4 @@ common-obj-y += filter.o
 common-obj-y += filter-buffer.o
 common-obj-y += filter-mirror.o
 common-obj-y += colo-compare.o
+common-obj-y += colo-base.o
diff --git a/net/colo-base.c b/net/colo-base.c
new file mode 100644
index 000..f5d5de9
--- /dev/null
+++ b/net/colo-base.c
@@ -0,0 +1,74 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "net/colo-base.h"
+
+int parse_packet_early(Packet *pkt)
+{
+int network_length;
+uint8_t *data = pkt->data;
+uint16_t l3_proto;
+ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
+
+if (pkt->size < ETH_HLEN) {
+error_report("pkt->size < ETH_HLEN");
+return 1;
+}
+pkt->network_layer = data + ETH_HLEN;
+l3_proto = eth_get_l3_proto(data, l2hdr_len);
+if (l3_proto != ETH_P_IP) {
+return 1;
+}
+
+network_length = pkt->ip->ip_hl * 4;
+if (pkt->size < ETH_HLEN + network_length) {
+error_report("pkt->size < network_layer + network_length");
+return 1;
+}
+pkt->transport_layer = pkt->network_layer + network_length;
+if (!pkt->transport_layer) {
+error_report("pkt->transport_layer is valid");
+return 1;
+}
+
+return 0;
+}
+
+Packet *packet_new(const void *data, int size)
+{
+Packet *pkt = g_slice_new(Packet);
+
+pkt->data = g_memdup(data, size);
+pkt->size = size;
+
+return pkt;
+}
+
+void packet_destroy(void *opaque, void *user_data)
+{
+Packet *pkt = opaque;
+
+g_free(pkt->data);
+g_slice_free(Packet, pkt);
+}
+
+/*
+ * Clear hashtable, stop this hash growing really huge
+ */
+void connection_hashtable_reset(GHashTable *connection_track_table)
+{
+g_hash_table_remove_all(connection_track_table);
+}
diff --git a/net/colo-base.h b/net/colo-base.h
new file mode 100644
index 000..48835e7
--- /dev/null
+++ b/net/colo-base.h
@@ -0,0 +1,38 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_COLO_BASE_H
+#define QEMU_COLO_BASE_H
+
+#include "slirp/slirp.h"
+#include "qemu/jhash.h"
+
+#define HASHTABLE_MAX_SIZE 16384
+
+typedef struct Packet {
+void *data;
+union {
+uint8_t *network_layer;
+struct ip *ip;
+};
+uint8_t *transport_layer;
+int size;
+} Packet;
+
+int parse_packet_early(Packet *pkt);
+void connection_hashtable_reset(GHashTable *connection_track_table);
+Packet *packet_new(const void *data, int size);
+void packet_destroy(void *opaque, void *user_data);
+
+#endif /* QEMU_COLO_BASE_H */
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 0402958..7c52cc8 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -27,13 +27,38 @@
 #include "sysemu/char.h"
 #include "qemu/sockets.h"
 #include "qapi-visit.h"
+#include "net/colo-base.h"
+#include "trace.h"
 
 #define TYPE_COLO_COMPARE "colo-compare"
 #define COLO_COMPARE(obj) \
 OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
 
 #define COMPARE_READ_LEN_MAX NET_BUFSIZE
+#define MAX_QUEUE_SIZE 1024
 
+/*
+  + CompareState ++
+  |   |
+  +---+   +---+ +---+
+  |conn list  +--->conn   +->conn   |
+  +---+   +---+ +---+
+  |   | |   | |  |
+  

[Qemu-devel] [RFC PATCH V9 5/7] qemu-char: Add qemu_chr_add_handlers_full() for GMaincontext

2016-07-21 Thread Zhang Chen
Add qemu_chr_add_handlers_full() API, we can use
this API pass in a GMainContext,make handler run
in the context rather than main_loop.
This comments from Daniel P . Berrange.

Cc: Daniel P . Berrange 
Cc: Paolo Bonzini 

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 include/sysemu/char.h |  11 -
 qemu-char.c   | 119 +++---
 2 files changed, 84 insertions(+), 46 deletions(-)

diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 307fd8f..86888bc 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -65,7 +65,8 @@ struct CharDriverState {
 int (*chr_sync_read)(struct CharDriverState *s,
  const uint8_t *buf, int len);
 GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond);
-void (*chr_update_read_handler)(struct CharDriverState *s);
+void (*chr_update_read_handler_full)(struct CharDriverState *s,
+ GMainContext *context);
 int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg);
 int (*get_msgfds)(struct CharDriverState *s, int* fds, int num);
 int (*set_msgfds)(struct CharDriverState *s, int *fds, int num);
@@ -388,6 +389,14 @@ void qemu_chr_add_handlers(CharDriverState *s,
IOEventHandler *fd_event,
void *opaque);
 
+/* This API can make handler run in the context what you pass to. */
+void qemu_chr_add_handlers_full(CharDriverState *s,
+IOCanReadHandler *fd_can_read,
+IOReadHandler *fd_read,
+IOEventHandler *fd_event,
+void *opaque,
+GMainContext *context);
+
 void qemu_chr_be_generic_open(CharDriverState *s);
 void qemu_chr_accept_input(CharDriverState *s);
 int qemu_chr_add_client(CharDriverState *s, int fd);
diff --git a/qemu-char.c b/qemu-char.c
index b597ee1..0a45c9e 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -448,11 +448,12 @@ void qemu_chr_fe_printf(CharDriverState *s, const char 
*fmt, ...)
 
 static void remove_fd_in_watch(CharDriverState *chr);
 
-void qemu_chr_add_handlers(CharDriverState *s,
-   IOCanReadHandler *fd_can_read,
-   IOReadHandler *fd_read,
-   IOEventHandler *fd_event,
-   void *opaque)
+void qemu_chr_add_handlers_full(CharDriverState *s,
+IOCanReadHandler *fd_can_read,
+IOReadHandler *fd_read,
+IOEventHandler *fd_event,
+void *opaque,
+GMainContext *context)
 {
 int fe_open;
 
@@ -466,8 +467,9 @@ void qemu_chr_add_handlers(CharDriverState *s,
 s->chr_read = fd_read;
 s->chr_event = fd_event;
 s->handler_opaque = opaque;
-if (fe_open && s->chr_update_read_handler)
-s->chr_update_read_handler(s);
+if (fe_open && s->chr_update_read_handler_full) {
+s->chr_update_read_handler_full(s, context);
+}
 
 if (!s->explicit_fe_open) {
 qemu_chr_fe_set_open(s, fe_open);
@@ -480,6 +482,16 @@ void qemu_chr_add_handlers(CharDriverState *s,
 }
 }
 
+void qemu_chr_add_handlers(CharDriverState *s,
+   IOCanReadHandler *fd_can_read,
+   IOReadHandler *fd_read,
+   IOEventHandler *fd_event,
+   void *opaque)
+{
+qemu_chr_add_handlers_full(s, fd_can_read, fd_read,
+   fd_event, opaque, NULL);
+}
+
 static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
 return len;
@@ -717,7 +729,8 @@ static void mux_chr_event(void *opaque, int event)
 mux_chr_send_event(d, i, event);
 }
 
-static void mux_chr_update_read_handler(CharDriverState *chr)
+static void mux_chr_update_read_handler_full(CharDriverState *chr,
+ GMainContext *context)
 {
 MuxDriver *d = chr->opaque;
 
@@ -731,8 +744,10 @@ static void mux_chr_update_read_handler(CharDriverState 
*chr)
 d->chr_event[d->mux_cnt] = chr->chr_event;
 /* Fix up the real driver with mux routines */
 if (d->mux_cnt == 0) {
-qemu_chr_add_handlers(d->drv, mux_chr_can_read, mux_chr_read,
-  mux_chr_event, chr);
+qemu_chr_add_handlers_full(d->drv, mux_chr_can_read,
+   mux_chr_read,
+   mux_chr_event,
+   chr, context);
 }
 if (d->focus != -1) {
 mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
@@ -813,7 +828,7 @@ 

[Qemu-devel] [RFC PATCH V9 6/7] colo-compare: introduce packet comparison thread

2016-07-21 Thread Zhang Chen
If primary packet is same with secondary packet,
we will send primary packet and drop secondary
packet, otherwise notify COLO frame to do checkpoint.
If primary packet comes and secondary packet not,
after REGULAR_PACKET_CHECK_MS milliseconds we set
the primary packet as old_packet,then do a checkpoint.

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 net/colo-base.c|   1 +
 net/colo-base.h|   3 +
 net/colo-compare.c | 212 +
 trace-events   |   2 +
 4 files changed, 218 insertions(+)

diff --git a/net/colo-base.c b/net/colo-base.c
index 7e91dec..eb1b631 100644
--- a/net/colo-base.c
+++ b/net/colo-base.c
@@ -132,6 +132,7 @@ Packet *packet_new(const void *data, int size)
 
 pkt->data = g_memdup(data, size);
 pkt->size = size;
+pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 
 return pkt;
 }
diff --git a/net/colo-base.h b/net/colo-base.h
index 0505608..06d6dca 100644
--- a/net/colo-base.h
+++ b/net/colo-base.h
@@ -17,6 +17,7 @@
 
 #include "slirp/slirp.h"
 #include "qemu/jhash.h"
+#include "qemu/timer.h"
 
 #define HASHTABLE_MAX_SIZE 16384
 
@@ -28,6 +29,8 @@ typedef struct Packet {
 };
 uint8_t *transport_layer;
 int size;
+/* Time of packet creation, in wall clock ms */
+int64_t creation_ms;
 } Packet;
 
 typedef struct ConnectionKey {
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 5f87710..e020edc 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -36,6 +36,8 @@
 
 #define COMPARE_READ_LEN_MAX NET_BUFSIZE
 #define MAX_QUEUE_SIZE 1024
+/* TODO: Should be configurable */
+#define REGULAR_PACKET_CHECK_MS 3000
 
 /*
   + CompareState ++
@@ -83,6 +85,10 @@ typedef struct CompareState {
 GQueue unprocessed_connections;
 /* proxy current hash size */
 uint32_t hashtable_size;
+/* compare thread, a thread for each NIC */
+QemuThread thread;
+/* Timer used on the primary to find packets that are never matched */
+QEMUTimer *timer;
 } CompareState;
 
 typedef struct CompareClass {
@@ -170,6 +176,112 @@ static int packet_enqueue(CompareState *s, int mode)
 return 0;
 }
 
+/*
+ * The IP packets sent by primary and secondary
+ * will be compared in here
+ * TODO support ip fragment, Out-Of-Order
+ * return:0  means packet same
+ *> 0 || < 0 means packet different
+ */
+static int colo_packet_compare(Packet *ppkt, Packet *spkt)
+{
+trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
+   inet_ntoa(ppkt->ip->ip_dst), spkt->size,
+   inet_ntoa(spkt->ip->ip_src),
+   inet_ntoa(spkt->ip->ip_dst));
+
+if (ppkt->size == spkt->size) {
+return memcmp(ppkt->data, spkt->data, spkt->size);
+} else {
+return -1;
+}
+}
+
+static int colo_packet_compare_all(Packet *spkt, Packet *ppkt)
+{
+trace_colo_compare_main("compare all");
+return colo_packet_compare(ppkt, spkt);
+}
+
+static void colo_old_packet_check_one(void *opaque_packet,
+  void *opaque_found)
+{
+int64_t now;
+bool *found_old = (bool *)opaque_found;
+Packet *ppkt = (Packet *)opaque_packet;
+
+if (*found_old) {
+/* Someone found an old packet earlier in the queue */
+return;
+}
+
+now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+if ((now - ppkt->creation_ms) > REGULAR_PACKET_CHECK_MS) {
+trace_colo_old_packet_check_found(ppkt->creation_ms);
+*found_old = true;
+}
+}
+
+static void colo_old_packet_check_one_conn(void *opaque,
+   void *user_data)
+{
+bool found_old = false;
+Connection *conn = opaque;
+
+g_queue_foreach(>primary_list, colo_old_packet_check_one,
+_old);
+if (found_old) {
+/* do checkpoint will flush old packet */
+/* TODO: colo_notify_checkpoint();*/
+}
+}
+
+/*
+ * Look for old packets that the secondary hasn't matched,
+ * if we have some then we have to checkpoint to wake
+ * the secondary up.
+ */
+static void colo_old_packet_check(void *opaque)
+{
+CompareState *s = opaque;
+
+g_queue_foreach(>conn_list, colo_old_packet_check_one_conn, NULL);
+}
+
+/*
+ * called from the compare thread on the primary
+ * for compare connection
+ */
+static void colo_compare_connection(void *opaque, void *user_data)
+{
+CompareState *s = user_data;
+Connection *conn = opaque;
+Packet *pkt = NULL;
+GList *result = NULL;
+int ret;
+
+while (!g_queue_is_empty(>primary_list) &&
+   !g_queue_is_empty(>secondary_list)) {
+pkt = g_queue_pop_tail(>primary_list);
+result = g_queue_find_custom(>secondary_list,
+  pkt, (GCompareFunc)colo_packet_compare_all);
+
+if (result) {
+  

[Qemu-devel] [RFC PATCH V9 7/7] colo-compare: add TCP, UDP, ICMP packet comparison

2016-07-21 Thread Zhang Chen
We add TCP,UDP,ICMP packet comparison to replace
IP packet comparison. This can increase the
accuracy of the package comparison.
less checkpoint more efficiency.

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 net/colo-compare.c | 174 +++--
 trace-events   |   4 ++
 2 files changed, 174 insertions(+), 4 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index e020edc..c7bb5f7 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -18,6 +18,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/error.h"
 #include "net/net.h"
+#include "net/eth.h"
 #include "net/vhost_net.h"
 #include "qom/object_interfaces.h"
 #include "qemu/iov.h"
@@ -197,9 +198,158 @@ static int colo_packet_compare(Packet *ppkt, Packet *spkt)
 }
 }
 
-static int colo_packet_compare_all(Packet *spkt, Packet *ppkt)
+/*
+ * called from the compare thread on the primary
+ * for compare tcp packet
+ * compare_tcp copied from Dr. David Alan Gilbert's branch
+ */
+static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
+{
+struct tcphdr *ptcp, *stcp;
+int res;
+char *sdebug, *ddebug;
+
+trace_colo_compare_main("compare tcp");
+if (ppkt->size != spkt->size) {
+if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
+trace_colo_compare_main("pkt size not same");
+}
+return -1;
+}
+
+ptcp = (struct tcphdr *)ppkt->transport_layer;
+stcp = (struct tcphdr *)spkt->transport_layer;
+
+if (ptcp->th_seq != stcp->th_seq) {
+if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
+trace_colo_compare_main("pkt tcp seq not same");
+}
+return -1;
+}
+
+/*
+ * The 'identification' field in the IP header is *very* random
+ * it almost never matches.  Fudge this by ignoring differences in
+ * unfragmented packets; they'll normally sort themselves out if different
+ * anyway, and it should recover at the TCP level.
+ * An alternative would be to get both the primary and secondary to rewrite
+ * somehow; but that would need some sync traffic to sync the state
+ */
+if (ntohs(ppkt->ip->ip_off) & IP_DF) {
+spkt->ip->ip_id = ppkt->ip->ip_id;
+/* and the sum will be different if the IDs were different */
+spkt->ip->ip_sum = ppkt->ip->ip_sum;
+}
+
+res = memcmp(ppkt->data + ETH_HLEN, spkt->data + ETH_HLEN,
+(spkt->size - ETH_HLEN));
+
+if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
+sdebug = strdup(inet_ntoa(ppkt->ip->ip_src));
+ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst));
+fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=%u/%u"
+" s: seq/ack=%u/%u res=%d flags=%x/%x\n", __func__,
+   sdebug, ddebug,
+   ntohl(ptcp->th_seq), ntohl(ptcp->th_ack),
+   ntohl(stcp->th_seq), ntohl(stcp->th_ack),
+   res, ptcp->th_flags, stcp->th_flags);
+
+trace_colo_compare_tcp_miscompare("Primary len", ppkt->size);
+qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
+trace_colo_compare_tcp_miscompare("Secondary len", spkt->size);
+qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
+
+g_free(sdebug);
+g_free(ddebug);
+}
+
+return res;
+}
+
+/*
+ * called from the compare thread on the primary
+ * for compare udp packet
+ */
+static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
+{
+int ret;
+
+trace_colo_compare_main("compare udp");
+ret = colo_packet_compare(ppkt, spkt);
+
+if (ret) {
+trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
+qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
+trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
+qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
+}
+
+return ret;
+}
+
+/*
+ * called from the compare thread on the primary
+ * for compare icmp packet
+ */
+static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
 {
-trace_colo_compare_main("compare all");
+int network_length;
+struct icmp *icmp_ppkt, *icmp_spkt;
+
+trace_colo_compare_main("compare icmp");
+network_length = ppkt->ip->ip_hl * 4;
+if (ppkt->size != spkt->size ||
+ppkt->size < network_length + ETH_HLEN) {
+return -1;
+}
+icmp_ppkt = (struct icmp *)(ppkt->data + network_length + ETH_HLEN);
+icmp_spkt = (struct icmp *)(spkt->data + network_length + ETH_HLEN);
+
+if ((icmp_ppkt->icmp_type == icmp_spkt->icmp_type) &&
+(icmp_ppkt->icmp_code == icmp_spkt->icmp_code)) {
+if (icmp_ppkt->icmp_type == ICMP_REDIRECT) {
+if (icmp_ppkt->icmp_gwaddr.s_addr !=
+ 

[Qemu-devel] [RFC PATCH V9 4/7] colo-compare: track connection and enqueue packet

2016-07-21 Thread Zhang Chen
In this patch we use kernel jhash table to track
connection, and then enqueue net packet like this:

+ CompareState ++
|   |
+---+   +---+ +---+
|conn list  +--->conn   +->conn   |
+---+   +---+ +---+
|   | |   | |  |
+---+ +---v+  +---v++---v+ +---v+
  |primary |  |secondary|primary | |secondary
  |packet  |  |packet  +|packet  | |packet  +
  ++  ++++ ++
  |   | |  |
  +---v+  +---v++---v+ +---v+
  |primary |  |secondary|primary | |secondary
  |packet  |  |packet  +|packet  | |packet  +
  ++  ++++ ++
  |   | |  |
  +---v+  +---v++---v+ +---v+
  |primary |  |secondary|primary | |secondary
  |packet  |  |packet  +|packet  | |packet  +
  ++  ++++ ++

We use conn_list to record connection info.
When we want to enqueue a packet, firstly get the
connection from connection_track_table. then push
the packet to g_queue(pri/sec) in it's own conn.

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 net/colo-base.c| 108 +
 net/colo-base.h|  30 +++
 net/colo-compare.c |  70 +-
 3 files changed, 198 insertions(+), 10 deletions(-)

diff --git a/net/colo-base.c b/net/colo-base.c
index f5d5de9..7e91dec 100644
--- a/net/colo-base.c
+++ b/net/colo-base.c
@@ -16,6 +16,29 @@
 #include "qemu/error-report.h"
 #include "net/colo-base.h"
 
+uint32_t connection_key_hash(const void *opaque)
+{
+const ConnectionKey *key = opaque;
+uint32_t a, b, c;
+
+/* Jenkins hash */
+a = b = c = JHASH_INITVAL + sizeof(*key);
+a += key->src.s_addr;
+b += key->dst.s_addr;
+c += (key->src_port | key->dst_port << 16);
+__jhash_mix(a, b, c);
+
+a += key->ip_proto;
+__jhash_final(a, b, c);
+
+return c;
+}
+
+int connection_key_equal(const void *key1, const void *key2)
+{
+return memcmp(key1, key2, sizeof(ConnectionKey)) == 0;
+}
+
 int parse_packet_early(Packet *pkt)
 {
 int network_length;
@@ -47,6 +70,62 @@ int parse_packet_early(Packet *pkt)
 return 0;
 }
 
+void fill_connection_key(Packet *pkt, ConnectionKey *key)
+{
+uint32_t tmp_ports;
+
+key->ip_proto = pkt->ip->ip_p;
+
+switch (key->ip_proto) {
+case IPPROTO_TCP:
+case IPPROTO_UDP:
+case IPPROTO_DCCP:
+case IPPROTO_ESP:
+case IPPROTO_SCTP:
+case IPPROTO_UDPLITE:
+tmp_ports = *(uint32_t *)(pkt->transport_layer);
+key->src = pkt->ip->ip_src;
+key->dst = pkt->ip->ip_dst;
+key->src_port = ntohs(tmp_ports & 0x);
+key->dst_port = ntohs(tmp_ports >> 16);
+break;
+case IPPROTO_AH:
+tmp_ports = *(uint32_t *)(pkt->transport_layer + 4);
+key->src = pkt->ip->ip_src;
+key->dst = pkt->ip->ip_dst;
+key->src_port = ntohs(tmp_ports & 0x);
+key->dst_port = ntohs(tmp_ports >> 16);
+break;
+default:
+key->src_port = 0;
+key->dst_port = 0;
+break;
+}
+}
+
+Connection *connection_new(ConnectionKey *key)
+{
+Connection *conn = g_slice_new(Connection);
+
+conn->ip_proto = key->ip_proto;
+conn->processing = false;
+g_queue_init(>primary_list);
+g_queue_init(>secondary_list);
+
+return conn;
+}
+
+void connection_destroy(void *opaque)
+{
+Connection *conn = opaque;
+
+g_queue_foreach(>primary_list, packet_destroy, NULL);
+g_queue_free(>primary_list);
+g_queue_foreach(>secondary_list, packet_destroy, NULL);
+g_queue_free(>secondary_list);
+g_slice_free(Connection, conn);
+}
+
 Packet *packet_new(const void *data, int size)
 {
 Packet *pkt = g_slice_new(Packet);
@@ -72,3 +151,32 @@ void connection_hashtable_reset(GHashTable 
*connection_track_table)
 {
 g_hash_table_remove_all(connection_track_table);
 }
+
+/* if not found, create a new connection and add to hash table */
+Connection *connection_get(GHashTable *connection_track_table,
+   ConnectionKey *key,
+   uint32_t *hashtable_size)
+{
+Connection *conn = g_hash_table_lookup(connection_track_table, key);
+
+if (conn == NULL) {
+ConnectionKey *new_key = g_memdup(key, sizeof(*key));
+
+conn = connection_new(key);
+
+(*hashtable_size) += 1;
+if 

[Qemu-devel] [RFC PATCH V9 1/7] colo-compare: introduce colo compare initialization

2016-07-21 Thread Zhang Chen
This a COLO net ascii figure:

 Primary qemu   
Secondary qemu
+--+   
++
| +-+  |   |  
+---+ |
| | |  |   |  | 
  | |
| |guest|  |   |  | 
   guest  | |
| | |  |   |  | 
  | |
| +---^--+--+  |   |  
+-+++ |
| |  | |   |
^|  |
| |  | |   |
||  |
| |  +--+  |
||  |
|netfilter|  |   | ||  |   
netfilter||  |
| +--+ ---+||  |  
+---+ |
| |   |  |   ||||  |  | 
||  filter excute order   | |
| |   |  |   ||||  |  | 
|| +--->  | |
| |   |  |   ||||  |  | 
||   TCP  | |
| | +-+--+--+ +--v-+  | ++ ||  |  | 
++  +---++---v+rewriter++  ++ | |
| | |   | ||  | || ||  |  | |   
 |  ||  |  || | |
| | |  filter   | |   filter   +>   colo <+ +>  
filter   +--> adjust |   adjust +-->   filter   | | |
| | |  mirror   | | redirector |  | |  compare   | |  ||  | | 
redirector |  | ack|   seq|  | redirector | | |
| | |   | ||  | || |  ||  | |   
 |  ||  |  || | |
| | +^--+ ++  | +-+--+ |  ||  | 
++  ++--+  +---++ | |
| |  | tx rx  |   ||  ||  | 
   txall   |  rx  | |
| |  ||   ||  ||  
+---+ |
| |  ||   ||  ||
   ||
| |  |   filter excute order  |   ||  ||
   ||
| |  |  +---> |   ||  
++|
| +---+   ||   |
|
||||   |
|
+--+   
++
 |guest receive   |guest send
 ||
++v+
|  |
  NOTE: filter direction is rx/tx/all
| tap  |
  rx:receive packets sent to the netdev
|  |
  tx:receive packets sent by the netdev
+--+

In COLO-compare.
Packets coming from the primary char indev will be sent to outdev
Packets coming from the secondary char dev will be dropped
colo-comapre need two input chardev and one output chardev:
primary_in=chardev1-id
secondary_in=chardev2-id
outdev=chardev3-id

usage:

primary:
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
-device 

[Qemu-devel] [RFC PATCH V9 3/7] Jhash: add linux kernel jhashtable in qemu

2016-07-21 Thread Zhang Chen
Jhash used by colo-compare and filter-rewriter
to save and lookup net connection info

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 include/qemu/jhash.h | 61 
 1 file changed, 61 insertions(+)
 create mode 100644 include/qemu/jhash.h

diff --git a/include/qemu/jhash.h b/include/qemu/jhash.h
new file mode 100644
index 000..0fcd875
--- /dev/null
+++ b/include/qemu/jhash.h
@@ -0,0 +1,61 @@
+/* jhash.h: Jenkins hash support.
+  *
+  * Copyright (C) 2006. Bob Jenkins (bob_jenk...@burtleburtle.net)
+  *
+  * http://burtleburtle.net/bob/hash/
+  *
+  * These are the credits from Bob's sources:
+  *
+  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+  *
+  * These are functions for producing 32-bit hashes for hash table lookup.
+  * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+  * are externally useful functions.  Routines to test the hash are
+included
+  * if SELF_TEST is defined.  You can use this free for any purpose.
+It's in
+  * the public domain.  It has no warranty.
+  *
+  * Copyright (C) 2009-2010 Jozsef Kadlecsik (kad...@blackhole.kfki.hu)
+  *
+  * I've modified Bob's hash to be useful in the Linux kernel, and
+  * any bugs present are my fault.
+  * Jozsef
+  */
+
+#ifndef QEMU_JHASH_H__
+#define QEMU_JHASH_H__
+
+#include "qemu/bitops.h"
+
+/*
+ * hashtable relation copy from linux kernel jhash
+ */
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c)\
+{   \
+a -= c;  a ^= rol32(c, 4);  c += b; \
+b -= a;  b ^= rol32(a, 6);  a += c; \
+c -= b;  c ^= rol32(b, 8);  b += a; \
+a -= c;  a ^= rol32(c, 16); c += b; \
+b -= a;  b ^= rol32(a, 19); a += c; \
+c -= b;  c ^= rol32(b, 4);  b += a; \
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c)  \
+{   \
+c ^= b; c -= rol32(b, 14);  \
+a ^= c; a -= rol32(c, 11);  \
+b ^= a; b -= rol32(a, 25);  \
+c ^= b; c -= rol32(b, 16);  \
+a ^= c; a -= rol32(c, 4);   \
+b ^= a; b -= rol32(a, 14);  \
+c ^= b; c -= rol32(b, 24);  \
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL   0xdeadbeef
+
+#endif /* QEMU_JHASH_H__ */
-- 
2.7.4






[Qemu-devel] [RFC PATCH V9 0/7] Introduce COLO-compare

2016-07-21 Thread Zhang Chen
COLO-compare is a part of COLO project. It is used
to compare the network package to help COLO decide
whether to do checkpoint.

The full version in this github:
https://github.com/zhangckid/qemu/tree/colo-v2.7-proxy-mode-compare-with-colo-base-jul22


v9:
 p5:
  - use chr_update_read_handler_full() replace
the chr_update_read_handler()
  - use io_watch_poll_prepare_full() replace
the io_watch_poll_prepare()
  - use io_watch_poll_funcs_full replace
the io_watch_poll_funcs
  - avoid code duplication

v8:
 p5:
  - add new patch:
qemu-char: Add qemu_chr_add_handlers_full() for GMaincontext

v7:
 p5:
   - add [PATCH]qemu-char: Fix context for g_source_attach()
 in this patch series.

v6: 
 p6:
   - add more commit log.
   - fix icmp comparison to compare all packet.

 p5:
   - add more cpmments in commit log.
   - change REGULAR_CHECK_MS to REGULAR_PACKET_CHECK_MS
   - make check old packet independent to compare thread
   - remove thread_status

 p4:
   - change this patch only about
 Connection and ConnectionKey.
   - add some comments in commit log.
   - remove mode in fill_connection_key().
   - fix some comments and bug.
   - move colo_conn_state to patch of
 "work with colo-frame"
   - remove conn_list_lock.
   - add MAX_QUEUE_SIZE, if primary_list or
 secondary_list biger than MAX_QUEUE_SIZE
 we will drop packet. 

 p3:
   - add new independent kernel jhash patch.

 p2:
   - add new independent colo-base patch.

 p1:
   - add a ascii figure and some comments to explain it
   - move trace.h to p2
   - move QTAILQ_HEAD(, CompareState) net_compares to
 patch of "work with colo-frame"
   - add some comments in qemu-option.hx


v5:
 p3:
- comments from Jason
  we poll and handle chardev in comapre thread,
  Through this way, there's no need for extra 
  synchronization with main loop
  this depend on another patch:
  qemu-char: Fix context for g_source_attach()
- remove QemuEvent
 p2:
- remove conn->list_lock
 p1:
- move compare_pri/sec_chr_in to p3
- move compare_chr_send to p2

v4:
 p4:
- add some comments
- fix some trace-events
- fix tcp compare error
 p3:
- add rcu_read_lock().
- fix trace name
- fix jason's other comments
- rebase some Dave's branch function
 p2:
- colo_compare_connection() change g_queue_push_head() to
- g_queue_push_tail() match to sorted order.
- remove pkt->s
- move data structure to colo-base.h
- add colo-base.c reuse codes for filter-rewriter
- add some filter-rewriter needs struct
- depends on previous SocketReadState patch
 p1:
- except move qemu_chr_add_handlers()
  to colo thread
- remove class_finalize
- remove secondary arp codes
- depends on previous SocketReadState patch

v3:
  - rebase colo-compare to colo-frame v2.7
  - fix most of Dave's comments
(except RCU)
  - add TCP,UDP,ICMP and other packet comparison
  - add trace-event
  - add some comments
  - other bug fix
  - add RFC index
  - add usage in patch 1/4

v2:
  - add jhash.h

v1:
  - initial patch


Zhang Chen (7):
  colo-compare: introduce colo compare initialization
  colo-base: add colo-base to define and handle packet
  Jhash: add linux kernel jhashtable in qemu
  colo-compare: track connection and enqueue packet
  qemu-char: Add qemu_chr_add_handlers_full() for GMaincontext
  colo-compare: introduce packet comparison thread
  colo-compare: add TCP,UDP,ICMP packet comparison

 include/qemu/jhash.h  |  61 
 include/sysemu/char.h |  11 +-
 net/Makefile.objs |   2 +
 net/colo-base.c   | 183 
 net/colo-base.h   |  71 +
 net/colo-compare.c| 765 ++
 qemu-char.c   | 119 +---
 qemu-options.hx   |  38 +++
 trace-events  |   9 +
 vl.c  |   3 +-
 10 files changed, 1215 insertions(+), 47 deletions(-)
 create mode 100644 include/qemu/jhash.h
 create mode 100644 net/colo-base.c
 create mode 100644 net/colo-base.h
 create mode 100644 net/colo-compare.c

-- 
2.7.4






Re: [Qemu-devel] [RFC v1 13/13] target-ppc: introduce opc4 for Expanded Opcode

2016-07-21 Thread Nikunj A Dadhania
David Gibson  writes:

> [ Unknown signature status ]
> On Mon, Jul 18, 2016 at 10:35:17PM +0530, Nikunj A Dadhania wrote:
>> ISA 3.0 has introduced EO - Expanded Opcode. Introduce third level
>> indirect opcode table and corresponding parsing routines.
>> 
>> EO (11:12) Expanded opcode field
>> Formats: XX1
>> 
>> EO (11:15) Expanded opcode field
>> Formats: VX, X, XX2
>> 
>> Signed-off-by: Nikunj A Dadhania 
>> ---
>>  target-ppc/translate.c  |  73 +--
>>  target-ppc/translate_init.c | 103 
>> 
>>  2 files changed, 136 insertions(+), 40 deletions(-)
>> 
>> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
>> index 6c5a4a6..733d68d 100644
>> --- a/target-ppc/translate.c
>> +++ b/target-ppc/translate.c
>> @@ -40,6 +40,7 @@
>>  /* Include definitions for instructions classes and implementations flags */
>>  //#define PPC_DEBUG_DISAS
>>  //#define DO_PPC_STATISTICS
>> +//#define PPC_DUMP_CPU
>>  
>>  #ifdef PPC_DEBUG_DISAS
>>  #  define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
>> @@ -367,12 +368,15 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, 
>> PPC_NONE)
>>  #define GEN_HANDLER2_E(name, onam, opc1, opc2, opc3, inval, type, type2)
>>   \
>>  GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
>>  
>> +#define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)   
>>   \
>> +GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
>> +
>>  typedef struct opcode_t {
>> -unsigned char opc1, opc2, opc3;
>> +unsigned char opc1, opc2, opc3, opc4;
>>  #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
>> -unsigned char pad[5];
>> +unsigned char pad[4];
>>  #else
>> -unsigned char pad[1];
>> +unsigned char pad[4]; /* 4-byte pad to maintain pad in opcode table */
>
> IIUC the point here is to align entries to the wordsize.  If the
> worsize is 32-bit you shouldn't need any extra padding here.

You are right, the reason I had added this here is to keep the code
clean in the GEN_OPCODEx

#define GEN_OPCODE(name, op1, op2, op3, op4, invl, _typ, _typ2)   \
{  \
.opc1 = op1,   \
.opc2 = op2,   \
.opc3 = op3,   \
.opc4 = 0xff,  \
#if HOST_LONG_BITS == 64   \
.pad  = { 0, },\
#endif \
.handler = {   \
.inval1  = invl,   \
.type = _typ,  \
.type2 = _typ2,\
.handler = _##name,\
.oname = stringify(name),  \
}, \
.oname = stringify(name),  \
}

I am fine with both the approach, but thought of the current one as
cleaner, we would waste 4byte per opcode in 32-bit case.

Regards
Nikunj




Re: [Qemu-devel] [RFC v4] virtio-crypto specification

2016-07-21 Thread Zeng, Xin
On Friday, July 22, 2016 10:53 AM Gonglei (Arei) wrote:
> 
> Hi Xin,
> 
> Thank you so much for your great comments.
> I agree with you almostly except some trivial detals.
> Please see my below replies.
> 
> And I'll submit V5 next week, and you can finish the asym algos parts if you
> like.
> Let's co-work to finish the virtio-crypto spec, shall we?
> 

That's great. 

> Regards,
> -Gonglei
> 
> 
> > -Original Message-
> > From: Zeng, Xin [mailto:xin.z...@intel.com]
> > Sent: Friday, July 22, 2016 8:48 AM
> > To: Gonglei (Arei); virtio-...@lists.oasis-open.org; qemu-
> de...@nongnu.org
> > Cc: Hanweidong (Randy); Stefan Hajnoczi; Cornelia Huck; m...@redhat.com;
> > Lingli Deng; Jani Kokkonen; Luonengjun; Huangpeng (Peter); Zhoujian (jay,
> > Euler); chenshanxi 00222737; 'Ola liljed...@arm.com'; Varun Sethi
> > Subject: RE: [RFC v4] virtio-crypto specification
> >
> > On Sunday, June 26, 2016 5:35 PM, Gonglei (Arei) Wrote:
> > > Hi all,
> > >
> > > This is the specification (version 4) about a new virtio crypto device.
> > >
> >
> > In general, our comments around this proposal are listed below:
> > 1. Suggest to introduce crypto services into virtio crypto device. The
> services
> > currently defined are CIPHER, MAC, HASH, AEAD, KDF, ASYM, PRIMITIVE.
> 
> Yes, I agree, whether DRBG/NDRBG are included in PRIMITIVE service or
> not?
> If not, we'd better add another separate service.

Yes, I think we can add these two into PRIMITIVE services.

> 
> > 2. Suggest to define a unified crypto request format that is consisted of
> > general header + service specific request,  Where 'general header' is for 
> > all
> > crypto request,  'service specific request' is composed of
> > operation parameter + input data + output data in generally.
> > operation parameter is algorithm-specific parameters,
> > input data is the data should be operated ,
> > output data is the "operation result + result buffer".
> >
> It makes sense. Good.
> 
> > #define VIRTIO_CRYPTO_OPCODE(service, op)   (((service)<<8) | (op))
> > struct virtio_crypto_op_header {
> > #define VIRTIO_CRYPTO_CIPHER_ENCRYPT
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_CIPHER, 0x00)
> > #define VIRTIO_CRYPTO_CIPHER_DECRYPT
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_CIPHER, 0x01)
> > #define VIRTIO_CRYPTO_HASH
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_HASH, 0x00)
> > #define VIRTIO_CRYPTO_MAC
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_MAC, 0x00)
> > #define VIRTIO_CRYPTO_KDF
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_KDF, 0x00)
> > #define VIRTIO_CRYPTO_ASYM_KEY_GEN
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x00)
> > #define VIRTIO_CRYPTO_ASYM_KEY_EXCHG
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x01)
> > #define VIRTIO_CRYPTO_ASYM_SIGN
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x02)
> > #define VIRTIO_CRYPTO_ASYM_VERIFY
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x03)
> > #define VIRTIO_CRYPTO_ASYM_ENCRYPT
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x04)
> > #define VIRTIO_CRYPTO_ASYM_DECRYPT
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x05)
> > #define VIRTIO_CRYPTO_AEAD_ENCRYPT
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_AEAD, 0x00)
> > #define VIRTIO_CRYPTO_AEAD_DECRYPT
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_AEAD, 0x01)
> > #define VIRTIO_CRYPTO_PRIMITIVE
> > VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_PRIMITIVE, 0x00)
> > u32 opcode;
> > u8 algo; /*service-specific algorithms*/
> > u8 flag; /*control flag*/
> 
> We'd better add a U64 session_id property here for service-specific
> algorithms.
> 

Can we put session_id into parameter filed inside service-specific request?
For ASYM service, it doesn't need session_id.
And for HASH service, it might not need a session_id as well.

> > };
> >
> > Take rsa_sign_request as example,
> > A rsa sign service specific request is defined as:
> > struct virtio_crypto_asym_rsa_sign_req{
> > struct virtio_crypto_rsa_sign_para parameter;
> > struct virtio_crypto_rsa_sign_input idata;
> > struct virtio_crypto_rsa_sign_output odata;
> > };
> >
> > A complete crypto service request is defined as:
> > struct virtio_crypto_op_data_req {
> >struct virtio_crypto_op_header header;
> >   union {
> >struct virtio_crypto_asym_rsa_sign_req
> > rsa_sign_req;
> >/*other service request*/
> >   }u;
> > };
> >
> I wanted to do this in fact. ;)
> 
> > More detailed comments are embedded below:
> >
> > > Changes from v3:
> > >  - Don't use enum is the spec but macros in specific structures. [Michael 
> > > &
> > > Stefan]
> > >  - Add two complete structures for session creation and closing, so that
> > >   the spec is clear on how to lay out the request.  [Stefan]
> > >  - Definite the crypto operation request with assigned structure, in this
> way,
> > >   each data request only occupies *one entry* of the Vring descriptor
> table,
> > >   which *improves* the 

Re: [Qemu-devel] [RFC v1 10/13] target-ppc: add setb instruction

2016-07-21 Thread Nikunj A Dadhania
David Gibson  writes:

> [ Unknown signature status ]
> On Mon, Jul 18, 2016 at 10:35:14PM +0530, Nikunj A Dadhania wrote:
>> From: Vivek Andrew Sha 
>> 
>> Returns:
>>   -1 if bit 0 of CR field is set
>>   0  if bit 1 of CR field is set
>>   1  otherwise.
>
> Um.. that description is pretty inadequate.  Retuns where? Which CR
> field?

I will update, its encoded in the opcode.

Regards
Nikunj




Re: [Qemu-devel] [RFC v1 05/13] target-ppc: add modulo word operations

2016-07-21 Thread Nikunj A Dadhania
David Gibson  writes:

> [ Unknown signature status ]
> On Mon, Jul 18, 2016 at 10:35:09PM +0530, Nikunj A Dadhania wrote:
>> Adding following instructions:
>> 
>> moduw: Modulo Unsigned Word
>> modsw: Modulo Signed Word
>> 
>> Signed-off-by: Nikunj A Dadhania 
>
> As rth has already mentioned this many branches probably means this
> wants a helper.
>
>> ---
>>  target-ppc/translate.c | 48 
>>  1 file changed, 48 insertions(+)
>> 
>> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
>> index d44f7af..487dd94 100644
>> --- a/target-ppc/translate.c
>> +++ b/target-ppc/translate.c
>> @@ -1178,6 +1178,52 @@ GEN_DIVE(divde, divde, 0);
>>  GEN_DIVE(divdeo, divde, 1);
>>  #endif
>>  
>> +static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
>> + TCGv arg2, int sign)
>> +{
>> +TCGLabel *l1 = gen_new_label();
>> +TCGLabel *l2 = gen_new_label();
>> +TCGv_i32 t0 = tcg_temp_local_new_i32();
>> +TCGv_i32 t1 = tcg_temp_local_new_i32();
>> +TCGv_i32 t2 = tcg_temp_local_new_i32();
>> +
>> +tcg_gen_trunc_tl_i32(t0, arg1);
>> +tcg_gen_trunc_tl_i32(t1, arg2);
>> +tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);

Result for:
 % 0 and ...

>> +if (sign) {
>> +TCGLabel *l3 = gen_new_label();
>> +tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
>> +tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
>> +gen_set_label(l3);
>
> It's not really clear to be what the logic above is doing.

... For signed case
0x8000_ % -1

Is undefined, addressing those cases.

>
>> +tcg_gen_rem_i32(t2, t0, t1);
>> +} else {
>> +tcg_gen_remu_i32(t2, t0, t1);
>> +}
>> +tcg_gen_br(l2);
>> +gen_set_label(l1);
>> +if (sign) {
>> +tcg_gen_sari_i32(t2, t0, 31);
>
> AFAICT this sets t2 to either 0 or -1 depending on the sign of t0,
> which seems like an odd thing to do.

Extending the sign later ...

>> +} else {
>> +tcg_gen_movi_i32(t2, 0);
>> +}
>> +gen_set_label(l2);
>> +tcg_gen_extu_i32_tl(ret, t2);

... Here.

Regards
Nikunj




Re: [Qemu-devel] [RFC v1 05/13] target-ppc: add modulo word operations

2016-07-21 Thread David Gibson
On Mon, Jul 18, 2016 at 10:35:09PM +0530, Nikunj A Dadhania wrote:
> Adding following instructions:
> 
> moduw: Modulo Unsigned Word
> modsw: Modulo Signed Word
> 
> Signed-off-by: Nikunj A Dadhania 

As rth has already mentioned this many branches probably means this
wants a helper.

> ---
>  target-ppc/translate.c | 48 
>  1 file changed, 48 insertions(+)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index d44f7af..487dd94 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -1178,6 +1178,52 @@ GEN_DIVE(divde, divde, 0);
>  GEN_DIVE(divdeo, divde, 1);
>  #endif
>  
> +static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
> + TCGv arg2, int sign)
> +{
> +TCGLabel *l1 = gen_new_label();
> +TCGLabel *l2 = gen_new_label();
> +TCGv_i32 t0 = tcg_temp_local_new_i32();
> +TCGv_i32 t1 = tcg_temp_local_new_i32();
> +TCGv_i32 t2 = tcg_temp_local_new_i32();
> +
> +tcg_gen_trunc_tl_i32(t0, arg1);
> +tcg_gen_trunc_tl_i32(t1, arg2);
> +tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
> +if (sign) {
> +TCGLabel *l3 = gen_new_label();
> +tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
> +tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
> +gen_set_label(l3);

It's not really clear to be what the logic above is doing.

> +tcg_gen_rem_i32(t2, t0, t1);
> +} else {
> +tcg_gen_remu_i32(t2, t0, t1);
> +}
> +tcg_gen_br(l2);
> +gen_set_label(l1);
> +if (sign) {
> +tcg_gen_sari_i32(t2, t0, 31);

AFAICT this sets t2 to either 0 or -1 depending on the sign of t0,
which seems like an odd thing to do.

> +} else {
> +tcg_gen_movi_i32(t2, 0);
> +}
> +gen_set_label(l2);
> +tcg_gen_extu_i32_tl(ret, t2);
> +tcg_temp_free_i32(t0);
> +tcg_temp_free_i32(t1);
> +tcg_temp_free_i32(t2);
> +}
> +
> +#define GEN_INT_ARITH_MODW(name, opc3, sign)\
> +static void glue(gen_, name)(DisasContext *ctx) \
> +{   \
> +gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],\
> +  cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
> +  sign);\
> +}
> +
> +GEN_INT_ARITH_MODW(modsw, 0x18, 1);
> +GEN_INT_ARITH_MODW(moduw, 0x08, 0);
> +
>  /* mulhw  mulhw. */
>  static void gen_mulhw(DisasContext *ctx)
>  {
> @@ -10244,6 +10290,8 @@ GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, 
> PPC2_DIVE_ISA206),
>  GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
>  GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
>  GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
> +GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x0001, PPC_NONE, PPC2_ISA300),
> +GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x0001, PPC_NONE, PPC2_ISA300),
>  
>  #if defined(TARGET_PPC64)
>  #undef GEN_INT_ARITH_DIVD

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC v1 10/13] target-ppc: add setb instruction

2016-07-21 Thread David Gibson
On Mon, Jul 18, 2016 at 10:35:14PM +0530, Nikunj A Dadhania wrote:
> From: Vivek Andrew Sha 
> 
> Returns:
>   -1 if bit 0 of CR field is set
>   0  if bit 1 of CR field is set
>   1  otherwise.

Um.. that description is pretty inadequate.  Retuns where? Which CR
field?

> Signed-off-by: Vivek Andrew Sha 
> [ reworded commit, used 32bit ops as crf is 32bits ]
> Signed-off-by: Nikunj A Dadhania 
> ---
>  target-ppc/translate.c | 30 ++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 8f7ff49..9464942 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -4879,6 +4879,35 @@ static void gen_mtspr(DisasContext *ctx)
>  }
>  }
>  
> +#if defined(TARGET_PPC64)
> +/* setb */
> +static void gen_setb(DisasContext *ctx)
> +{
> +TCGLabel *l1 = gen_new_label();
> +TCGLabel *l2 = gen_new_label();
> +TCGLabel *out = gen_new_label();
> +TCGv_i32 t0 = tcg_temp_local_new_i32();
> +TCGv_i64 ret = tcg_temp_local_new_i64();
> +int crf = crfS(ctx->opcode);
> +
> +tcg_gen_andi_i32(t0, cpu_crf[crf], 0x3);
> +tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
> +tcg_gen_andi_i32(t0, cpu_crf[crf], 0x1);
> +tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 1, l2);
> +tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], 1);
> +tcg_gen_br(out);
> +gen_set_label(l2);
> +tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], -1);
> +tcg_gen_br(out);
> +gen_set_label(l1);
> +tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], 0);
> +gen_set_label(out);
> +
> +tcg_temp_free_i32(t0);
> +tcg_temp_free_i64(ret);
> +}
> +#endif
> +
>  /*** Cache management  
> ***/
>  
>  /* dcbf */
> @@ -10195,6 +10224,7 @@ GEN_HANDLER(mftb, 0x1F, 0x13, 0x0B, 0x0001, 
> PPC_MFTB),
>  GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x0801, PPC_MISC),
>  #if defined(TARGET_PPC64)
>  GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
> +GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300),
>  #endif
>  GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
>  GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x, PPC_MISC),

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC v1 13/13] target-ppc: introduce opc4 for Expanded Opcode

2016-07-21 Thread David Gibson
On Mon, Jul 18, 2016 at 10:35:17PM +0530, Nikunj A Dadhania wrote:
> ISA 3.0 has introduced EO - Expanded Opcode. Introduce third level
> indirect opcode table and corresponding parsing routines.
> 
> EO (11:12) Expanded opcode field
> Formats: XX1
> 
> EO (11:15) Expanded opcode field
> Formats: VX, X, XX2
> 
> Signed-off-by: Nikunj A Dadhania 
> ---
>  target-ppc/translate.c  |  73 +--
>  target-ppc/translate_init.c | 103 
> 
>  2 files changed, 136 insertions(+), 40 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 6c5a4a6..733d68d 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -40,6 +40,7 @@
>  /* Include definitions for instructions classes and implementations flags */
>  //#define PPC_DEBUG_DISAS
>  //#define DO_PPC_STATISTICS
> +//#define PPC_DUMP_CPU
>  
>  #ifdef PPC_DEBUG_DISAS
>  #  define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
> @@ -367,12 +368,15 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, 
> PPC_NONE)
>  #define GEN_HANDLER2_E(name, onam, opc1, opc2, opc3, inval, type, type2) 
>  \
>  GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
>  
> +#define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)
>  \
> +GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
> +
>  typedef struct opcode_t {
> -unsigned char opc1, opc2, opc3;
> +unsigned char opc1, opc2, opc3, opc4;
>  #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
> -unsigned char pad[5];
> +unsigned char pad[4];
>  #else
> -unsigned char pad[1];
> +unsigned char pad[4]; /* 4-byte pad to maintain pad in opcode table */

IIUC the point here is to align entries to the wordsize.  If the
worsize is 32-bit you shouldn't need any extra padding here.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC v1 09/13] target-ppc: add cmpeqb instruction

2016-07-21 Thread David Gibson
On Mon, Jul 18, 2016 at 10:35:13PM +0530, Nikunj A Dadhania wrote:
> From: Swapnil Bokade 
> 
> Search a byte in the stream of 8bytes provided in the register
> 
> Signed-off-by: Sandipan Das 
> [ Modified the logic to use lesser temporaries ]
> Signed-off-by: Nikunj A Dadhania 

rth's reference may obsolete the suggestions below.

> ---
>  target-ppc/translate.c | 27 +++
>  1 file changed, 27 insertions(+)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index a57f7dd..8f7ff49 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -856,6 +856,32 @@ static void gen_cmprb(DisasContext *ctx)
>  tcg_temp_free(src2hi);
>  }
>  
> +/* cmpeqb */
> +static void gen_cmpeqb(DisasContext *ctx)
> +{
> +TCGLabel *l1 = gen_new_label();
> +TCGLabel *l2 = gen_new_label();
> +TCGv src1 = tcg_temp_local_new();
> +TCGv t0 = tcg_temp_local_new();
> +TCGv arg1 = cpu_gpr[rB(ctx->opcode)];
> +int i;
> +
> +tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF);
> +for (i = 0; i < 64; i += 8) {
> +tcg_gen_shri_tl(t0, arg1, i);
> +tcg_gen_andi_tl(t0, t0, 0xFF);

Shifting direct from the original arg each time seems awkward when you
can just shift a working reg by 8 bits each loop.  I suspect that
could save you a temporary.

> +tcg_gen_brcond_tl(TCG_COND_EQ, src1, t0, l1);
> +}
> +tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);
> +tcg_gen_br(l2);
> +gen_set_label(l1);
> +/* Set match bit, i.e. CRF_GT */
> +tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
> +gen_set_label(l2);

You should only need one branch at most, either by initializing the
CRF first, or by moving a variable to it.

> +tcg_temp_free(src1);
> +tcg_temp_free(t0);
> +}
> +
>  /* isel (PowerPC 2.03 specification) */
>  static void gen_isel(DisasContext *ctx)
>  {
> @@ -10040,6 +10066,7 @@ GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x0040, 
> PPC_INTEGER),
>  GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x0040, PPC_INTEGER),
>  GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x0040, PPC_INTEGER),
>  GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x0040, PPC_INTEGER),
> +GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, PPC_NONE, PPC2_ISA300),
>  GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x0001, PPC_NONE, PPC2_ISA205),
>  GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x0041, PPC_NONE, PPC2_ISA300),
>  GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x0001, PPC_ISEL),

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Qemu-devel] [PATCH] linux-user: correctly pack target_epoll_event for i386 target

2016-07-21 Thread Icenowy Zheng
According to comments in /usr/include/linux/eventpoll.h, x86_64 have
the same memory layout of struct target_epoll_event as i386. So on a
aligned host, if x86_64 should be packed, i386 will also need.

This has been tested with a i386 guest on an arm host: without the
patch, wineserver crashes (core).

Signed-off-by: Icenowy Zheng 
---
 linux-user/syscall_defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index b43966e..7380bf5 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -2547,7 +2547,7 @@ struct target_mq_attr {
 #define FUTEX_CMD_MASK  ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
 
 #ifdef CONFIG_EPOLL
-#if defined(TARGET_X86_64)
+#if defined(TARGET_X86_64) || defined(TARGET_I386)
 #define TARGET_EPOLL_PACKED QEMU_PACKED
 #else
 #define TARGET_EPOLL_PACKED
-- 
2.9.0




Re: [Qemu-devel] [RFC v4] virtio-crypto specification

2016-07-21 Thread Zeng, Xin
On Sunday, June 26, 2016 5:35 PM, Gonglei (Arei) Wrote:
> Hi all,
> 
> This is the specification (version 4) about a new virtio crypto device.
> 

In general, our comments around this proposal are listed below:
1. Suggest to introduce crypto services into virtio crypto device. The services 
currently defined are CIPHER, MAC, HASH, AEAD, KDF, ASYM, PRIMITIVE.
2. Suggest to define a unified crypto request format that is consisted of 
general header + service specific request,  Where 'general header' is for all 
crypto request,  'service specific request' is composed of 
operation parameter + input data + output data in generally. 
operation parameter is algorithm-specific parameters, 
input data is the data should be operated ,
output data is the "operation result + result buffer".

#define VIRTIO_CRYPTO_OPCODE(service, op)   (((service)<<8) | (op))
struct virtio_crypto_op_header {
#define VIRTIO_CRYPTO_CIPHER_ENCRYPT
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_CIPHER, 0x00)
#define VIRTIO_CRYPTO_CIPHER_DECRYPT
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_CIPHER, 0x01)
#define VIRTIO_CRYPTO_HASH  
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_HASH, 0x00)
#define VIRTIO_CRYPTO_MAC   
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_MAC, 0x00)
#define VIRTIO_CRYPTO_KDF   
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_KDF, 0x00)
#define VIRTIO_CRYPTO_ASYM_KEY_GEN  
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x00)
#define VIRTIO_CRYPTO_ASYM_KEY_EXCHG
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x01)
#define VIRTIO_CRYPTO_ASYM_SIGN 
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x02)
#define VIRTIO_CRYPTO_ASYM_VERIFY   
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x03)
#define VIRTIO_CRYPTO_ASYM_ENCRYPT  
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x04)
#define VIRTIO_CRYPTO_ASYM_DECRYPT  
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x05)
#define VIRTIO_CRYPTO_AEAD_ENCRYPT  
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_AEAD, 0x00)
#define VIRTIO_CRYPTO_AEAD_DECRYPT  
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_AEAD, 0x01)
#define VIRTIO_CRYPTO_PRIMITIVE 
VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_PRIMITIVE, 0x00)
u32 opcode; 
u8 algo; /*service-specific algorithms*/
u8 flag; /*control flag*/
};

Take rsa_sign_request as example,
A rsa sign service specific request is defined as:
struct virtio_crypto_asym_rsa_sign_req{
struct virtio_crypto_rsa_sign_para parameter;
struct virtio_crypto_rsa_sign_input idata;
struct virtio_crypto_rsa_sign_output odata; 
};

A complete crypto service request is defined as:
struct virtio_crypto_op_data_req {
   struct virtio_crypto_op_header header;
  union {
   struct virtio_crypto_asym_rsa_sign_req  rsa_sign_req;
   /*other service request*/
  }u;
};

More detailed comments are embedded below:

> Changes from v3:
>  - Don't use enum is the spec but macros in specific structures. [Michael &
> Stefan]
>  - Add two complete structures for session creation and closing, so that
>   the spec is clear on how to lay out the request.  [Stefan]
>  - Definite the crypto operation request with assigned structure, in this way,
>   each data request only occupies *one entry* of the Vring descriptor table,
>   which *improves* the *throughput* of data transferring.
> 
> Changes from v2:
>  - Reserve virtio device ID 20 for crypto device. [Cornelia]
>  - Drop all feature bits, those capabilities are offered by the device all the
> time.  [Stefan & Cornelia]
>  - Add a new section 1.4.2 for driver requirements. [Stefan]
>  - Use definite type definition instead of enum type in some structure.
> [Stefan]
>  - Add virtio_crypto_cipher_alg definition. [Stefan]
>  - Add a "Device requirements" section as using MUST. [Stefan]
>  - Some grammar nits fixes and typo fixes. [Stefan & Cornelia]
>  - Add one VIRTIO_CRYPTO_S_STARTED status for the driver as the flag of
> virtio-crypto device started and can work now.
> 
> Great thanks for Stefan and Cornelia!
> 
> Changes from v1:
>  - Drop the feature bit definition for each algorithm, and using config space
> instead  [Cornelia]
>  - Add multiqueue support and add corresponding feature bit
>  - Update Encryption process and header definition
>  - Add session operation process and add corresponding header description
>  - Other better description in order to fit for virtio spec  [Michael]
>  - Some other trivial fixes.
> 
> If you have any comments, please let me know, thanks :)
> 
> 
> Virtio-crypto device Spec
>  

Re: [Qemu-devel] [PATCH 0/8] Fix migration issues with arbitrary cpu-hot(un)plug

2016-07-21 Thread David Gibson
On Fri, Jul 22, 2016 at 12:56:26AM +0300, Michael S. Tsirkin wrote:
> On Thu, Jul 21, 2016 at 05:54:31PM +0200, Igor Mammedov wrote:
> > Series fixes migration issues caused by unstable cpu_index which depended
> > on order cpus were created/destroyed. It follows David's idea to make
> > cpu_index assignable by selected boards if board supports cpu-hotplug
> > with device_add and needs stable cpu_index/'migration id' but leaves
> > behaviour of the same as before for users that don't care about
> > cpu-hot(un)plug making changes low-risk.
> > 
> > tested with:
> >   SRC -snapshot -enable-kvm -smp 1,maxcpus=3 -m 256M guest.img -monitor 
> > stdio \
> >-device qemu64-x86_64-cpu,id=cpudel,apic-id=1 \
> >-device qemu64-x86_64-cpu,apic-id=2 
> >   (qemu) device_del cpudel
> >   (qemu) stop
> >   (qemu) migrate "exec:gzip -c > STATEFILE.gz"
> >   
> >   DST -snapshot -enable-kvm -smp 1,maxcpus=3 -m 256M guest.img -monitor 
> > stdio \
> >   -device qemu64-x86_64-cpu,apic-id=2 \
> >   -incoming "exec: gzip -c -d STATEFILE.gz"
> > 
> > git tree to test with:
> >  https://github.com/imammedo/qemu cpu-index-stable
> >  to view
> >  https://github.com/imammedo/qemu/commits/cpu-index-stable
> 
> For PC bits:
> 
> Reviewed-by: Michael S. Tsirkin 
> 
> This would be nice to have in 2.7.

I agree.  Despite the lateness, I think this will avoid substantial
future pain.

> Who's reviewing/merging the rest? Eduardo?

I've reviewed.  I could merge through my tree if we don't have a
better option, but merging pc specific pieces through the ppc tree
would seem odd.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC 0/2] cpu-add compatibility for query-hotpluggable-cpus implementations

2016-07-21 Thread David Gibson
On Tue, Jul 19, 2016 at 09:58:59AM +0530, Bharata B Rao wrote:
> On Mon, Jul 18, 2016 at 06:20:35PM +0200, Igor Mammedov wrote:
> > On Mon, 18 Jul 2016 17:06:18 +0200
> > Peter Krempa  wrote:
> > 
> > > On Mon, Jul 18, 2016 at 19:19:18 +1000, David Gibson wrote:
> > > > I'm not entirely sure if this is a good idea, and if it is whether
> > > > this is a good approach to it.  But I'd like to discuss it and see if
> > > > anyone has better ideas.
> > > > 
> > > > As you may know we've hit a bunch of complications with cpu_index
> > > > which will impose some limitations with what we can do with the new
> > > > query-hotpluggable-cpus interface, and we've run out of time to
> > > > address these in qemu-2.7.
> > > >
> > > > At the same time we're hitting complications with the fact that the
> > > > new qemu interface requires a new libvirt interface to use properly,
> > > > and that has follow on effects further up the stack.  
> > > 
> > > The libvirt interface is basically now depending on adding a working
> > > implementation for qemu or a different hypervisor. APIs without
> > > implementation are not accepted upstream.
> > > 
> > > It looks like there are the following problems which make the above
> > > hard:
> > > 
> > > First of the problem is the missing link between the NUMA topology
> > > (currently confirured via 'cpu id' which is not linked in any way to the
> > > query-hotpluggable-cpus entries). This basically means that I'll have to
> > > re-implement the qemu numbering scheme and hope that it doesn't change
> > > until a better approach is added.
> > with current 'in order' plug/unplug limitation behavior is the same as
> > for cpu-add (wrt x86) so device_add could be used as direct replacement
> > of cpu-add in NUMA case.
> > 
> > Numa node to CPU in query-hotpluggable-cpus a missing part
> > but once numa mapping for hotplugged CPUs (which is broken now) is fixed
> > (fix https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg00595.html)
> > I'll be ready to extend x86.query-hotpluggable-cpus with numa mapping
> > that -numa cpus=1,2,3... happened to configure.
> > (note: that device_add cpu,node=X that doesn't match whatever has been
> > configured with -numa cpus=... will rise error, as numa configuration
> > is static and fixed at VM creation time, meaning that "node" option
> > in query-hotpluggable-cpus is optional and only to inform users to
> > which node cpu belongs)
> > 
> > > Secondly from my understanding of the current state it's impossible to
> > > select an arbitrary cpu to hotplug but they need to happen 'in order' of
> > > the cpu id pointed out above (which is not accessible). The grand plan
> > > is to allow adding the cpus in any order. This makes the feature look
> > > like a proof of concept rather than something useful.
> 
> > having out-of-order plug/unplug would be nice but that wasn't
> > the grand plan. Main reason is to replace cpu-add with 'device_add cpu' and
> > on top of that provide support for 'device_del cpu' instead of adding 
> > cpu-del
> > command.
> > And as result of migration to device_add to avoid changing -smp to match
> > present cpus count on target and reuse the same interface as other devices.
> > 
> > We can still pick 'out of order' device_add cpu using migration_id patch
> > and revert in-order limit patch. It would work for x86,
> > but I think there were issues with SPAPR, that's why I'm in favor of
> > in-order limit approach.
> 
> Not that the migration_id patch doesn't work for sPAPR, but it was felt
> that having too many IDs (cpu_dt_id, arch_id, migration_id) is not
> good/idea/preferable and could cause confusion.

I was also concerned that adding another id would be yet another layer
of things we needed to maintain compatibility with in future.

> I am not clear as to why limiting the out-of-order hotplug is a show
> stopper for libvirt actually. Isn't that how it is for cpu-add currently ?
> 
> Regards,
> Bharata.
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Qemu-devel] [liqiang...@360.cn: 答复: Host memory leakage in QEMU xhci device emulation]

2016-07-21 Thread Michael S. Tsirkin
- Forwarded message from 李强  -

Date: Fri, 22 Jul 2016 03:48:38 +
From: 李强 
To: "Michael S. Tsirkin" 
Subject: 答复: Host memory leakage  in QEMU  xhci device emulation
Message-ID: <143c0afc63fc204cb0c55bb88f3a8abb0181d...@ex02.corp.qihoo.net>
In-Reply-To: <20160722063935-mutt-send-email-...@kernel.org>

Right, a user can hotplug a lot of devices.
It is a bug not a security issue. Just publish it.

> -邮件原件-
> 发件人: Michael S. Tsirkin [mailto:m...@redhat.com]
> 发送时间: 2016年7月22日 11:42
> 收件人: 李强
> 抄送: pmato...@redhat.com; sstabell...@kernel.org; secal...@redhat.com;
> mdr...@linux.vnet.ibm.com
> 主题: Re: Host memory leakage in QEMU xhci device emulation
> 
> On Fri, Jul 22, 2016 at 02:42:56AM +, 李强 wrote:
> > Hi,
> >
> >
> >
> > I found a host memory leakage issue in QEMU hcd-xhci device emulation.
> >
> >
> >
> > DESCRIPTION
> >
> > _
> >
> >
> >
> > In usb_xhci_realize() function, it calls msix_init() which requires
> > memory malloc. In usb_xhci_exit(), it doesn't call the corresponding
> > function
> > msix_uninit() to free the memory.
> >
> > This will cause host memory leakage if a malicious hotplug and unplug
> > the xhci device.
> 
> Thanks for the resport.
> 
> I would say whoever can hotplug devices can just hotplug a hoge number of
> these until you run out of memory.
> So I don't think it's a security vulnerability, but we should fix it.
> 
> Do you agree? If yes we can publish this on the qemu mailing list.
> 
> 
> >
> >
> > code from hw/usb/hcd-xhci.c
> >
> > static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
> >
> > {
> >
> > int i, ret;
> >
> >
> >
> > if (xhci_get_flag(xhci, XHCI_FLAG_USE_MSI_X)) {
> >
> > msix_init(dev, xhci->numintrs,
> >
> >   >mem, 0, OFF_MSIX_TABLE,
> >
> >   >mem, 0, OFF_MSIX_PBA,
> >
> >   0x90);
> >
> > }
> >
> > }
> >
> >
> >
> > static void usb_xhci_exit(PCIDevice *dev)
> >
> > {
> >
> > int i;
> >
> > XHCIState *xhci = XHCI(dev);
> >
> >
> >
> > trace_usb_xhci_exit();
> >
> >
> >
> > for (i = 0; i < xhci->numslots; i++) {
> >
> > xhci_disable_slot(xhci, i + 1);
> >
> > }
> >
> >
> >
> > if (xhci->mfwrap_timer) {
> >
> > timer_del(xhci->mfwrap_timer);
> >
> > timer_free(xhci->mfwrap_timer);
> >
> > xhci->mfwrap_timer = NULL;
> >
> > }
> >
> >
> >
> > /* destroy msix memory region *///here just destroy the memory
> > region, doesn’t free the msix memory
> >
> > if (dev->msix_table && dev->msix_pba
> >
> > && dev->msix_entry_used) {
> >
> > memory_region_del_subregion(>mem,
> > >msix_table_mmio);
> >
> > memory_region_del_subregion(>mem,
> >msix_pba_mmio);
> >
> > }
> >
> >
> >
> > usb_bus_release(>bus);
> >
> > }
> >
> >
> >
> > DEBUG AND TEST INFO
> >
> > _
> >
> >
> >
> > Breakpoint 1, usb_xhci_realize (dev=0x592e7c20,
> > errp=0x7fffc5f8) at hw/
> > usb/hcd-xhci.c:3652
> >
> > 3652 msix_init(dev, xhci->numintrs,
> >
> > (gdb) p dev->msix_table
> >
> > $1 = (uint8_t *) 0x0
> >
> > (gdb) p dev->msix_pba
> >
> > $2 = (uint8_t *) 0x0
> >
> > (gdb) p dev->msix_entry_used
> >
> > $3 = (unsigned int *) 0x0
> >
> > (gdb) n
> >
> > [Thread 0x7fffe72f7700 (LWP 49433) exited]
> >
> > 3657 }
> >
> > (gdb) p dev->msix_table
> >
> > $4 = (uint8_t *) 0x5823a480 ""
> >
> > (gdb) p dev->msix_pba
> >
> > $5 = (uint8_t *) 0x5823a590 ""
> >
> > (gdb) p dev->msix_entry_used
> >
> > $6 = (unsigned int *) 0x5823a5b0
> >
> > (gdb) c
> >
> > Continuing.
> >
> >
> >
> > Program received signal SIGPIPE, Broken pipe.
> >
> > 0x769ed78d in sendmsg () at
> > ../sysdeps/unix/syscall-template.S:81
> >
> > 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
> >
> > (gdb) i b
> >
> > Num Type   Disp Enb AddressWhat
> >
> > 1   breakpoint keep y   0x559c6a68 in
> usb_xhci_realize at hw/
> > usb/hcd-xhci.c:3652
> >
> >  breakpoint already hit 1 time
> >
> > 2   breakpoint keep y   0x559c6c5e in usb_xhci_exit
> at hw/usb/
> > hcd-xhci.c:3686
> >
> > (gdb) d 1
> >
> > (gdb) c
> >
> > Continuing.
> >
> > [New Thread 0x7fffe72f7700 (LWP 49451)]
> >
> > [Switching to Thread 0x7fffe5a04700 (LWP 49408)]
> >
> >
> >
> > Breakpoint 2, usb_xhci_exit (dev=0x592e7c20) at
> > hw/usb/hcd-xhci.c:3687
> >
> > 3687 if (dev->msix_table && dev->msix_pba
> >
> > (gdb) p dev->msix_table
> >
> > $7 = (uint8_t *) 0x5823a480 "\f\020\340\376"
> >
> > (gdb) p dev->msix_pba
> >
> > $8 = (uint8_t *) 0x5823a590 ""
> >
> > (gdb) p dev->msix_entry_used
> >
> > $9 = (unsigned int *) 0x5823a5b0
> >
> > (gdb) n
> >
> > 3688 && dev->msix_entry_used) {
> >
> > (gdb) n
> >
> > 3689 memory_region_del_subregion(>mem,
> >
> > msix_table_mmio);
> >
> > (gdb) awatch *(int*)0x5823a480  

[Qemu-devel] [Bug 1605443] Re: QEMU epoll for i386-linux-user on arm host is broken in 2.6

2016-07-21 Thread Icenowy Zheng
Oh I have sent a patch to qemu-devel mailing list...

(maybe the mail is rejected, as I'm using Yandex mail service...)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1605443

Title:
  QEMU epoll for i386-linux-user on arm host is broken in 2.6

Status in QEMU:
  Confirmed

Bug description:
  I'm trying to get wine running on qemu-i386 on arm.

  I found that 2.5.1 is OK, but 2.6 is not.

  By bisecting, I found commit 928bed6a057cedd6110e634865e021a24029785a
  is the problem.

  I reverted this commit, and then epoll is OK now.

  It seems that the commit broke epoll of qemu-i386 on arm.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1605443/+subscriptions



Re: [Qemu-devel] [PATCH] checkpatch: fix break by renaming README

2016-07-21 Thread Peter Xu
On Thu, Jul 21, 2016 at 08:37:01AM -0600, Eric Blake wrote:
> On 07/21/2016 04:15 AM, Peter Xu wrote:
> > Without this, we cannot run checkpatch.pl under QEMU root directory.
> > 
> > Signed-off-by: Peter Xu 
> > ---
> >  scripts/checkpatch.pl | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> NACK; see
> https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg04850.html for
> a more complete solution

Yeah it's already reverted and it's working in master. Thanks for the
pointer. :)

-- peterx



Re: [Qemu-devel] [PATCH 6/8] spapr: init CPUState->cpu_index with index relative to core-id

2016-07-21 Thread David Gibson
On Thu, Jul 21, 2016 at 05:54:37PM +0200, Igor Mammedov wrote:
> It will enshure that cpu_index for a given cpu stays the same
> regardless of the order cpus has been created/deleted and so
> it would be possible to migrate QEMU instance with out of order
> created CPU.
> 
> Signed-off-by: Igor Mammedov 

So, this isn't quite right (it wasn't right in my version either).

The problem occurs when smp_threads < kvmppc_smt_threads().  That is,
when the requested threads-per-core is less than the hardware's
maximum number of threads-per-core.

The core-id values are assigned essentially as i *
kvmppc_smt_threads(), meaning the patch below will leave gaps in the
cpu_index values and the last ones will exceed max_cpus, causing other
problems.

What I'm not sure about is whether the right way to fix this is to
change the core-id values, or to calculate the cpu_index from the
existing core-id values.

> ---
>  hw/ppc/spapr_cpu_core.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index 4bfc96b..f68e88d 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -309,9 +309,13 @@ static void spapr_cpu_core_realize(DeviceState *dev, 
> Error **errp)
>  sc->threads = g_malloc0(size * cc->nr_threads);
>  for (i = 0; i < cc->nr_threads; i++) {
>  char id[32];
> +CPUState *cs;
> +
>  obj = sc->threads + i * size;
>  
>  object_initialize(obj, size, typename);
> +cs = CPU(obj);
> +cs->cpu_index = cc->core_id + i;
>  snprintf(id, sizeof(id), "thread[%d]", i);
>  object_property_add_child(OBJECT(sc), id, obj, _err);
>  if (local_err) {

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v3 1/2] qdev: ignore GlobalProperty.errp for hotplugged devices

2016-07-21 Thread David Gibson
On Fri, Jul 22, 2016 at 01:01:26AM +0200, Greg Kurz wrote:
> This patch ensures QEMU won't terminate while hotplugging a device if the
> global property cannot be set and errp points to error_fatal or error_abort.
> 
> While here, it also fixes indentation of the typename argument.
> 
> Suggested-by: Eduardo Habkost 
> Signed-off-by: Greg Kurz 

This seems kind of bogus to me - we have this whole infrastructure for
handling errors, and here we throw it away.

It seems like the right solution would be to make the caller in the
hotplug case *not* use error_abort or error_fatal, and instead get the
error propagated back to the monitor which will display it.

> ---
>  hw/core/qdev-properties.c |4 ++--
>  include/hw/qdev-core.h|4 +++-
>  2 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
> index 14e544ab17d2..311af6da7684 100644
> --- a/hw/core/qdev-properties.c
> +++ b/hw/core/qdev-properties.c
> @@ -1084,7 +1084,7 @@ int qdev_prop_check_globals(void)
>  }
>  
>  static void qdev_prop_set_globals_for_type(DeviceState *dev,
> -const char *typename)
> +   const char *typename)
>  {
>  GList *l;
>  
> @@ -1100,7 +1100,7 @@ static void qdev_prop_set_globals_for_type(DeviceState 
> *dev,
>  if (err != NULL) {
>  error_prepend(, "can't apply global %s.%s=%s: ",
>prop->driver, prop->property, prop->value);
> -if (prop->errp) {
> +if (!dev->hotplugged && prop->errp) {
>  error_propagate(prop->errp, err);
>  } else {
>  assert(prop->user_provided);
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 1d1f8612a9b8..4b4b33bec885 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -261,7 +261,9 @@ struct PropertyInfo {
>   * @used: Set to true if property was used when initializing a device.
>   * @errp: Error destination, used like first argument of error_setg()
>   *in case property setting fails later. If @errp is NULL, we
> - *print warnings instead of ignoring errors silently.
> + *print warnings instead of ignoring errors silently. For
> + *hotplugged devices, errp is always ignored and warnings are
> + *printed instead.
>   */
>  typedef struct GlobalProperty {
>  const char *driver;
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 5/8] pc: init CPUState->cpu_index with index in possible_cpus[]

2016-07-21 Thread David Gibson
On Thu, Jul 21, 2016 at 05:54:36PM +0200, Igor Mammedov wrote:
> It will enshure that cpu_index for a given cpu stays the same
> regardless of the order cpus has been created/deleted.
> 
> No compat code is needed as for initial cpus index in
> possible_cpus[] matches cpu_index that's been auto-allocated
> in cpu_exec_init().
> 
> Tha same applies for hotplug with cpu-add command if cpus are
> added sequentially in increasing order as 'id' matches cpu_index.
> 
> If cpu-add had been used for creating out-of-order cpus,
> that created unmigratable instance since it were not possible
> to start target with the same cpu_index using old way
> of migrating instance with hotplugged cpus:
> 
> * source QEMU with CLI (-smp 1,maxcpus=3 and cpu-add id=2)
>   following set of cpu_index is allocated [0, 1] with
>   apics set [0, 2] respectivelly
> * target QEMU is started with CLI -smp 2,maxcpus=3
>   resulting in set of cpu_index [0, 1] but with
>   set of apics [0, 1] wich doesn't match source.
> 
> So we don't need compat code in this case as it's never worked
> and newelly added device_add support would use stable cpu_index
> set by machine to begin with, so it won't have above limitation
> and source QEMU could be migrated to destination regardless
> of the order cpus were created.
> 
> Signed-off-by: Igor Mammedov 

Reviewed-by: David Gibson 

> ---
>  hw/i386/pc.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index ac7a4d5..316fb43 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1872,6 +1872,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
>  DeviceState *dev, Error **errp)
>  {
>  int idx;
> +CPUState *cs;
>  CPUArchId *cpu_slot;
>  X86CPUTopoInfo topo;
>  X86CPU *cpu = X86_CPU(dev);
> @@ -1972,6 +1973,9 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
>  return;
>  }
>  cpu->thread_id = topo.smt_id;
> +
> +cs = CPU(cpu);
> +cs->cpu_index = idx;
>  }
>  
>  static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 4/8] qdev: fix object reference leak in case device.realize() fails

2016-07-21 Thread David Gibson
On Thu, Jul 21, 2016 at 05:54:35PM +0200, Igor Mammedov wrote:
> If device doesn't have parent assined before its realize
> is called, device_set_realized() will implicitly set parent
> to '/machine/unattached'.
> 
> However device_set_realized() may fail after that point at
> several other points leaving not realized object dangling
> in '/machine/unattached' and as result caller of
> 
>   obj = object_new()
> obj->ref == 1
>   object_property_set_bool(obj,..., true, "realized",...)
> obj->ref == 2
>   if (fail)
>   object_unref(obj);
>   obj->ref == 1
> 
> will get object leak instead of expected object destruction.
> 
> Fix it by making device_set_realized() to cleanup after itself
> in case of failure.
> 
> Signed-off-by: Igor Mammedov 

Reviewed-by: David Gibson 

> ---
>  hw/core/qdev.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/core/qdev.c b/hw/core/qdev.c
> index 6680089..ee4a083 100644
> --- a/hw/core/qdev.c
> +++ b/hw/core/qdev.c
> @@ -885,6 +885,8 @@ static void device_set_realized(Object *obj, bool value, 
> Error **errp)
>  HotplugHandler *hotplug_ctrl;
>  BusState *bus;
>  Error *local_err = NULL;
> +bool unattached_parent = false;
> +static int unattached_count;
>  
>  if (dev->hotplugged && !dc->hotpluggable) {
>  error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
> @@ -893,12 +895,12 @@ static void device_set_realized(Object *obj, bool 
> value, Error **errp)
>  
>  if (value && !dev->realized) {
>  if (!obj->parent) {
> -static int unattached_count;
>  gchar *name = g_strdup_printf("device[%d]", unattached_count++);
>  
>  object_property_add_child(container_get(qdev_get_machine(),
>  "/unattached"),
>name, obj, _abort);
> +unattached_parent = true;
>  g_free(name);
>  }
>  
> @@ -987,6 +989,10 @@ post_realize_fail:
>  
>  fail:
>  error_propagate(errp, local_err);
> +if (unattached_parent) {
> +object_unparent(OBJECT(dev));
> +unattached_count--;
> +}
>  }
>  
>  static bool device_get_hotpluggable(Object *obj, Error **errp)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 1/8] exec: reduce CONFIG_USER_ONLY ifdeffenery

2016-07-21 Thread David Gibson
On Thu, Jul 21, 2016 at 05:54:32PM +0200, Igor Mammedov wrote:
> Signed-off-by: Igor Mammedov 

Reviewed-by: David Gibson 

I think this is long overdue.

> ---
>  bsd-user/qemu.h |  2 --
>  include/exec/exec-all.h | 12 
>  linux-user/qemu.h   |  2 --
>  exec.c  | 17 +++--
>  4 files changed, 15 insertions(+), 18 deletions(-)
> 
> diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
> index 6ccc544..2b2b918 100644
> --- a/bsd-user/qemu.h
> +++ b/bsd-user/qemu.h
> @@ -209,8 +209,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong 
> old_size,
> abi_ulong new_addr);
>  int target_msync(abi_ulong start, abi_ulong len, int flags);
>  extern unsigned long last_brk;
> -void cpu_list_lock(void);
> -void cpu_list_unlock(void);
>  #if defined(CONFIG_USE_NPTL)
>  void mmap_fork_start(void);
>  void mmap_fork_end(int child);
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index acda7b6..d008296 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -56,6 +56,18 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>target_ulong pc, target_ulong cs_base,
>uint32_t flags,
>int cflags);
> +#if defined(CONFIG_USER_ONLY)
> +void cpu_list_lock(void);
> +void cpu_list_unlock(void);
> +#else
> +static inline void cpu_list_unlock(void)
> +{
> +}
> +static inline void cpu_list_lock(void)
> +{
> +}
> +#endif
> +
>  void cpu_exec_init(CPUState *cpu, Error **errp);
>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
>  void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index cdf23a7..bef465d 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -419,8 +419,6 @@ int target_msync(abi_ulong start, abi_ulong len, int 
> flags);
>  extern unsigned long last_brk;
>  extern abi_ulong mmap_next_start;
>  abi_ulong mmap_find_vma(abi_ulong, abi_ulong);
> -void cpu_list_lock(void);
> -void cpu_list_unlock(void);
>  void mmap_fork_start(void);
>  void mmap_fork_end(int child);
>  
> diff --git a/exec.c b/exec.c
> index 60cf46a..2f57c62 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -642,23 +642,17 @@ void cpu_exec_exit(CPUState *cpu)
>  {
>  CPUClass *cc = CPU_GET_CLASS(cpu);
>  
> -#if defined(CONFIG_USER_ONLY)
>  cpu_list_lock();
> -#endif
>  if (cpu->cpu_index == -1) {
>  /* cpu_index was never allocated by this @cpu or was already freed. 
> */
> -#if defined(CONFIG_USER_ONLY)
>  cpu_list_unlock();
> -#endif
>  return;
>  }
>  
>  QTAILQ_REMOVE(, cpu, node);
>  cpu_release_index(cpu);
>  cpu->cpu_index = -1;
> -#if defined(CONFIG_USER_ONLY)
>  cpu_list_unlock();
> -#endif
>  
>  if (cc->vmsd != NULL) {
>  vmstate_unregister(NULL, cc->vmsd, cpu);
> @@ -670,7 +664,7 @@ void cpu_exec_exit(CPUState *cpu)
>  
>  void cpu_exec_init(CPUState *cpu, Error **errp)
>  {
> -CPUClass *cc = CPU_GET_CLASS(cpu);
> +CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
>  Error *local_err = NULL;
>  
>  cpu->as = NULL;
> @@ -694,22 +688,17 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
>  object_ref(OBJECT(cpu->memory));
>  #endif
>  
> -#if defined(CONFIG_USER_ONLY)
>  cpu_list_lock();
> -#endif
>  cpu->cpu_index = cpu_get_free_index(_err);
>  if (local_err) {
>  error_propagate(errp, local_err);
> -#if defined(CONFIG_USER_ONLY)
>  cpu_list_unlock();
> -#endif
>  return;
>  }
>  QTAILQ_INSERT_TAIL(, cpu, node);
> -#if defined(CONFIG_USER_ONLY)
> -(void) cc;
>  cpu_list_unlock();
> -#else
> +
> +#ifndef CONFIG_USER_ONLY
>  if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
>  vmstate_register(NULL, cpu->cpu_index, _cpu_common, cpu);
>  }

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 2/8] exec: don't use cpu_index to detect if cpu_exec_init()'s been called for cpu

2016-07-21 Thread David Gibson
On Thu, Jul 21, 2016 at 05:54:33PM +0200, Igor Mammedov wrote:
> Instead use QTAIL's tqe_prev field to detect if cpu's been
> placed in list by cpu_exec_init() which is always set if
> QTAIL element is in list.
> 
> Fixes SIGSEGV on failure path in case cpu_index is assigned
> by board and cpu.relalize() fails before cpu_exec_init() is called.
> 
> In follow up patches, cpu_index will be assigned by boards that
> support cpu hot(un)plug and need stable cpu_index that doesn't
> depend on order cpus are created/removed.
> 
> Signed-off-by: Igor Mammedov 
> Reported-by: David Gibson 

Looks correct, although I wonder a bit about changing QTAILQ_REMOVE()
for everyone for the sake of this one use case.

> ---
>  include/qemu/queue.h | 2 ++
>  exec.c   | 4 ++--
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/include/qemu/queue.h b/include/qemu/queue.h
> index c2b6c81..2c2c74b 100644
> --- a/include/qemu/queue.h
> +++ b/include/qemu/queue.h
> @@ -407,6 +407,7 @@ struct {  
>   \
>  else\
>  (head)->tqh_last = (elm)->field.tqe_prev;   \
>  *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
> +(elm)->field.tqe_prev = NULL;   \
>  } while (/*CONSTCOND*/0)
>  
>  #define QTAILQ_FOREACH(var, head, field)\
> @@ -430,6 +431,7 @@ struct {  
>   \
>  #define QTAILQ_EMPTY(head)   ((head)->tqh_first == NULL)
>  #define QTAILQ_FIRST(head)   ((head)->tqh_first)
>  #define QTAILQ_NEXT(elm, field)  ((elm)->field.tqe_next)
> +#define QTAILQ_IN_USE(elm, field)((elm)->field.tqe_prev)
>  
>  #define QTAILQ_LAST(head, headname) \
>  (*(((struct headname *)((head)->tqh_last))->tqh_last))
> diff --git a/exec.c b/exec.c
> index 2f57c62..8c5da32 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -643,8 +643,8 @@ void cpu_exec_exit(CPUState *cpu)
>  CPUClass *cc = CPU_GET_CLASS(cpu);
>  
>  cpu_list_lock();
> -if (cpu->cpu_index == -1) {
> -/* cpu_index was never allocated by this @cpu or was already freed. 
> */
> +if (!QTAILQ_IN_USE(cpu, node)) {
> +/* there is nothing to undo since cpu_exec_init() hasn't been called 
> */
>  cpu_list_unlock();
>  return;
>  }

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 3/8] exec: set cpu_index only if it's been explictly set

2016-07-21 Thread David Gibson
On Thu, Jul 21, 2016 at 05:54:34PM +0200, Igor Mammedov wrote:
> it keeps the legacy behavior for all users that doesn't care
> about stable cpu_index value, but would allow boards that
> would support device_add/device_del to set stable cpu_index
> that won't depend on order in which cpus are created/destroyed.
> 
> While at that simplify cpu_get_free_index() as cpu_index
> generated by USER_ONLY and softmmu variants is the same
> since none of the users support cpu-remove so far except
> of not yet released spapr/x86 device_add/del. which
> will be altered in follow up patches to provide stable
> cpu_index.
> 
> Signed-off-by: Igor Mammedov 

It looks like the 1-line description needs a "not" in it somewhere,
but otherwise:

Reviewed-by: David Gibson 

> ---
>  include/qom/cpu.h |  2 ++
>  exec.c| 44 ++--
>  qom/cpu.c |  2 +-
>  3 files changed, 9 insertions(+), 39 deletions(-)
> 
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index cbcd64c..ce0c406 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -883,4 +883,6 @@ extern const struct VMStateDescription vmstate_cpu_common;
>  .offset = 0,\
>  }
>  
> +#define UNASSIGNED_CPU_INDEX -1
> +
>  #endif
> diff --git a/exec.c b/exec.c
> index 8c5da32..8e8416b 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -598,30 +598,7 @@ AddressSpace *cpu_get_address_space(CPUState *cpu, int 
> asidx)
>  }
>  #endif
>  
> -#ifndef CONFIG_USER_ONLY
> -static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
> -
> -static int cpu_get_free_index(Error **errp)
> -{
> -int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
> -
> -if (cpu >= MAX_CPUMASK_BITS) {
> -error_setg(errp, "Trying to use more CPUs than max of %d",
> -   MAX_CPUMASK_BITS);
> -return -1;
> -}
> -
> -bitmap_set(cpu_index_map, cpu, 1);
> -return cpu;
> -}
> -
> -static void cpu_release_index(CPUState *cpu)
> -{
> -bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
> -}
> -#else
> -
> -static int cpu_get_free_index(Error **errp)
> +static int cpu_get_free_index(void)
>  {
>  CPUState *some_cpu;
>  int cpu_index = 0;
> @@ -632,12 +609,6 @@ static int cpu_get_free_index(Error **errp)
>  return cpu_index;
>  }
>  
> -static void cpu_release_index(CPUState *cpu)
> -{
> -return;
> -}
> -#endif
> -
>  void cpu_exec_exit(CPUState *cpu)
>  {
>  CPUClass *cc = CPU_GET_CLASS(cpu);
> @@ -650,8 +621,7 @@ void cpu_exec_exit(CPUState *cpu)
>  }
>  
>  QTAILQ_REMOVE(, cpu, node);
> -cpu_release_index(cpu);
> -cpu->cpu_index = -1;
> +cpu->cpu_index = UNASSIGNED_CPU_INDEX;
>  cpu_list_unlock();
>  
>  if (cc->vmsd != NULL) {
> @@ -665,7 +635,7 @@ void cpu_exec_exit(CPUState *cpu)
>  void cpu_exec_init(CPUState *cpu, Error **errp)
>  {
>  CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
> -Error *local_err = NULL;
> +Error *local_err ATTRIBUTE_UNUSED = NULL;
>  
>  cpu->as = NULL;
>  cpu->num_ases = 0;
> @@ -689,11 +659,9 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
>  #endif
>  
>  cpu_list_lock();
> -cpu->cpu_index = cpu_get_free_index(_err);
> -if (local_err) {
> -error_propagate(errp, local_err);
> -cpu_list_unlock();
> -return;
> +if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
> +cpu->cpu_index = cpu_get_free_index();
> +assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
>  }
>  QTAILQ_INSERT_TAIL(, cpu, node);
>  cpu_list_unlock();
> diff --git a/qom/cpu.c b/qom/cpu.c
> index 42b5631..2553247 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -340,7 +340,7 @@ static void cpu_common_initfn(Object *obj)
>  CPUState *cpu = CPU(obj);
>  CPUClass *cc = CPU_GET_CLASS(obj);
>  
> -cpu->cpu_index = -1;
> +cpu->cpu_index = UNASSIGNED_CPU_INDEX;
>  cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs;
>  qemu_mutex_init(>work_mutex);
>  QTAILQ_INIT(>breakpoints);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v12 13/27] intel_iommu: Add support for PCI MSI remap

2016-07-21 Thread Peter Xu
On Thu, Jul 21, 2016 at 08:45:30PM +0300, Michael S. Tsirkin wrote:
> On Thu, Jul 14, 2016 at 01:56:22PM +0800, Peter Xu wrote:
> > This patch enables interrupt remapping for PCI devices.
> > 
> > To play the trick, one memory region "iommu_ir" is added as child region
> > of the original iommu memory region, covering range 0xfeeX (which is
> > the address range for APIC). All the writes to this range will be taken
> > as MSI, and translation is carried out only when IR is enabled.
> > 
> > Idea suggested by Paolo Bonzini.
> > 
> > Signed-off-by: Peter Xu 
> > ---
> >  hw/i386/intel_iommu.c  | 241 
> > +
> >  hw/i386/intel_iommu_internal.h |   2 +
> >  include/hw/i386/intel_iommu.h  |  66 +++
> >  3 files changed, 309 insertions(+)
> > 
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 6a6cb3b..3d1b15d 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -1982,6 +1982,242 @@ static Property vtd_properties[] = {
> >  DEFINE_PROP_END_OF_LIST(),
> >  };
> >  
> > +/* Read IRTE entry with specific index */
> > +static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
> > +VTD_IRTE *entry)
> > +{
> > +dma_addr_t addr = 0x00;
> > +
> > +addr = iommu->intr_root + index * sizeof(*entry);
> > +if (dma_memory_read(_space_memory, addr, entry,
> > +sizeof(*entry))) {
> > +VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
> > +" + %"PRIu16, iommu->intr_root, index);
> > +return -VTD_FR_IR_ROOT_INVAL;
> > +}
> > +
> > +if (!entry->present) {
> > +VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
> > +" entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
> > +index, le64_to_cpu(entry->data[1]),
> > +le64_to_cpu(entry->data[0]));
> > +return -VTD_FR_IR_ENTRY_P;
> > +}
> > +
> > +if (entry->__reserved_0 || entry->__reserved_1 || \
> > +entry->__reserved_2) {
> > +VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
> > +" reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
> > +index, le64_to_cpu(entry->data[1]),
> > +le64_to_cpu(entry->data[0]));
> > +return -VTD_FR_IR_IRTE_RSVD;
> > +}
> > +
> > +/*
> > + * TODO: Check Source-ID corresponds to SVT (Source Validation
> > + * Type) bits
> > + */
> > +
> > +return 0;
> > +}
> > +
> > +/* Fetch IRQ information of specific IR index */
> > +static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, 
> > VTDIrq *irq)
> > +{
> > +VTD_IRTE irte;
> > +int ret = 0;
> > +
> > +bzero(, sizeof(irte));
> > +
> > +ret = vtd_irte_get(iommu, index, );
> > +if (ret) {
> > +return ret;
> > +}
> > +
> > +irq->trigger_mode = irte.trigger_mode;
> > +irq->vector = irte.vector;
> > +irq->delivery_mode = irte.delivery_mode;
> > +/* Not support EIM yet: please refer to vt-d 9.10 DST bits */
> > +#define  VTD_IR_APIC_DEST_MASK (0xff00ULL)
> > +#define  VTD_IR_APIC_DEST_SHIFT(8)
> > +irq->dest = (le32_to_cpu(irte.dest_id) & VTD_IR_APIC_DEST_MASK) >> \
> > +VTD_IR_APIC_DEST_SHIFT;
> > +irq->dest_mode = irte.dest_mode;
> > +irq->redir_hint = irte.redir_hint;
> > +
> > +VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
> > +"deliver:%u,dest:%u,dest_mode:%u", index,
> > +irq->trigger_mode, irq->vector, irq->delivery_mode,
> > +irq->dest, irq->dest_mode);
> > +
> > +return 0;
> > +}
> > +
> > +/* Generate one MSI message from VTDIrq info */
> > +static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
> > +{
> > +VTD_MSIMessage msg = {};
> > +
> > +/* Generate address bits */
> > +msg.dest_mode = irq->dest_mode;
> > +msg.redir_hint = irq->redir_hint;
> > +msg.dest = irq->dest;
> > +msg.__addr_head = cpu_to_le32(0xfee);
> > +/* Keep this from original MSI address bits */
> > +msg.__not_used = irq->msi_addr_last_bits;
> > +
> > +/* Generate data bits */
> > +msg.vector = irq->vector;
> > +msg.delivery_mode = irq->delivery_mode;
> > +msg.level = 1;
> > +msg.trigger_mode = irq->trigger_mode;
> > +
> > +msg_out->address = msg.msi_addr;
> > +msg_out->data = msg.msi_data;
> > +}
> > +
> > +/* Interrupt remapping for MSI/MSI-X entry */
> > +static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
> > +   MSIMessage *origin,
> > +   MSIMessage *translated)
> > +{
> > +int ret = 0;
> > +VTD_IR_MSIAddress addr;
> > +uint16_t index;
> > +VTDIrq irq = {0};
> > +
> > +assert(origin && translated);
> > +
> > +if (!iommu || !iommu->intr_enabled) {
> > 

[Qemu-devel] [Bug 1605443] Re: QEMU epoll for i386-linux-user on arm host is broken in 2.6

2016-07-21 Thread pranith
** Changed in: qemu
   Status: New => Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1605443

Title:
  QEMU epoll for i386-linux-user on arm host is broken in 2.6

Status in QEMU:
  Confirmed

Bug description:
  I'm trying to get wine running on qemu-i386 on arm.

  I found that 2.5.1 is OK, but 2.6 is not.

  By bisecting, I found commit 928bed6a057cedd6110e634865e021a24029785a
  is the problem.

  I reverted this commit, and then epoll is OK now.

  It seems that the commit broke epoll of qemu-i386 on arm.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1605443/+subscriptions



Re: [Qemu-devel] [RFC v4] virtio-crypto specification

2016-07-21 Thread Gonglei (Arei)
Hi Xin,

Thank you so much for your great comments. 
I agree with you almostly except some trivial detals. 
Please see my below replies.

And I'll submit V5 next week, and you can finish the asym algos parts if you 
like.
Let's co-work to finish the virtio-crypto spec, shall we?

Regards,
-Gonglei


> -Original Message-
> From: Zeng, Xin [mailto:xin.z...@intel.com]
> Sent: Friday, July 22, 2016 8:48 AM
> To: Gonglei (Arei); virtio-...@lists.oasis-open.org; qemu-devel@nongnu.org
> Cc: Hanweidong (Randy); Stefan Hajnoczi; Cornelia Huck; m...@redhat.com;
> Lingli Deng; Jani Kokkonen; Luonengjun; Huangpeng (Peter); Zhoujian (jay,
> Euler); chenshanxi 00222737; 'Ola liljed...@arm.com'; Varun Sethi
> Subject: RE: [RFC v4] virtio-crypto specification
> 
> On Sunday, June 26, 2016 5:35 PM, Gonglei (Arei) Wrote:
> > Hi all,
> >
> > This is the specification (version 4) about a new virtio crypto device.
> >
> 
> In general, our comments around this proposal are listed below:
> 1. Suggest to introduce crypto services into virtio crypto device. The 
> services
> currently defined are CIPHER, MAC, HASH, AEAD, KDF, ASYM, PRIMITIVE.

Yes, I agree, whether DRBG/NDRBG are included in PRIMITIVE service or not?
If not, we'd better add another separate service.

> 2. Suggest to define a unified crypto request format that is consisted of
> general header + service specific request,  Where 'general header' is for all
> crypto request,  'service specific request' is composed of
> operation parameter + input data + output data in generally.
> operation parameter is algorithm-specific parameters,
> input data is the data should be operated ,
> output data is the "operation result + result buffer".
> 
It makes sense. Good.

> #define VIRTIO_CRYPTO_OPCODE(service, op)   (((service)<<8) | (op))
> struct virtio_crypto_op_header {
> #define VIRTIO_CRYPTO_CIPHER_ENCRYPT
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_CIPHER, 0x00)
> #define VIRTIO_CRYPTO_CIPHER_DECRYPT
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_CIPHER, 0x01)
> #define VIRTIO_CRYPTO_HASH
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_HASH, 0x00)
> #define VIRTIO_CRYPTO_MAC
> VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_MAC, 0x00)
> #define VIRTIO_CRYPTO_KDF
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_KDF, 0x00)
> #define VIRTIO_CRYPTO_ASYM_KEY_GEN
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x00)
> #define VIRTIO_CRYPTO_ASYM_KEY_EXCHG
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x01)
> #define VIRTIO_CRYPTO_ASYM_SIGN
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x02)
> #define VIRTIO_CRYPTO_ASYM_VERIFY
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x03)
> #define VIRTIO_CRYPTO_ASYM_ENCRYPT
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x04)
> #define VIRTIO_CRYPTO_ASYM_DECRYPT
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_ASYM, 0x05)
> #define VIRTIO_CRYPTO_AEAD_ENCRYPT
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_AEAD, 0x00)
> #define VIRTIO_CRYPTO_AEAD_DECRYPT
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_AEAD, 0x01)
> #define VIRTIO_CRYPTO_PRIMITIVE
>   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_S_PRIMITIVE, 0x00)
>   u32 opcode;
>   u8 algo; /*service-specific algorithms*/
>   u8 flag; /*control flag*/

We'd better add a U64 session_id property here for service-specific algorithms.

> };
> 
> Take rsa_sign_request as example,
> A rsa sign service specific request is defined as:
> struct virtio_crypto_asym_rsa_sign_req{
>   struct virtio_crypto_rsa_sign_para parameter;
>   struct virtio_crypto_rsa_sign_input idata;
>   struct virtio_crypto_rsa_sign_output odata;
> };
> 
> A complete crypto service request is defined as:
> struct virtio_crypto_op_data_req {
>struct virtio_crypto_op_header header;
>   union {
>struct virtio_crypto_asym_rsa_sign_req
> rsa_sign_req;
>/*other service request*/
>   }u;
> };
> 
I wanted to do this in fact. ;) 

> More detailed comments are embedded below:
> 
> > Changes from v3:
> >  - Don't use enum is the spec but macros in specific structures. [Michael &
> > Stefan]
> >  - Add two complete structures for session creation and closing, so that
> >   the spec is clear on how to lay out the request.  [Stefan]
> >  - Definite the crypto operation request with assigned structure, in this 
> > way,
> >   each data request only occupies *one entry* of the Vring descriptor table,
> >   which *improves* the *throughput* of data transferring.
> >
> > Changes from v2:
> >  - Reserve virtio device ID 20 for crypto device. [Cornelia]
> >  - Drop all feature bits, those capabilities are offered by the device all 
> > the
> > time.  [Stefan & Cornelia]
> >  - Add a new section 1.4.2 for driver requirements. [Stefan]
> >  - Use definite type definition instead of enum type in some structure.
> > [Stefan]
> >  - Add virtio_crypto_cipher_alg definition. [Stefan]
> >  - Add a "Device requirements" section as using MUST. [Stefan]

[Qemu-devel] [Bug 1605443] [NEW] QEMU epoll for i386-linux-user on arm host is broken in 2.6

2016-07-21 Thread Icenowy Zheng
Public bug reported:

I'm trying to get wine running on qemu-i386 on arm.

I found that 2.5.1 is OK, but 2.6 is not.

By bisecting, I found commit 928bed6a057cedd6110e634865e021a24029785a is
the problem.

I reverted this commit, and then epoll is OK now.

It seems that the commit broke epoll of qemu-i386 on arm.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1605443

Title:
  QEMU epoll for i386-linux-user on arm host is broken in 2.6

Status in QEMU:
  New

Bug description:
  I'm trying to get wine running on qemu-i386 on arm.

  I found that 2.5.1 is OK, but 2.6 is not.

  By bisecting, I found commit 928bed6a057cedd6110e634865e021a24029785a
  is the problem.

  I reverted this commit, and then epoll is OK now.

  It seems that the commit broke epoll of qemu-i386 on arm.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1605443/+subscriptions



Re: [Qemu-devel] [PULL v3 00/55] pc, pci, virtio: new features, cleanups, fixes

2016-07-21 Thread Fam Zheng
On Thu, 07/21 11:45, Peter Maydell wrote:
> On 21 July 2016 at 11:36, Peter Xu  wrote:
> > On Thu, Jul 21, 2016 at 10:20:38AM +0100, Peter Maydell wrote:
> >> Fails to build:
> >>
> >> /home/petmay01/linaro/qemu-for-merges/hw/i386/intel_iommu.c: In
> >> function ‘vtd_remap_irq_get’:
> >> /home/petmay01/linaro/qemu-for-merges/hw/i386/intel_iommu.c:2092:5:
> >> error: missing braces around initializer [-Werror=missing-braces]
> >>  VTD_IRTE irte = { 0 };
> >>  ^
> >> /home/petmay01/linaro/qemu-for-merges/hw/i386/intel_iommu.c:2092:5:
> >> error: (near initialization for ‘irte.’)
> >> [-Werror=missing-braces]
> 
> > Is this a compiler specific issue? Since this can pass compile test on
> > my machine (both local build, or make docker-test). Do you have any
> > suggestion on how we can avoid this kind of errors in the future?
> 
> It failed on several of my test builds, not just one, but these
> things are tricky to avoid if they don't happen on all compilers.
> In this case I think it is a compiler bug:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119
> so you'll only see it with an older compiler.

Could you name the distro and gcc version? If it's worth to keep the buggy
compiler happy, it probably can be added as a docker test. :)

Fam



Re: [Qemu-devel] [PATCH v6 11/16] block: simplify drive-backup

2016-07-21 Thread Eric Blake
On 07/21/2016 01:40 PM, Denis V. Lunev wrote:
> From: Pavel Butsykin 
> 
> Now that we can support boxed commands, use it to greatly reduce the
> number of parameters (and likelihood of getting out of sync) when
> adjusting drive-backup parameters..

s/.././

> 
> Signed-off-by: Pavel Butsykin 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Denis V. Lunev 
> CC: Jeff Cody 
> CC: Markus Armbruster 
> CC: Eric Blake 
> CC: John Snow 
> CC: Stefan Hajnoczi 
> CC: Kevin Wolf 
> ---
>  blockdev.c   | 115 
> +--
>  hmp.c|  29 -
>  qapi/block-core.json |   3 +-
>  3 files changed, 58 insertions(+), 89 deletions(-)
> 

> +++ b/hmp.c
> @@ -1109,8 +1109,24 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
>  const char *format = qdict_get_try_str(qdict, "format");
>  bool reuse = qdict_get_try_bool(qdict, "reuse", false);
>  bool full = qdict_get_try_bool(qdict, "full", false);
> -enum NewImageMode mode;
>  Error *err = NULL;
> +DriveBackup backup = {
> +.device = (char *)device,
> +.target = (char *)filename,
> +.has_format = !!format,
> +.format = (char *)format,
> +.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
> +.has_mode = true,
> +.mode = reuse ? NEW_IMAGE_MODE_EXISTING : 
> NEW_IMAGE_MODE_ABSOLUTE_PATHS,

Good.

> +.has_speed = false,
> +.speed = 0,
> +.has_bitmap = false,
> +.bitmap = NULL,
> +.has_on_source_error = false,
> +.on_source_error = 0,
> +.has_on_target_error = false,
> +.on_target_error = 0,

I'd drop these. C guarantees that they'll be zero-initialized, and
unless HMP has a way to set them to non-default values, it's not worth
wasting the lines of code.

With those fixups,
Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [Qemu-trivial] [PATCH] configure: Rename CONFIG_QGA_NTDDDISK into CONFIG_QGA_NTDDSCSI

2016-07-21 Thread Michael Roth
Quoting Michael Roth (2016-06-30 14:04:15)
> Quoting Thomas Huth (2016-06-10 10:25:54)
> > There is no CONFIG_QGA_NTDDDISK define used anywhere in the QEMU
> > sources. Looking at the changelog and qga/commands-win32.c, it
> > seems like this should be called CONFIG_QGA_NTDDSCSI instead.
> > 
> > Signed-off-by: Thomas Huth 
> 
> Thanks, applied to qga tree:
>   https://github.com/mdroth/qemu/commits/qga
> 
> Although this did indeed uncover a bug in guest-get-fsinfo.
> Should have a fix soon, but will want that in place before
> I send a pull.

So, that fix turned into a fairly heavy rework of how we map
PCI devices to filesystems in the current w32 implementation
of guest-get-fsinfo and I still don't quite have it working.
It probably won't make 2.7 at this point.

Unfortunately, as things stand, enabling the disk info
support on w32 via your patch causes the entire command to
fail, which would be a regression from the current support
level.

So for now I think the most expedient thing to do would be
to hold off on this patch till 2.8, at which point we can
apply it along with the disk info fixes I'm working on.

> 
> > ---
> >  configure | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/configure b/configure
> > index 8c2f90b..55019d6 100755
> > --- a/configure
> > +++ b/configure
> > @@ -4965,7 +4965,7 @@ if test "$mingw32" = "yes" ; then
> >  echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak
> >fi
> >if test "$guest_agent_ntddscsi" = "yes" ; then
> > -echo "CONFIG_QGA_NTDDDISK=y" >> $config_host_mak
> > +echo "CONFIG_QGA_NTDDSCSI=y" >> $config_host_mak
> >fi
> >if test "$guest_agent_msi" = "yes"; then
> >  echo "QEMU_GA_MSI_ENABLED=yes" >> $config_host_mak  
> > -- 
> > 1.8.3.1
> > 
> 
> 




Re: [Qemu-devel] [PATCH v6 02/16] block: Convert bdrv_pwrite_compressed() to BdrvChild

2016-07-21 Thread Eric Blake
On 07/21/2016 01:40 PM, Denis V. Lunev wrote:
> From: Pavel Butsykin 
> 
> Signed-off-by: Pavel Butsykin 
> Signed-off-by: Denis V. Lunev 
> CC: Jeff Cody 
> CC: Markus Armbruster 
> CC: Eric Blake 
> CC: John Snow 
> CC: Stefan Hajnoczi 
> CC: Kevin Wolf 
> ---
>  block/block-backend.c | 2 +-
>  block/io.c| 3 ++-
>  include/block/block.h | 2 +-
>  3 files changed, 4 insertions(+), 3 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v6 01/16] block: switch blk_write_compressed() to byte-based interface

2016-07-21 Thread Eric Blake
On 07/21/2016 01:40 PM, Denis V. Lunev wrote:
> From: Pavel Butsykin 
> 
> This is a preparatory patch, which continues the general trend of the
> transition to the byte-based interfaces. bdrv_check_request() and
> blk_check_requestis no longer used, so we can remove.

s/requestis/request() are/
s/remove./remove them./

> 
> Signed-off-by: Pavel Butsykin 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Denis V. Lunev 
> CC: Jeff Cody 
> CC: Markus Armbruster 
> CC: Eric Blake 
> CC: John Snow 
> CC: Stefan Hajnoczi 
> CC: Kevin Wolf 
> ---
>  block/block-backend.c  | 23 ---
>  block/io.c | 22 +++---
>  include/block/block.h  |  4 ++--
>  include/sysemu/block-backend.h |  4 ++--
>  qemu-img.c |  6 --
>  qemu-io-cmds.c |  2 +-
>  6 files changed, 20 insertions(+), 41 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PATCH v3 1/2] qdev: ignore GlobalProperty.errp for hotplugged devices

2016-07-21 Thread Greg Kurz
This patch ensures QEMU won't terminate while hotplugging a device if the
global property cannot be set and errp points to error_fatal or error_abort.

While here, it also fixes indentation of the typename argument.

Suggested-by: Eduardo Habkost 
Signed-off-by: Greg Kurz 
---
 hw/core/qdev-properties.c |4 ++--
 include/hw/qdev-core.h|4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 14e544ab17d2..311af6da7684 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1084,7 +1084,7 @@ int qdev_prop_check_globals(void)
 }
 
 static void qdev_prop_set_globals_for_type(DeviceState *dev,
-const char *typename)
+   const char *typename)
 {
 GList *l;
 
@@ -1100,7 +1100,7 @@ static void qdev_prop_set_globals_for_type(DeviceState 
*dev,
 if (err != NULL) {
 error_prepend(, "can't apply global %s.%s=%s: ",
   prop->driver, prop->property, prop->value);
-if (prop->errp) {
+if (!dev->hotplugged && prop->errp) {
 error_propagate(prop->errp, err);
 } else {
 assert(prop->user_provided);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 1d1f8612a9b8..4b4b33bec885 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -261,7 +261,9 @@ struct PropertyInfo {
  * @used: Set to true if property was used when initializing a device.
  * @errp: Error destination, used like first argument of error_setg()
  *in case property setting fails later. If @errp is NULL, we
- *print warnings instead of ignoring errors silently.
+ *print warnings instead of ignoring errors silently. For
+ *hotplugged devices, errp is always ignored and warnings are
+ *printed instead.
  */
 typedef struct GlobalProperty {
 const char *driver;




[Qemu-devel] [PATCH v3 2/2] vl: exit if a bad property value is passed to -global

2016-07-21 Thread Greg Kurz
When passing '-global driver=host-powerpc64-cpu,property=compat,value=foo'
on the command line, without this patch, we get the following warning per
device (which means many lines if the guests has many cpus):

qemu-system-ppc64: Warning: can't apply global host-powerpc64-cpu.compat=foo:
Invalid compatibility mode "foo"

... and QEMU continues execution, ignoring the property.

With this patch, we get a single line:

qemu-system-ppc64: can't apply global host-powerpc64-cpu.compat=foo:
Invalid compatibility mode "foo"

... and QEMU exits.

The previous behavior is kept for hotplugged devices since we don't want
QEMU to exit when doing device_add.

Reviewed-by: David Gibson 
Signed-off-by: Greg Kurz 
---
v3: - set directly the global property errp to _fatal
---
 vl.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/vl.c b/vl.c
index a455947b4f32..e7c2c628de29 100644
--- a/vl.c
+++ b/vl.c
@@ -2922,6 +2922,7 @@ static int global_init_func(void *opaque, QemuOpts *opts, 
Error **errp)
 g->property = qemu_opt_get(opts, "property");
 g->value= qemu_opt_get(opts, "value");
 g->user_provided = true;
+g->errp = _fatal;
 qdev_prop_register_global(g);
 return 0;
 }




[Qemu-devel] [PATCH v3 0/2] improve error handling of global properties

2016-07-21 Thread Greg Kurz
As suggested by Eduardo, this series split the error handling of global
properties in two separate patches.

---

Greg Kurz (2):
  qdev: ignore GlobalProperty.errp for hotplugged devices
  vl: exit if a bad property value is passed to -global


 hw/core/qdev-properties.c |4 ++--
 include/hw/qdev-core.h|4 +++-
 vl.c  |1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

--
Greg




Re: [Qemu-devel] [PATCH v2 09/12] qapi: remove the "middle" mode

2016-07-21 Thread Eric Blake
On 07/21/2016 08:00 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Now that the register function is always generated, we can
> remove the so-called "middle" mode from the generator script.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  scripts/qapi-commands.py | 29 +
>  1 file changed, 5 insertions(+), 24 deletions(-)
> 
> diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py
> index a06a2c4..4754ae0 100644
> --- a/scripts/qapi-commands.py
> +++ b/scripts/qapi-commands.py
> @@ -84,17 +84,8 @@ static void qmp_marshal_output_%(c_name)s(%(c_type)s 
> ret_in, QObject **ret_out,
>  
>  
>  def gen_marshal_proto(name):
> -ret = 'void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)' % 
> c_name(name)
> -if not middle_mode:
> -ret = 'static ' + ret
> -return ret
> -
> -
> -def gen_marshal_decl(name):
> -return mcgen('''
> -%(proto)s;
> -''',
> - proto=gen_marshal_proto(name))
> +return 'static void qmp_marshal_%s' % c_name(name) + \
> +'(QDict *args, QObject **ret, Error **errp)'

I'm wondering if this should be:

return mcgen('''
static void qmp_marshal_%(c_name)s(QDict *args, QObject **ret, Error **errp)
''',
 c_name=c_name(name))

for consistency with our other code (I'm not sure why we weren't already
using mcgen(), though).

Otherwise, nice to see it go!

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2 08/12] build-sys: remove qmp-commands-old.h

2016-07-21 Thread Eric Blake
On 07/21/2016 08:00 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  Makefile.target | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2 07/12] monitor: implement 'qmp_query_commands' without qmp_cmds

2016-07-21 Thread Eric Blake
On 07/21/2016 08:00 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> So we can get rid of the static qmp_cmds table.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  monitor.c | 32 ++--
>  1 file changed, 14 insertions(+), 18 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH V3] hw/virtio-pci: fix virtio behaviour

2016-07-21 Thread Michael S. Tsirkin
On Thu, Jul 21, 2016 at 11:58:52PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > Actually this can still break existing scripts:
> > stick a device on express bus but add disable-modern=on
> > Gave you a legacy device previously but it no longer does.
> 
> Unlikely to happen in practice because there is little reason to use
> disable-modern=on in 2.6 & older because that is the default ...

Good point, I forgot.

> Still we can default to legacy=yes in case disable-modern=on +
> disable-legacy=auto.

Given the above I'm not sure it's worth it. I'll leave it to Marcel
to decide.

>  And throw and error in case both modern and legacy
> are explicitly disabled (as already suggested elsewhere in this thread).
> 
> cheers,
>   Gerd



Re: [Qemu-devel] [PATCH V3] hw/virtio-pci: fix virtio behaviour

2016-07-21 Thread Gerd Hoffmann
  Hi,

> Actually this can still break existing scripts:
> stick a device on express bus but add disable-modern=on
> Gave you a legacy device previously but it no longer does.

Unlikely to happen in practice because there is little reason to use
disable-modern=on in 2.6 & older because that is the default ...

Still we can default to legacy=yes in case disable-modern=on +
disable-legacy=auto.  And throw and error in case both modern and legacy
are explicitly disabled (as already suggested elsewhere in this thread).

cheers,
  Gerd




Re: [Qemu-devel] [PATCH 0/8] Fix migration issues with arbitrary cpu-hot(un)plug

2016-07-21 Thread Michael S. Tsirkin
On Thu, Jul 21, 2016 at 05:54:31PM +0200, Igor Mammedov wrote:
> Series fixes migration issues caused by unstable cpu_index which depended
> on order cpus were created/destroyed. It follows David's idea to make
> cpu_index assignable by selected boards if board supports cpu-hotplug
> with device_add and needs stable cpu_index/'migration id' but leaves
> behaviour of the same as before for users that don't care about
> cpu-hot(un)plug making changes low-risk.
> 
> tested with:
>   SRC -snapshot -enable-kvm -smp 1,maxcpus=3 -m 256M guest.img -monitor stdio 
> \
>-device qemu64-x86_64-cpu,id=cpudel,apic-id=1 \
>-device qemu64-x86_64-cpu,apic-id=2 
>   (qemu) device_del cpudel
>   (qemu) stop
>   (qemu) migrate "exec:gzip -c > STATEFILE.gz"
>   
>   DST -snapshot -enable-kvm -smp 1,maxcpus=3 -m 256M guest.img -monitor stdio 
> \
>   -device qemu64-x86_64-cpu,apic-id=2 \
>   -incoming "exec: gzip -c -d STATEFILE.gz"
> 
> git tree to test with:
>  https://github.com/imammedo/qemu cpu-index-stable
>  to view
>  https://github.com/imammedo/qemu/commits/cpu-index-stable

For PC bits:

Reviewed-by: Michael S. Tsirkin 

This would be nice to have in 2.7.

Who's reviewing/merging the rest? Eduardo?


> CC: Paolo Bonzini 
> CC: Peter Crosthwaite 
> CC: Richard Henderson 
> CC: Eduardo Habkost 
> CC: "Michael S. Tsirkin" 
> CC: David Gibson 
> CC: Alexander Graf 
> CC: Riku Voipio 
> CC: Bharata B Rao 
> CC: qemu-...@nongnu.org
> 
> David Gibson (1):
>   Revert "spapr: Ensure CPU cores are added contiguously and removed in
> LIFO order"
> 
> Igor Mammedov (7):
>   exec: reduce CONFIG_USER_ONLY ifdeffenery
>   exec: don't use cpu_index to detect if cpu_exec_init()'s been called
> for cpu
>   exec: set cpu_index only if it's been explictly set
>   qdev: fix object reference leak in case device.realize() fails
>   pc: init CPUState->cpu_index with index in possible_cpus[]
>   spapr: init CPUState->cpu_index with index relative to core-id
>   Revert "pc: Enforce adding CPUs contiguously and removing them in
> opposite order"
> 
>  bsd-user/qemu.h |  2 --
>  include/exec/exec-all.h | 12 +
>  include/qemu/queue.h|  2 ++
>  include/qom/cpu.h   |  2 ++
>  linux-user/qemu.h   |  2 --
>  exec.c  | 65 
> +
>  hw/core/qdev.c  |  8 +-
>  hw/i386/pc.c| 38 +++--
>  hw/ppc/spapr_cpu_core.c | 25 ---
>  qom/cpu.c   |  2 +-
>  10 files changed, 44 insertions(+), 114 deletions(-)
> 
> -- 
> 2.7.4



Re: [Qemu-devel] [PATCH v4] virtio-pci: error out when both legacy and modern modes are disabled

2016-07-21 Thread Greg Kurz
On Thu, 21 Jul 2016 23:21:16 +0200
Greg Kurz  wrote:

> From: Greg Kurz 
> 
> Without presuming if we got there because of a user mistake or some
> more subtle bug in the tooling, it really does not make sense to
> implement a non-functional device.
> 
> Signed-off-by: Greg Kurz 
> Reviewed-by: Marcel Apfelbaum 
> Signed-off-by: Greg Kurz 
> ---
> v4: - rephrased error message and provide a hint to the user
> - split string literals to stay below 80 characters
> - added Marcel's R-b tag
> ---

Marcel,

I see that Michael has comments on your patch. If you feel this patch is 
valuable
for 2.7, please consider carrying and pushing it, as I'm about to take a 1-month
leave.

Thanks.

--
Greg

>  hw/virtio/virtio-pci.c |8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 755f9218b77d..72c4b392ffda 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1842,6 +1842,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, 
> Error **errp)
>  VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
>  PCIDevice *pci_dev = >pci_dev;
>  
> +if (!(virtio_pci_modern(proxy) || virtio_pci_legacy(proxy))) {
> +error_setg(errp, "device cannot work when both modern and legacy 
> modes"
> +   " are disabled");
> +error_append_hint(errp, "Set either disable-modern or disable-legacy"
> +  " to off\n");
> +return;
> +}
> +
>  if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
>  virtio_pci_modern(proxy)) {
>  pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
> 
> 




Re: [Qemu-devel] [PATCH V3] hw/virtio-pci: fix virtio behaviour

2016-07-21 Thread Michael S. Tsirkin
On Wed, Jul 20, 2016 at 06:28:21PM +0300, Marcel Apfelbaum wrote:
> Enable transitional virtio devices by default.
> Enable virtio-1.0 for devices plugged into
> PCIe ports (Root ports or Downstream ports).
> 
> Using the virtio-1 mode will remove the limitation
> of the number of devices that can be attached to a machine
> by removing the need for the IO BAR.
> 
> Signed-off-by: Marcel Apfelbaum 
> ---
> 
> Hi,
> 
> v2 -> v3:
>   - Various code tweaks to simplify if statements (Michael)
>   - Enable virtio modern by default (Gerd and Cornelia)
>   - Replace virtio flags with actual fields (Gerd)
>   - Wrappers for more readable code
> 
> v1 -> v2:
>   - Stick to existing defaults for old machine types (Michael S. Tsirkin)

Actually this can still break existing scripts:
stick a device on express bus but add disable-modern=on
Gave you a legacy device previously but it no longer does.

> If everyone agrees, I am thinking about getting it into 2.7
> to avoid the ~15 virtio devices limitation per machine.
> 
> My tests were limited to checking all possible disable-* configurations (and 
> make check for all archs)
> 
> Thanks,
> Marcel
> 
>  hw/display/virtio-gpu-pci.c |  4 +---
>  hw/display/virtio-vga.c |  4 +---
>  hw/virtio/virtio-pci.c  | 34 ++
>  hw/virtio/virtio-pci.h  | 21 +
>  include/hw/compat.h |  8 
>  5 files changed, 45 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
> index a71b230..34a724c 100644
> --- a/hw/display/virtio-gpu-pci.c
> +++ b/hw/display/virtio-gpu-pci.c
> @@ -30,9 +30,7 @@ static void virtio_gpu_pci_realize(VirtIOPCIProxy 
> *vpci_dev, Error **errp)
>  int i;
>  
>  qdev_set_parent_bus(vdev, BUS(_dev->bus));
> -/* force virtio-1.0 */
> -vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN;
> -vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY;
> +virtio_pci_force_virtio_1(vpci_dev);
>  object_property_set_bool(OBJECT(vdev), true, "realized", errp);
>  
>  for (i = 0; i < g->conf.max_outputs; i++) {
> diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
> index 315b7fc..5b510a1 100644
> --- a/hw/display/virtio-vga.c
> +++ b/hw/display/virtio-vga.c
> @@ -134,9 +134,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, 
> Error **errp)
>  
>  /* init virtio bits */
>  qdev_set_parent_bus(DEVICE(g), BUS(_dev->bus));
> -/* force virtio-1.0 */
> -vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN;
> -vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY;
> +virtio_pci_force_virtio_1(vpci_dev);
>  object_property_set_bool(OBJECT(g), true, "realized", );
>  if (err) {
>  error_propagate(errp, err);
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 2b34b43..11cd634 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -161,7 +161,7 @@ static bool virtio_pci_modern_state_needed(void *opaque)
>  {
>  VirtIOPCIProxy *proxy = opaque;
>  
> -return !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +return virtio_pci_modern(proxy);
>  }
>  
>  static const VMStateDescription vmstate_virtio_pci_modern_state = {
> @@ -300,8 +300,8 @@ static int virtio_pci_ioeventfd_assign(DeviceState *d, 
> EventNotifier *notifier,
>  VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
>  VirtIODevice *vdev = virtio_bus_get_device(>bus);
>  VirtQueue *vq = virtio_get_queue(vdev, n);
> -bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY);
> -bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +bool legacy = virtio_pci_legacy(proxy);
> +bool modern = virtio_pci_modern(proxy);
>  bool fast_mmio = kvm_ioeventfd_any_length_enabled();
>  bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
>  MemoryRegion *modern_mr = >notify.mr;
> @@ -1576,8 +1576,8 @@ static void virtio_pci_device_plugged(DeviceState *d, 
> Error **errp)
>  {
>  VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
>  VirtioBusState *bus = >bus;
> -bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY);
> -bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +bool legacy = virtio_pci_legacy(proxy);
> +bool modern = virtio_pci_modern(proxy);
>  bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
>  uint8_t *config;
>  uint32_t size;
> @@ -1696,7 +1696,7 @@ static void virtio_pci_device_plugged(DeviceState *d, 
> Error **errp)
>  static void virtio_pci_device_unplugged(DeviceState *d)
>  {
>  VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> -bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +bool modern = virtio_pci_modern(proxy);
>  bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
>  
>  virtio_pci_stop_ioeventfd(proxy);
> @@ -1716,6 +1716,8 @@ static void 

[Qemu-devel] [PATCH v4] virtio-pci: error out when both legacy and modern modes are disabled

2016-07-21 Thread Greg Kurz
From: Greg Kurz 

Without presuming if we got there because of a user mistake or some
more subtle bug in the tooling, it really does not make sense to
implement a non-functional device.

Signed-off-by: Greg Kurz 
Reviewed-by: Marcel Apfelbaum 
Signed-off-by: Greg Kurz 
---
v4: - rephrased error message and provide a hint to the user
- split string literals to stay below 80 characters
- added Marcel's R-b tag
---
 hw/virtio/virtio-pci.c |8 
 1 file changed, 8 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 755f9218b77d..72c4b392ffda 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1842,6 +1842,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, 
Error **errp)
 VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
 PCIDevice *pci_dev = >pci_dev;
 
+if (!(virtio_pci_modern(proxy) || virtio_pci_legacy(proxy))) {
+error_setg(errp, "device cannot work when both modern and legacy modes"
+   " are disabled");
+error_append_hint(errp, "Set either disable-modern or disable-legacy"
+  " to off\n");
+return;
+}
+
 if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
 virtio_pci_modern(proxy)) {
 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;




Re: [Qemu-devel] [PATCH v2 03/12] monitor: register gen:false commands manually

2016-07-21 Thread Eric Blake
On 07/21/2016 08:00 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Since a few commands are using 'gen': false, they are not registered
> automatically by the generator. Register manually instead.
> 
> This is in preparation for removal of qapi 'middle' mode generation.
> 
> Signed-off-by: Marc-André Lureau 
> Reviewed-by: Eric Blake 
> ---
>  monitor.c | 13 +
>  1 file changed, 13 insertions(+)
> 

> +static void qmp_init_marshal(void)
> +{
> +qmp_register_command("query-qmp-schema", qmp_query_qmp_schema,
> + QCO_NO_OPTIONS);
> +qmp_register_command("device_add", qmp_device_add,
> + QCO_NO_OPTIONS);
> +qmp_register_command("netdev_add", qmp_netdev_add,
> + QCO_NO_OPTIONS);

Of course, if my netdev_add patch goes in first (currently on Markus'
qapi-not-next branch, because we decided it was too risky for 2.7 hard
freeze), then this last line is not needed.

R-b still stands.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2 06/12] monitor: remove mhandler.cmd_new

2016-07-21 Thread Eric Blake
On 07/21/2016 08:00 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> This is no longer necessary, now that middle mode has been removed.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  monitor.c |  13 +--
>  docs/writing-qmp-commands.txt |   8 +-
>  hmp-commands-info.hx  | 118 
>  hmp-commands.hx   | 208 
> +-
>  4 files changed, 170 insertions(+), 177 deletions(-)
> 

> +++ b/docs/writing-qmp-commands.txt

> @@ -459,7 +457,6 @@ The last step is to add the correspoding entry in the 
> qmp-commands.hx file:
>  {

Might be worth s/correspoding/corresponding/ while touching this file.

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH] test: port postcopy test to ppc64

2016-07-21 Thread Thomas Huth
On 21.07.2016 15:46, Laurent Vivier wrote:
> 
> 
> On 21/07/2016 15:16, Thomas Huth wrote:
>> On 21.07.2016 12:12, Laurent Vivier wrote:
>>> As userfaultfd syscall is available on powerpc, migration
>>> postcopy can be used.
>>
>> Good idea to add this test for PPC, too!
> 
> The idea comes from David Gilbert :)
> Thanks David!
> 
>>
>>> This patch adds the support needed to test this on powerpc,
>>> instead of using a bootsector to run code to modify memory,
>>> we use a FORTH script in "boot-command" property.
>>>
>>> As spapr machine doesn't support "-prom-env" argument
>>> (the nvram is initialized by SLOF and not by QEMU),
>>> "boot-command" is provided to SLOF via a file mapped nvram
>>> (with "-drive file=...,if=pflash")
>>
>> I wonder whether we could easily add support for the "-prom-env"
>> parameter for the sPAPR machine, too, since the NVRAM layout seems to be
>> pretty much the same as on the old CHRP Mac machines...?
> 
> The only thing we need to have "-prom-env" is to initialize the nvram
> partitions in QEMU if they don't exist.
> 
> I didn't follow this way because this work is normally to be done by the
> firmware not by QEMU. If we do it in QEMU, SLOF will do not, and we
> can't be sure we are doing the things well, as SLOF does.

I'm slightly familiar with the NVRAM code in SLOF ... shouldn't be too
hard to get a similar layout done with QEMU, especially if we can re-use
some of the OpenBIOS NVRAM functions in QEMU.

> On Mac, it is another history, originally, firmware was OpenHackware,
> not OpenBIOS, and OH was only here to allow to boot the kernel nothing
> more (it was really a hack: there was not forth interpreter [oh, my god!]).
> So all the important stuffs were done by QEMU, like to initialize the
> NVRAM, the PCI bus, ... and this allows to set values in the environment
> variables.
> 
> But if you think it is viable, it should be really easy to do (like
> cut'n'paste from OpenBIOS ABI)...

Having -prom-env for SLOF, too, would be really posh, so I think I'll
have a try when I got some spare minutes ... hmm, my TODO list seems to
keep rather growing than shrinking ... maybe I'm doing something wrong ;-)

 Thomas




Re: [Qemu-devel] [PATCH v2] test: port postcopy test to ppc64

2016-07-21 Thread Thomas Huth
On 21.07.2016 18:47, Laurent Vivier wrote:
> As userfaultfd syscall is available on powerpc, migration
> postcopy can be used.
> 
> This patch adds the support needed to test this on powerpc,
> instead of using a bootsector to run code to modify memory,
> we use a FORTH script in "boot-command" property.
> 
> As spapr machine doesn't support "-prom-env" argument
> (the nvram is initialized by SLOF and not by QEMU),
> "boot-command" is provided to SLOF via a file mapped nvram
> (with "-drive file=...,if=pflash")
> 
> Signed-off-by: Laurent Vivier 
> ---
> v2: move FORTH script directly in sprintf()
> use openbios_firmware_abi.h
> remove useless "default" case
> 
>  tests/Makefile.include |   1 +
>  tests/postcopy-test.c  | 116 
> +
>  2 files changed, 98 insertions(+), 19 deletions(-)
> 
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index e7e50d6..e2d1885 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -268,6 +268,7 @@ check-qtest-sparc-y += tests/prom-env-test$(EXESUF)
>  #check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
>  check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
>  check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
> +check-qtest-ppc64-y += tests/postcopy-test$(EXESUF)
>  
>  check-qtest-generic-y += tests/qom-test$(EXESUF)
>  
> diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
> index 16465ab..229e9e9 100644
> --- a/tests/postcopy-test.c
> +++ b/tests/postcopy-test.c
> @@ -18,6 +18,9 @@
>  #include "qemu/sockets.h"
>  #include "sysemu/char.h"
>  #include "sysemu/sysemu.h"
> +#include "hw/nvram/openbios_firmware_abi.h"
> +
> +#define MIN_NVRAM_SIZE 8192 /* from spapr_nvram.c */
>  
>  const unsigned start_address = 1024 * 1024;
>  const unsigned end_address = 100 * 1024 * 1024;
> @@ -122,6 +125,44 @@ unsigned char bootsect[] = {
>0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0xaa
>  };
>  
> +static void init_bootfile_x86(const char *bootpath)
> +{
> +FILE *bootfile = fopen(bootpath, "wb");
> +
> +g_assert_cmpint(fwrite(bootsect, 512, 1, bootfile), ==, 1);
> +fclose(bootfile);
> +}
> +
> +static void init_bootfile_ppc(const char *bootpath)
> +{
> +FILE *bootfile;
> +char buf[MIN_NVRAM_SIZE];
> +struct OpenBIOS_nvpart_v1 *header = (struct OpenBIOS_nvpart_v1 *)buf;
> +
> +memset(buf, 0, MIN_NVRAM_SIZE);
> +
> +/* Create a "common" partition in nvram to store boot-command property */
> +
> +header->signature = OPENBIOS_PART_SYSTEM;
> +memcpy(header->name, "common", 6);
> +OpenBIOS_finish_partition(header, MIN_NVRAM_SIZE);
> +
> +/* FW_MAX_SIZE is 4MB, but slof.bin is only 900KB,
> + * so let's modify memory between 1MB and 100MB
> + * to do like PC bootsector
> + */
> +
> +sprintf(buf + 16,
> +"boot-command=hex .\" _\" begin %x %x do i c@ 1 + i c! 1000 
> +loop "
> +".\" B\" 0 until", end_address, start_address);
> +
> +/* Write partition to the NVRAM file */
> +
> +bootfile = fopen(bootpath, "wb");
> +g_assert_cmpint(fwrite(buf, MIN_NVRAM_SIZE, 1, bootfile), ==, 1);
> +fclose(bootfile);
> +}
> +
>  /*
>   * Wait for some output in the serial output file,
>   * we get an 'A' followed by an endless string of 'B's
> @@ -131,10 +172,29 @@ static void wait_for_serial(const char *side)
>  {
>  char *serialpath = g_strdup_printf("%s/%s", tmpfs, side);
>  FILE *serialfile = fopen(serialpath, "r");
> +const char *arch = qtest_get_arch();
> +int started = (strcmp(side, "src_serial") == 0 &&
> +   strcmp(arch, "ppc64") == 0) ? 0 : 1;
>  
>  do {
>  int readvalue = fgetc(serialfile);
>  
> +if (!started) {
> +/* SLOF prints its banner before starting test,
> + * to ignore it, mark the start of the test with '_',
> + * ignore all characters until this marker
> + */
> +switch (readvalue) {
> +case '_':
> +started = 1;
> +break;
> +case EOF:
> +fseek(serialfile, 0, SEEK_SET);
> +usleep(1000);
> +break;
> +}
> +continue;
> +}
>  switch (readvalue) {
>  case 'A':
>  /* Fine */
> @@ -147,6 +207,8 @@ static void wait_for_serial(const char *side)
>  return;
>  
>  case EOF:
> +started = (strcmp(side, "src_serial") == 0 &&
> +   strcmp(arch, "ppc64") == 0) ? 0 : 1;
>  fseek(serialfile, 0, SEEK_SET);
>  usleep(1000);
>  break;
> @@ -295,32 +357,48 @@ static void test_migrate(void)
>  char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
>  QTestState *global = global_qtest, *from, *to;
>  unsigned char dest_byte_a, dest_byte_b, dest_byte_c, dest_byte_d;
> -gchar *cmd;
> +gchar *cmd, *cmd_src, 

Re: [Qemu-devel] [PATCH v2 05/12] monitor: register the qapi generated commands

2016-07-21 Thread Eric Blake
On 07/21/2016 08:00 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Stop using the so-called 'middle' mode. Instead, use qmp_find_command()
> from generated qapi commands registry.
> 
> Note: this commit requires a 'make clean' prior to make, since the
> generated files do not depend on Makefile (due to a cyclic rule
> introduced in 4115852bb0).
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  monitor.c   |  15 --
>  Makefile|   2 +-
>  qmp-commands.hx | 143 
> 
>  vl.c|   1 +
>  4 files changed, 13 insertions(+), 148 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2 02/12] qapi-schema: add 'device_add'

2016-07-21 Thread Eric Blake
On 07/21/2016 08:00 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Even though device_add is not fully qapi'fied, we may add it to the json
> schema with 'gen': false, so registration and documentation can be
> generated.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  qapi-schema.json | 29 +
>  1 file changed, 29 insertions(+)


> +++ b/qapi-schema.json
> @@ -2200,6 +2200,35 @@
>  ##
>  { 'command': 'xen-set-global-dirty-log', 'data': { 'enable': 'bool' } }
>  
> +##
> +# @device_add:
> +#
> +# @driver: the name of the new device's driver
> +# @bus: #optional the device's parent bus (device tree path)
> +# @id: the device's ID, must be unique
> +# @props: #optional a dictionary of properties to be passed to the backend
> +#
> +# Add a device.
> +#
> +# Notes:
> +# 1. For detailed information about this command, please refer to the
> +#'docs/qdev-device-use.txt' file.
> +#
> +# 2. It's possible to list device properties by running QEMU with the
> +#"-device DEVICE,help" command-line argument, where DEVICE is the
> +#device's name
> +#
> +# Example:
> +#
> +# -> { "execute": "device_add",
> +#  "arguments": { "driver": "e1000", "id": "net1" } }

Is it worth an example that includes 'bus' and/or 'props'?

> +# <- { "return": {} }
> +#
> +# Since: 0.13
> +##
> +{ 'command': 'device_add',
> +  'data': {'driver': 'str', 'id': 'str'}, 'gen': false }

The documentation mentions fields not listed here, but the 'gen':false
explains why. We may yet get device_add QAPIfied for 2.8, but there's
nothing wrong with documenting things now.

Reviewed-by: Eric Blake 


-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH V3] hw/virtio-pci: fix virtio behaviour

2016-07-21 Thread Michael S. Tsirkin
On Wed, Jul 20, 2016 at 06:28:21PM +0300, Marcel Apfelbaum wrote:
> Enable transitional virtio devices by default.
> Enable virtio-1.0 for devices plugged into

disable legacy is better, I agree.

> PCIe ports (Root ports or Downstream ports).
> 
> Using the virtio-1 mode will remove the limitation
> of the number of devices that can be attached to a machine
> by removing the need for the IO BAR.
> 
> Signed-off-by: Marcel Apfelbaum 

I think you also want to add some comment with a description explaining
*why* you are disabling legacy for these specific devices.


> ---
> 
> Hi,
> 
> v2 -> v3:
>   - Various code tweaks to simplify if statements (Michael)
>   - Enable virtio modern by default (Gerd and Cornelia)
>   - Replace virtio flags with actual fields (Gerd)
>   - Wrappers for more readable code
> 
> v1 -> v2:
>   - Stick to existing defaults for old machine types (Michael S. Tsirkin)
> 
> If everyone agrees, I am thinking about getting it into 2.7
> to avoid the ~15 virtio devices limitation per machine.
> 
> My tests were limited to checking all possible disable-* configurations (and 
> make check for all archs)
> 
> Thanks,
> Marcel
> 
>  hw/display/virtio-gpu-pci.c |  4 +---
>  hw/display/virtio-vga.c |  4 +---
>  hw/virtio/virtio-pci.c  | 34 ++
>  hw/virtio/virtio-pci.h  | 21 +
>  include/hw/compat.h |  8 
>  5 files changed, 45 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
> index a71b230..34a724c 100644
> --- a/hw/display/virtio-gpu-pci.c
> +++ b/hw/display/virtio-gpu-pci.c
> @@ -30,9 +30,7 @@ static void virtio_gpu_pci_realize(VirtIOPCIProxy 
> *vpci_dev, Error **errp)
>  int i;
>  
>  qdev_set_parent_bus(vdev, BUS(_dev->bus));
> -/* force virtio-1.0 */
> -vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN;
> -vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY;
> +virtio_pci_force_virtio_1(vpci_dev);
>  object_property_set_bool(OBJECT(vdev), true, "realized", errp);
>  
>  for (i = 0; i < g->conf.max_outputs; i++) {
> diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
> index 315b7fc..5b510a1 100644
> --- a/hw/display/virtio-vga.c
> +++ b/hw/display/virtio-vga.c
> @@ -134,9 +134,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, 
> Error **errp)
>  
>  /* init virtio bits */
>  qdev_set_parent_bus(DEVICE(g), BUS(_dev->bus));
> -/* force virtio-1.0 */
> -vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN;
> -vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY;
> +virtio_pci_force_virtio_1(vpci_dev);
>  object_property_set_bool(OBJECT(g), true, "realized", );
>  if (err) {
>  error_propagate(errp, err);
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 2b34b43..11cd634 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -161,7 +161,7 @@ static bool virtio_pci_modern_state_needed(void *opaque)
>  {
>  VirtIOPCIProxy *proxy = opaque;
>  
> -return !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +return virtio_pci_modern(proxy);
>  }
>  
>  static const VMStateDescription vmstate_virtio_pci_modern_state = {
> @@ -300,8 +300,8 @@ static int virtio_pci_ioeventfd_assign(DeviceState *d, 
> EventNotifier *notifier,
>  VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
>  VirtIODevice *vdev = virtio_bus_get_device(>bus);
>  VirtQueue *vq = virtio_get_queue(vdev, n);
> -bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY);
> -bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +bool legacy = virtio_pci_legacy(proxy);
> +bool modern = virtio_pci_modern(proxy);
>  bool fast_mmio = kvm_ioeventfd_any_length_enabled();
>  bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
>  MemoryRegion *modern_mr = >notify.mr;
> @@ -1576,8 +1576,8 @@ static void virtio_pci_device_plugged(DeviceState *d, 
> Error **errp)
>  {
>  VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
>  VirtioBusState *bus = >bus;
> -bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY);
> -bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +bool legacy = virtio_pci_legacy(proxy);
> +bool modern = virtio_pci_modern(proxy);
>  bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
>  uint8_t *config;
>  uint32_t size;
> @@ -1696,7 +1696,7 @@ static void virtio_pci_device_plugged(DeviceState *d, 
> Error **errp)
>  static void virtio_pci_device_unplugged(DeviceState *d)
>  {
>  VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> -bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN);
> +bool modern = virtio_pci_modern(proxy);
>  bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
>  
>  virtio_pci_stop_ioeventfd(proxy);
> @@ -1716,6 +1716,8 @@ static void 

Re: [Qemu-devel] [PULL v5 00/57] pc, pci, virtio: new features, cleanups, fixes

2016-07-21 Thread Peter Maydell
On 21 July 2016 at 18:50, Michael S. Tsirkin  wrote:
> The following changes since commit 5d3217340adcb6c4f0e4af5d2b865331eb2ff63d:
>
>   disas: Fix ATTRIBUTE_UNUSED define clash with ALSA headers (2016-07-19 
> 16:40:39 +0100)
>
> are available in the git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to bc38ee10fc26338e21c01485540f815be1f3db28:
>
>   intel_iommu: avoid unnamed fields (2016-07-21 20:44:20 +0300)
>
> 
> pc, pci, virtio: new features, cleanups, fixes
>
> - interrupt remapping for intel iommus
> - a bunch of virtio cleanups
> - fixes all over the place
>
> Signed-off-by: Michael S. Tsirkin 

Applied to master; thanks for working through all the respins.

-- PMM



[Qemu-devel] [PATCH v6 07/16] qcow: add qcow_co_pwritev_compressed

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Added implementation of the qcow_co_pwritev_compressed function that
will allow us to safely use compressed writes for the qcow from running
VMs.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/qcow.c | 109 +++
 1 file changed, 42 insertions(+), 67 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 0c7b75b..e1d335d 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -913,75 +913,42 @@ static int qcow_make_empty(BlockDriverState *bs)
 return 0;
 }
 
-typedef struct QcowWriteCo {
-BlockDriverState *bs;
-int64_t sector_num;
-const uint8_t *buf;
-int nb_sectors;
-int ret;
-} QcowWriteCo;
-
-static void qcow_write_co_entry(void *opaque)
-{
-QcowWriteCo *co = opaque;
-QEMUIOVector qiov;
-
-struct iovec iov = (struct iovec) {
-.iov_base   = (uint8_t*) co->buf,
-.iov_len= co->nb_sectors * BDRV_SECTOR_SIZE,
-};
-qemu_iovec_init_external(, , 1);
-
-co->ret = qcow_co_writev(co->bs, co->sector_num, co->nb_sectors, );
-}
-
-/* Wrapper for non-coroutine contexts */
-static int qcow_write(BlockDriverState *bs, int64_t sector_num,
-  const uint8_t *buf, int nb_sectors)
-{
-Coroutine *co;
-AioContext *aio_context = bdrv_get_aio_context(bs);
-QcowWriteCo data = {
-.bs = bs,
-.sector_num = sector_num,
-.buf= buf,
-.nb_sectors = nb_sectors,
-.ret= -EINPROGRESS,
-};
-co = qemu_coroutine_create(qcow_write_co_entry, );
-qemu_coroutine_enter(co);
-while (data.ret == -EINPROGRESS) {
-aio_poll(aio_context, true);
-}
-return data.ret;
-}
-
 /* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+   uint64_t bytes, QEMUIOVector *qiov)
 {
 BDRVQcowState *s = bs->opaque;
+QEMUIOVector hd_qiov;
+struct iovec iov;
 z_stream strm;
 int ret, out_len;
-uint8_t *out_buf;
+uint8_t *buf, *out_buf;
 uint64_t cluster_offset;
 
-if (nb_sectors != s->cluster_sectors) {
+if (bytes != s->cluster_size) {
 ret = -EINVAL;
 
 /* Zero-pad last write if image size is not cluster aligned */
-if (sector_num + nb_sectors == bs->total_sectors &&
-nb_sectors < s->cluster_sectors) {
+if (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS &&
+bytes < s->cluster_size)
+{
 uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
 memset(pad_buf, 0, s->cluster_size);
-memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-ret = qcow_write_compressed(bs, sector_num,
-pad_buf, s->cluster_sectors);
+qemu_iovec_to_buf(qiov, 0, pad_buf, s->cluster_size);
+iov = (struct iovec) {
+.iov_base   = pad_buf,
+.iov_len= s->cluster_size,
+};
+qemu_iovec_init_external(_qiov, , 1);
+ret = qcow_co_pwritev_compressed(bs, offset, bytes, _qiov);
 qemu_vfree(pad_buf);
 }
 return ret;
 }
+buf = qemu_blockalign(bs, s->cluster_size);
+qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
 
 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
 
@@ -1012,27 +979,35 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
 
 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
 /* could not compress: write normal cluster */
-ret = qcow_write(bs, sector_num, buf, s->cluster_sectors);
-if (ret < 0) {
-goto fail;
-}
-} else {
-cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-out_len, 0, 0);
-if (cluster_offset == 0) {
-ret = -EIO;
-goto fail;
-}
-
-cluster_offset &= s->cluster_offset_mask;
-ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS,
+ bytes >> BDRV_SECTOR_BITS, qiov);
 if (ret < 0) {
 goto fail;
 }
+goto success;
+}
+qemu_co_mutex_lock(>lock);
+

Re: [Qemu-devel] [PATCH v3] virtio-pci: error out when both legacy and modern modes are disabled

2016-07-21 Thread Eric Blake
On 07/21/2016 11:43 AM, Greg Kurz wrote:
> From: Greg Kurz 
> 
> Without presuming if we got there because of a user mistake or some
> more subtle bug in the tooling, it really does not make sense to
> implement a non-functional device.
> 
> Signed-off-by: Greg Kurz 
> Signed-off-by: Greg Kurz 
> ---
> v3: - rebased on top of:
> https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg04744.html
> - use virtio_pci_legacy/modern helpers
> - rephrased error message to be shorter and use the on/off logic
> 
> Marcel, this still results in > 80 char line in the code but I'd rather not
> split it to ease grepping, nor shorten the message even more to keep it
> meaningful.
> ---
>  hw/virtio/virtio-pci.c |5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 755f9218b77d..1f5f00a50a0b 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1842,6 +1842,11 @@ static void virtio_pci_dc_realize(DeviceState *qdev, 
> Error **errp)
>  VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
>  PCIDevice *pci_dev = >pci_dev;
>  
> +if (!(virtio_pci_modern(proxy) || virtio_pci_legacy(proxy))) {
> +error_setg(errp, "device cannot work when both disable-modern and 
> disable-legacy are set to on.");

The phrase passed to error_setg() should not end in '.'

You can also split the string literal, to keep the line length of the
source under 80 (the long error message is less problematic).



-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PATCH v6 13/16] drive-backup: added support for data compression

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

The patch adds a flag to the qmp/hmp drive-backup command which enables
block compression. Compression should be implemented in the format driver
to enable this feature.

There are some limitations of the format driver to allow compressed writes.
We can write data only once. Though for backup this is perfectly fine.
These limitations are maintained by the driver and the error will be
reported if we are doing something wrong.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/backup.c| 12 +++-
 blockdev.c|  9 ++---
 hmp-commands.hx   |  8 +---
 hmp.c |  3 +++
 include/block/block_int.h |  1 +
 qapi/block-core.json  |  5 -
 qmp-commands.hx   |  5 -
 7 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 2c05323..bb3bb9a 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
 uint64_t sectors_read;
 unsigned long *done_bitmap;
 int64_t cluster_size;
+bool compress;
 NotifierWithReturn before_write;
 QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
@@ -154,7 +155,8 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
 } else {
 ret = blk_co_pwritev(job->target, start * job->cluster_size,
- bounce_qiov.size, _qiov, 0);
+ bounce_qiov.size, _qiov,
+ job->compress ? BDRV_REQ_WRITE_COMPRESSED : 
0);
 }
 if (ret < 0) {
 trace_backup_do_cow_write_fail(job, start, ret);
@@ -477,6 +479,7 @@ static void coroutine_fn backup_run(void *opaque)
 void backup_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *target, int64_t speed,
   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+  bool compress,
   BlockdevOnError on_source_error,
   BlockdevOnError on_target_error,
   BlockCompletionFunc *cb, void *opaque,
@@ -507,6 +510,12 @@ void backup_start(const char *job_id, BlockDriverState *bs,
 return;
 }
 
+if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
+error_setg(errp, "Compression is not supported for this drive %s",
+   bdrv_get_device_name(target));
+return;
+}
+
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
 return;
 }
@@ -555,6 +564,7 @@ void backup_start(const char *job_id, BlockDriverState *bs,
 job->sync_mode = sync_mode;
 job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
+job->compress = compress;
 
 /* If there is no backing file on the target, we cannot rely on COW if our
  * backup cluster size is smaller than the target cluster size. Even for
diff --git a/blockdev.c b/blockdev.c
index 0c5ea25..587d76b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3154,6 +3154,9 @@ static void do_drive_backup(DriveBackup *backup, 
BlockJobTxn *txn, Error **errp)
 if (!backup->has_job_id) {
 backup->job_id = NULL;
 }
+if (!backup->has_compress) {
+backup->compress = false;
+}
 
 blk = blk_by_name(backup->device);
 if (!blk) {
@@ -3242,8 +3245,8 @@ static void do_drive_backup(DriveBackup *backup, 
BlockJobTxn *txn, Error **errp)
 }
 
 backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
- bmap, backup->on_source_error, backup->on_target_error,
- block_job_cb, bs, txn, _err);
+ bmap, backup->compress, backup->on_source_error,
+ backup->on_target_error, block_job_cb, bs, txn, _err);
 bdrv_unref(target_bs);
 if (local_err != NULL) {
 error_propagate(errp, local_err);
@@ -3317,7 +3320,7 @@ void do_blockdev_backup(BlockdevBackup *backup, 
BlockJobTxn *txn, Error **errp)
 }
 }
 backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
- NULL, backup->on_source_error, backup->on_target_error,
+ NULL, false, backup->on_source_error, backup->on_target_error,
  block_job_cb, bs, txn, _err);
 if (local_err != NULL) {
 

[Qemu-devel] [PATCH v6 00/16] backup compression

2016-07-21 Thread Denis V. Lunev
The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

These patches add the ability to compress data during backup. This
functionality is implemented by means of adding options to the qmp/hmp
commands(drive-backup, blockdev-backup). The implementation is quite
simple, because the responsibility for data compression imposed on the
format driver.

Changes from v1:
- added unittest for backup compression (12, 13)

Changes from v2:
- implemented a new .bdrv_co_write_compressed interface to replace the
  old .bdrv_write_compressed (2,3,4,5,6)

Changes from v3:
- added the byte-based interfaces:
  blk_pwrite_compressed()/blk_co_pwritev_compressed() (1, 7)
- fix drive/blockdev-backup documentation (10, 11)

Changes form v4:
- added assert that offset and count are aligned (1)
- reuse RwCo and bdrv_co_pwritev() for write compressed (2)
- converted interfaces to byte-based for format drivers (2, 3, 5, 6)
- move an unrelated cleanup in a separate patches (4, 7)
- turn on dirty_bitmaps for the compressed writes (9)
- added simplify drive/blockdev-backup by using the boxed commands (10, 11)
- reworded drive/blockdev-backup documentation about compression (12, 13)
- fix s/bakup/backup/ (14)

Changes form v5:
- rebased on master
- fix grammar (5, 8)

Signed-off-by: Pavel Butsykin 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 

Pavel Butsykin (16):
  block: switch blk_write_compressed() to byte-based interface
  block: Convert bdrv_pwrite_compressed() to BdrvChild
  block/io: reuse bdrv_co_pwritev() for write compressed
  qcow2: add qcow2_co_pwritev_compressed
  qcow2: cleanup qcow2_co_pwritev_compressed to avoid the recursion
  vmdk: add vmdk_co_pwritev_compressed
  qcow: add qcow_co_pwritev_compressed
  qcow: cleanup qcow_co_pwritev_compressed to avoid the recursion
  block: remove BlockDriver.bdrv_write_compressed
  block/io: turn on dirty_bitmaps for the compressed writes
  block: simplify drive-backup
  block: simplify blockdev-backup
  drive-backup: added support for data compression
  blockdev-backup: added support for data compression
  qemu-iotests: test backup compression in 055
  qemu-iotests: add vmdk for test backup compression in 055

 block/backup.c |  12 ++-
 block/block-backend.c  |  27 +-
 block/io.c |  48 --
 block/qcow.c   | 113 +---
 block/qcow2.c  | 128 ++-
 block/vmdk.c   |  55 ++--
 blockdev.c | 193 ++---
 hmp-commands.hx|   8 +-
 hmp.c  |  32 ---
 include/block/block.h  |   5 +-
 include/block/block_int.h  |   5 +-
 include/sysemu/block-backend.h |   4 +-
 qapi/block-core.json   |  18 +++-
 qemu-img.c |   8 +-
 qemu-io-cmds.c |   2 +-
 qmp-commands.hx|   9 +-
 tests/qemu-iotests/055 | 118 +
 tests/qemu-iotests/055.out |   4 +-
 tests/qemu-iotests/iotests.py  |  10 +--
 19 files changed, 374 insertions(+), 425 deletions(-)

-- 
2.5.0




Re: [Qemu-devel] [PATCH v2] virtio-pci: error out when both legacy and modern modes are disabled

2016-07-21 Thread Eric Blake
On 07/21/2016 09:52 AM, Greg Kurz wrote:
> From: Greg Kurz 
> 
> Without presuming if we got there because of a user mistake or some
> more subtle bug in the tooling, it really does not make sense to
> implement a non-functional device.
> 
> Signed-off-by: Greg Kurz 
> Signed-off-by: Greg Kurz 
> ---
> v2: - error out at realize time as suggested by Connie
> - updated title and changelog
> ---
>  hw/virtio/virtio-pci.c |6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index f0677b73d860..8d707aac0c21 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1838,6 +1838,12 @@ static void virtio_pci_dc_realize(DeviceState *qdev, 
> Error **errp)
>  VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
>  PCIDevice *pci_dev = >pci_dev;
>  
> +if (proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY &&
> +proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN) {
> +error_setg(errp, "device is unserviceable when both legacy and 
> modern modes are disabled. At least one of the disable-modern or 
> disable-legacy properties should be set to false.");

Too long. error_setg() should be a single phrase, with no trailing '.'.
 If you need to add additional information, error_append_hint() is how
you add the second sentence.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PATCH v6 11/16] block: simplify drive-backup

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Now that we can support boxed commands, use it to greatly reduce the
number of parameters (and likelihood of getting out of sync) when
adjusting drive-backup parameters..

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 blockdev.c   | 115 +--
 hmp.c|  29 -
 qapi/block-core.json |   3 +-
 3 files changed, 58 insertions(+), 89 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index eafeba9..e29147a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1838,17 +1838,8 @@ typedef struct DriveBackupState {
 BlockJob *job;
 } DriveBackupState;
 
-static void do_drive_backup(const char *job_id, const char *device,
-const char *target, bool has_format,
-const char *format, enum MirrorSyncMode sync,
-bool has_mode, enum NewImageMode mode,
-bool has_speed, int64_t speed,
-bool has_bitmap, const char *bitmap,
-bool has_on_source_error,
-BlockdevOnError on_source_error,
-bool has_on_target_error,
-BlockdevOnError on_target_error,
-BlockJobTxn *txn, Error **errp);
+static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+Error **errp);
 
 static void drive_backup_prepare(BlkActionState *common, Error **errp)
 {
@@ -1878,16 +1869,7 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 bdrv_drained_begin(blk_bs(blk));
 state->bs = blk_bs(blk);
 
-do_drive_backup(backup->has_job_id ? backup->job_id : NULL,
-backup->device, backup->target,
-backup->has_format, backup->format,
-backup->sync,
-backup->has_mode, backup->mode,
-backup->has_speed, backup->speed,
-backup->has_bitmap, backup->bitmap,
-backup->has_on_source_error, backup->on_source_error,
-backup->has_on_target_error, backup->on_target_error,
-common->block_job_txn, _err);
+do_drive_backup(backup, common->block_job_txn, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -3155,17 +3137,7 @@ out:
 aio_context_release(aio_context);
 }
 
-static void do_drive_backup(const char *job_id, const char *device,
-const char *target, bool has_format,
-const char *format, enum MirrorSyncMode sync,
-bool has_mode, enum NewImageMode mode,
-bool has_speed, int64_t speed,
-bool has_bitmap, const char *bitmap,
-bool has_on_source_error,
-BlockdevOnError on_source_error,
-bool has_on_target_error,
-BlockdevOnError on_target_error,
-BlockJobTxn *txn, Error **errp)
+static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error 
**errp)
 {
 BlockBackend *blk;
 BlockDriverState *bs;
@@ -3178,23 +3150,26 @@ static void do_drive_backup(const char *job_id, const 
char *device,
 int flags;
 int64_t size;
 
-if (!has_speed) {
-speed = 0;
+if (!backup->has_speed) {
+backup->speed = 0;
 }
-if (!has_on_source_error) {
-on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+if (!backup->has_on_source_error) {
+backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
 }
-if (!has_on_target_error) {
-on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+if (!backup->has_on_target_error) {
+backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+}
+if (!backup->has_mode) {
+backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
 }
-if (!has_mode) {
-mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+if (!backup->has_job_id) {
+backup->job_id = NULL;
 }
 
-blk = blk_by_name(device);
+blk = blk_by_name(backup->device);
 if (!blk) {
 error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-  "Device '%s' not found", device);
+  "Device '%s' not found", backup->device);
 return;
 }
 
@@ -3204,13 +3179,14 @@ static void do_drive_backup(const char *job_id, const 
char *device,
 /* Although backup_run has this check too, we need to use bs->drv 

[Qemu-devel] [PATCH v6 15/16] qemu-iotests: test backup compression in 055

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Added cases to check the backup compression out of qcow2, raw in qcow2
on drive-backup and blockdev-backup.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 tests/qemu-iotests/055| 97 +++
 tests/qemu-iotests/055.out|  4 +-
 tests/qemu-iotests/iotests.py | 10 ++---
 3 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index c8e3578..be81a42 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -451,5 +451,102 @@ class TestSingleTransaction(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 self.assert_no_active_block_jobs()
 
+
+class TestDriveCompression(iotests.QMPTestCase):
+image_len = 64 * 1024 * 1024 # MB
+outfmt = 'qcow2'
+
+def setUp(self):
+# Write data to the image so we can compare later
+qemu_img('create', '-f', iotests.imgfmt, test_img, 
str(TestDriveCompression.image_len))
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x11 0 64k', test_img)
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x00 64k 128k', test_img)
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x22 162k 32k', test_img)
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x33 67043328 64k', 
test_img)
+
+qemu_img('create', '-f', TestDriveCompression.outfmt, 
blockdev_target_img,
+ str(TestDriveCompression.image_len))
+self.vm = 
iotests.VM().add_drive(test_img).add_drive(blockdev_target_img,
+ 
format=TestDriveCompression.outfmt)
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(test_img)
+os.remove(blockdev_target_img)
+try:
+os.remove(target_img)
+except OSError:
+pass
+
+def do_test_compress_complete(self, cmd, **args):
+self.assert_no_active_block_jobs()
+
+result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, 
**args)
+self.assert_qmp(result, 'return', {})
+
+self.wait_until_completed()
+
+self.vm.shutdown()
+self.assertTrue(iotests.compare_images(test_img, blockdev_target_img,
+   iotests.imgfmt, 
TestDriveCompression.outfmt),
+'target image does not match source after backup')
+
+def test_complete_compress_drive_backup(self):
+self.do_test_compress_complete('drive-backup', 
target=blockdev_target_img, mode='existing')
+
+def test_complete_compress_blockdev_backup(self):
+self.do_test_compress_complete('blockdev-backup', target='drive1')
+
+def do_test_compress_cancel(self, cmd, **args):
+self.assert_no_active_block_jobs()
+
+result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, 
**args)
+self.assert_qmp(result, 'return', {})
+
+event = self.cancel_and_wait()
+self.assert_qmp(event, 'data/type', 'backup')
+
+def test_compress_cancel_drive_backup(self):
+self.do_test_compress_cancel('drive-backup', 
target=blockdev_target_img, mode='existing')
+
+def test_compress_cancel_blockdev_backup(self):
+self.do_test_compress_cancel('blockdev-backup', target='drive1')
+
+def do_test_compress_pause(self, cmd, **args):
+self.assert_no_active_block_jobs()
+
+self.vm.pause_drive('drive0')
+result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, 
**args)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('block-job-pause', device='drive0')
+self.assert_qmp(result, 'return', {})
+
+self.vm.resume_drive('drive0')
+time.sleep(1)
+result = self.vm.qmp('query-block-jobs')
+offset = self.dictpath(result, 'return[0]/offset')
+
+time.sleep(1)
+result = self.vm.qmp('query-block-jobs')
+self.assert_qmp(result, 'return[0]/offset', offset)
+
+result = self.vm.qmp('block-job-resume', device='drive0')
+self.assert_qmp(result, 'return', {})
+
+self.wait_until_completed()
+
+self.vm.shutdown()
+self.assertTrue(iotests.compare_images(test_img, blockdev_target_img,
+   iotests.imgfmt, 
TestDriveCompression.outfmt),
+'target image does not match source after backup')
+
+def test_compress_pause_drive_backup(self):
+self.do_test_compress_pause('drive-backup', 
target=blockdev_target_img, 

[Qemu-devel] [PATCH v6 14/16] blockdev-backup: added support for data compression

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 blockdev.c   | 7 +--
 qapi/block-core.json | 4 
 qmp-commands.hx  | 4 +++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 587d76b..89c403f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3287,6 +3287,9 @@ void do_blockdev_backup(BlockdevBackup *backup, 
BlockJobTxn *txn, Error **errp)
 if (!backup->has_job_id) {
 backup->job_id = NULL;
 }
+if (!backup->has_compress) {
+backup->compress = false;
+}
 
 blk = blk_by_name(backup->device);
 if (!blk) {
@@ -3320,8 +3323,8 @@ void do_blockdev_backup(BlockdevBackup *backup, 
BlockJobTxn *txn, Error **errp)
 }
 }
 backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
- NULL, false, backup->on_source_error, backup->on_target_error,
- block_job_cb, bs, txn, _err);
+ NULL, backup->compress, backup->on_source_error,
+ backup->on_target_error, block_job_cb, bs, txn, _err);
 if (local_err != NULL) {
 error_propagate(errp, local_err);
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6d98da7..7f3424b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -935,6 +935,9 @@
 # @speed: #optional the maximum speed, in bytes per second. The default is 0,
 # for unlimited.
 #
+# @compress: #optional true to compress data, if the target format supports it.
+#(default: false) (since 2.7)
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #   default 'report'.  'stop' and 'enospc' can only be used
 #   if the block device supports io-status (see BlockInfo).
@@ -953,6 +956,7 @@
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
 'sync': 'MirrorSyncMode',
 '*speed': 'int',
+'*compress': 'bool',
 '*on-source-error': 'BlockdevOnError',
 '*on-target-error': 'BlockdevOnError' } }
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ff72194..d06ced5 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1275,7 +1275,7 @@ EQMP
 
 {
 .name   = "blockdev-backup",
-.args_type  = "job-id:s?,sync:s,device:B,target:B,speed:i?,"
+.args_type  = 
"job-id:s?,sync:s,device:B,target:B,speed:i?,compress:b?,"
   "on-source-error:s?,on-target-error:s?",
 .mhandler.cmd_new = qmp_marshal_blockdev_backup,
 },
@@ -1299,6 +1299,8 @@ Arguments:
   sectors allocated in the topmost image, or "none" to only replicate
   new I/O (MirrorSyncMode).
 - "speed": the maximum speed, in bytes per second (json-int, optional)
+- "compress": true to compress data, if the target format supports it.
+  (json-bool, optional, default false)
 - "on-source-error": the action to take on an error on the source, default
  'report'.  'stop' and 'enospc' can only be used
  if the block device supports io-status.
-- 
2.5.0




[Qemu-devel] [PATCH v6 16/16] qemu-iotests: add vmdk for test backup compression in 055

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

The vmdk format has support for compression, it would be fine to add it for
the test backup compression

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 tests/qemu-iotests/055 | 57 ++
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index be81a42..cf5a423 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -454,7 +454,8 @@ class TestSingleTransaction(iotests.QMPTestCase):
 
 class TestDriveCompression(iotests.QMPTestCase):
 image_len = 64 * 1024 * 1024 # MB
-outfmt = 'qcow2'
+fmt_supports_compression = [{'type': 'qcow2', 'args': ()},
+{'type': 'vmdk', 'args': ('-o', 
'subformat=streamOptimized')}]
 
 def setUp(self):
 # Write data to the image so we can compare later
@@ -464,12 +465,6 @@ class TestDriveCompression(iotests.QMPTestCase):
 qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x22 162k 32k', test_img)
 qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x33 67043328 64k', 
test_img)
 
-qemu_img('create', '-f', TestDriveCompression.outfmt, 
blockdev_target_img,
- str(TestDriveCompression.image_len))
-self.vm = 
iotests.VM().add_drive(test_img).add_drive(blockdev_target_img,
- 
format=TestDriveCompression.outfmt)
-self.vm.launch()
-
 def tearDown(self):
 self.vm.shutdown()
 os.remove(test_img)
@@ -479,7 +474,18 @@ class TestDriveCompression(iotests.QMPTestCase):
 except OSError:
 pass
 
-def do_test_compress_complete(self, cmd, **args):
+def do_prepare_drives(self, fmt, args):
+self.vm = iotests.VM().add_drive(test_img)
+
+qemu_img('create', '-f', fmt, blockdev_target_img,
+ str(TestDriveCompression.image_len), *args)
+self.vm.add_drive(blockdev_target_img, format=fmt)
+
+self.vm.launch()
+
+def do_test_compress_complete(self, cmd, format, **args):
+self.do_prepare_drives(format['type'], format['args'])
+
 self.assert_no_active_block_jobs()
 
 result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, 
**args)
@@ -489,16 +495,21 @@ class TestDriveCompression(iotests.QMPTestCase):
 
 self.vm.shutdown()
 self.assertTrue(iotests.compare_images(test_img, blockdev_target_img,
-   iotests.imgfmt, 
TestDriveCompression.outfmt),
+   iotests.imgfmt, format['type']),
 'target image does not match source after backup')
 
 def test_complete_compress_drive_backup(self):
-self.do_test_compress_complete('drive-backup', 
target=blockdev_target_img, mode='existing')
+for format in TestDriveCompression.fmt_supports_compression:
+self.do_test_compress_complete('drive-backup', format,
+   target=blockdev_target_img, 
mode='existing')
 
 def test_complete_compress_blockdev_backup(self):
-self.do_test_compress_complete('blockdev-backup', target='drive1')
+for format in TestDriveCompression.fmt_supports_compression:
+self.do_test_compress_complete('blockdev-backup', format, 
target='drive1')
+
+def do_test_compress_cancel(self, cmd, format, **args):
+self.do_prepare_drives(format['type'], format['args'])
 
-def do_test_compress_cancel(self, cmd, **args):
 self.assert_no_active_block_jobs()
 
 result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, 
**args)
@@ -507,13 +518,20 @@ class TestDriveCompression(iotests.QMPTestCase):
 event = self.cancel_and_wait()
 self.assert_qmp(event, 'data/type', 'backup')
 
+self.vm.shutdown()
+
 def test_compress_cancel_drive_backup(self):
-self.do_test_compress_cancel('drive-backup', 
target=blockdev_target_img, mode='existing')
+for format in TestDriveCompression.fmt_supports_compression:
+self.do_test_compress_cancel('drive-backup', format,
+ target=blockdev_target_img, 
mode='existing')
 
 def test_compress_cancel_blockdev_backup(self):
-self.do_test_compress_cancel('blockdev-backup', target='drive1')
+   for format in TestDriveCompression.fmt_supports_compression:
+self.do_test_compress_cancel('blockdev-backup', format, 
target='drive1')
+
+def do_test_compress_pause(self, cmd, format, **args):

[Qemu-devel] [PATCH 4/4] block: Cater to iscsi with non-power-of-2 discard

2016-07-21 Thread Eric Blake
Dell Equallogic iSCSI SANs have a very unusual advertised geometry:

$ iscsi-inq -e 1 -c $((0xb0)) iscsi://XXX/0
wsnz:0
maximum compare and write length:1
optimal transfer length granularity:0
maximum transfer length:0
optimal transfer length:0
maximum prefetch xdread xdwrite transfer length:0
maximum unmap lba count:30720
maximum unmap block descriptor count:2
optimal unmap granularity:30720
ugavalid:1
unmap granularity alignment:0
maximum write same length:30720

which says that both the maximum and the optimal discard size
is 15M.  It is not immediately apparent if the device allows
discard requests not aligned to the optimal size, nor if it
allows discards at a finer granularity than the optimal size.

I tried to find details in the SCSI Commands Reference Manual
Rev. A on what valid values of maximum and optimal sizes are
permitted, but while that document mentions a "Block Limits
VPD Page", I couldn't actually find documentation of that page
or what values it would have, or if a SCSI device has an
advertisement of its minimal unmap granularity.  So it is not
obvious to me whether the Dell Equallogic device is compliance
with the SCSI specification.

Fortunately, it is easy enough to support non-power-of-2 sizing,
even if it means we are less efficient than truly possible when
targetting that device (for example, it means that we refuse to
unmap anything that is not a multiple of 15M and aligned to a
15M boundary, even if the device truly does support a smaller
granularity where unmapping actually works).

Reported-by: Peter Lieven 
Signed-off-by: Eric Blake 

---
Help in locating the actual specs on what SCSI requires for
page 0xb0 would be nice. But this should at least avoid the
assertion failures that Peter is hitting.  I was able to
test this patch using NBD on a hacked up qemu where I made
block/nbd.c report the same block limits, and could confirm
the assert under qemu-io 'w -z 0 40m' and 'discard 0 40m'
pre-patch, as well as the post-patch behavior of splitting
things to 15M alignment ('discard 1M 15M' becomes a no-op
because it is not aligned).  But obviously it needs to be
tested on the actual iscsi SAN that triggered the original
report.
---
 include/block/block_int.h | 37 -
 block/io.c| 15 +--
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1fe0fd9..47665be 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -330,36 +330,39 @@ typedef struct BlockLimits {
  * otherwise. */
 uint32_t request_alignment;

-/* maximum number of bytes that can be discarded at once (since it
- * is signed, it must be < 2G, if set), should be multiple of
+/* Maximum number of bytes that can be discarded at once (since it
+ * is signed, it must be < 2G, if set). Must be multiple of
  * pdiscard_alignment, but need not be power of 2. May be 0 if no
  * inherent 32-bit limit */
 int32_t max_pdiscard;

-/* optimal alignment for discard requests in bytes, must be power
- * of 2, less than max_pdiscard if that is set, and multiple of
- * bl.request_alignment. May be 0 if bl.request_alignment is good
- * enough */
+/* Optimal alignment for discard requests in bytes. A power of 2
+ * is best but not mandatory.  Must be a multiple of
+ * bl.request_alignment, and must be less than max_pdiscard if
+ * that is set. May be 0 if bl.request_alignment is good enough */
 uint32_t pdiscard_alignment;

-/* maximum number of bytes that can zeroized at once (since it is
- * signed, it must be < 2G, if set), should be multiple of
+/* Maximum number of bytes that can zeroized at once (since it is
+ * signed, it must be < 2G, if set). Must be multiple of
  * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
 int32_t max_pwrite_zeroes;

-/* optimal alignment for write zeroes requests in bytes, must be
- * power of 2, less than max_pwrite_zeroes if that is set, and
- * multiple of bl.request_alignment. May be 0 if
- * bl.request_alignment is good enough */
+/* Optimal alignment for write zeroes requests in bytes. A power
+ * of 2 is best but not mandatory.  Must be a multiple of
+ * bl.request_alignment, and must be less than max_pwrite_zeroes
+ * if that is set. May be 0 if bl.request_alignment is good
+ * enough */
 uint32_t pwrite_zeroes_alignment;

-/* optimal transfer length in bytes (must be power of 2, and
- * multiple of bl.request_alignment), or 0 if no preferred size */
+/* Optimal transfer length in bytes.  A power of 2 is best but not
+ * mandatory.  Must be a multiple of bl.request_alignment, or 0 if
+ * no preferred size */
 uint32_t opt_transfer;

-/* maximal transfer length in bytes (need not be power of 2, but
- * should be multiple of 

Re: [Qemu-devel] [PATCH 24/37] pc: keep gsi reference

2016-07-21 Thread Eduardo Habkost
On Thu, Jul 21, 2016 at 02:28:33PM -0400, Marc-André Lureau wrote:
> Hi
> 
> - Original Message -
> > On Thu, Jul 21, 2016 at 01:27:35PM -0400, Marc-André Lureau wrote:
> > > Hi
> > > 
> > > - Original Message -
> > > > On Tue, Jul 19, 2016 at 12:54:19PM +0400, marcandre.lur...@redhat.com
> > > > wrote:
> > > > > From: Marc-André Lureau 
> > > > > 
> > > > > Further cleanup would need to call qemu_free_irq() at the appropriate
> > > > > time, but for now this silences ASAN about direct leaks.
> > > > > 
> > > > > Signed-off-by: Marc-André Lureau 
> > > > 
> > > > Is there a way to make ASAN happy without having to add a field
> > > > to MachineState that we're not going to use for anything?
> > > 
> > > Well, the plan is rather to release it when no longer needed.
> > > Would it be fine to call qemu_free_irqs() in
> > > machine_finalize()?
> > 
> > It would be fine, I guess, but it looks pointless if we have lots
> > of other resources allocated during PC machine initialization
> > that are never released.
> 
> The main point, right now, is to have no direct leaks when
> running ASAN or valgrind, as they hide new introduced leaks
> that may be much worse. (it would also be good if we had no
> indirect leaks either, as this may also grow over time)

I see. And you don't need to release it on finalize to reach that
goal, right? In this case I don't think we need the extra work.

(In case my previous message was unclear, I believe the field
will be useful, even if we don't release anything on finalize.)

> 
> > But, see additional comment below:
> > 
> > > 
> > > > 
> > > > > ---
> > > > >  hw/i386/pc_piix.c   | 1 +
> > > > >  hw/i386/pc_q35.c| 1 +
> > > > >  include/hw/boards.h | 1 +
> > > > >  3 files changed, 3 insertions(+)
> > > > > 
> > > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > > index a07dc81..b2db274 100644
> > > > > --- a/hw/i386/pc_piix.c
> > > > > +++ b/hw/i386/pc_piix.c
> > > > > @@ -190,6 +190,7 @@ static void pc_init1(MachineState *machine,
> > > > >  } else {
> > > > >  gsi = qemu_allocate_irqs(gsi_handler, gsi_state,
> > > > >  GSI_NUM_PINS);
> > > > >  }
> > > > > +machine->gsi = gsi;
> > > > >  
> > > > >  if (pcmc->pci_enabled) {
> > > > >  pci_bus = i440fx_init(host_type,
> > > > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > > > index c5e8367..5dfb14f 100644
> > > > > --- a/hw/i386/pc_q35.c
> > > > > +++ b/hw/i386/pc_q35.c
> > > > > @@ -158,6 +158,7 @@ static void pc_q35_init(MachineState *machine)
> > > > >  } else {
> > > > >  gsi = qemu_allocate_irqs(gsi_handler, gsi_state,
> > > > >  GSI_NUM_PINS);
> > > > >  }
> > > > > +machine->gsi = gsi;
> > > > >  
> > > > >  /* create pci host bus */
> > > > >  q35_host = Q35_HOST_DEVICE(qdev_create(NULL,
> > > > >  TYPE_Q35_HOST_DEVICE));
> > > > > diff --git a/include/hw/boards.h b/include/hw/boards.h
> > > > > index e46a744..289ba52 100644
> > > > > --- a/include/hw/boards.h
> > > > > +++ b/include/hw/boards.h
> > > > > @@ -139,6 +139,7 @@ struct MachineState {
> > > > >  /*< private >*/
> > > > >  Object parent_obj;
> > > > >  Notifier sysbus_notifier;
> > > > > +qemu_irq *gsi;
> > 
> > If this is used only by PC, doesn't it belong to PCMachineState?
> 
> right, i'll try to put it there
> 
> > Anyway, the new field would be very useful to help reduce the
> > number of parameters of PC initialization functions (by making
> > them just get a PCMachineState* argument). I would go even
> 
> Which functions do you have in mind?

i440fx_init(), pc_basic_device_init(), maybe others.

> 
> > further and remove the local 'gsi' variable and replace it with
> > 'pcms->gsi' everywhere.
> 
> ok, why not.

-- 
Eduardo



[Qemu-devel] [PATCH v6 02/16] block: Convert bdrv_pwrite_compressed() to BdrvChild

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Signed-off-by: Pavel Butsykin 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/block-backend.c | 2 +-
 block/io.c| 3 ++-
 include/block/block.h | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 8f38ab4..4bfc2eb 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1477,7 +1477,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t 
offset, const void *buf,
 return ret;
 }
 
-return bdrv_pwrite_compressed(blk_bs(blk), offset, buf, count);
+return bdrv_pwrite_compressed(blk->root, offset, buf, count);
 }
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
diff --git a/block/io.c b/block/io.c
index e9f35c6..1503e09 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1867,9 +1867,10 @@ int bdrv_is_allocated_above(BlockDriverState *top,
 return 0;
 }
 
-int bdrv_pwrite_compressed(BlockDriverState *bs, int64_t offset,
+int bdrv_pwrite_compressed(BdrvChild *child, int64_t offset,
const void *buf, int bytes)
 {
+BlockDriverState *bs = child->bs;
 BlockDriver *drv = bs->drv;
 int ret;
 
diff --git a/include/block/block.h b/include/block/block.h
index b4a97f2..7bb5ddb 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -399,7 +399,7 @@ const char *bdrv_get_node_name(const BlockDriverState *bs);
 const char *bdrv_get_device_name(const BlockDriverState *bs);
 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_pwrite_compressed(BlockDriverState *bs, int64_t offset,
+int bdrv_pwrite_compressed(BdrvChild *child, int64_t offset,
const void *buf, int bytes);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
-- 
2.5.0




[Qemu-devel] [PATCH v6 12/16] block: simplify blockdev-backup

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Now that we can support boxed commands, use it to greatly reduce the
number of parameters (and likelihood of getting out of sync) when
adjusting blockdev-backup parameters.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 blockdev.c   | 70 +---
 qapi/block-core.json |  6 -
 2 files changed, 27 insertions(+), 49 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index e29147a..0c5ea25 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1906,14 +1906,8 @@ typedef struct BlockdevBackupState {
 AioContext *aio_context;
 } BlockdevBackupState;
 
-static void do_blockdev_backup(const char *job_id, const char *device,
-   const char *target, enum MirrorSyncMode sync,
-   bool has_speed, int64_t speed,
-   bool has_on_source_error,
-   BlockdevOnError on_source_error,
-   bool has_on_target_error,
-   BlockdevOnError on_target_error,
-   BlockJobTxn *txn, Error **errp);
+static void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+   Error **errp);
 
 static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
 {
@@ -1953,12 +1947,7 @@ static void blockdev_backup_prepare(BlkActionState 
*common, Error **errp)
 state->bs = blk_bs(blk);
 bdrv_drained_begin(state->bs);
 
-do_blockdev_backup(backup->has_job_id ? backup->job_id : NULL,
-   backup->device, backup->target, backup->sync,
-   backup->has_speed, backup->speed,
-   backup->has_on_source_error, backup->on_source_error,
-   backup->has_on_target_error, backup->on_target_error,
-   common->block_job_txn, _err);
+do_blockdev_backup(backup, common->block_job_txn, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -3275,14 +3264,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error 
**errp)
 return bdrv_named_nodes_list(errp);
 }
 
-void do_blockdev_backup(const char *job_id, const char *device,
-const char *target, enum MirrorSyncMode sync,
- bool has_speed, int64_t speed,
- bool has_on_source_error,
- BlockdevOnError on_source_error,
- bool has_on_target_error,
- BlockdevOnError on_target_error,
- BlockJobTxn *txn, Error **errp)
+void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)
 {
 BlockBackend *blk;
 BlockDriverState *bs;
@@ -3290,19 +3272,22 @@ void do_blockdev_backup(const char *job_id, const char 
*device,
 Error *local_err = NULL;
 AioContext *aio_context;
 
-if (!has_speed) {
-speed = 0;
+if (!backup->has_speed) {
+backup->speed = 0;
 }
-if (!has_on_source_error) {
-on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+if (!backup->has_on_source_error) {
+backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
 }
-if (!has_on_target_error) {
-on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+if (!backup->has_on_target_error) {
+backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+}
+if (!backup->has_job_id) {
+backup->job_id = NULL;
 }
 
-blk = blk_by_name(device);
+blk = blk_by_name(backup->device);
 if (!blk) {
-error_setg(errp, "Device '%s' not found", device);
+error_setg(errp, "Device '%s' not found", backup->device);
 return;
 }
 
@@ -3310,12 +3295,12 @@ void do_blockdev_backup(const char *job_id, const char 
*device,
 aio_context_acquire(aio_context);
 
 if (!blk_is_available(blk)) {
-error_setg(errp, "Device '%s' has no medium", device);
+error_setg(errp, "Device '%s' has no medium", backup->device);
 goto out;
 }
 bs = blk_bs(blk);
 
-target_bs = bdrv_lookup_bs(target, target, errp);
+target_bs = bdrv_lookup_bs(backup->target, backup->target, errp);
 if (!target_bs) {
 goto out;
 }
@@ -3331,8 +3316,9 @@ void do_blockdev_backup(const char *job_id, const char 
*device,
 goto out;
 }
 }
-backup_start(job_id, bs, target_bs, speed, sync, NULL, on_source_error,
- on_target_error, block_job_cb, bs, txn, _err);
+backup_start(backup->job_id, bs, target_bs, backup->speed, 

[Qemu-devel] [PATCH v6 06/16] vmdk: add vmdk_co_pwritev_compressed

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Added implementation of the vmdk_co_pwritev_compressed function that
will allow us to safely use compressed writes for the vmdk from running
VMs.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/vmdk.c | 55 +--
 1 file changed, 5 insertions(+), 50 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 46d474e..a11c27a 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1645,56 +1645,11 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, 
uint64_t bytes,
 return ret;
 }
 
-typedef struct VmdkWriteCompressedCo {
-BlockDriverState *bs;
-int64_t sector_num;
-const uint8_t *buf;
-int nb_sectors;
-int ret;
-} VmdkWriteCompressedCo;
-
-static void vmdk_co_write_compressed(void *opaque)
-{
-VmdkWriteCompressedCo *co = opaque;
-QEMUIOVector local_qiov;
-uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
-uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
-
-struct iovec iov = (struct iovec) {
-.iov_base   = (uint8_t*) co->buf,
-.iov_len= bytes,
-};
-qemu_iovec_init_external(_qiov, , 1);
-
-co->ret = vmdk_pwritev(co->bs, offset, bytes, _qiov, false, false);
-}
-
-static int vmdk_write_compressed(BlockDriverState *bs,
- int64_t sector_num,
- const uint8_t *buf,
- int nb_sectors)
+static int coroutine_fn
+vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+   uint64_t bytes, QEMUIOVector *qiov)
 {
-BDRVVmdkState *s = bs->opaque;
-
-if (s->num_extents == 1 && s->extents[0].compressed) {
-Coroutine *co;
-AioContext *aio_context = bdrv_get_aio_context(bs);
-VmdkWriteCompressedCo data = {
-.bs = bs,
-.sector_num = sector_num,
-.buf= buf,
-.nb_sectors = nb_sectors,
-.ret= -EINPROGRESS,
-};
-co = qemu_coroutine_create(vmdk_co_write_compressed, );
-qemu_coroutine_enter(co);
-while (data.ret == -EINPROGRESS) {
-aio_poll(aio_context, true);
-}
-return data.ret;
-} else {
-return -ENOTSUP;
-}
+return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
 }
 
 static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
@@ -2393,7 +2348,7 @@ static BlockDriver bdrv_vmdk = {
 .bdrv_reopen_prepare  = vmdk_reopen_prepare,
 .bdrv_co_preadv   = vmdk_co_preadv,
 .bdrv_co_pwritev  = vmdk_co_pwritev,
-.bdrv_write_compressed= vmdk_write_compressed,
+.bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
 .bdrv_co_pwrite_zeroes= vmdk_co_pwrite_zeroes,
 .bdrv_close   = vmdk_close,
 .bdrv_create  = vmdk_create,
-- 
2.5.0




[Qemu-devel] [PATCH v6 01/16] block: switch blk_write_compressed() to byte-based interface

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

This is a preparatory patch, which continues the general trend of the
transition to the byte-based interfaces. bdrv_check_request() and
blk_check_requestis no longer used, so we can remove.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/block-backend.c  | 23 ---
 block/io.c | 22 +++---
 include/block/block.h  |  4 ++--
 include/sysemu/block-backend.h |  4 ++--
 qemu-img.c |  6 --
 qemu-io-cmds.c |  2 +-
 6 files changed, 20 insertions(+), 41 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index effa038..8f38ab4 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -727,21 +727,6 @@ static int blk_check_byte_request(BlockBackend *blk, 
int64_t offset,
 return 0;
 }
 
-static int blk_check_request(BlockBackend *blk, int64_t sector_num,
- int nb_sectors)
-{
-if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
-return -EIO;
-}
-
-if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
-return -EIO;
-}
-
-return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
-  nb_sectors * BDRV_SECTOR_SIZE);
-}
-
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
@@ -1484,15 +1469,15 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend 
*blk, int64_t offset,
   flags | BDRV_REQ_ZERO_WRITE);
 }
 
-int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
+  int count)
 {
-int ret = blk_check_request(blk, sector_num, nb_sectors);
+int ret = blk_check_byte_request(blk, offset, count);
 if (ret < 0) {
 return ret;
 }
 
-return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
+return bdrv_pwrite_compressed(blk_bs(blk), offset, buf, count);
 }
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
diff --git a/block/io.c b/block/io.c
index 7323f0f..e9f35c6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -540,17 +540,6 @@ static int bdrv_check_byte_request(BlockDriverState *bs, 
int64_t offset,
 return 0;
 }
 
-static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
-  int nb_sectors)
-{
-if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return -EIO;
-}
-
-return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
-   nb_sectors * BDRV_SECTOR_SIZE);
-}
-
 typedef struct RwCo {
 BdrvChild *child;
 int64_t offset;
@@ -1878,8 +1867,8 @@ int bdrv_is_allocated_above(BlockDriverState *top,
 return 0;
 }
 
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
-  const uint8_t *buf, int nb_sectors)
+int bdrv_pwrite_compressed(BlockDriverState *bs, int64_t offset,
+   const void *buf, int bytes)
 {
 BlockDriver *drv = bs->drv;
 int ret;
@@ -1890,14 +1879,17 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t 
sector_num,
 if (!drv->bdrv_write_compressed) {
 return -ENOTSUP;
 }
-ret = bdrv_check_request(bs, sector_num, nb_sectors);
+ret = bdrv_check_byte_request(bs, offset, bytes);
 if (ret < 0) {
 return ret;
 }
 
 assert(QLIST_EMPTY(>dirty_bitmaps));
+assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
-return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
+return drv->bdrv_write_compressed(bs, offset >> BDRV_SECTOR_BITS, buf,
+  bytes >> BDRV_SECTOR_BITS);
 }
 
 typedef struct BdrvVmstateCo {
diff --git a/include/block/block.h b/include/block/block.h
index 11c162d..b4a97f2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -399,8 +399,8 @@ const char *bdrv_get_node_name(const BlockDriverState *bs);
 const char *bdrv_get_device_name(const BlockDriverState *bs);
 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
-  const uint8_t *buf, int nb_sectors);
+int 

[Qemu-devel] [PATCH v6 09/16] block: remove BlockDriver.bdrv_write_compressed

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

There are no block drivers left that implement the old
.bdrv_write_compressed interface, so it can be removed. Also now we have
no need to use the bdrv_pwrite_compressed function and we can remove it
entirely.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/block-backend.c |  8 ++--
 block/io.c| 31 ---
 include/block/block.h |  2 --
 include/block/block_int.h |  3 ---
 qemu-img.c|  2 +-
 5 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 4bfc2eb..53f7971 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1472,12 +1472,8 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, 
int64_t offset,
 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
   int count)
 {
-int ret = blk_check_byte_request(blk, offset, count);
-if (ret < 0) {
-return ret;
-}
-
-return bdrv_pwrite_compressed(blk->root, offset, buf, count);
+return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+   BDRV_REQ_WRITE_COMPRESSED);
 }
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
diff --git a/block/io.c b/block/io.c
index 7fad5b7..60922ed 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1886,37 +1886,6 @@ int bdrv_is_allocated_above(BlockDriverState *top,
 return 0;
 }
 
-int bdrv_pwrite_compressed(BdrvChild *child, int64_t offset,
-   const void *buf, int bytes)
-{
-BlockDriverState *bs = child->bs;
-BlockDriver *drv = bs->drv;
-QEMUIOVector qiov;
-struct iovec iov;
-
-if (!drv) {
-return -ENOMEDIUM;
-}
-if (drv->bdrv_write_compressed) {
-int ret = bdrv_check_byte_request(bs, offset, bytes);
-if (ret < 0) {
-return ret;
-}
-assert(QLIST_EMPTY(>dirty_bitmaps));
-assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-return drv->bdrv_write_compressed(bs, offset >> BDRV_SECTOR_BITS, buf,
-  bytes >> BDRV_SECTOR_BITS);
-}
-iov = (struct iovec) {
-.iov_base = (void *)buf,
-.iov_len = bytes,
-};
-qemu_iovec_init_external(, , 1);
-
-return bdrv_prwv_co(child, offset, , true, BDRV_REQ_WRITE_COMPRESSED);
-}
-
 typedef struct BdrvVmstateCo {
 BlockDriverState*bs;
 QEMUIOVector*qiov;
diff --git a/include/block/block.h b/include/block/block.h
index d8dacd2..7edce5c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -400,8 +400,6 @@ const char *bdrv_get_node_name(const BlockDriverState *bs);
 const char *bdrv_get_device_name(const BlockDriverState *bs);
 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_pwrite_compressed(BdrvChild *child, int64_t offset,
-   const void *buf, int bytes);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
 void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index d2673a1..378c966 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -204,9 +204,6 @@ struct BlockDriver {
 bool has_variable_length;
 int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
 
-int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
-
 int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
 uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
 
diff --git a/qemu-img.c b/qemu-img.c
index d5676a5..014c408 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2034,7 +2034,7 @@ static int img_convert(int argc, char **argv)
 const char *preallocation =
 qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
 
-if (!drv->bdrv_write_compressed && !drv->bdrv_co_pwritev_compressed) {
+if (!drv->bdrv_co_pwritev_compressed) {
 error_report("Compression not supported for this file format");
 ret = -1;
 goto out;
-- 
2.5.0




[Qemu-devel] [PATCH v6 08/16] qcow: cleanup qcow_co_pwritev_compressed to avoid the recursion

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Now that the function uses a vector instead of a buffer, there is no
need to use recursive code.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/qcow.c | 24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index e1d335d..20d2e15 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -927,27 +927,17 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t 
offset,
 uint8_t *buf, *out_buf;
 uint64_t cluster_offset;
 
+buf = qemu_blockalign(bs, s->cluster_size);
 if (bytes != s->cluster_size) {
-ret = -EINVAL;
-
-/* Zero-pad last write if image size is not cluster aligned */
-if (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS &&
-bytes < s->cluster_size)
+if (bytes > s->cluster_size ||
+offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
 {
-uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-memset(pad_buf, 0, s->cluster_size);
-qemu_iovec_to_buf(qiov, 0, pad_buf, s->cluster_size);
-iov = (struct iovec) {
-.iov_base   = pad_buf,
-.iov_len= s->cluster_size,
-};
-qemu_iovec_init_external(_qiov, , 1);
-ret = qcow_co_pwritev_compressed(bs, offset, bytes, _qiov);
-qemu_vfree(pad_buf);
+qemu_vfree(buf);
+return -EINVAL;
 }
-return ret;
+/* Zero-pad last write if image size is not cluster aligned */
+memset(buf + bytes, 0, s->cluster_size - bytes);
 }
-buf = qemu_blockalign(bs, s->cluster_size);
 qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
 
 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-- 
2.5.0




[Qemu-devel] [PATCH v6 05/16] qcow2: cleanup qcow2_co_pwritev_compressed to avoid the recursion

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Now that the function uses a vector instead of a buffer, there is no
need to use recursive code.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/qcow2.c | 24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index b5c69df..01bc003 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2554,27 +2554,17 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, 
uint64_t offset,
 return bdrv_truncate(bs->file->bs, cluster_offset);
 }
 
+buf = qemu_blockalign(bs, s->cluster_size);
 if (bytes != s->cluster_size) {
-ret = -EINVAL;
-
-/* Zero-pad last write if image size is not cluster aligned */
-if (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS &&
-bytes < s->cluster_size)
+if (bytes > s->cluster_size ||
+offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
 {
-uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-memset(pad_buf, 0, s->cluster_size);
-qemu_iovec_to_buf(qiov, 0, pad_buf, s->cluster_size);
-iov = (struct iovec) {
-.iov_base   = pad_buf,
-.iov_len= s->cluster_size,
-};
-qemu_iovec_init_external(_qiov, , 1);
-ret = qcow2_co_pwritev_compressed(bs, offset, bytes, _qiov);
-qemu_vfree(pad_buf);
+qemu_vfree(buf);
+return -EINVAL;
 }
-return ret;
+/* Zero-pad last write if image size is not cluster aligned */
+memset(buf + bytes, 0, s->cluster_size - bytes);
 }
-buf = qemu_blockalign(bs, s->cluster_size);
 qemu_iovec_to_buf(qiov, 0, buf, s->cluster_size);
 
 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-- 
2.5.0




[Qemu-devel] [PATCH v6 03/16] block/io: reuse bdrv_co_pwritev() for write compressed

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

For bdrv_pwrite_compressed() it looks like most of the code creating
coroutine is duplicated in bdrv_prwv_co(). So we can just add a flag
(BDRV_REQ_WRITE_COMPRESSED) and use bdrv_prwv_co() as a generic one.
In the end we get coroutine oriented function for write compressed by using
bdrv_co_pwritev/blk_co_pwritev with BDRV_REQ_WRITE_COMPRESSED flag.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/io.c| 56 +--
 include/block/block.h |  3 ++-
 include/block/block_int.h |  3 +++
 qemu-img.c|  2 +-
 4 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/block/io.c b/block/io.c
index 1503e09..7fad5b7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -886,6 +886,20 @@ emulate_flags:
 return ret;
 }
 
+static int coroutine_fn
+bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+   uint64_t bytes, QEMUIOVector *qiov)
+{
+BlockDriver *drv = bs->drv;
+
+if (!drv->bdrv_co_pwritev_compressed) {
+return -ENOTSUP;
+}
+
+assert(QLIST_EMPTY(>dirty_bitmaps));
+return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
+}
+
 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
 {
@@ -1554,9 +1568,14 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
 bytes = ROUND_UP(bytes, align);
 }
 
-ret = bdrv_aligned_pwritev(bs, , offset, bytes, align,
-   use_local_qiov ? _qiov : qiov,
-   flags);
+if (flags & BDRV_REQ_WRITE_COMPRESSED) {
+ret = bdrv_driver_pwritev_compressed(
+bs, offset, bytes, use_local_qiov ? _qiov : qiov);
+} else {
+ret = bdrv_aligned_pwritev(bs, , offset, bytes, align,
+   use_local_qiov ? _qiov : qiov,
+   flags);
+}
 
 fail:
 
@@ -1872,25 +1891,30 @@ int bdrv_pwrite_compressed(BdrvChild *child, int64_t 
offset,
 {
 BlockDriverState *bs = child->bs;
 BlockDriver *drv = bs->drv;
-int ret;
+QEMUIOVector qiov;
+struct iovec iov;
 
 if (!drv) {
 return -ENOMEDIUM;
 }
-if (!drv->bdrv_write_compressed) {
-return -ENOTSUP;
-}
-ret = bdrv_check_byte_request(bs, offset, bytes);
-if (ret < 0) {
-return ret;
+if (drv->bdrv_write_compressed) {
+int ret = bdrv_check_byte_request(bs, offset, bytes);
+if (ret < 0) {
+return ret;
+}
+assert(QLIST_EMPTY(>dirty_bitmaps));
+assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+return drv->bdrv_write_compressed(bs, offset >> BDRV_SECTOR_BITS, buf,
+  bytes >> BDRV_SECTOR_BITS);
 }
+iov = (struct iovec) {
+.iov_base = (void *)buf,
+.iov_len = bytes,
+};
+qemu_iovec_init_external(, , 1);
 
-assert(QLIST_EMPTY(>dirty_bitmaps));
-assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-
-return drv->bdrv_write_compressed(bs, offset >> BDRV_SECTOR_BITS, buf,
-  bytes >> BDRV_SECTOR_BITS);
+return bdrv_prwv_co(child, offset, , true, BDRV_REQ_WRITE_COMPRESSED);
 }
 
 typedef struct BdrvVmstateCo {
diff --git a/include/block/block.h b/include/block/block.h
index 7bb5ddb..d8dacd2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -65,9 +65,10 @@ typedef enum {
 BDRV_REQ_MAY_UNMAP  = 0x4,
 BDRV_REQ_NO_SERIALISING = 0x8,
 BDRV_REQ_FUA= 0x10,
+BDRV_REQ_WRITE_COMPRESSED   = 0x20,
 
 /* Mask of valid flags */
-BDRV_REQ_MASK   = 0x1f,
+BDRV_REQ_MASK   = 0x3f,
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1fe0fd9..d2673a1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -207,6 +207,9 @@ struct BlockDriver {
 int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
  const uint8_t *buf, int nb_sectors);
 
+int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
+uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
+
 int (*bdrv_snapshot_create)(BlockDriverState *bs,
 QEMUSnapshotInfo *sn_info);
 int (*bdrv_snapshot_goto)(BlockDriverState 

[Qemu-devel] [PATCH 2/4] nbd: Limit nbdflags to 16 bits

2016-07-21 Thread Eric Blake
Rather than asserting that nbdflags is within range, just give
it the correct type to begin with :)  nbdflags corresponds to
the per-export portion of NBD Protocol "transmission flags", which
is 16 bits in response to NBD_OPT_EXPORT_NAME and NBD_OPT_GO.

Furthermore, upstream NBD has never passed the global flags to
the kernel via ioctl(NBD_SET_FLAGS) (the ioctl was first
introduced in NBD 2.9.22; then a latent bug in NBD 3.1 actually
tried to OR the global flags with the transmission flags, with
the disaster that the addition of NBD_FLAG_NO_ZEROES in 3.9
caused all earlier NBD 3.x clients to treat every export as
read-only; NBD 3.10 and later intentionally clip things to 16
bits to pass only transmission flags).  Qemu should follow suit,
since the current two global flags (NBD_FLAG_FIXED_NEWSTYLE
and NBD_FLAG_NO_ZEROES) have no impact on the kernel's behavior
during transmission.

CC: qemu-sta...@nongnu.org
Signed-off-by: Eric Blake 

---
v1: extract from larger series
previously 3/14 of v5 NBD write zeroes series
v4: rebase, cc qemu-stable
v3: expand scope of patch
---
 block/nbd-client.h  |  2 +-
 include/block/nbd.h |  6 +++---
 nbd/client.c| 28 +++-
 nbd/server.c| 10 --
 qemu-nbd.c  |  4 ++--
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index fa9817b..044aca4 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,7 +20,7 @@
 typedef struct NbdClientSession {
 QIOChannelSocket *sioc; /* The master data channel */
 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-uint32_t nbdflags;
+uint16_t nbdflags;
 off_t size;

 CoMutex send_mutex;
diff --git a/include/block/nbd.h b/include/block/nbd.h
index cb91820..1897557 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -90,11 +90,11 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
  size_t niov,
  size_t length,
  bool do_read);
-int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
+int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
   QCryptoTLSCreds *tlscreds, const char *hostname,
   QIOChannel **outioc,
   off_t *size, Error **errp);
-int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size);
+int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size);
 ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request);
 ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply);
 int nbd_client(int fd);
@@ -104,7 +104,7 @@ typedef struct NBDExport NBDExport;
 typedef struct NBDClient NBDClient;

 NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
-  uint32_t nbdflags, void (*close)(NBDExport *),
+  uint16_t nbdflags, void (*close)(NBDExport *),
   Error **errp);
 void nbd_export_close(NBDExport *exp);
 void nbd_export_get(NBDExport *exp);
diff --git a/nbd/client.c b/nbd/client.c
index 78a7195..a92f1e2 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -408,7 +408,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
 }


-int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
+int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
   QCryptoTLSCreds *tlscreds, const char *hostname,
   QIOChannel **outioc,
   off_t *size, Error **errp)
@@ -468,7 +468,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint32_t *flags,
 uint32_t opt;
 uint32_t namesize;
 uint16_t globalflags;
-uint16_t exportflags;
 bool fixedNewStyle = false;

 if (read_sync(ioc, , sizeof(globalflags)) !=
@@ -477,7 +476,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint32_t *flags,
 goto fail;
 }
 globalflags = be16_to_cpu(globalflags);
-*flags = globalflags << 16;
 TRACE("Global flags are %" PRIx32, globalflags);
 if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
 fixedNewStyle = true;
@@ -545,17 +543,15 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint32_t *flags,
 goto fail;
 }
 *size = be64_to_cpu(s);
-TRACE("Size is %" PRIu64, *size);

-if (read_sync(ioc, , sizeof(exportflags)) !=
-sizeof(exportflags)) {
+if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) {
 error_setg(errp, "Failed to read export flags");
 goto fail;
 }
-exportflags = be16_to_cpu(exportflags);
-*flags |= exportflags;
-TRACE("Export flags are %" PRIx16, exportflags);
+be16_to_cpus(flags);

[Qemu-devel] [PATCH v6 04/16] qcow2: add qcow2_co_pwritev_compressed

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Added implementation of the qcow2_co_pwritev_compressed function that
will allow us to safely use compressed writes for the qcow2 from running
VMs.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/qcow2.c | 124 +++---
 1 file changed, 50 insertions(+), 74 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index d620d0a..b5c69df 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2533,84 +2533,49 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset)
 return 0;
 }
 
-typedef struct Qcow2WriteCo {
-BlockDriverState *bs;
-int64_t sector_num;
-const uint8_t *buf;
-int nb_sectors;
-int ret;
-} Qcow2WriteCo;
-
-static void qcow2_write_co_entry(void *opaque)
-{
-Qcow2WriteCo *co = opaque;
-QEMUIOVector qiov;
-uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
-uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
-
-struct iovec iov = (struct iovec) {
-.iov_base   = (uint8_t*) co->buf,
-.iov_len= bytes,
-};
-qemu_iovec_init_external(, , 1);
-
-co->ret = qcow2_co_pwritev(co->bs, offset, bytes, , 0);
-}
-
-/* Wrapper for non-coroutine contexts */
-static int qcow2_write(BlockDriverState *bs, int64_t sector_num,
-   const uint8_t *buf, int nb_sectors)
-{
-Coroutine *co;
-AioContext *aio_context = bdrv_get_aio_context(bs);
-Qcow2WriteCo data = {
-.bs = bs,
-.sector_num = sector_num,
-.buf= buf,
-.nb_sectors = nb_sectors,
-.ret= -EINPROGRESS,
-};
-co = qemu_coroutine_create(qcow2_write_co_entry, );
-qemu_coroutine_enter(co);
-while (data.ret == -EINPROGRESS) {
-aio_poll(aio_context, true);
-}
-return data.ret;
-}
-
 /* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
-  const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+uint64_t bytes, QEMUIOVector *qiov)
 {
 BDRVQcow2State *s = bs->opaque;
+QEMUIOVector hd_qiov;
+struct iovec iov;
 z_stream strm;
 int ret, out_len;
-uint8_t *out_buf;
+uint8_t *buf, *out_buf;
 uint64_t cluster_offset;
 
-if (nb_sectors == 0) {
+if (bytes == 0) {
 /* align end of file to a sector boundary to ease reading with
sector based I/Os */
 cluster_offset = bdrv_getlength(bs->file->bs);
 return bdrv_truncate(bs->file->bs, cluster_offset);
 }
 
-if (nb_sectors != s->cluster_sectors) {
+if (bytes != s->cluster_size) {
 ret = -EINVAL;
 
 /* Zero-pad last write if image size is not cluster aligned */
-if (sector_num + nb_sectors == bs->total_sectors &&
-nb_sectors < s->cluster_sectors) {
+if (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS &&
+bytes < s->cluster_size)
+{
 uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
 memset(pad_buf, 0, s->cluster_size);
-memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-ret = qcow2_write_compressed(bs, sector_num,
- pad_buf, s->cluster_sectors);
+qemu_iovec_to_buf(qiov, 0, pad_buf, s->cluster_size);
+iov = (struct iovec) {
+.iov_base   = pad_buf,
+.iov_len= s->cluster_size,
+};
+qemu_iovec_init_external(_qiov, , 1);
+ret = qcow2_co_pwritev_compressed(bs, offset, bytes, _qiov);
 qemu_vfree(pad_buf);
 }
 return ret;
 }
+buf = qemu_blockalign(bs, s->cluster_size);
+qemu_iovec_to_buf(qiov, 0, buf, s->cluster_size);
 
 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
 
@@ -2641,33 +2606,44 @@ static int qcow2_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
 
 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
 /* could not compress: write normal cluster */
-ret = qcow2_write(bs, sector_num, buf, s->cluster_sectors);
+ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
 if (ret < 0) {
 goto fail;
 }
-} else {
-cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
-sector_num << 9, out_len);
-if (!cluster_offset) {
-  

[Qemu-devel] [PATCH v6 10/16] block/io: turn on dirty_bitmaps for the compressed writes

2016-07-21 Thread Denis V. Lunev
From: Pavel Butsykin 

Previously was added the assert:

  commit 1755da16e32c15b22a521e8a38539e4b5cf367f3
  Author: Paolo Bonzini 
  Date:   Thu Oct 18 16:49:18 2012 +0200
  block: introduce new dirty bitmap functionality

Now the compressed write is always in coroutine and setting the bits is
done after the write, so that we can return the dirty_bitmaps for the
compressed writes.

Signed-off-by: Pavel Butsykin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
CC: Kevin Wolf 
---
 block/io.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/block/io.c b/block/io.c
index 60922ed..bc9eee5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -896,7 +896,6 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, 
uint64_t offset,
 return -ENOTSUP;
 }
 
-assert(QLIST_EMPTY(>dirty_bitmaps));
 return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
 }
 
@@ -1317,6 +1316,8 @@ static int coroutine_fn 
bdrv_aligned_pwritev(BlockDriverState *bs,
 } else if (flags & BDRV_REQ_ZERO_WRITE) {
 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
+} else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
+ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
 } else if (bytes <= max_transfer) {
 bdrv_debug_event(bs, BLKDBG_PWRITEV);
 ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
@@ -1568,14 +1569,9 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
 bytes = ROUND_UP(bytes, align);
 }
 
-if (flags & BDRV_REQ_WRITE_COMPRESSED) {
-ret = bdrv_driver_pwritev_compressed(
-bs, offset, bytes, use_local_qiov ? _qiov : qiov);
-} else {
-ret = bdrv_aligned_pwritev(bs, , offset, bytes, align,
-   use_local_qiov ? _qiov : qiov,
-   flags);
-}
+ret = bdrv_aligned_pwritev(bs, , offset, bytes, align,
+   use_local_qiov ? _qiov : qiov,
+   flags);
 
 fail:
 
-- 
2.5.0




[Qemu-devel] [PATCH for-2.7 0/4] NBD and block alignment fixes

2016-07-21 Thread Eric Blake
This series extracts a couple of bug fixes that should be included
in 2.7, out of my earlier v5 NBD series [1] that was deemed too
large and too late.  Then it tackles the promised regression fix
reported by Peter for Dell Equallogic iSCSI SANs with their unusual
non-power-of-2 unmap granularity.

[1] https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg04053.html

The earlier series had a couple other patches that are
borderline bug fixes, but I think they can wait for 2.8,
as follows:

https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg04054.html
8/14 nbd: Let server know when client gives up
Servers already have to deal with clients like qemu 2.6 that
don't give this notification, so it doesn't hurt to keep 2.7
in that same situation.

https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg04064.html
12/14 nbd: Improve server handling of shutdown requests
Clients already have to deal with servers like qemu 2.6 that
don't reply to NBD_OPT_ABORT, so it doesn't hurt to keep 2.7
in that same situation.

Also available as a tag at:
git fetch git://repo.or.cz/qemu/ericb.git nbd-fixes-v1

Eric Blake (4):
  nbd: Fix bad flag detection on server
  nbd: Limit nbdflags to 16 bits
  osdep: Document differences in rounding macros
  block: Cater to iscsi with non-power-of-2 discard

 block/nbd-client.h|  2 +-
 include/block/block_int.h | 37 -
 include/block/nbd.h   |  6 +++---
 include/qemu/osdep.h  |  6 +-
 block/io.c| 15 +--
 nbd/client.c  | 28 +++-
 nbd/server.c  | 13 ++---
 qemu-nbd.c|  4 ++--
 8 files changed, 61 insertions(+), 50 deletions(-)

-- 
2.5.5




[Qemu-devel] [PATCH 3/4] osdep: Document differences in rounding macros

2016-07-21 Thread Eric Blake
Make it obvious which macros are safe in which situations.

Useful since QEMU_ALIGN_UP and ROUND_UP both purport to do
the same thing, but differ on whether the alignment must be
a power of 2.

Signed-off-by: Eric Blake 
---
 include/qemu/osdep.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index fbb8759..9991fb0 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -158,7 +158,8 @@ extern int daemon(int, int);
 /* Round number down to multiple */
 #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))

-/* Round number up to multiple */
+/* Round number up to multiple. Safe when m is not a power of 2 (see
+ * ROUND_UP for a faster version when a power of 2 is guaranteed) */
 #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))

 /* Check if n is a multiple of m */
@@ -175,6 +176,9 @@ extern int daemon(int, int);
 /* Check if pointer p is n-bytes aligned */
 #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n))

+/* Round number up to multiple. Requires that d be a power of 2 (see
+ * QEMU_ALIGN_UP for a safer but slower version on arbitrary
+ * numbers) */
 #ifndef ROUND_UP
 #define ROUND_UP(n,d) (((n) + (d) - 1) & -(d))
 #endif
-- 
2.5.5




[Qemu-devel] [PATCH 1/4] nbd: Fix bad flag detection on server

2016-07-21 Thread Eric Blake
Commit ab7c548e added a check for invalid flags, but used an
early return on error instead of properly going through the
cleanup label.

Signed-off-by: Eric Blake 

---
v1: extract from larger series
previously 1/14 of v5 NBD write zeroes series
v4: new patch
---
 nbd/server.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nbd/server.c b/nbd/server.c
index 29e2099..3c1e2b3 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1057,7 +1057,8 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
 if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) {
 LOG("unsupported flags (got 0x%x)",
 request->type & ~NBD_CMD_MASK_COMMAND);
-return -EINVAL;
+rc = -EINVAL;
+goto out;
 }

 rc = 0;
-- 
2.5.5




Re: [Qemu-devel] [PULL 0/4] Block patches

2016-07-21 Thread Peter Maydell
On 20 July 2016 at 22:16, Eric Blake  wrote:
> On 07/20/2016 10:05 AM, Peter Maydell wrote:
>> On 19 July 2016 at 23:47, Max Reitz  wrote:
>>> The following changes since commit 5d3217340adcb6c4f0e4af5d2b865331eb2ff63d:
>>>
>>>   disas: Fix ATTRIBUTE_UNUSED define clash with ALSA headers (2016-07-19 
>>> 16:40:39 +0100)
>>>
>>> are available in the git repository at:
>>>
>>>   git://github.com/XanClic/qemu.git tags/pull-block-2016-07-20
>>>
>>> for you to fetch changes up to bafea5b7c26dd14895f7be64685a12645a75f4cf:
>>>
>>>   block: export LUKS specific data to qemu-img info (2016-07-20 00:34:03 
>>> +0200)
>>>
>>> 
>>> Block patches for master
>>>
>>> 
>>
>> Fails to build on everything:
>>
>>   GEN   qapi-visit.h
>> In file included from /Users/pm215/src/qemu-for-merges/qapi-schema.json:9:
>> /Users/pm215/src/qemu-for-merges/qapi/crypto.json:299: Union
>> 'QCryptoBlockInfo' data missing 'qcow' branch
>
> Aha. Cause is two branches developed in parallel; commit d0b18239 forces
> all branches of a flat union to be listed (to avoid an abort() if the
> user passes a branch that was not listed); solution is to expand the
> crypto.json addition to cover all branches, even if it means an empty
> type for the branches that have no additional data.

I'm just processing the last other outstanding pullreq now,
so unless a respin of this arrives by tomorrow lunchtime UK
time it's going to miss rc0, I think.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v3] virtio-pci: error out when both legacy and modern modes are disabled

2016-07-21 Thread Marcel Apfelbaum

On 07/21/2016 08:43 PM, Greg Kurz wrote:

From: Greg Kurz 

Without presuming if we got there because of a user mistake or some
more subtle bug in the tooling, it really does not make sense to
implement a non-functional device.

Signed-off-by: Greg Kurz 
Signed-off-by: Greg Kurz 
---
v3: - rebased on top of:
 https://lists.gnu.org/archive/html/qemu-devel/2016-07/msg04744.html
 - use virtio_pci_legacy/modern helpers
 - rephrased error message to be shorter and use the on/off logic

Marcel, this still results in > 80 char line in the code but I'd rather not
split it to ease grepping, nor shorten the message even more to keep it
meaningful.
---
  hw/virtio/virtio-pci.c |5 +
  1 file changed, 5 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 755f9218b77d..1f5f00a50a0b 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1842,6 +1842,11 @@ static void virtio_pci_dc_realize(DeviceState *qdev, 
Error **errp)
  VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
  PCIDevice *pci_dev = >pci_dev;

+if (!(virtio_pci_modern(proxy) || virtio_pci_legacy(proxy))) {


Hi Greg,
Thanks for rebasing it.

A minor thing, disable-legacy is now auto/on/off.
If the user sets [disable-legacy=auto, disable-modern=on]
will not pass this test, but is possible that later on
will be enabled:
See virtio_pci_realize:
   if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
  proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
   }

On the the other hand, if the user sets disable-modern=on, is reasonable to ask
him to set disable-legacy to off.


Reviewed-by: Marcel Apfelbaum 

Thanks,
Marcel




+error_setg(errp, "device cannot work when both disable-modern and 
disable-legacy are set to on.");
+return;
+}
+
  if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
  virtio_pci_modern(proxy)) {
  pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;






[Qemu-devel] [PULL v5 49/57] virtio-balloon: Wrap in vmstate

2016-07-21 Thread Michael S. Tsirkin
From: "Dr. David Alan Gilbert" 

Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Cornelia Huck 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio-balloon.c | 19 +--
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 1a22e6d..5af429a 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -396,11 +396,6 @@ static void virtio_balloon_to_target(void *opaque, 
ram_addr_t target)
 trace_virtio_balloon_to_target(target, dev->num_pages);
 }
 
-static void virtio_balloon_save(QEMUFile *f, void *opaque)
-{
-virtio_save(VIRTIO_DEVICE(opaque), f);
-}
-
 static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
 VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -409,12 +404,9 @@ static void virtio_balloon_save_device(VirtIODevice *vdev, 
QEMUFile *f)
 qemu_put_be32(f, s->actual);
 }
 
-static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_balloon_load(QEMUFile *f, void *opaque, size_t size)
 {
-if (version_id != 1)
-return -EINVAL;
-
-return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
 }
 
 static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -454,9 +446,6 @@ static void virtio_balloon_device_realize(DeviceState *dev, 
Error **errp)
 s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
 reset_stats(s);
-
-register_savevm(dev, "virtio-balloon", -1, 1,
-virtio_balloon_save, virtio_balloon_load, s);
 }
 
 static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
@@ -466,7 +455,6 @@ static void virtio_balloon_device_unrealize(DeviceState 
*dev, Error **errp)
 
 balloon_stats_destroy_timer(s);
 qemu_remove_balloon_handler(s);
-unregister_savevm(dev, "virtio-balloon", s);
 virtio_cleanup(vdev);
 }
 
@@ -493,6 +481,8 @@ static void virtio_balloon_instance_init(Object *obj)
 NULL, s, NULL);
 }
 
+VMSTATE_VIRTIO_DEVICE(balloon, 1, virtio_balloon_load, virtio_vmstate_save);
+
 static Property virtio_balloon_properties[] = {
 DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
@@ -505,6 +495,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, 
void *data)
 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
 dc->props = virtio_balloon_properties;
+dc->vmsd = _virtio_balloon;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 vdc->realize = virtio_balloon_device_realize;
 vdc->unrealize = virtio_balloon_device_unrealize;
-- 
MST




[Qemu-devel] [PULL v5 47/57] virtio-blk: Wrap in vmstate

2016-07-21 Thread Michael S. Tsirkin
From: "Dr. David Alan Gilbert" 

Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Cornelia Huck 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/block/virtio-blk.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 728b278..475a822 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -798,7 +798,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, 
uint8_t status)
 }
 }
 
-static void virtio_blk_save(QEMUFile *f, void *opaque)
+static void virtio_blk_save(QEMUFile *f, void *opaque, size_t size)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
 
@@ -823,15 +823,12 @@ static void virtio_blk_save_device(VirtIODevice *vdev, 
QEMUFile *f)
 qemu_put_sbyte(f, 0);
 }
 
-static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_blk_load(QEMUFile *f, void *opaque, size_t size)
 {
 VirtIOBlock *s = opaque;
 VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
-if (version_id != 2)
-return -EINVAL;
-
-return virtio_load(vdev, f, version_id);
+return virtio_load(vdev, f, 2);
 }
 
 static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -880,7 +877,6 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 VirtIOBlock *s = VIRTIO_BLK(dev);
 VirtIOBlkConf *conf = >conf;
 Error *err = NULL;
-static int virtio_blk_id;
 unsigned i;
 
 if (!conf->conf.blk) {
@@ -924,8 +920,6 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 }
 
 s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
-register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
-virtio_blk_save, virtio_blk_load, s);
 blk_set_dev_ops(s->blk, _block_ops, s);
 blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
 
@@ -940,7 +934,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev, 
Error **errp)
 virtio_blk_data_plane_destroy(s->dataplane);
 s->dataplane = NULL;
 qemu_del_vm_change_state_handler(s->change);
-unregister_savevm(dev, "virtio-blk", s);
 blockdev_mark_auto_del(s->blk);
 virtio_cleanup(vdev);
 }
@@ -958,6 +951,8 @@ static void virtio_blk_instance_init(Object *obj)
   DEVICE(obj), NULL);
 }
 
+VMSTATE_VIRTIO_DEVICE(blk, 2, virtio_blk_load, virtio_blk_save);
+
 static Property virtio_blk_properties[] = {
 DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf),
 DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf),
@@ -979,6 +974,7 @@ static void virtio_blk_class_init(ObjectClass *klass, void 
*data)
 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
 dc->props = virtio_blk_properties;
+dc->vmsd = _virtio_blk;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 vdc->realize = virtio_blk_device_realize;
 vdc->unrealize = virtio_blk_device_unrealize;
-- 
MST




[Qemu-devel] [PULL v5 46/57] virtio-scsi: Wrap in vmstate

2016-07-21 Thread Michael S. Tsirkin
From: "Dr. David Alan Gilbert" 

Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Cornelia Huck 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/scsi/virtio-scsi.c | 21 ++---
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 88d4bf0..ce57ef6 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -663,22 +663,17 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
 /* The device does not have anything to save beyond the virtio data.
  * Request data is saved with callbacks from SCSI devices.
  */
-static void virtio_scsi_save(QEMUFile *f, void *opaque)
+static void virtio_scsi_save(QEMUFile *f, void *opaque, size_t size)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
 virtio_save(vdev, f);
 }
 
-static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_scsi_load(QEMUFile *f, void *opaque, size_t size)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-int ret;
 
-ret = virtio_load(vdev, f, version_id);
-if (ret) {
-return ret;
-}
-return 0;
+return virtio_load(vdev, f, 1);
 }
 
 void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
@@ -862,7 +857,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, 
Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VirtIOSCSI *s = VIRTIO_SCSI(dev);
-static int virtio_scsi_id;
 Error *err = NULL;
 
 virtio_scsi_common_realize(dev, , virtio_scsi_handle_ctrl,
@@ -885,9 +879,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, 
Error **errp)
 return;
 }
 }
-
-register_savevm(dev, "virtio-scsi", virtio_scsi_id++, 1,
-virtio_scsi_save, virtio_scsi_load, s);
 }
 
 static void virtio_scsi_instance_init(Object *obj)
@@ -911,9 +902,6 @@ void virtio_scsi_common_unrealize(DeviceState *dev, Error 
**errp)
 
 static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp)
 {
-VirtIOSCSI *s = VIRTIO_SCSI(dev);
-
-unregister_savevm(dev, "virtio-scsi", s);
 virtio_scsi_common_unrealize(dev, errp);
 }
 
@@ -930,6 +918,8 @@ static Property virtio_scsi_properties[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+VMSTATE_VIRTIO_DEVICE(scsi, 1, virtio_scsi_load, virtio_scsi_save);
+
 static void virtio_scsi_common_class_init(ObjectClass *klass, void *data)
 {
 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
@@ -946,6 +936,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void 
*data)
 HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
 dc->props = virtio_scsi_properties;
+dc->vmsd = _virtio_scsi;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 vdc->realize = virtio_scsi_device_realize;
 vdc->unrealize = virtio_scsi_device_unrealize;
-- 
MST




[Qemu-devel] [PULL v5 38/57] virtio: Introduce virtio_add_queue_aio

2016-07-21 Thread Michael S. Tsirkin
From: Fam Zheng 

Using this function instead of virtio_add_queue marks the vq as aio
based. This differentiation will be useful in later patches.

Distinguish between virtqueue processing in the iohandler context and main loop
AioContext.  iohandler context is isolated from AioContexts and therefore does
not run during aio_poll().

Signed-off-by: Fam Zheng 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Cornelia Huck 
Reviewed-by: Stefan Hajnoczi 
Acked-by: Paolo Bonzini 
---
 include/hw/virtio/virtio.h |  3 +++
 hw/virtio/virtio.c | 38 ++
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 3670829..7a82f79 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -143,6 +143,9 @@ typedef void (*VirtIOHandleOutput)(VirtIODevice *, 
VirtQueue *);
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
 VirtIOHandleOutput handle_output);
 
+VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
+VirtIOHandleOutput handle_output);
+
 void virtio_del_queue(VirtIODevice *vdev, int n);
 
 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num);
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2cc68d24..2fbed0c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -97,6 +97,7 @@ struct VirtQueue
 uint16_t vector;
 VirtIOHandleOutput handle_output;
 VirtIOHandleOutput handle_aio_output;
+bool use_aio;
 VirtIODevice *vdev;
 EventNotifier guest_notifier;
 EventNotifier host_notifier;
@@ -1130,8 +1131,9 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, 
uint16_t vector)
 }
 }
 
-VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
-VirtIOHandleOutput handle_output)
+static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size,
+VirtIOHandleOutput handle_output,
+bool use_aio)
 {
 int i;
 
@@ -1148,10 +1150,28 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
queue_size,
 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
 vdev->vq[i].handle_output = handle_output;
 vdev->vq[i].handle_aio_output = NULL;
+vdev->vq[i].use_aio = use_aio;
 
 return >vq[i];
 }
 
+/* Add a virt queue and mark AIO.
+ * An AIO queue will use the AioContext based event interface instead of the
+ * default IOHandler and EventNotifier interface.
+ */
+VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
+VirtIOHandleOutput handle_output)
+{
+return virtio_add_queue_internal(vdev, queue_size, handle_output, true);
+}
+
+/* Add a normal virt queue (on the contrary to the AIO version above. */
+VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+VirtIOHandleOutput handle_output)
+{
+return virtio_add_queue_internal(vdev, queue_size, handle_output, false);
+}
+
 void virtio_del_queue(VirtIODevice *vdev, int n)
 {
 if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
@@ -1830,11 +1850,21 @@ static void 
virtio_queue_host_notifier_read(EventNotifier *n)
 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
bool set_handler)
 {
+AioContext *ctx = qemu_get_aio_context();
 if (assign && set_handler) {
-event_notifier_set_handler(>host_notifier, true,
+if (vq->use_aio) {
+aio_set_event_notifier(ctx, >host_notifier, true,
virtio_queue_host_notifier_read);
+} else {
+event_notifier_set_handler(>host_notifier, true,
+   virtio_queue_host_notifier_read);
+}
 } else {
-event_notifier_set_handler(>host_notifier, true, NULL);
+if (vq->use_aio) {
+aio_set_event_notifier(ctx, >host_notifier, true, NULL);
+} else {
+event_notifier_set_handler(>host_notifier, true, NULL);
+}
 }
 if (!assign) {
 /* Test and clear notifier before after disabling event,
-- 
MST




[Qemu-devel] [PULL v5 35/57] kvm-all: add trace events for kvm irqchip ops

2016-07-21 Thread Michael S. Tsirkin
From: Peter Xu 

These will help us monitoring irqchip route activities more easily.

Signed-off-by: Peter Xu 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 kvm-all.c| 5 +
 trace-events | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/kvm-all.c b/kvm-all.c
index 3764ba9..ef81ca5 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1048,6 +1048,7 @@ void kvm_irqchip_commit_routes(KVMState *s)
 int ret;
 
 s->irq_routes->flags = 0;
+trace_kvm_irqchip_commit_routes();
 ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
 assert(ret == 0);
 }
@@ -1271,6 +1272,8 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, 
PCIDevice *dev)
 return -EINVAL;
 }
 
+trace_kvm_irqchip_add_msi_route(virq);
+
 kvm_add_routing_entry(s, );
 kvm_arch_add_msi_route_post(, vector, dev);
 kvm_irqchip_commit_routes(s);
@@ -1301,6 +1304,8 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, 
MSIMessage msg,
 return -EINVAL;
 }
 
+trace_kvm_irqchip_update_msi_route(virq);
+
 return kvm_update_routing_entry(s, );
 }
 
diff --git a/trace-events b/trace-events
index 4767059..52c6a6c 100644
--- a/trace-events
+++ b/trace-events
@@ -118,6 +118,9 @@ kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, 
reason %d"
 kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
 kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve 
ONEREG %" PRIu64 " from KVM: %s"
 kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set 
ONEREG %" PRIu64 " to KVM: %s"
+kvm_irqchip_commit_routes(void) ""
+kvm_irqchip_add_msi_route(int virq) "Adding MSI route virq=%d"
+kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
 
 # TCG related tracing (mostly disabled by default)
 # cpu-exec.c
-- 
MST




Re: [Qemu-devel] [PATCH v2] test: port postcopy test to ppc64

2016-07-21 Thread Dr. David Alan Gilbert
* Laurent Vivier (lviv...@redhat.com) wrote:
> As userfaultfd syscall is available on powerpc, migration
> postcopy can be used.
> 
> This patch adds the support needed to test this on powerpc,
> instead of using a bootsector to run code to modify memory,
> we use a FORTH script in "boot-command" property.
> 
> As spapr machine doesn't support "-prom-env" argument
> (the nvram is initialized by SLOF and not by QEMU),
> "boot-command" is provided to SLOF via a file mapped nvram
> (with "-drive file=...,if=pflash")
> 
> Signed-off-by: Laurent Vivier 

Thanks for doing this!

> ---
> v2: move FORTH script directly in sprintf()
> use openbios_firmware_abi.h
> remove useless "default" case
> 
>  tests/Makefile.include |   1 +
>  tests/postcopy-test.c  | 116 
> +
>  2 files changed, 98 insertions(+), 19 deletions(-)
> 
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index e7e50d6..e2d1885 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -268,6 +268,7 @@ check-qtest-sparc-y += tests/prom-env-test$(EXESUF)
>  #check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
>  check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
>  check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
> +check-qtest-ppc64-y += tests/postcopy-test$(EXESUF)
>  
>  check-qtest-generic-y += tests/qom-test$(EXESUF)
>  
> diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
> index 16465ab..229e9e9 100644
> --- a/tests/postcopy-test.c
> +++ b/tests/postcopy-test.c
> @@ -18,6 +18,9 @@
>  #include "qemu/sockets.h"
>  #include "sysemu/char.h"
>  #include "sysemu/sysemu.h"
> +#include "hw/nvram/openbios_firmware_abi.h"
> +
> +#define MIN_NVRAM_SIZE 8192 /* from spapr_nvram.c */
>  
>  const unsigned start_address = 1024 * 1024;
>  const unsigned end_address = 100 * 1024 * 1024;
> @@ -122,6 +125,44 @@ unsigned char bootsect[] = {
>0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0xaa
>  };
>  
> +static void init_bootfile_x86(const char *bootpath)
> +{
> +FILE *bootfile = fopen(bootpath, "wb");
> +
> +g_assert_cmpint(fwrite(bootsect, 512, 1, bootfile), ==, 1);
> +fclose(bootfile);
> +}
> +
> +static void init_bootfile_ppc(const char *bootpath)
> +{
> +FILE *bootfile;
> +char buf[MIN_NVRAM_SIZE];
> +struct OpenBIOS_nvpart_v1 *header = (struct OpenBIOS_nvpart_v1 *)buf;
> +
> +memset(buf, 0, MIN_NVRAM_SIZE);
> +
> +/* Create a "common" partition in nvram to store boot-command property */
> +
> +header->signature = OPENBIOS_PART_SYSTEM;
> +memcpy(header->name, "common", 6);
> +OpenBIOS_finish_partition(header, MIN_NVRAM_SIZE);
> +
> +/* FW_MAX_SIZE is 4MB, but slof.bin is only 900KB,
> + * so let's modify memory between 1MB and 100MB
> + * to do like PC bootsector
> + */
> +
> +sprintf(buf + 16,
> +"boot-command=hex .\" _\" begin %x %x do i c@ 1 + i c! 1000 
> +loop "
> +".\" B\" 0 until", end_address, start_address);

Very nice; took me a while do decode but yes I think that's doing
the same as my x86.

Dave

> +/* Write partition to the NVRAM file */
> +
> +bootfile = fopen(bootpath, "wb");
> +g_assert_cmpint(fwrite(buf, MIN_NVRAM_SIZE, 1, bootfile), ==, 1);
> +fclose(bootfile);
> +}
> +
>  /*
>   * Wait for some output in the serial output file,
>   * we get an 'A' followed by an endless string of 'B's
> @@ -131,10 +172,29 @@ static void wait_for_serial(const char *side)
>  {
>  char *serialpath = g_strdup_printf("%s/%s", tmpfs, side);
>  FILE *serialfile = fopen(serialpath, "r");
> +const char *arch = qtest_get_arch();
> +int started = (strcmp(side, "src_serial") == 0 &&
> +   strcmp(arch, "ppc64") == 0) ? 0 : 1;
>  
>  do {
>  int readvalue = fgetc(serialfile);
>  
> +if (!started) {
> +/* SLOF prints its banner before starting test,
> + * to ignore it, mark the start of the test with '_',
> + * ignore all characters until this marker
> + */
> +switch (readvalue) {
> +case '_':
> +started = 1;
> +break;
> +case EOF:
> +fseek(serialfile, 0, SEEK_SET);
> +usleep(1000);
> +break;
> +}
> +continue;
> +}
>  switch (readvalue) {
>  case 'A':
>  /* Fine */
> @@ -147,6 +207,8 @@ static void wait_for_serial(const char *side)
>  return;
>  
>  case EOF:
> +started = (strcmp(side, "src_serial") == 0 &&
> +   strcmp(arch, "ppc64") == 0) ? 0 : 1;
>  fseek(serialfile, 0, SEEK_SET);
>  usleep(1000);
>  break;
> @@ -295,32 +357,48 @@ static void test_migrate(void)
>  char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
>  QTestState *global = global_qtest, *from, *to;
>  

Re: [Qemu-devel] [PATCH 24/37] pc: keep gsi reference

2016-07-21 Thread Eduardo Habkost
On Thu, Jul 21, 2016 at 01:27:35PM -0400, Marc-André Lureau wrote:
> Hi
> 
> - Original Message -
> > On Tue, Jul 19, 2016 at 12:54:19PM +0400, marcandre.lur...@redhat.com wrote:
> > > From: Marc-André Lureau 
> > > 
> > > Further cleanup would need to call qemu_free_irq() at the appropriate
> > > time, but for now this silences ASAN about direct leaks.
> > > 
> > > Signed-off-by: Marc-André Lureau 
> > 
> > Is there a way to make ASAN happy without having to add a field
> > to MachineState that we're not going to use for anything?
> 
> Well, the plan is rather to release it when no longer needed.
> Would it be fine to call qemu_free_irqs() in
> machine_finalize()?

It would be fine, I guess, but it looks pointless if we have lots
of other resources allocated during PC machine initialization
that are never released.

But, see additional comment below:

> 
> > 
> > > ---
> > >  hw/i386/pc_piix.c   | 1 +
> > >  hw/i386/pc_q35.c| 1 +
> > >  include/hw/boards.h | 1 +
> > >  3 files changed, 3 insertions(+)
> > > 
> > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > index a07dc81..b2db274 100644
> > > --- a/hw/i386/pc_piix.c
> > > +++ b/hw/i386/pc_piix.c
> > > @@ -190,6 +190,7 @@ static void pc_init1(MachineState *machine,
> > >  } else {
> > >  gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
> > >  }
> > > +machine->gsi = gsi;
> > >  
> > >  if (pcmc->pci_enabled) {
> > >  pci_bus = i440fx_init(host_type,
> > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > index c5e8367..5dfb14f 100644
> > > --- a/hw/i386/pc_q35.c
> > > +++ b/hw/i386/pc_q35.c
> > > @@ -158,6 +158,7 @@ static void pc_q35_init(MachineState *machine)
> > >  } else {
> > >  gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
> > >  }
> > > +machine->gsi = gsi;
> > >  
> > >  /* create pci host bus */
> > >  q35_host = Q35_HOST_DEVICE(qdev_create(NULL, TYPE_Q35_HOST_DEVICE));
> > > diff --git a/include/hw/boards.h b/include/hw/boards.h
> > > index e46a744..289ba52 100644
> > > --- a/include/hw/boards.h
> > > +++ b/include/hw/boards.h
> > > @@ -139,6 +139,7 @@ struct MachineState {
> > >  /*< private >*/
> > >  Object parent_obj;
> > >  Notifier sysbus_notifier;
> > > +qemu_irq *gsi;

If this is used only by PC, doesn't it belong to PCMachineState?

Anyway, the new field would be very useful to help reduce the
number of parameters of PC initialization functions (by making
them just get a PCMachineState* argument). I would go even
further and remove the local 'gsi' variable and replace it with
'pcms->gsi' everywhere.

-- 
Eduardo



Re: [Qemu-devel] [PATCH 24/37] pc: keep gsi reference

2016-07-21 Thread Marc-André Lureau
Hi

- Original Message -
> On Thu, Jul 21, 2016 at 01:27:35PM -0400, Marc-André Lureau wrote:
> > Hi
> > 
> > - Original Message -
> > > On Tue, Jul 19, 2016 at 12:54:19PM +0400, marcandre.lur...@redhat.com
> > > wrote:
> > > > From: Marc-André Lureau 
> > > > 
> > > > Further cleanup would need to call qemu_free_irq() at the appropriate
> > > > time, but for now this silences ASAN about direct leaks.
> > > > 
> > > > Signed-off-by: Marc-André Lureau 
> > > 
> > > Is there a way to make ASAN happy without having to add a field
> > > to MachineState that we're not going to use for anything?
> > 
> > Well, the plan is rather to release it when no longer needed.
> > Would it be fine to call qemu_free_irqs() in
> > machine_finalize()?
> 
> It would be fine, I guess, but it looks pointless if we have lots
> of other resources allocated during PC machine initialization
> that are never released.

The main point, right now, is to have no direct leaks when running ASAN or 
valgrind, as they hide new introduced leaks that may be much worse. (it would 
also be good if we had no indirect leaks either, as this may also grow over 
time)

> But, see additional comment below:
> 
> > 
> > > 
> > > > ---
> > > >  hw/i386/pc_piix.c   | 1 +
> > > >  hw/i386/pc_q35.c| 1 +
> > > >  include/hw/boards.h | 1 +
> > > >  3 files changed, 3 insertions(+)
> > > > 
> > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > index a07dc81..b2db274 100644
> > > > --- a/hw/i386/pc_piix.c
> > > > +++ b/hw/i386/pc_piix.c
> > > > @@ -190,6 +190,7 @@ static void pc_init1(MachineState *machine,
> > > >  } else {
> > > >  gsi = qemu_allocate_irqs(gsi_handler, gsi_state,
> > > >  GSI_NUM_PINS);
> > > >  }
> > > > +machine->gsi = gsi;
> > > >  
> > > >  if (pcmc->pci_enabled) {
> > > >  pci_bus = i440fx_init(host_type,
> > > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > > index c5e8367..5dfb14f 100644
> > > > --- a/hw/i386/pc_q35.c
> > > > +++ b/hw/i386/pc_q35.c
> > > > @@ -158,6 +158,7 @@ static void pc_q35_init(MachineState *machine)
> > > >  } else {
> > > >  gsi = qemu_allocate_irqs(gsi_handler, gsi_state,
> > > >  GSI_NUM_PINS);
> > > >  }
> > > > +machine->gsi = gsi;
> > > >  
> > > >  /* create pci host bus */
> > > >  q35_host = Q35_HOST_DEVICE(qdev_create(NULL,
> > > >  TYPE_Q35_HOST_DEVICE));
> > > > diff --git a/include/hw/boards.h b/include/hw/boards.h
> > > > index e46a744..289ba52 100644
> > > > --- a/include/hw/boards.h
> > > > +++ b/include/hw/boards.h
> > > > @@ -139,6 +139,7 @@ struct MachineState {
> > > >  /*< private >*/
> > > >  Object parent_obj;
> > > >  Notifier sysbus_notifier;
> > > > +qemu_irq *gsi;
> 
> If this is used only by PC, doesn't it belong to PCMachineState?

right, i'll try to put it there

> Anyway, the new field would be very useful to help reduce the
> number of parameters of PC initialization functions (by making
> them just get a PCMachineState* argument). I would go even

Which functions do you have in mind?

> further and remove the local 'gsi' variable and replace it with
> 'pcms->gsi' everywhere.

ok, why not.



[Qemu-devel] [PULL v5 33/57] kvm-irqchip: do explicit commit when update irq

2016-07-21 Thread Michael S. Tsirkin
From: Peter Xu 

In the past, we are doing gsi route commit for each irqchip route
update. This is not efficient if we are updating lots of routes in the
same time. This patch removes the committing phase in
kvm_irqchip_update_msi_route(). Instead, we do explicit commit after all
routes updated.

Signed-off-by: Peter Xu 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/sysemu/kvm.h | 2 +-
 hw/i386/kvm/pci-assign.c | 2 ++
 hw/misc/ivshmem.c| 1 +
 hw/vfio/pci.c| 1 +
 hw/virtio/virtio-pci.c   | 1 +
 kvm-all.c| 2 --
 kvm-stub.c   | 4 
 target-i386/kvm.c| 1 +
 8 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 0a16e0e..c9c2436 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -371,7 +371,6 @@ int kvm_set_irq(KVMState *s, int irq, int level);
 int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
 
 void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin);
-void kvm_irqchip_commit_routes(KVMState *s);
 
 void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
 void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
@@ -494,6 +493,7 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
 int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev);
 int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
  PCIDevice *dev);
+void kvm_irqchip_commit_routes(KVMState *s);
 void kvm_irqchip_release_virq(KVMState *s, int virq);
 
 int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter);
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 334dfc4..8238fbc 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1015,6 +1015,7 @@ static void assigned_dev_update_msi_msg(PCIDevice 
*pci_dev)
 
 kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0],
  msi_get_message(pci_dev, 0), pci_dev);
+kvm_irqchip_commit_routes(kvm_state);
 }
 
 static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
@@ -1602,6 +1603,7 @@ static void assigned_dev_msix_mmio_write(void *opaque, 
hwaddr addr,
 if (ret) {
 error_report("Error updating irq routing entry (%d)", ret);
 }
+kvm_irqchip_commit_routes(kvm_state);
 }
 }
 }
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 023da84..40a2ebc 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -322,6 +322,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned 
vector,
 if (ret < 0) {
 return ret;
 }
+kvm_irqchip_commit_routes(kvm_state);
 
 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
 }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 87a6f05..7bfa17c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -458,6 +458,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, 
MSIMessage msg,
  PCIDevice *pdev)
 {
 kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev);
+kvm_irqchip_commit_routes(kvm_state);
 }
 
 static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index cbdfd59..f0677b7 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -842,6 +842,7 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy 
*proxy,
 if (ret < 0) {
 return ret;
 }
+kvm_irqchip_commit_routes(kvm_state);
 }
 }
 
diff --git a/kvm-all.c b/kvm-all.c
index ca30a58..3764ba9 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1094,8 +1094,6 @@ static int kvm_update_routing_entry(KVMState *s,
 
 *entry = *new_entry;
 
-kvm_irqchip_commit_routes(s);
-
 return 0;
 }
 
diff --git a/kvm-stub.c b/kvm-stub.c
index 982e590..64e23f6 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -135,6 +135,10 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, 
MSIMessage msg,
 return -ENOSYS;
 }
 
+void kvm_irqchip_commit_routes(KVMState *s)
+{
+}
+
 int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
 {
 return -ENOSYS;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 61f57f9..0a09be6 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -3427,6 +3427,7 @@ static void kvm_update_msi_routes_all(void *private, bool 
global,
 kvm_irqchip_update_msi_route(kvm_state, entry->virq,
  msg, entry->dev);
 }
+kvm_irqchip_commit_routes(kvm_state);
 trace_kvm_x86_update_msi_routes(cnt);
 }
 
-- 
MST




Re: [Qemu-devel] [PATCH 26/37] sd: free timer

2016-07-21 Thread Andrew Baumann
> From: Marc-André Lureau [mailto:marcandre.lur...@gmail.com]
> Sent: Thursday, 21 July 2016 4:15
> Hi Andrew,
> 
> Since you introduced the timer, could you review this patch?
> 
> thanks
> 
> 
> -- Forwarded message --
> From:  
> Date: Tue, Jul 19, 2016 at 12:54 PM
> Subject: [Qemu-devel] [PATCH 26/37] sd: free timer
> To: qemu-devel@nongnu.org
> Cc: Marc-André Lureau 
> 
> 
> From: Marc-André Lureau 
> 
> Free the timer allocated in instance_init.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  hw/sd/sd.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> index 87c6dc1..8e88e83 100644
> --- a/hw/sd/sd.c
> +++ b/hw/sd/sd.c
> @@ -1876,6 +1876,14 @@ static void sd_instance_init(Object *obj)
>  sd->ocr_power_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> sd_ocr_powerup, sd);  }
> 
> +static void sd_instance_finalize(Object *obj) {
> +SDState *sd = SD_CARD(obj);
> +
> +timer_del(sd->ocr_power_timer);
> +timer_free(sd->ocr_power_timer);
> +}
> +
>  static void sd_realize(DeviceState *dev, Error **errp)  {
>  SDState *sd = SD_CARD(dev);
> @@ -1927,6 +1935,7 @@ static const TypeInfo sd_info = {
>  .class_size = sizeof(SDCardClass),
>  .class_init = sd_class_init,
>  .instance_init = sd_instance_init,
> +.instance_finalize = sd_instance_finalize,
>  };
> 
>  static void sd_register_types(void)

Thanks for the fix. This was based on some other timer code I found in the tree 
that was evidently also leaky (I don't remember where unfortunately).

One thing: are you sure it is safe to call timer_del() again if the timer may 
already have been deleted? It looks that way from the implementation, but the 
header comment isn't explicit.

Otherwise,
Reviewed-by: Andrew Baumann 

Cheers,
Andrew


[Qemu-devel] [PULL v5 31/57] kvm-irqchip: i386: add hook for add/remove virq

2016-07-21 Thread Michael S. Tsirkin
From: Peter Xu 

Adding two hooks to be notified when adding/removing msi routes. There
are two kinds of MSI routes:

- in kvm_irqchip_add_irq_route(): before assigning IRQFD. Used by
  vhost, vfio, etc.

- in kvm_irqchip_send_msi(): when sending direct MSI message, if
  direct MSI not allowed, we will first create one MSI route entry
  in the kernel, then trigger it.

This patch only hooks the first one (irqfd case). We do not need to
take care for the 2nd one, since it's only used by QEMU userspace
(kvm-apic) and the messages will always do in-time translation when
triggered. While we need to note them down for the 1st one, so that we
can notify the kernel when cache invalidation happens.

Also, we do not hook IOAPIC msi routes (we have explicit notifier for
IOAPIC to keep its cache updated). We only need to care about irqfd
users.

Signed-off-by: Peter Xu 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/sysemu/kvm.h |  6 ++
 kvm-all.c|  2 ++
 target-arm/kvm.c | 11 +++
 target-i386/kvm.c| 48 
 target-mips/kvm.c| 11 +++
 target-ppc/kvm.c | 11 +++
 target-s390x/kvm.c   | 11 +++
 target-i386/trace-events |  2 ++
 8 files changed, 102 insertions(+)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index e5d90bd..0a16e0e 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -359,6 +359,12 @@ void kvm_arch_init_irq_routing(KVMState *s);
 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
  uint64_t address, uint32_t data, PCIDevice *dev);
 
+/* Notify arch about newly added MSI routes */
+int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
+int vector, PCIDevice *dev);
+/* Notify arch about released MSI routes */
+int kvm_arch_release_virq_post(int virq);
+
 int kvm_arch_msi_data_to_gsi(uint32_t data);
 
 int kvm_set_irq(KVMState *s, int irq, int level);
diff --git a/kvm-all.c b/kvm-all.c
index d94c0e4..69ff658 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1133,6 +1133,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
 }
 }
 clear_gsi(s, virq);
+kvm_arch_release_virq_post(virq);
 }
 
 static unsigned int kvm_hash_msi(uint32_t data)
@@ -1281,6 +1282,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, 
PCIDevice *dev)
 }
 
 kvm_add_routing_entry(s, );
+kvm_arch_add_msi_route_post(, vector, dev);
 kvm_irqchip_commit_routes(s);
 
 return virq;
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 5c2bd7a..dbe393c 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -622,6 +622,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry 
*route,
 return 0;
 }
 
+int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
+int vector, PCIDevice *dev)
+{
+return 0;
+}
+
+int kvm_arch_release_virq_post(int virq)
+{
+return 0;
+}
+
 int kvm_arch_msi_data_to_gsi(uint32_t data)
 {
 return (data - 32) & 0x;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index f574513..8875034 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -3400,6 +3400,54 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
 return 0;
 }
 
+typedef struct MSIRouteEntry MSIRouteEntry;
+
+struct MSIRouteEntry {
+PCIDevice *dev; /* Device pointer */
+int vector; /* MSI/MSIX vector index */
+int virq;   /* Virtual IRQ index */
+QLIST_ENTRY(MSIRouteEntry) list;
+};
+
+/* List of used GSI routes */
+static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
+QLIST_HEAD_INITIALIZER(msi_route_list);
+
+int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
+int vector, PCIDevice *dev)
+{
+MSIRouteEntry *entry;
+
+if (!dev) {
+/* These are (possibly) IOAPIC routes only used for split
+ * kernel irqchip mode, while what we are housekeeping are
+ * PCI devices only. */
+return 0;
+}
+
+entry = g_new0(MSIRouteEntry, 1);
+entry->dev = dev;
+entry->vector = vector;
+entry->virq = route->gsi;
+QLIST_INSERT_HEAD(_route_list, entry, list);
+
+trace_kvm_x86_add_msi_route(route->gsi);
+return 0;
+}
+
+int kvm_arch_release_virq_post(int virq)
+{
+MSIRouteEntry *entry, *next;
+QLIST_FOREACH_SAFE(entry, _route_list, list, next) {
+if (entry->virq == virq) {
+trace_kvm_x86_remove_msi_route(virq);
+QLIST_REMOVE(entry, list);
+break;
+}
+}
+return 0;
+}
+
 int kvm_arch_msi_data_to_gsi(uint32_t data)
 {
 abort();
diff --git a/target-mips/kvm.c b/target-mips/kvm.c
index 

[Qemu-devel] [PULL v5 30/57] kvm-irqchip: simplify kvm_irqchip_add_msi_route

2016-07-21 Thread Michael S. Tsirkin
From: Peter Xu 

Changing the original MSIMessage parameter in kvm_irqchip_add_msi_route
into the vector number. Vector index provides more information than the
MSIMessage, we can retrieve the MSIMessage using the vector easily. This
will avoid fetching MSIMessage every time before adding MSI routes.

Meanwhile, the vector info will be used in the coming patches to further
enable gsi route update notifications.

Signed-off-by: Peter Xu 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/sysemu/kvm.h | 13 -
 hw/i386/kvm/pci-assign.c |  8 ++--
 hw/misc/ivshmem.c|  3 +--
 hw/vfio/pci.c| 11 +--
 hw/virtio/virtio-pci.c   |  9 +++--
 kvm-all.c| 18 --
 kvm-stub.c   |  2 +-
 target-i386/kvm.c|  3 +--
 8 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index ad6f837..e5d90bd 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -474,7 +474,18 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
 }
 }
 
-int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev);
+/**
+ * kvm_irqchip_add_msi_route - Add MSI route for specific vector
+ * @s:  KVM state
+ * @vector: which vector to add. This can be either MSI/MSIX
+ *  vector. The function will automatically detect whether
+ *  MSI/MSIX is enabled, and fetch corresponding MSI
+ *  message.
+ * @dev:Owner PCI device to add the route. If @dev is specified
+ *  as @NULL, an empty MSI message will be inited.
+ * @return: virq (>=0) when success, errno (<0) when failed.
+ */
+int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev);
 int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
  PCIDevice *dev);
 void kvm_irqchip_release_virq(KVMState *s, int virq);
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 1a429e5..334dfc4 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -974,10 +974,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
 }
 
 if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
-MSIMessage msg = msi_get_message(pci_dev, 0);
 int virq;
 
-virq = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev);
+virq = kvm_irqchip_add_msi_route(kvm_state, 0, pci_dev);
 if (virq < 0) {
 perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
 return;
@@ -1042,7 +1041,6 @@ static int assigned_dev_update_msix_mmio(PCIDevice 
*pci_dev)
 uint16_t entries_nr = 0;
 int i, r = 0;
 MSIXTableEntry *entry = adev->msix_table;
-MSIMessage msg;
 
 /* Get the usable entry number for allocating */
 for (i = 0; i < adev->msix_max; i++, entry++) {
@@ -1079,9 +1077,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice 
*pci_dev)
 continue;
 }
 
-msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32);
-msg.data = entry->data;
-r = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev);
+r = kvm_irqchip_add_msi_route(kvm_state, i, pci_dev);
 if (r < 0) {
 return r;
 }
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 7e7c843..023da84 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -441,13 +441,12 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int 
vector,
  Error **errp)
 {
 PCIDevice *pdev = PCI_DEVICE(s);
-MSIMessage msg = msix_get_message(pdev, vector);
 int ret;
 
 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
 assert(!s->msi_vectors[vector].pdev);
 
-ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
+ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
 if (ret < 0) {
 error_setg(errp, "kvm_irqchip_add_msi_route failed");
 return;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c8436a1..87a6f05 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -417,11 +417,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool 
msix)
 }
 
 static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
-  MSIMessage *msg, bool msix)
+  int vector_n, bool msix)
 {
 int virq;
 
-if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) {
+if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
 return;
 }
 
@@ -429,7 +429,7 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, 
VFIOMSIVector *vector,
 return;
 }
 
-virq = kvm_irqchip_add_msi_route(kvm_state, *msg, >pdev);
+virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, >pdev);
 

[Qemu-devel] [PULL v5 57/57] intel_iommu: avoid unnamed fields

2016-07-21 Thread Michael S. Tsirkin
Also avoid unnamed fields for portability.
Also, rename VTD_IRTE to VTD_IR_TableEntry for coding
style compliance.

Signed-off-by: Michael S. Tsirkin 
---
 include/hw/i386/intel_iommu.h |  8 
 hw/i386/intel_iommu.c | 42 +-
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 2eba7ed..a42dbd7 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -59,7 +59,7 @@ typedef struct IntelIOMMUState IntelIOMMUState;
 typedef struct VTDAddressSpace VTDAddressSpace;
 typedef struct VTDIOTLBEntry VTDIOTLBEntry;
 typedef struct VTDBus VTDBus;
-typedef union VTD_IRTE VTD_IRTE;
+typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
 typedef struct VTDIrq VTDIrq;
 typedef struct VTD_MSIMessage VTD_MSIMessage;
@@ -120,7 +120,7 @@ enum {
 };
 
 /* Interrupt Remapping Table Entry Definition */
-union VTD_IRTE {
+union VTD_IR_TableEntry {
 struct {
 #ifdef HOST_WORDS_BIGENDIAN
 uint32_t dest_id:32; /* Destination ID */
@@ -159,7 +159,7 @@ union VTD_IRTE {
 uint64_t sid_vtype:2;/* Source-ID Validation Type */
 uint64_t __reserved_2:44;/* Reserved 2 */
 #endif
-} QEMU_PACKED;
+} QEMU_PACKED irte;
 uint64_t data[2];
 };
 
@@ -184,7 +184,7 @@ union VTD_IR_MSIAddress {
 uint32_t index_l:15; /* Interrupt index bit 14-0 */
 uint32_t __head:12;  /* Should always be: 0x0fee */
 #endif
-} QEMU_PACKED;
+} QEMU_PACKED addr;
 uint32_t data;
 };
 
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0e139d1..28c31a2 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2010,7 +2010,7 @@ static Property vtd_properties[] = {
 
 /* Read IRTE entry with specific index */
 static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
-VTD_IRTE *entry, uint16_t sid)
+VTD_IR_TableEntry *entry, uint16_t sid)
 {
 static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
 {0x, 0xfffb, 0xfff9, 0xfff8};
@@ -2026,7 +2026,7 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t 
index,
 return -VTD_FR_IR_ROOT_INVAL;
 }
 
-if (!entry->present) {
+if (!entry->irte.present) {
 VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
 " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
 index, le64_to_cpu(entry->data[1]),
@@ -2034,8 +2034,8 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t 
index,
 return -VTD_FR_IR_ENTRY_P;
 }
 
-if (entry->__reserved_0 || entry->__reserved_1 || \
-entry->__reserved_2) {
+if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
+entry->irte.__reserved_2) {
 VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
 " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
 index, le64_to_cpu(entry->data[1]),
@@ -2045,14 +2045,14 @@ static int vtd_irte_get(IntelIOMMUState *iommu, 
uint16_t index,
 
 if (sid != X86_IOMMU_SID_INVALID) {
 /* Validate IRTE SID */
-source_id = le32_to_cpu(entry->source_id);
-switch (entry->sid_vtype) {
+source_id = le32_to_cpu(entry->irte.source_id);
+switch (entry->irte.sid_vtype) {
 case VTD_SVT_NONE:
 VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index);
 break;
 
 case VTD_SVT_ALL:
-mask = vtd_svt_mask[entry->sid_q];
+mask = vtd_svt_mask[entry->irte.sid_q];
 if ((source_id & mask) != (sid & mask)) {
 VTD_DPRINTF(GENERAL, "SID validation for IRTE index "
 "%d failed (reqid 0x%04x sid 0x%04x)", index,
@@ -2075,7 +2075,7 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t 
index,
 
 default:
 VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index "
-"%d", entry->sid_vtype, index);
+"%d", entry->irte.sid_vtype, index);
 /* Take this as verification failure. */
 return -VTD_FR_IR_SID_ERR;
 break;
@@ -2089,7 +2089,7 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t 
index,
 static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
  VTDIrq *irq, uint16_t sid)
 {
-VTD_IRTE irte = {};
+VTD_IR_TableEntry irte = {};
 int ret = 0;
 
 ret = vtd_irte_get(iommu, index, , sid);
@@ -2097,18 +2097,18 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, 
uint16_t index,
 return ret;
 }
 
-irq->trigger_mode = irte.trigger_mode;
-irq->vector = irte.vector;
-irq->delivery_mode = irte.delivery_mode;
-irq->dest = le32_to_cpu(irte.dest_id);
+irq->trigger_mode = 

[Qemu-devel] [PULL v5 28/57] intel_iommu: Add support for Extended Interrupt Mode

2016-07-21 Thread Michael S. Tsirkin
From: Jan Kiszka 

As neither QEMU nor KVM support more than 255 CPUs so far, this is
simple: we only need to switch the destination ID translation in
vtd_remap_irq_get if EIME is set.

Once CFI support is there, it will have to take EIM into account as
well. So far, nothing to do for this.

This patch allows to use x2APIC in split irqchip mode of KVM.

Signed-off-by: Jan Kiszka 
[use le32_to_cpu() to retrieve dest_id]
Signed-off-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu_internal.h |  2 ++
 include/hw/i386/intel_iommu.h  |  1 +
 hw/i386/intel_iommu.c  | 16 +---
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 10c20fe..72b0114 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -176,6 +176,7 @@
 
 /* IRTA_REG */
 #define VTD_IRTA_ADDR_MASK  (VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_IRTA_EIME   (1ULL << 11)
 #define VTD_IRTA_SIZE_MASK  (0xfULL)
 
 /* ECAP_REG */
@@ -184,6 +185,7 @@
 #define VTD_ECAP_QI (1ULL << 1)
 /* Interrupt Remapping support */
 #define VTD_ECAP_IR (1ULL << 3)
+#define VTD_ECAP_EIM(1ULL << 4)
 
 /* CAP_REG */
 /* (offset >> 4) << 24 */
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index e048ced..745b4e7 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -271,6 +271,7 @@ struct IntelIOMMUState {
 bool intr_enabled;  /* Whether guest enabled IR */
 dma_addr_t intr_root;   /* Interrupt remapping table pointer */
 uint32_t intr_size; /* Number of IR table entries */
+bool intr_eime; /* Extended interrupt mode enabled */
 };
 
 /* Find the VTD Address space associated with the given bus pointer,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 2acec85..a605b58 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -916,6 +916,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState 
*s)
 value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
 s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
 s->intr_root = value & VTD_IRTA_ADDR_MASK;
+s->intr_eime = value & VTD_IRTA_EIME;
 
 /* Notify global invalidation */
 vtd_iec_notify_all(s, true, 0, 0);
@@ -2058,11 +2059,13 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, 
uint16_t index, VTDIrq *irq
 irq->trigger_mode = irte.trigger_mode;
 irq->vector = irte.vector;
 irq->delivery_mode = irte.delivery_mode;
-/* Not support EIM yet: please refer to vt-d 9.10 DST bits */
+irq->dest = le32_to_cpu(irte.dest_id);
+if (!iommu->intr_eime) {
 #define  VTD_IR_APIC_DEST_MASK (0xff00ULL)
 #define  VTD_IR_APIC_DEST_SHIFT(8)
-irq->dest = (le32_to_cpu(irte.dest_id) & VTD_IR_APIC_DEST_MASK) >> \
-VTD_IR_APIC_DEST_SHIFT;
+irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >>
+VTD_IR_APIC_DEST_SHIFT;
+}
 irq->dest_mode = irte.dest_mode;
 irq->redir_hint = irte.redir_hint;
 
@@ -2312,7 +2315,7 @@ static void vtd_init(IntelIOMMUState *s)
 s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
 if (x86_iommu->intr_supported) {
-s->ecap |= VTD_ECAP_IR;
+s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM;
 }
 
 vtd_reset_context_cache(s);
@@ -2366,10 +2369,9 @@ static void vtd_init(IntelIOMMUState *s)
 vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000ULL);
 
 /*
- * Interrupt remapping registers, not support extended interrupt
- * mode for now.
+ * Interrupt remapping registers.
  */
-vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xf00fULL, 0);
+vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xf80fULL, 0);
 }
 
 /* Should not reset address_spaces when reset because devices will still use
-- 
MST




  1   2   3   4   5   >