[Qemu-devel] [PATCH] qmp: fix typo in input-send-event examples

2014-11-25 Thread Amos Kong
Lack of two closed bracket in json commands.

Signed-off-by: Amos Kong ak...@redhat.com
---
 qmp-commands.hx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8812401..bb2e380 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3818,13 +3818,13 @@ Press left mouse button.
 - { execute: input-send-event,
 arguments: { console: 0,
events: [ { type: btn,
-data : { down: true, button: Left } } } }
+data : { down: true, button: Left } } ] } }
 - { return: {} }
 
 - { execute: input-send-event,
 arguments: { console: 0,
events: [ { type: btn,
-data : { down: false, button: Left } } } }
+data : { down: false, button: Left } } ] } }
 - { return: {} }
 
 Example (2):
-- 
1.9.3




Re: [Qemu-devel] [PATCH] input: move input-send-event into experimental namespace

2014-11-25 Thread Markus Armbruster
Gerd Hoffmann kra...@redhat.com writes:

 Ongoing discussions on how we are going to specify the console,
 so tag the command as experiemntal so we can refine things in
 the 2.3 development cycle.

 Signed-off-by: Gerd Hoffmann kra...@redhat.com
 ---
  qmp-commands.hx | 12 ++--

Don't you need to patch qapi-schema.json, too?

Do we actually explain x- means experimental anywhere?



[Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction

2014-11-25 Thread Fam Zheng
ppoll(2) doesn't scale as well as epoll: The elapsed time of the syscall is
linear to the number of fd's we poll, which hurts performance a bit when the
number of devices are many, or when a virtio device registers many virtqueues
(virtio-serial, for instance).

To show some data from my test on current master:

 - As a base point (10~20 fd's), it takes 22000 ns for each qemu_poll_ns.
 - Add 10 virtio-serial, which adds some 6 hundreds of fd's in the main loop.
   The time spent in qemu_poll_ns goes up to 75000 ns.

This series introduces qemu_poll, which is implemented  with g_poll and epoll,
decided at configure time with CONFIG_EPOLL.

After this change, the times to do the same thing with qemu_poll (more
precisely, with a sequence of qemu_poll_set_fds(), qemu_poll(),
qemu_poll_get_events() followed by syncing back to gpollfds), are reduced to
21000 ns and 25000 ns, respectively.

We are still not O(1) because as a transition, the qemu_poll_set_fds before
qemu_poll is not optimized out yet.

Fam

Fam Zheng (5):
  poll: Introduce QEMU Poll API
  posix-aio: Use QEMU poll interface
  poll: Add epoll implementation for qemu_poll
  main-loop: Replace qemu_poll_ns with qemu_poll
  tests: Add test case for qemu_poll

 Makefile.objs   |   2 +
 aio-posix.c |  52 -
 async.c |   5 +-
 include/block/aio.h |   7 +-
 include/qemu/poll.h |  40 +++
 include/qemu/timer.h|  13 ---
 include/qemu/typedefs.h |   2 +
 main-loop.c |  35 ++-
 poll-glib.c | 130 +++
 poll-linux.c| 216 ++
 qemu-timer.c|  21 
 tests/Makefile  |   2 +
 tests/test-poll.c   | 272 
 13 files changed, 723 insertions(+), 74 deletions(-)
 create mode 100644 include/qemu/poll.h
 create mode 100644 poll-glib.c
 create mode 100644 poll-linux.c
 create mode 100644 tests/test-poll.c

-- 
1.9.3




[Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll

2014-11-25 Thread Fam Zheng
This implements qemu_poll with epoll. The only complex part is
qemu_poll_set_fds, which will sync up epollfd with epoll_ctl by
computing the symmetric difference of previous and new fds.

Signed-off-by: Fam Zheng f...@redhat.com
---
 Makefile.objs |   4 +-
 poll-linux.c  | 216 ++
 2 files changed, 219 insertions(+), 1 deletion(-)
 create mode 100644 poll-linux.c

diff --git a/Makefile.objs b/Makefile.objs
index 57184eb..ea86cb8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -9,7 +9,9 @@ util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o 
qapi-event.o
 block-obj-y = async.o thread-pool.o
 block-obj-y += nbd.o block.o blockjob.o
 block-obj-y += main-loop.o iohandler.o qemu-timer.o
-block-obj-$(CONFIG_POSIX) += aio-posix.o poll-glib.o
+block-obj-$(CONFIG_POSIX) += aio-posix.o
+block-obj-$(CONFIG_EPOLL) += poll-linux.o
+block-obj-$(if $(CONFIG_EPOLL),n,$(CONFIG_POSIX)) += poll-glib.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
diff --git a/poll-linux.c b/poll-linux.c
new file mode 100644
index 000..a5fc201
--- /dev/null
+++ b/poll-linux.c
@@ -0,0 +1,216 @@
+/*
+ * epoll implementation for QEMU Poll API
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ *   Fam Zheng f...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include sys/epoll.h
+#include glib.h
+#include qemu-common.h
+#include qemu/timer.h
+#include qemu/poll.h
+
+struct QEMUPoll {
+int epollfd;
+struct epoll_event *events;
+int max_events;
+int nevents;
+GHashTable *fds;
+};
+
+QEMUPoll *qemu_poll_new(void)
+{
+int epollfd;
+QEMUPoll *qpoll = g_new0(QEMUPoll, 1);
+epollfd = epoll_create1(EPOLL_CLOEXEC);
+if (epollfd  0) {
+perror(epoll_create1:);
+abort();
+}
+qpoll-epollfd = epollfd;
+qpoll-max_events = 1;
+g_free(qpoll-events);
+qpoll-events = g_new(struct epoll_event, 1);
+qpoll-fds = g_hash_table_new_full(g_int_hash, g_int_equal,
+   NULL, g_free);
+return qpoll;
+}
+
+void qemu_poll_free(QEMUPoll *qpoll)
+{
+g_free(qpoll-events);
+g_hash_table_destroy(qpoll-fds);
+close(qpoll-epollfd);
+g_free(qpoll);
+}
+
+int qemu_poll(QEMUPoll *qpoll, int64_t timeout_ns)
+{
+int r;
+r = epoll_wait(qpoll-epollfd,
+   qpoll-events,
+   qpoll-max_events,
+   qemu_timeout_ns_to_ms(timeout_ns));
+qpoll-nevents = r;
+return r;
+}
+
+static inline uint32_t epoll_event_from_gio_events(int gio_events)
+{
+
+return (gio_events  G_IO_IN  ? EPOLLIN : 0) |
+   (gio_events  G_IO_OUT ? EPOLLOUT : 0) |
+   (gio_events  G_IO_ERR ? EPOLLERR : 0) |
+   (gio_events  G_IO_HUP ? EPOLLHUP : 0);
+}
+
+
+/* Add an fd to poll. Return -EEXIST if fd already registered. */
+int qemu_poll_add(QEMUPoll *qpoll, int fd, int gio_events, void *opaque)
+{
+int ret;
+struct epoll_event ev;
+QEMUPollEvent *e;
+
+ev.events = epoll_event_from_gio_events(gio_events);
+ev.data.fd = fd;
+ret = epoll_ctl(qpoll-epollfd, EPOLL_CTL_ADD, fd, ev);
+if (ret) {
+ret = -errno;
+return ret;
+}
+qpoll-max_events++;
+qpoll-events = g_renew(struct epoll_event,
+qpoll-events,
+qpoll-max_events);
+/* Shouldn't get here if the fd is already added since epoll_ctl would
+ * return -EEXIST, assert to be sure */
+assert(g_hash_table_lookup(qpoll-fds, fd) == NULL);
+e = g_new0(QEMUPollEvent, 1);
+e-fd = fd;
+e-events = gio_events;
+e-opaque = opaque;
+g_hash_table_insert(qpoll-fds, e-fd, e);
+return ret;
+}
+
+/* Delete a previously added fd. Return -ENOENT if fd not registered. */
+int qemu_poll_del(QEMUPoll *qpoll, int fd)
+{
+int ret;
+
+if (!g_hash_table_lookup(qpoll-fds, fd)) {
+ret = -ENOENT;
+goto out;
+}
+ret = epoll_ctl(qpoll-epollfd, EPOLL_CTL_DEL, fd, NULL);
+if (ret) {
+ret = -errno;
+goto out;
+}
+qpoll-max_events--;
+qpoll-events = g_renew(struct epoll_event,
+qpoll-events,
+qpoll-max_events);
+out:
+g_hash_table_remove(qpoll-fds, fd);
+return ret;
+}
+
+static void qemu_poll_copy_fd(gpointer key, gpointer value, gpointer user_data)
+{
+GHashTable *dst = user_data;
+QEMUPollEvent *event, *copy;
+
+event = value;
+copy = g_new(QEMUPollEvent, 1);
+*copy = *event;
+g_hash_table_insert(dst, copy-fd, copy);
+}
+
+static void qemu_poll_del_fd(gpointer key, gpointer value, gpointer user_data)
+{
+QEMUPoll *qpoll = user_data;
+QEMUPollEvent *event = value;
+
+qemu_poll_del(qpoll, event-fd);
+}
+
+int qemu_poll_set_fds(QEMUPoll *qpoll, GPollFD *fds, int 

[Qemu-devel] [PATCH for-2.3 4/5] main-loop: Replace qemu_poll_ns with qemu_poll

2014-11-25 Thread Fam Zheng
qemu_poll_set_fds + qemu_poll does the same, and when epoll is
available, it is faster.

Signed-off-by: Fam Zheng f...@redhat.com
---
 include/qemu/timer.h | 13 -
 main-loop.c  | 35 ++-
 qemu-timer.c | 21 -
 3 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 5f5210d..cd3371d 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -646,19 +646,6 @@ void timer_put(QEMUFile *f, QEMUTimer *ts);
 int qemu_timeout_ns_to_ms(int64_t ns);
 
 /**
- * qemu_poll_ns:
- * @fds: Array of file descriptors
- * @nfds: number of file descriptors
- * @timeout: timeout in nanoseconds
- *
- * Perform a poll like g_poll but with a timeout in nanoseconds.
- * See g_poll documentation for further details.
- *
- * Returns: number of fds ready
- */
-int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout);
-
-/**
  * qemu_soonest_timeout:
  * @timeout1: first timeout in nanoseconds (or -1 for infinite)
  * @timeout2: second timeout in nanoseconds (or -1 for infinite)
diff --git a/main-loop.c b/main-loop.c
index 981bcb5..b70f929 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -29,6 +29,7 @@
 #include slirp/libslirp.h
 #include qemu/main-loop.h
 #include block/aio.h
+#include qemu/poll.h
 
 #ifndef _WIN32
 
@@ -130,6 +131,7 @@ void qemu_notify_event(void)
 }
 
 static GArray *gpollfds;
+static QEMUPoll *qpoll;
 
 int qemu_init_main_loop(Error **errp)
 {
@@ -150,6 +152,7 @@ int qemu_init_main_loop(Error **errp)
 return -EMFILE;
 }
 gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
+qpoll = qemu_poll_new();
 src = aio_get_g_source(qemu_aio_context);
 g_source_attach(src, NULL);
 g_source_unref(src);
@@ -207,6 +210,8 @@ static int os_host_main_loop_wait(int64_t timeout)
 {
 int ret;
 static int spin_counter;
+QEMUPollEvent events[1024];
+int r, i;
 
 glib_pollfds_fill(timeout);
 
@@ -236,7 +241,17 @@ static int os_host_main_loop_wait(int64_t timeout)
 spin_counter++;
 }
 
-ret = qemu_poll_ns((GPollFD *)gpollfds-data, gpollfds-len, timeout);
+qemu_poll_set_fds(qpoll, (GPollFD *)gpollfds-data, gpollfds-len);
+ret = qemu_poll(qpoll, timeout);
+if (ret  0) {
+ret = MIN(ret, sizeof(events) / sizeof(QEMUPollEvent));
+r = qemu_poll_get_events(qpoll, events, ret);
+for (i = 0; i  r; i++) {
+GPollFD *fd = events[i].opaque;
+fd-revents = events[i].revents;
+}
+ret = r;
+}
 
 if (timeout) {
 qemu_mutex_lock_iothread();
@@ -389,9 +404,11 @@ static int os_host_main_loop_wait(int64_t timeout)
 {
 GMainContext *context = g_main_context_default();
 GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
+QEMUPollEvent poll_events[1024 * 2];
 int select_ret = 0;
-int g_poll_ret, ret, i, n_poll_fds;
+int nevents, ret, i, n_poll_fds;
 PollingEntry *pe;
+QEMUPollEvent *events;
 WaitObjects *w = wait_objects;
 gint poll_timeout;
 int64_t poll_timeout_ns;
@@ -441,10 +458,18 @@ static int os_host_main_loop_wait(int64_t timeout)
 poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
 
 qemu_mutex_unlock_iothread();
-g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w-num, poll_timeout_ns);
+
+qemu_poll_set_fds(qpoll, (GPollFD *)poll_fds, n_poll_fds + w-num)
+nevents = qemu_poll(qpoll, poll_timeout_ns);
 
 qemu_mutex_lock_iothread();
-if (g_poll_ret  0) {
+if (nevents  0) {
+r = qemu_poll_get_events(qpoll, poll_events, nevents);
+for (i = 0; i  r; i++) {
+GPollFD *fd = poll_events[i].opaque;
+fd-revents = poll_events[i].revents;
+}
+
 for (i = 0; i  w-num; i++) {
 w-revents[i] = poll_fds[n_poll_fds + i].revents;
 }
@@ -459,7 +484,7 @@ static int os_host_main_loop_wait(int64_t timeout)
 g_main_context_dispatch(context);
 }
 
-return select_ret || g_poll_ret;
+return select_ret || nevents;
 }
 #endif
 
diff --git a/qemu-timer.c b/qemu-timer.c
index 00a5d35..4500235 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -303,27 +303,6 @@ int qemu_timeout_ns_to_ms(int64_t ns)
 return (int) ms;
 }
 
-
-/* qemu implementation of g_poll which uses a nanosecond timeout but is
- * otherwise identical to g_poll
- */
-int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
-{
-#ifdef CONFIG_PPOLL
-if (timeout  0) {
-return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
-} else {
-struct timespec ts;
-ts.tv_sec = timeout / 10LL;
-ts.tv_nsec = timeout % 10LL;
-return ppoll((struct pollfd *)fds, nfds, ts, NULL);
-}
-#else
-return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
-#endif
-}
-
-
 void timer_init(QEMUTimer *ts,
 QEMUTimerList *timer_list, int scale,
 QEMUTimerCB 

[Qemu-devel] [PATCH for-2.3 5/5] tests: Add test case for qemu_poll

2014-11-25 Thread Fam Zheng
Signed-off-by: Fam Zheng f...@redhat.com
---
 tests/Makefile|   2 +
 tests/test-poll.c | 272 ++
 2 files changed, 274 insertions(+)
 create mode 100644 tests/test-poll.c

diff --git a/tests/Makefile b/tests/Makefile
index 16f0e4c..79f399d 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -41,6 +41,7 @@ check-unit-$(CONFIG_POSIX) += tests/test-rfifolock$(EXESUF)
 check-unit-y += tests/test-throttle$(EXESUF)
 gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
+check-unit-y += tests/test-poll$(EXESUF)
 check-unit-y += tests/test-thread-pool$(EXESUF)
 gcov-files-test-thread-pool-y = thread-pool.c
 gcov-files-test-hbitmap-y = util/hbitmap.c
@@ -241,6 +242,7 @@ tests/check-qjson$(EXESUF): tests/check-qjson.o 
libqemuutil.a libqemustub.a
 tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o 
$(qom-core-obj) libqemuutil.a libqemustub.a
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) 
libqemuutil.a libqemustub.a
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a 
libqemustub.a
+tests/test-poll$(EXESUF): tests/test-poll.o $(block-obj-y) libqemuutil.a 
libqemustub.a
 tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o libqemuutil.a 
libqemustub.a
 tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) 
libqemuutil.a libqemustub.a
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) 
libqemuutil.a libqemustub.a
diff --git a/tests/test-poll.c b/tests/test-poll.c
new file mode 100644
index 000..75074d8
--- /dev/null
+++ b/tests/test-poll.c
@@ -0,0 +1,272 @@
+/*
+ * QTest testcase for QEMU poll
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ *  Fam Zheng f...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include glib.h
+#include qemu/poll.h
+#include qemu/event_notifier.h
+
+static EventNotifier *poll_add_one(QEMUPoll *qpoll)
+{
+EventNotifier *e = g_new(EventNotifier, 1);
+
+event_notifier_init(e, false);
+qemu_poll_add(qpoll, event_notifier_get_fd(e),
+  G_IO_IN,
+  NULL);
+return e;
+}
+
+static int poll_del_one(QEMUPoll *qpoll, EventNotifier *e)
+{
+int r = qemu_poll_del(qpoll, event_notifier_get_fd(e));
+event_notifier_cleanup(e);
+g_free(e);
+return r;
+}
+
+static void test_poll_single(void)
+{
+QEMUPoll *qpoll;
+EventNotifier *e;
+int r, i;
+
+qpoll = qemu_poll_new();
+g_assert(qpoll);
+
+e = poll_add_one(qpoll);
+
+/* Write some data and poll */
+event_notifier_set(e);
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, 1);
+
+/* Clear data and poll */
+r = event_notifier_test_and_clear(e);
+g_assert(r);
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, 0);
+
+/* Write a lot of data and poll */
+for (i = 0; i  1; i++) {
+event_notifier_set(e);
+}
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, 1);
+
+/* Clear data and poll again */
+r = event_notifier_test_and_clear(e);
+g_assert(r);
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, 0);
+
+/* Clean up */
+poll_del_one(qpoll, e);
+qemu_poll_free(qpoll);
+}
+
+static void test_poll_multiple(void)
+{
+QEMUPoll *qpoll;
+const int N = 32;
+EventNotifier *e[N];
+QEMUPollEvent events[N];
+int r, s, i;
+
+qpoll = qemu_poll_new();
+g_assert(qpoll);
+
+for (i = 0; i  N; i++) {
+e[i] = poll_add_one(qpoll);
+}
+
+/* Write some data for each and poll */
+for (i = 0; i  N; i++) {
+event_notifier_set(e[i]);
+}
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, N);
+
+/* Clear data and poll */
+for (i = 0; i  N; i++) {
+r = event_notifier_test_and_clear(e[i]);
+g_assert(r);
+}
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, 0);
+
+/* Write some data for first half and poll */
+for (i = 0; i  N / 2; i++) {
+event_notifier_set(e[i]);
+}
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, N / 2);
+s = qemu_poll_get_events(qpoll, events, N);
+g_assert_cmpint(s, ==, r);
+for (i = 0; i  s; i++) {
+g_assert(events[i].revents  G_IO_IN);
+}
+
+/* Clean up */
+for (i = 0; i  N; i++) {
+poll_del_one(qpoll, e[i]);
+}
+qemu_poll_free(qpoll);
+}
+
+static void test_poll_add_del(void)
+{
+QEMUPoll *qpoll;
+EventNotifier *e1, *e2;
+QEMUPollEvent events[2];
+int r;
+
+qpoll = qemu_poll_new();
+g_assert(qpoll);
+
+e1 = poll_add_one(qpoll);
+e2 = poll_add_one(qpoll);
+
+/* Write some data for each and poll */
+event_notifier_set(e1);
+event_notifier_set(e2);
+r = qemu_poll(qpoll, 0);
+g_assert_cmpint(r, ==, 2);
+
+/* Clear e1 and poll */
+r = 

[Qemu-devel] [PATCH for-2.3 2/5] posix-aio: Use QEMU poll interface

2014-11-25 Thread Fam Zheng
The AIO handler list is only modified by aio_set_fd_handler, so we can
easily add del poll fd there. Initialize a QEMUPoll and keep track of
all the fds, so we don't need to rebuild a GPollFD array for g_poll in
aio_poll.

Signed-off-by: Fam Zheng f...@redhat.com
---
 aio-posix.c | 52 +---
 async.c |  5 +++--
 include/block/aio.h |  7 +--
 3 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index d3ac06e..32323b3 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -17,6 +17,7 @@
 #include block/block.h
 #include qemu/queue.h
 #include qemu/sockets.h
+#include qemu/poll.h
 
 struct AioHandler
 {
@@ -55,6 +56,7 @@ void aio_set_fd_handler(AioContext *ctx,
 /* Are we deleting the fd handler? */
 if (!io_read  !io_write) {
 if (node) {
+qemu_poll_del(ctx-qpoll, fd);
 g_source_remove_poll(ctx-source, node-pfd);
 
 /* If the lock is held, just mark the node as deleted */
@@ -71,13 +73,15 @@ void aio_set_fd_handler(AioContext *ctx,
 }
 }
 } else {
-if (node == NULL) {
+if (node) {
+/* Remove the old */
+qemu_poll_del(ctx-qpoll, fd);
+g_source_remove_poll(ctx-source, node-pfd);
+} else {
 /* Alloc and insert if it's not already there */
 node = g_malloc0(sizeof(AioHandler));
 node-pfd.fd = fd;
 QLIST_INSERT_HEAD(ctx-aio_handlers, node, node);
-
-g_source_add_poll(ctx-source, node-pfd);
 }
 /* Update handler with latest information */
 node-io_read = io_read;
@@ -87,6 +91,8 @@ void aio_set_fd_handler(AioContext *ctx,
 
 node-pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
 node-pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
+qemu_poll_add(ctx-qpoll, fd, node-pfd.events, node);
+g_source_add_poll(ctx-source, node-pfd);
 }
 
 aio_notify(ctx);
@@ -191,6 +197,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 AioHandler *node;
 bool was_dispatching;
 int ret;
+int i;
 bool progress;
 
 was_dispatching = ctx-dispatching;
@@ -208,38 +215,21 @@ bool aio_poll(AioContext *ctx, bool blocking)
  */
 aio_set_dispatching(ctx, !blocking);
 
-ctx-walking_handlers++;
-
-g_array_set_size(ctx-pollfds, 0);
-
-/* fill pollfds */
-QLIST_FOREACH(node, ctx-aio_handlers, node) {
-node-pollfds_idx = -1;
-if (!node-deleted  node-pfd.events) {
-GPollFD pfd = {
-.fd = node-pfd.fd,
-.events = node-pfd.events,
-};
-node-pollfds_idx = ctx-pollfds-len;
-g_array_append_val(ctx-pollfds, pfd);
-}
-}
-
-ctx-walking_handlers--;
-
 /* wait until next event */
-ret = qemu_poll_ns((GPollFD *)ctx-pollfds-data,
- ctx-pollfds-len,
- blocking ? aio_compute_timeout(ctx) : 0);
+ret = qemu_poll(ctx-qpoll, blocking ? aio_compute_timeout(ctx) : 0);
 
 /* if we have any readable fds, dispatch event */
 if (ret  0) {
-QLIST_FOREACH(node, ctx-aio_handlers, node) {
-if (node-pollfds_idx != -1) {
-GPollFD *pfd = g_array_index(ctx-pollfds, GPollFD,
-  node-pollfds_idx);
-node-pfd.revents = pfd-revents;
-}
+int r;
+g_array_set_size(ctx-events, ret);
+r = qemu_poll_get_events(ctx-qpoll,
+(QEMUPollEvent *)ctx-events-data,
+ret);
+assert(r == ret);
+for (i = 0; i  r; i++) {
+QEMUPollEvent *e = g_array_index(ctx-events, QEMUPollEvent, i);
+node = e-opaque;
+node-pfd.revents = e-revents;
 }
 }
 
diff --git a/async.c b/async.c
index 6e1b282..443a674 100644
--- a/async.c
+++ b/async.c
@@ -27,6 +27,7 @@
 #include block/thread-pool.h
 #include qemu/main-loop.h
 #include qemu/atomic.h
+#include qemu/poll.h
 
 /***/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -232,7 +233,6 @@ aio_ctx_finalize(GSource *source)
 event_notifier_cleanup(ctx-notifier);
 rfifolock_destroy(ctx-lock);
 qemu_mutex_destroy(ctx-bh_lock);
-g_array_free(ctx-pollfds, TRUE);
 timerlistgroup_deinit(ctx-tlg);
 }
 
@@ -300,10 +300,11 @@ AioContext *aio_context_new(Error **errp)
 error_setg_errno(errp, -ret, Failed to initialize event notifier);
 return NULL;
 }
+ctx-qpoll = qemu_poll_new();
+ctx-events = g_array_new(FALSE, FALSE, sizeof(QEMUPollEvent));
 aio_set_event_notifier(ctx, ctx-notifier,
(EventNotifierHandler *)
event_notifier_test_and_clear);
-ctx-pollfds = 

Re: [Qemu-devel] [PATCH] input: move input-send-event into experimental namespace

2014-11-25 Thread Amos Kong
On Tue, Nov 25, 2014 at 09:06:34AM +0100, Markus Armbruster wrote:
 Gerd Hoffmann kra...@redhat.com writes:
 
  Ongoing discussions on how we are going to specify the console,
  so tag the command as experiemntal so we can refine things in
  the 2.3 development cycle.
 
  Signed-off-by: Gerd Hoffmann kra...@redhat.com
  ---
   qmp-commands.hx | 12 ++--
 
 Don't you need to patch qapi-schema.json, too?

Not necessary in function level.
 
 Do we actually explain x- means experimental anywhere?

What's the official way to make command experimental?

Quote from qapi-schema.json:
| # Notes: This command is experimental and may change
| syntax in future releases.

-- 
Amos.


signature.asc
Description: Digital signature


Re: [Qemu-devel] [BUGFIX][PATCH for 2.2 1/1] hw/ide/core.c: Prevent SIGSEGV during migration

2014-11-25 Thread Dr. David Alan Gilbert
* Don Slutz (dsl...@verizon.com) wrote:
 On 11/21/14 05:49, Dr. David Alan Gilbert wrote:
 * Markus Armbruster (arm...@redhat.com) wrote:
 Don Slutzdsl...@verizon.com  writes:
 
 On 11/19/14 07:29, Markus Armbruster wrote:
 Don Slutzdsl...@verizon.com  writes:
 
 The other callers to blk_set_enable_write_cache() in this file
 already check for s-blk == NULL.
 
 Signed-off-by: Don Slutzdsl...@verizon.com
 ---
 
 I think this is a bugfix that should be back ported to stable
 releases.
 
 I also think this should be done in xen's copy of QEMU for 4.5 with
 back port(s) to active stable releases.
 
 Note: In 2.1 and earlier the routine is
 bdrv_set_enable_write_cache(); variable is s-bs.
 Got a reproducer?
 yes.  Migrating a guest from xen 4.2 or 4.3 to xen 4.4 (or 4.5-unstable) on
 CentOS 6.3 with xen_emul_unplug=unnecessary and no cdrom defined.
 
 
 I'm asking because I believe s-identify_set implies s-blk.
 s-identify_set is initialized to zero, and gets set to non-zero exactly
 on the first successful IDENTIFY DEVICE or IDENTIFY PACKET DEVICE, in
 ide_identify(), ide_atapi_identify() or ide_cfata_identify(),
 respectively.  Only called via cmd_identify() / cmd_identify_packet()
 via ide_exec_cmd().  The latter immediately fails when !s-blk:
 
   s = idebus_active_if(bus);
   /* ignore commands to non existent slave */
   if (s != bus-ifs  !s-blk) {
   return;
   }
 I do think that you are right.  I have now spent more time on why I am
 seeing this.
 
 
 Even if I'm right, your patch is fine, because it makes this spot more
 obviously correct, and consistent with the other uses of
 blk_set_enable_write_cache().  The case for stable is weak, though.
 
 I had not fully tracked down what is happening before sending the bugfix.
 I have now done more debugging, and have tracked it down to xen 4.4
 now using -nodefaults with QEMU.
 
 I needed to add output to QEMU to track this down because I have long
 command lines...
 
 (all I get for ps -ef):
 [...]
 Which is missing that option.
 
 The ide that was aborting in this case is the cdrom at hdc that is added
 if you do not specify -nodefaults.
 
 Since this is a changed machine config, I am no longer as sure as what
 versions this needs to be in.
 
 If I put my QEMU hat on, it does not look like a back port is needed.
 However
 for xen it would be nice.
 
 I do not know how the QEMU community feels about migration from a config
 without -nodefaults to one with -nodefaults as the only difference.
 So you have a CD-ROM on the source, but not on the destination?
 
 Yes, QEMU in the source has an empty CD-ROM, but not on the destination.
 xen
 does not know that QEMU added a CD-ROM drive in hdc by default.
 
 That can't work.  I guess it broke for you in an unusual way (target
 crashes) rather than the usual way (target rejects migration data for a
 device it doesn't have) due to our convoluted IDE data structures.  With
 your patch applied it should break the usual way.  Does it?
 
 Nope. It does not break at all.  The migration works.  The target accepts
 the hdc data.
 It looks like to me that all 4 IDE state data is sent.
 
 
 Management tools should use -nodefaults.  But if it mixes default and
 -nodefaults in migration, recreating the stuff it got by default but
 doesn't get with -nodefaults is its own responsibility.
 
 Yes. Since xen did not ask for a CD-ROM, it did not expect to need to create
 it.   xen was fixed to use -nodefaults.
 
 Well, mostly - we wouldn't expect a migration to work if the source/dest
 didn't match exactly; but QEMU shouldn't seg.
 
 Well, this change prevents a seg.  So you are in favor of having this
 backported?

Yes.

Dave

 
 
-Don Slutz
 
 Dave
 
 --
 Dr. David Alan Gilbert /dgilb...@redhat.com  / Manchester, UK
 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: Speed up make check-block

2014-11-25 Thread Max Reitz

On 2014-10-28 at 07:45, Fam Zheng wrote:

Using /tmp (usually mounted as tmpfs) and cache=writeback, the quick
group can be quicker.

On my laptop (Lenovo T430s with Fedora 20), this reduces the time from
50s to 30s.

Signed-off-by: Fam Zheng f...@redhat.com
---
  tests/qemu-iotests-quick.sh | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
index 12af731..0b54dbf 100755
--- a/tests/qemu-iotests-quick.sh
+++ b/tests/qemu-iotests-quick.sh
@@ -3,6 +3,6 @@
  cd tests/qemu-iotests
  
  ret=0

-./check -T -qcow2 -g quick || ret=1
+TEST_DIR=/tmp/qemu-iotests-quick-$$ ./check -T -qcow2 -g quick -c writeback || 
ret=1


There are (at least) two tests which don't work with -c writeback (026 
and 039), one of them is in the quick group (039). Why not use -c 
writethrough? It doesn't make any difference on tmpfs anyway  (we can't 
omit it because that will break 091).


And thank you, I didn't know about TEST_DIR. I always built the full 
qemu in /tmp. *g*


Max


  exit $ret





Re: [Qemu-devel] [PATCH 2/2] tests/Makefile: Add check-block to make check

2014-11-25 Thread Max Reitz

On 2014-10-28 at 07:45, Fam Zheng wrote:

Signed-off-by: Fam Zheng f...@redhat.com
---
  tests/Makefile | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Makefile b/tests/Makefile
index 16f0e4c..f430b18 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -463,7 +463,7 @@ check-qapi-schema: $(patsubst %,check-%, 
$(check-qapi-schema-y))
  check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS))
  check-unit: $(patsubst %,check-%, $(check-unit-y))
  check-block: $(patsubst %,check-%, $(check-block-y))
-check: check-qapi-schema check-unit check-qtest
+check: check-qapi-schema check-unit check-qtest check-block
  check-clean:
$(MAKE) -C tests/tcg clean
rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y)


Even regardless of patch 1:

Reviewed-by: Max Reitz mre...@redhat.com

But we must fix 039 first (I remember some other test failing sometimes, 
but I can't remember which).




Re: [Qemu-devel] [PATCH 0/2] tests: Add check-block to make check

2014-11-25 Thread Max Reitz

On 2014-11-25 at 08:30, Markus Armbruster wrote:

Markus Armbruster arm...@redhat.com writes:


Fam Zheng f...@redhat.com writes:


qemu-iotests contains useful tests that have a nice coverage of block layer
code. Adding check-block (which calls tests/qemu-iotests-quick.sh) to make
check is good for developers' self-testing.

With the first patch, this set takes a half minute on my laptop. If
-j option
is used, it only takes a few more seconds than what we have now.

Different data point: elderly machine, spinning rust, /tmp is tmpfs, no
-j: elapsed time increases from ~2 to ~3 minutes.

I'm very much in favour of actually running the tests we have.

Running all the block tests for all the formats would be too slow, and
that's why you run just the quick group, and only for qcow2.  Quick
enough?

Any ideas on speeding it up further?  Trimming image sizes, perhaps?

What are the slowest tests in the quick group?  Why are they slow?  How
are tests selected for the quick group anyway?


Last time I updated the associations it was Whatever runs in under five 
seconds on my HDD.


Max



Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: Speed up make check-block

2014-11-25 Thread Kevin Wolf
Am 25.11.2014 um 10:12 hat Max Reitz geschrieben:
 On 2014-10-28 at 07:45, Fam Zheng wrote:
 Using /tmp (usually mounted as tmpfs) and cache=writeback, the quick
 group can be quicker.
 
 On my laptop (Lenovo T430s with Fedora 20), this reduces the time from
 50s to 30s.
 
 Signed-off-by: Fam Zheng f...@redhat.com
 ---
   tests/qemu-iotests-quick.sh | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
 index 12af731..0b54dbf 100755
 --- a/tests/qemu-iotests-quick.sh
 +++ b/tests/qemu-iotests-quick.sh
 @@ -3,6 +3,6 @@
   cd tests/qemu-iotests
   ret=0
 -./check -T -qcow2 -g quick || ret=1
 +TEST_DIR=/tmp/qemu-iotests-quick-$$ ./check -T -qcow2 -g quick -c writeback 
 || ret=1
 
 There are (at least) two tests which don't work with -c writeback
 (026 and 039), one of them is in the quick group (039). Why not use
 -c writethrough? It doesn't make any difference on tmpfs anyway  (we
 can't omit it because that will break 091).

Why use any -c? The default is the fast option writeback, and for those
test cases that don't support writeback, something working is chosen
instead.

Kevin



Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: Speed up make check-block

2014-11-25 Thread Max Reitz

On 2014-11-25 at 10:21, Kevin Wolf wrote:

Am 25.11.2014 um 10:12 hat Max Reitz geschrieben:

On 2014-10-28 at 07:45, Fam Zheng wrote:

Using /tmp (usually mounted as tmpfs) and cache=writeback, the quick
group can be quicker.

On my laptop (Lenovo T430s with Fedora 20), this reduces the time from
50s to 30s.

Signed-off-by: Fam Zheng f...@redhat.com
---
  tests/qemu-iotests-quick.sh | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
index 12af731..0b54dbf 100755
--- a/tests/qemu-iotests-quick.sh
+++ b/tests/qemu-iotests-quick.sh
@@ -3,6 +3,6 @@
  cd tests/qemu-iotests
  ret=0
-./check -T -qcow2 -g quick || ret=1
+TEST_DIR=/tmp/qemu-iotests-quick-$$ ./check -T -qcow2 -g quick -c writeback || 
ret=1

There are (at least) two tests which don't work with -c writeback
(026 and 039), one of them is in the quick group (039). Why not use
-c writethrough? It doesn't make any difference on tmpfs anyway  (we
can't omit it because that will break 091).

Why use any -c? The default is the fast option writeback, and for those
test cases that don't support writeback, something working is chosen
instead.


Because that breaks 091.

Max



Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: Speed up make check-block

2014-11-25 Thread Kevin Wolf
Am 25.11.2014 um 10:21 hat Max Reitz geschrieben:
 On 2014-11-25 at 10:21, Kevin Wolf wrote:
 Am 25.11.2014 um 10:12 hat Max Reitz geschrieben:
 On 2014-10-28 at 07:45, Fam Zheng wrote:
 Using /tmp (usually mounted as tmpfs) and cache=writeback, the quick
 group can be quicker.
 
 On my laptop (Lenovo T430s with Fedora 20), this reduces the time from
 50s to 30s.
 
 Signed-off-by: Fam Zheng f...@redhat.com
 ---
   tests/qemu-iotests-quick.sh | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
 index 12af731..0b54dbf 100755
 --- a/tests/qemu-iotests-quick.sh
 +++ b/tests/qemu-iotests-quick.sh
 @@ -3,6 +3,6 @@
   cd tests/qemu-iotests
   ret=0
 -./check -T -qcow2 -g quick || ret=1
 +TEST_DIR=/tmp/qemu-iotests-quick-$$ ./check -T -qcow2 -g quick -c 
 writeback || ret=1
 There are (at least) two tests which don't work with -c writeback
 (026 and 039), one of them is in the quick group (039). Why not use
 -c writethrough? It doesn't make any difference on tmpfs anyway  (we
 can't omit it because that will break 091).
 Why use any -c? The default is the fast option writeback, and for those
 test cases that don't support writeback, something working is chosen
 instead.
 
 Because that breaks 091.

That's unfortunate. I wish tmpfs supported O_DIRECT...

But let's just remove it from quick then - it doesn't really matter if
it doesn't run because of the cache mode or because we didn't include it
in the group.

-c writethrough is okay as long as you really have tmpfs on your /tmp,
but it really hurts when you don't (and I for one don't, standard RHEL 7
installation).

Kevin



Re: [Qemu-devel] [PATCH] qmp: fix typo in input-send-event examples

2014-11-25 Thread Markus Armbruster
Amos Kong ak...@redhat.com writes:

 Lack of two closed bracket in json commands.

 Signed-off-by: Amos Kong ak...@redhat.com

Reviewed-by: Markus Armbruster arm...@redhat.com



Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: Speed up make check-block

2014-11-25 Thread Max Reitz

On 2014-11-25 at 10:30, Kevin Wolf wrote:

Am 25.11.2014 um 10:21 hat Max Reitz geschrieben:

On 2014-11-25 at 10:21, Kevin Wolf wrote:

Am 25.11.2014 um 10:12 hat Max Reitz geschrieben:

On 2014-10-28 at 07:45, Fam Zheng wrote:

Using /tmp (usually mounted as tmpfs) and cache=writeback, the quick
group can be quicker.

On my laptop (Lenovo T430s with Fedora 20), this reduces the time from
50s to 30s.

Signed-off-by: Fam Zheng f...@redhat.com
---
  tests/qemu-iotests-quick.sh | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
index 12af731..0b54dbf 100755
--- a/tests/qemu-iotests-quick.sh
+++ b/tests/qemu-iotests-quick.sh
@@ -3,6 +3,6 @@
  cd tests/qemu-iotests
  ret=0
-./check -T -qcow2 -g quick || ret=1
+TEST_DIR=/tmp/qemu-iotests-quick-$$ ./check -T -qcow2 -g quick -c writeback || 
ret=1

There are (at least) two tests which don't work with -c writeback
(026 and 039), one of them is in the quick group (039). Why not use
-c writethrough? It doesn't make any difference on tmpfs anyway  (we
can't omit it because that will break 091).

Why use any -c? The default is the fast option writeback, and for those
test cases that don't support writeback, something working is chosen
instead.

Because that breaks 091.

That's unfortunate. I wish tmpfs supported O_DIRECT...

But let's just remove it from quick then - it doesn't really matter if
it doesn't run because of the cache mode or because we didn't include it
in the group.


Fine with me.

Max


-c writethrough is okay as long as you really have tmpfs on your /tmp,
but it really hurts when you don't (and I for one don't, standard RHEL 7
installation).




Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: Speed up make check-block

2014-11-25 Thread Fam Zheng
On Tue, 11/25 10:31, Max Reitz wrote:
 On 2014-11-25 at 10:30, Kevin Wolf wrote:
 Am 25.11.2014 um 10:21 hat Max Reitz geschrieben:
 On 2014-11-25 at 10:21, Kevin Wolf wrote:
 Am 25.11.2014 um 10:12 hat Max Reitz geschrieben:
 On 2014-10-28 at 07:45, Fam Zheng wrote:
 Using /tmp (usually mounted as tmpfs) and cache=writeback, the quick
 group can be quicker.
 
 On my laptop (Lenovo T430s with Fedora 20), this reduces the time from
 50s to 30s.
 
 Signed-off-by: Fam Zheng f...@redhat.com
 ---
   tests/qemu-iotests-quick.sh | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
 index 12af731..0b54dbf 100755
 --- a/tests/qemu-iotests-quick.sh
 +++ b/tests/qemu-iotests-quick.sh
 @@ -3,6 +3,6 @@
   cd tests/qemu-iotests
   ret=0
 -./check -T -qcow2 -g quick || ret=1
 +TEST_DIR=/tmp/qemu-iotests-quick-$$ ./check -T -qcow2 -g quick -c 
 writeback || ret=1
 There are (at least) two tests which don't work with -c writeback
 (026 and 039), one of them is in the quick group (039). Why not use
 -c writethrough? It doesn't make any difference on tmpfs anyway  (we
 can't omit it because that will break 091).
 Why use any -c? The default is the fast option writeback, and for those
 test cases that don't support writeback, something working is chosen
 instead.
 Because that breaks 091.
 That's unfortunate. I wish tmpfs supported O_DIRECT...

Indeed unfortunate!

 
 But let's just remove it from quick then - it doesn't really matter if
 it doesn't run because of the cache mode or because we didn't include it
 in the group.

Will do it when respin.

 -c writethrough is okay as long as you really have tmpfs on your /tmp,
 but it really hurts when you don't (and I for one don't, standard RHEL 7
 installation).
 

Oh, do you have any better idea than hardcoding to /tmp then?

Fam




Re: [Qemu-devel] [BUGFIX][PATCH for 2.2 v6 1/1] -machine vmport=auto: Fix handling of VMWare ioport emulation for xen

2014-11-25 Thread Paolo Bonzini


On 21/11/2014 17:18, Don Slutz wrote:
 c/s 9b23cfb76b3a5e9eb5cc899eaf2f46bc46d33ba4
 
 or
 
 c/s b154537ad07598377ebf98252fb7d2aff127983b
 
 moved the testing of xen_enabled() from pc_init1() to
 pc_machine_initfn().
 
 xen_enabled() does not return the correct value in
 pc_machine_initfn().
 
 Changed vmport from a bool to an enum.  Added the value auto to do
 the old way.  Move check of xen_enabled() back to pc_init1().
 
 Signed-off-by: Don Slutz dsl...@verizon.com
 Acked-by: Eric Blake ebl...@redhat.com
 ---
 v6:
 
   Added the sentence: Move check of xen_enabled() back to pc_init1().
   to the commit message.
 
   I considered folding pc_machine_set_vmport and
   pc_machine_get_vmport into 1 routine.  Desided that for 2.2 I
   would keep the extra safety of passing a copy of vmport in the get
   routine.  I do not think is is needed but without a comment
   about this I took the simpler change.
 
   Eduardo Habkost:
 Use visit_type_OnOffAuto not visit_type_enum.
   Done -- Needed to add #include qapi-visit.h
 Adjust usage of visit_type_OnOffAuto to avoid undefined
 values.
   Done
 Change the property from str to OnOffAuto
   Done.
 
 v5:
   Eduardo Habkost:
 What about changing pc_machine-vmport here instead of using a
 no_vmport variable, so the actual vmport configuration may be
 queried by anybody later using the QOM property? It would even
 make the code shorter.
   Done.
   Eric Blake:
 I've only reviewed the qapi/common.json and qemu-options.hx
 files for QMP interface (and will leave the rest of the patch to
 others), but I'm okay with the changes to those files. I guess
 that means no R-b, since I didn't do a full review, so here's a
 weaker acked-by.
 
 v4:
   Michael S. Tsirkin, Eric Blake, Eduardo Habkost:
 Rename vmport to OnOffAuto and move to qapi/common.json
   Eduardo Habkost:
 Simpler convert of enum to no_vmport.
   Michael S. Tsirkin:
 Add assert for ON_OFF_AUTO_MAX.
 
  hw/i386/pc.c | 22 +-
  hw/i386/pc_piix.c|  7 ++-
  hw/i386/pc_q35.c |  7 ++-
  include/hw/i386/pc.h |  2 +-
  qapi/common.json | 15 +++
  qemu-options.hx  |  8 +---
  vl.c |  2 +-
  7 files changed, 47 insertions(+), 16 deletions(-)
 
 diff --git a/hw/i386/pc.c b/hw/i386/pc.c
 index 1205db8..10c1fa5 100644
 --- a/hw/i386/pc.c
 +++ b/hw/i386/pc.c
 @@ -61,6 +61,7 @@
  #include hw/mem/pc-dimm.h
  #include trace.h
  #include qapi/visitor.h
 +#include qapi-visit.h
  
  /* debug PC/ISA interrupts */
  //#define DEBUG_IRQ
 @@ -1711,18 +1712,21 @@ static void pc_machine_set_max_ram_below_4g(Object 
 *obj, Visitor *v,
  pcms-max_ram_below_4g = value;
  }
  
 -static bool pc_machine_get_vmport(Object *obj, Error **errp)
 +static void pc_machine_get_vmport(Object *obj, Visitor *v, void *opaque,
 +  const char *name, Error **errp)
  {
  PCMachineState *pcms = PC_MACHINE(obj);
 +OnOffAuto vmport = pcms-vmport;
  
 -return pcms-vmport;
 +visit_type_OnOffAuto(v, vmport, name, errp);
  }
  
 -static void pc_machine_set_vmport(Object *obj, bool value, Error **errp)
 +static void pc_machine_set_vmport(Object *obj, Visitor *v, void *opaque,
 +  const char *name, Error **errp)
  {
  PCMachineState *pcms = PC_MACHINE(obj);
  
 -pcms-vmport = value;
 +visit_type_OnOffAuto(v, pcms-vmport, name, errp);
  }
  
  static void pc_machine_initfn(Object *obj)
 @@ -1737,11 +1741,11 @@ static void pc_machine_initfn(Object *obj)
  pc_machine_get_max_ram_below_4g,
  pc_machine_set_max_ram_below_4g,
  NULL, NULL, NULL);
 -pcms-vmport = !xen_enabled();
 -object_property_add_bool(obj, PC_MACHINE_VMPORT,
 - pc_machine_get_vmport,
 - pc_machine_set_vmport,
 - NULL);
 +pcms-vmport = ON_OFF_AUTO_AUTO;
 +object_property_add(obj, PC_MACHINE_VMPORT, OnOffAuto,
 +pc_machine_get_vmport,
 +pc_machine_set_vmport,
 +NULL, NULL, NULL);
  }
  
  static void pc_machine_class_init(ObjectClass *oc, void *data)
 diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
 index 7bb97a4..fffaab7 100644
 --- a/hw/i386/pc_piix.c
 +++ b/hw/i386/pc_piix.c
 @@ -234,9 +234,14 @@ static void pc_init1(MachineState *machine,
  
  pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL);
  
 +assert(pc_machine-vmport != ON_OFF_AUTO_MAX);
 +if (pc_machine-vmport == ON_OFF_AUTO_AUTO) {
 +pc_machine-vmport = xen_enabled() ? ON_OFF_AUTO_OFF : 
 ON_OFF_AUTO_ON;
 +}
 +
  /* init basic PC hardware */
  pc_basic_device_init(isa_bus, gsi, rtc_state, floppy,
 - !pc_machine-vmport, 0x4);
 + (pc_machine-vmport != ON_OFF_AUTO_ON), 

Re: [Qemu-devel] [PATCH for-2.2] fw_cfg: fix boot order bug when dynamically modified via QOM

2014-11-25 Thread Paolo Bonzini


On 25/11/2014 05:38, arei.gong...@huawei.com wrote:
 From: Gonglei arei.gong...@huawei.com
 
 When we dynamically modify boot order, the length of
 boot order will be changed, but we don't update
 s-files-f[i].size with new length. This casuse
 seabios read a wrong vale of qemu cfg file about
 bootorder.
 
 Cc: Gerd Hoffmann kra...@redhat.com
 Cc: Paolo Bonzini pbonz...@redhat.com
 Signed-off-by: Gonglei arei.gong...@huawei.com
 ---
  hw/nvram/fw_cfg.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)
 
 diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
 index e7ed27e..a7122ee 100644
 --- a/hw/nvram/fw_cfg.c
 +++ b/hw/nvram/fw_cfg.c
 @@ -523,6 +523,7 @@ void *fw_cfg_modify_file(FWCfgState *s, const char 
 *filename,
  void *data, size_t len)
  {
  int i, index;
 +void *ptr = NULL;
  
  assert(s-files);
  
 @@ -531,8 +532,10 @@ void *fw_cfg_modify_file(FWCfgState *s, const char 
 *filename,
  
  for (i = 0; i  index; i++) {
  if (strcmp(filename, s-files-f[i].name) == 0) {
 -return fw_cfg_modify_bytes_read(s, FW_CFG_FILE_FIRST + i,
 - data, len);
 +ptr = fw_cfg_modify_bytes_read(s, FW_CFG_FILE_FIRST + i,
 +   data, len);
 +s-files-f[i].size   = cpu_to_be32(len);
 +return ptr;
  }
  }
  /* add new one */
 

Applied, thanks.

Paolo



Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: Speed up make check-block

2014-11-25 Thread Kevin Wolf
Am 25.11.2014 um 10:44 hat Fam Zheng geschrieben:
 On Tue, 11/25 10:31, Max Reitz wrote:
  On 2014-11-25 at 10:30, Kevin Wolf wrote:
  Am 25.11.2014 um 10:21 hat Max Reitz geschrieben:
  On 2014-11-25 at 10:21, Kevin Wolf wrote:
  Am 25.11.2014 um 10:12 hat Max Reitz geschrieben:
  On 2014-10-28 at 07:45, Fam Zheng wrote:
  Using /tmp (usually mounted as tmpfs) and cache=writeback, the quick
  group can be quicker.
  
  On my laptop (Lenovo T430s with Fedora 20), this reduces the time from
  50s to 30s.
  
  Signed-off-by: Fam Zheng f...@redhat.com
  ---
tests/qemu-iotests-quick.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
  
  diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
  index 12af731..0b54dbf 100755
  --- a/tests/qemu-iotests-quick.sh
  +++ b/tests/qemu-iotests-quick.sh
  @@ -3,6 +3,6 @@
cd tests/qemu-iotests
ret=0
  -./check -T -qcow2 -g quick || ret=1
  +TEST_DIR=/tmp/qemu-iotests-quick-$$ ./check -T -qcow2 -g quick -c 
  writeback || ret=1
  There are (at least) two tests which don't work with -c writeback
  (026 and 039), one of them is in the quick group (039). Why not use
  -c writethrough? It doesn't make any difference on tmpfs anyway  (we
  can't omit it because that will break 091).
  Why use any -c? The default is the fast option writeback, and for those
  test cases that don't support writeback, something working is chosen
  instead.
  Because that breaks 091.
  That's unfortunate. I wish tmpfs supported O_DIRECT...
 
 Indeed unfortunate!
 
  
  But let's just remove it from quick then - it doesn't really matter if
  it doesn't run because of the cache mode or because we didn't include it
  in the group.
 
 Will do it when respin.
 
  -c writethrough is okay as long as you really have tmpfs on your /tmp,
  but it really hurts when you don't (and I for one don't, standard RHEL 7
  installation).
  
 
 Oh, do you have any better idea than hardcoding to /tmp then?

I think /tmp is fine. It helps those of us who do have a tmpfs there,
and it shouldn't hurt the rest at least. Just don't combine it with
writethrough, but leave out -c.

Kevin



Re: [Qemu-devel] [BUGFIX][PATCH for 2.2 v6 1/1] -machine vmport=auto: Fix handling of VMWare ioport emulation for xen

2014-11-25 Thread Fabio Fantoni

On Fri, Nov 21, 2014 at 11:18:52AM -0500, Don Slutz wrote:
/  c/s 9b23cfb76b3a5e9eb5cc899eaf2f46bc46d33ba4/
/  /
/  or/
/  /
/  c/s b154537ad07598377ebf98252fb7d2aff127983b/
/  /
/  moved the testing of xen_enabled() from pc_init1() to/
/  pc_machine_initfn()./
/  /
/  xen_enabled() does not return the correct value in/
/  pc_machine_initfn()./
/  /
/  Changed vmport from a bool to an enum.  Added the value auto to do/
/  the old way.  Move check of xen_enabled() back to pc_init1()./
/  /
/  Signed-off-by: Don Slutz address@hidden/
/  Acked-by: Eric Blake address@hidden/

Reviewed-by: Eduardo Habkost address@hidden

Thanks for your patience with the many requests for changes.

--
Eduardo


This patch need more testing before applying it?
If yes I'll do a fast test ASAP.

Thanks for any reply.



[Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Max Reitz
Test 039 used to fail because qemu-io -c abort may generate core dumps
even with ulimit -c 0 (and the output then contains (core dumped)).
Fix this by adding an option to the abort command which allows
specifying a signal number to raise(). Using SIGKILL for example does
not result in a core dump, but it still badly crashes qemu-io (as
desired).

I am sending this series because we need all tests to work before adding
the check-block target to make check (which we will hopefully do
soon).


Max Reitz (3):
  qemu-io: Let -c abort raise any signal
  iotests: Filter for Killed in qemu-io output
  iotests: Fix test 039

 qemu-io-cmds.c   | 59 ++--
 tests/qemu-iotests/039   | 12 
 tests/qemu-iotests/039.out   |  6 ++--
 tests/qemu-iotests/common.filter |  2 +-
 4 files changed, 67 insertions(+), 12 deletions(-)

-- 
1.9.3




[Qemu-devel] [PATCH 1/3] qemu-io: Let -c abort raise any signal

2014-11-25 Thread Max Reitz
abort() has the sometimes undesirable side-effect of generating a core
dump. If that is not needed, SIGKILL has the same effect of abruptly
crash qemu; without a core dump.

Therefore, this patch allows to use the qemu-io abort command to raise
any signal.

Signed-off-by: Max Reitz mre...@redhat.com
---
 qemu-io-cmds.c | 59 +++---
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index d94fb1e..5d39cf4 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -2036,18 +2036,71 @@ static const cmdinfo_t wait_break_cmd = {
.oneline= waits for the suspension of a request,
 };
 
-static int abort_f(BlockDriverState *bs, int argc, char **argv)
+
+static void abort_help(void)
 {
-abort();
+printf(
+\n
+ simulates a program crash\n
+\n
+ Invokes abort(), or raise(signal) if a signal number is specified.\n
+ -S, -- number of the signal to raise()\n
+\n);
 }
 
+static int abort_f(BlockDriverState *bs, int argc, char **argv);
+
 static const cmdinfo_t abort_cmd = {
.name   = abort,
.cfunc  = abort_f,
+   .argmin = 0,
+   .argmax = 2,
.flags  = CMD_NOFILE_OK,
-   .oneline= simulate a program crash using abort(3),
+   .args   = [-S signal],
+   .oneline= simulate a program crash,
+   .help   = abort_help,
 };
 
+static int abort_f(BlockDriverState *bs, int argc, char **argv)
+{
+int c;
+int sig = -1;
+
+while ((c = getopt(argc, argv, S:)) != EOF) {
+switch (c) {
+case 'S':
+sig = cvtnum(optarg);
+if (sig  0) {
+printf(non-numeric signal number argument -- %s\n, 
optarg);
+return 0;
+}
+break;
+
+default:
+return qemuio_command_usage(abort_cmd);
+}
+}
+
+if (optind != argc) {
+return qemuio_command_usage(abort_cmd);
+}
+
+if (sig  0) {
+abort();
+} else {
+/* While abort() does flush all open streams, using raise() to kill 
this
+ * process does not necessarily. At least stdout and stderr (although
+ * the latter should be non-buffered anyway) should be flushed, though.
+ */
+fflush(stdout);
+fflush(stderr);
+
+raise(sig);
+/* raise() may return */
+return 0;
+}
+}
+
 static void sleep_cb(void *opaque)
 {
 bool *expired = opaque;
-- 
1.9.3




[Qemu-devel] [PATCH 3/3] iotests: Fix test 039

2014-11-25 Thread Max Reitz
Test 039 used qemu-io -c abort for simulating a qemu crash; however,
abort() generally results in a core dump and ulimit -c 0 is no reliable
way of preventing that. Use abort -S 9 instead to have it crash
without a core dump.

Signed-off-by: Max Reitz mre...@redhat.com
---
 tests/qemu-iotests/039 | 12 +++-
 tests/qemu-iotests/039.out |  6 +++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index 84c9167..813822c 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -47,9 +47,11 @@ _supported_os Linux
 _default_cache_mode writethrough
 _supported_cache_modes writethrough
 
-_no_dump_exec()
+_subshell_exec()
 {
-(ulimit -c 0; exec $@)
+# Executing crashing commands in a subshell prevents information like the
+# Killed line from being lost
+(exec $@)
 }
 
 size=128M
@@ -72,7 +74,7 @@ echo == Creating a dirty image file ==
 IMGOPTS=compat=1.1,lazy_refcounts=on
 _make_test_img $size
 
-_no_dump_exec $QEMU_IO -c write -P 0x5a 0 512 -c abort $TEST_IMG 21 | 
_filter_qemu_io
+_subshell_exec $QEMU_IO -c write -P 0x5a 0 512 -c abort -S 9 $TEST_IMG 
21 | _filter_qemu_io
 
 # The dirty bit must be set
 $PYTHON qcow2.py $TEST_IMG dump-header | grep incompatible_features
@@ -105,7 +107,7 @@ echo == Opening a dirty image read/write should repair it 
==
 IMGOPTS=compat=1.1,lazy_refcounts=on
 _make_test_img $size
 
-_no_dump_exec $QEMU_IO -c write -P 0x5a 0 512 -c abort $TEST_IMG 21 | 
_filter_qemu_io
+_subshell_exec $QEMU_IO -c write -P 0x5a 0 512 -c abort -S 9 $TEST_IMG 
21 | _filter_qemu_io
 
 # The dirty bit must be set
 $PYTHON qcow2.py $TEST_IMG dump-header | grep incompatible_features
@@ -121,7 +123,7 @@ echo == Creating an image file with lazy_refcounts=off ==
 IMGOPTS=compat=1.1,lazy_refcounts=off
 _make_test_img $size
 
-_no_dump_exec $QEMU_IO -c write -P 0x5a 0 512 -c abort $TEST_IMG 21 | 
_filter_qemu_io
+_subshell_exec $QEMU_IO -c write -P 0x5a 0 512 -c abort -S 9 $TEST_IMG 
21 | _filter_qemu_io
 
 # The dirty bit must not be set since lazy_refcounts=off
 $PYTHON qcow2.py $TEST_IMG dump-header | grep incompatible_features
diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out
index 0adf153..35a04bd 100644
--- a/tests/qemu-iotests/039.out
+++ b/tests/qemu-iotests/039.out
@@ -11,7 +11,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./039: Aborted ( ulimit -c 0; exec $@ )
+./039: Killed  ( exec $@ )
 incompatible_features 0x1
 ERROR cluster 5 refcount=0 reference=1
 ERROR OFLAG_COPIED data cluster: l2_entry=8005 refcount=0
@@ -46,7 +46,7 @@ read 512/512 bytes at offset 0
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./039: Aborted ( ulimit -c 0; exec $@ )
+./039: Killed  ( exec $@ )
 incompatible_features 0x1
 ERROR cluster 5 refcount=0 reference=1
 Rebuilding refcount structure
@@ -60,7 +60,7 @@ incompatible_features 0x0
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./039: Aborted ( ulimit -c 0; exec $@ )
+./039: Killed  ( exec $@ )
 incompatible_features 0x0
 No errors were found on the image.
 
-- 
1.9.3




[Qemu-devel] [PATCH 2/3] iotests: Filter for Killed in qemu-io output

2014-11-25 Thread Max Reitz
_filter_qemu_io already filters out the process ID when qemu-io is
aborted; the same should be done if it is aborted.

Signed-off-by: Max Reitz mre...@redhat.com
---
 tests/qemu-iotests/common.filter | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index dfb9726..6c14590 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -150,7 +150,7 @@ _filter_win32()
 _filter_qemu_io()
 {
 _filter_win32 | sed -e s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* 
[EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX 
YYY\/sec and XXX ops\/sec)/ \
--e s/: line [0-9][0-9]*:  *[0-9][0-9]*\( Aborted\)/:\1/ \
+-e s/: line [0-9][0-9]*:  *[0-9][0-9]*\( Aborted\| Killed\)/:\1/ \
 -e s/qemu-io //g
 }
 
-- 
1.9.3




Re: [Qemu-devel] [PATCH 1/3] s390: Add PCI bus support

2014-11-25 Thread Frank Blaschka
On Tue, Nov 18, 2014 at 06:00:40PM +0100, Alexander Graf wrote:
 
 
 On 18.11.14 13:50, Frank Blaschka wrote:
  On Mon, Nov 10, 2014 at 04:14:16PM +0100, Alexander Graf wrote:
 
 
  On 10.11.14 15:20, Frank Blaschka wrote:
  From: Frank Blaschka frank.blasc...@de.ibm.com
 
  This patch implements a pci bus for s390x together with infrastructure
  to generate and handle hotplug events, to configure/unconfigure via
  sclp instruction, to do iommu translations and provide s390 support for
  MSI/MSI-X notification processing.
 
  Signed-off-by: Frank Blaschka frank.blasc...@de.ibm.com
 
 [...]
 
  diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
  new file mode 100644
  index 000..f2fa6ba
  --- /dev/null
  +++ b/hw/s390x/s390-pci-bus.c
  @@ -0,0 +1,485 @@
  +/*
  + * s390 PCI BUS
  + *
  + * Copyright 2014 IBM Corp.
  + * Author(s): Frank Blaschka frank.blasc...@de.ibm.com
  + *Hong Bo Li lih...@cn.ibm.com
  + *Yi Min Zhao zyi...@cn.ibm.com
  + *
  + * This work is licensed under the terms of the GNU GPL, version 2 or (at
  + * your option) any later version. See the COPYING file in the top-level
  + * directory.
  + */
  +
  +#include hw/pci/pci.h
  +#include hw/pci/pci_bus.h
  +#include hw/s390x/css.h
  +#include hw/s390x/sclp.h
  +#include hw/pci/msi.h
  +#include qemu/error-report.h
  +#include s390-pci-bus.h
  +
  +/* #define DEBUG_S390PCI_BUS */
  +#ifdef DEBUG_S390PCI_BUS
  +#define DPRINTF(fmt, ...) \
  +do { fprintf(stderr, S390pci-bus:  fmt, ## __VA_ARGS__); } while 
  (0)
  +#else
  +#define DPRINTF(fmt, ...) \
  +do { } while (0)
  +#endif
  +
  +static const unsigned long be_to_le = BITS_PER_LONG - 1;
  +static QTAILQ_HEAD(, SeiContainer) pending_sei =
  +QTAILQ_HEAD_INITIALIZER(pending_sei);
  +static QTAILQ_HEAD(, S390PCIBusDevice) device_list =
  +QTAILQ_HEAD_INITIALIZER(device_list);
 
  Please get rid of all statics ;). All state has to live in objects.
 
  
  be_to_le was misleading and unnecesary will remove this one but
  static QTAILQ_HEAD seems to be a common practice for list anchors.
  If you really want me to change this do you have any prefered way,
  or can you point me to some code doing this?
 
 For PCI devices, I don't think you need a list at all. Your PHB device
 should already have a proper qbus that knows about all its child devices.

OK

 
 As for pending_sei, what is this about?


This is a queue to store events (StoreEventInformation) used for hotplug
support. In case a device is pluged/unpluged an event is stored to this queue
and the guest is notified. Then the guest pick up the event information via
chsc instruction.
 
  
  +
  +int chsc_sei_nt2_get_event(void *res)
 
 [...]
 
  +
  +int chsc_sei_nt2_get_event(void *res);
  +int chsc_sei_nt2_have_event(void);
  +void s390_pci_sclp_configure(int configure, SCCB *sccb);
  +S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
  +S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
  +S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
 
  I think it makes sense to pass the PHB device as parameter on these.
  Don't assume you only have one.
  
  We need to lookup our device mainly in the instruction handlers and there
  we do not have a PHB available.
 
 Then have a way to find your PHB - either put a variable into the
 machine object, or find it by path via QOM tree lookups. Maybe we need
 multiple PHBs, identified by part of the ID? I know too little about the
 way PCI works on s390x to really tell.
 
 Again, are there specs?


Yes there are, but unfortunately they are not public.
 
  Also having one list for our S390PCIBusDevices
  devices does not prevent us from supporting more PHBs.
  
 
  +void s390_pci_bus_init(void);
  +uint64_t s390_pci_get_table_origin(uint64_t iota);
  +uint64_t s390_guest_io_table_walk(uint64_t guest_iota,
  +  uint64_t guest_dma_address);
 
  Why are these exported?
 
  +
  +#endif
  diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
  index bc4dc2a..2e25834 100644
  --- a/hw/s390x/s390-virtio-ccw.c
  +++ b/hw/s390x/s390-virtio-ccw.c
  @@ -18,6 +18,7 @@
   #include css.h
   #include virtio-ccw.h
   #include qemu/config-file.h
  +#include s390-pci-bus.h
   
   #define TYPE_S390_CCW_MACHINE   s390-ccw-machine
   
  @@ -127,6 +128,8 @@ static void ccw_init(MachineState *machine)
 machine-initrd_filename, s390-ccw.img);
   s390_flic_init();
   
  +s390_pci_bus_init();
 
  Please just inline that function here.
 
  
  What do you mean by just inline?
 
 The contents of the s390_pci_bus_init() function should just be standing
 right here. There's no value in creating a public wrapper function for
 initialization. We only did this back in the old days before qdev was
 around, because initialization was difficult back then and some devices
 didn't make the jump to get rid of their public init functions.
 
  
  +
   /* register 

Re: [Qemu-devel] [RFC v2 3/4] hw/pci-host: Add a generic PCI host controller for virtual platforms

2014-11-25 Thread Claudio Fontana
On 24.11.2014 15:57, alvise rigo wrote:
 Hi Claudio,
 
 Thank you for your review. Please see my in-line comments.
 
 On Mon, Nov 24, 2014 at 11:34 AM, Claudio Fontana
 claudio.font...@huawei.com wrote:
 On 21.11.2014 19:07, Alvise Rigo wrote:
 Add a generic PCI host controller for virtual platforms, based on the
 previous work by Rob Herring:
 http://lists.gnu.org/archive/html/qemu-devel/2014-06/msg03482.html

 Would it not be better to add rob herring's signoff, and then the explanation
 of what you changed from his patch, followed by your signoff?
 Or did you change so much that you had to redo the original work by Rob 
 Herring
 from scratch?
 
 It was actually difficult to create some meaningful patches over the
 initial ones.
 It's sure that for a final version these details will be properly
 addressed (signoff and diff over the original patches if possible).
 


 The controller creates a PCI bus for PCI devices; it is intended to
 receive from the platform all the needed information to initiate the
 memory regions expected by the PCI system. Due to this dependence, a
 header file is used to define the structure that the platform has to
 fill with the data needed by the controller. The device provides also a
 routine for the device tree node generation, which mostly has to take
 care of the interrupt-map property generation. This property is fetched
 by the guest operating system to map any PCI interrupt to the interrupt
 controller.

 For the time being, the device expects a GIC v2 to be used
 by the guest.

 That's a pretty big limitation for a generic controller.
 If it's only about the size of a parent interrupt cell or something like
 that, why don't we provide it as part of the platform description that
 you pass to the machinery anyway?
 
 Yes we can, however being totally interrupt controller independent
 means that the platform has to fully provide the way to describe a
 parent interrupt.
 For the time being, we use a description compatible with GICv2 (and
 v3), to cover the use case mach-virt + generic-pci (this is why I also
 called the interrupt specific structures gic*).
 If we really need to be more general, I can see two solutions: the
 first one is to find a way to pass all the necessary interrupt
 controller information to the device machinery (phandle, #cells,
 interrupt number, etc.).
 I don't know how such a solution could get complicated in the future,
 with some other virt-platform that requires a complicated interrupt
 mapping for example.
 The second would be to move all the device node generation back to the
 platform, making its code even more crowded.
 Are there other solutions that I'm missing?
 

 Only mach-virt has been used to test the controller.

 Signed-off-by: Alvise Rigo a.r...@virtualopensystems.com
 ---
  hw/pci-host/Makefile.objs |   2 +-
  hw/pci-host/generic-pci.c | 280 
 ++
  include/hw/pci-host/generic-pci.h |  74 ++
  3 files changed, 355 insertions(+), 1 deletion(-)
  create mode 100644 hw/pci-host/generic-pci.c
  create mode 100644 include/hw/pci-host/generic-pci.h

 diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
 index bb65f9c..8ef9fac 100644
 --- a/hw/pci-host/Makefile.objs
 +++ b/hw/pci-host/Makefile.objs
 @@ -1,4 +1,4 @@
 -common-obj-y += pam.o
 +common-obj-y += pam.o generic-pci.o

  # PPC devices
  common-obj-$(CONFIG_PREP_PCI) += prep.o
 diff --git a/hw/pci-host/generic-pci.c b/hw/pci-host/generic-pci.c
 new file mode 100644
 index 000..9c90263
 --- /dev/null
 +++ b/hw/pci-host/generic-pci.c
 @@ -0,0 +1,280 @@
 +/*
 + * Generic PCI host controller
 + *
 + * Copyright (c) 2014 Linaro, Ltd.
 + * Author: Rob Herring rob.herr...@linaro.org
 + *
 + * Based on ARM Versatile PCI controller (hw/pci-host/versatile.c):
 + * Copyright (c) 2006-2009 CodeSourcery.
 + * Written by Paul Brook
 + *
 + * This code is licensed under the LGPL.
 + */
 +
 +#include hw/sysbus.h
 +#include hw/pci-host/generic-pci.h
 +#include exec/address-spaces.h
 +#include sysemu/device_tree.h
 +
 +static const VMStateDescription pci_generic_host_vmstate = {
 +.name = generic-host-pci,
 +.version_id = 1,
 +.minimum_version_id = 1,
 +};
 +
 +static void pci_cam_config_write(void *opaque, hwaddr addr,
 + uint64_t val, unsigned size)
 +{

 I would add a comment:
 /* the MemoryRegionOps require uint64_t val, but we can only do 32bit */

 there are already asserts in pci_host.c, so that should be sufficient.
 
 Agreed.
 

 +PCIGenState *s = opaque;
 +pci_data_write(s-pci_bus, addr, val, size);
 +}
 +
 +static uint64_t pci_cam_config_read(void *opaque, hwaddr addr, unsigned 
 size)

 /* same here */

 +{
 +PCIGenState *s = opaque;
 +uint32_t val;
 +val = pci_data_read(s-pci_bus, addr, size);
 +return val;
 +}
 +
 +static const MemoryRegionOps pci_vpb_config_ops = {
 +.read = pci_cam_config_read,
 +.write = pci_cam_config_write,
 +

Re: [Qemu-devel] [RFC v2 0/4] Add Generic PCI host device update

2014-11-25 Thread alvise rigo
On Mon, Nov 24, 2014 at 4:50 PM, Claudio Fontana
claudio.font...@huawei.com wrote:
 Another general question about this series use:

 why do all these other devices that are unrelated to the virt platform show 
 up?

There are two devices added to the platform by default in mach-virt.
Look at the end of create_pci_host() in virt.c, you will find that a
pci-ohci and a lsi53c895a device are created.

 Here I am running on the guest with just virtio-net, virtio-blk and 
 virtio-rng:

 (qemu) info pci
   Bus  0, device   0, function 0:
 Class 2880: PCI device 1b36:1234
   id 
   Bus  0, device   1, function 0:
 USB controller: PCI device 106b:003f
   IRQ 0.
   BAR0: 32 bit memory at 0x [0x00fe].
   id 
   Bus  0, device   2, function 0:
 SCSI controller: PCI device 1000:0012
   IRQ 0.
   BAR0: I/O at 0x [0x00fe].
   BAR1: 32 bit memory at 0x [0x03fe].
   BAR2: 32 bit memory at 0x [0x1ffe].
   id 
   Bus  0, device   3, function 0:
 SCSI controller: PCI device 1af4:1001
   IRQ 0.
   BAR0: I/O at 0x0100 [0x013f].
   id blk0
   Bus  0, device   4, function 0:
 Class 0255: PCI device 1af4:1005
   IRQ 0.
   BAR0: I/O at 0x0140 [0x015f].
   id 
   Bus  0, device   5, function 0:
 Ethernet controller: PCI device 1af4:1000
   IRQ 0.
   BAR0: I/O at 0x0160 [0x017f].
   BAR6: 32 bit memory at 0x [0x0003fffe].
   id 

 Also what is the BAR6 for the virtio-net device? I am struggling to 
 understand where it is coming from...

I think it has to do with the efi-virtio.rom which is supplied to the
virtio-net-pci device.
At the end of pci_add_option_rom() in hw/pci/pci.c you will see the
sixth bar registered.

alvise


 Thanks,

 Claudio

 On 21.11.2014 19:07, Alvise Rigo wrote:
 This patch series is based on the previous work [1] and [2] by Rob
 Herring and on [3] by myself.  For sake of readability and since this is
 still a RFC, these patches come as a stand alone work, so there's no
 need to apply first [1][2][3].  it tries to enhance this work on these
 points:

 Improvements from v1:

 - The code should be general enough to allow the use of the controller
   with other platforms, not only with mach-virt.  The only assumption
   made is that a GIC v2 is used at guest side (the interrupt-map
   property describes the parent interrupts using the three cells
   format).
 - The interrupt-map node generation has been enhanced in the following
   aspects:
   - support of multi-function PCI device has been added
   - a PCI device can now use an interrupt pin different from #INTA

 Since some other works like [4] require to modify the device tree only
 when all the devices have been instantiated, the PATCH[1/4] proposes a
 solution for mach-virt to allow multiple agents (e.g., generic-pci,
 VFIO) to modify the device tree. The approach in simple: a global list
 is kept to store all the routines that perform the modification of the
 device tree. Eventually, when the machine is completed, all these
 routines are sequentially executed and the kernel is loaded to the guest
 by mean of a machine_init_done_notifier.
 In the context of this patch, here are some questions:
 Rather than postponing the arm_load_kernel call as this patch does,
 should we use instead the modify_dtb call provided by arm_boot_info to
 modify the device tree?
 If so, shouldn't modify_dtb be used to modify only *user* provided
 device trees?

 This work has been tested attaching several PCI devices to the mach-virt
 platform using an ARMv7 CPU. The tested devices are: virtio-blk-pci,
 virtio-net-pci, lsi53c895a and pci-ohci (all attached at the same time).

 TODO:
 - Add MSI, MSI-X support
 - PCI-E support. Due to a lack of devices, this part is a bit hard to
   accomplish at the moment.

 Thank you, alvise

 [1]
 [Qemu-devel] [RFC PATCH 1/2] hw/pci-host: add a generic PCI host
 http://lists.gnu.org/archive/html/qemu-devel/2014-06/msg03482.html
 [2]
 [Qemu-devel] [RFC PATCH 2/2] hw/arm/virt: Add generic PCI host device
 http://lists.gnu.org/archive/html/qemu-devel/2014-06/msg03483.html
 [3]
 [Qemu-devel] [RFC PATCH 0/8] Add Generic PCI host device update
 https://lists.gnu.org/archive/html/qemu-devel/2014-07/msg01957.html
 [4]
 http://lists.nongnu.org/archive/html/qemu-devel/2014-10/msg03816.html

 Alvise Rigo (4):
   hw/arm/virt: Allow multiple agents to modify dt
   hw/arm/virt: find_machine_info: handle NULL value
   hw/pci-host: Add a generic PCI host controller for virtual platforms
   hw/arm/virt: Add generic-pci device support

  hw/arm/virt.c | 114 +++-
  hw/pci-host/Makefile.objs |   2 +-
  hw/pci-host/generic-pci.c | 280 
 ++
  include/hw/pci-host/generic-pci.h |  74 ++
  4 files changed, 468 insertions(+), 2 deletions(-)
  create mode 100644 hw/pci-host/generic-pci.c
  

Re: [Qemu-devel] [PATCH 2/2] vga: flip qemu 2.2 pc machine types from cirrus to stdvga

2014-11-25 Thread Michael S. Tsirkin
On Mon, Nov 24, 2014 at 01:18:59PM +0100, Gerd Hoffmann wrote:
   Hi,
 
This is not new - it was disabled with -vga std previously -
but poses a bigger problem now it's the default?
Thoughts?  Something we can fix for 2.2?
   
   Which windows version is this?
   
   I'm wondering why windows handles stdvga different from cirrus.  Recent
   windows versions don't ship cirrus drivers any more, so windows uses the
   vgabios to drive the card in both cases ...
   
   cheers,
 Gerd
   
  
  I tested with XP. I can try others if you like.
 
 Yep.  WinXP was the last release shipping with cirrus drivers, so, yes,
 it'll be a change there.  On anything more recent I'd expect you don't
 see a difference in behavior between cirrus and stdvga.
 
 WinXP is out of support though, and I see little reason to care too much
 here.  Especially as this is only about picking a default, not about
 dropping cirrus support.  If you need it it is still there, and machine
 types for 2.1  older continue to default to cirrus.
 
 cheers,
   Gerd

I checked windows 7, and I see a problem with it, as well:
with -vga cirrus, hybernate is enabled, with new default,
it is disabled :(

-- 
MST



Re: [Qemu-devel] [PATCH 00/13] linux-aio/virtio-scsi: support AioContext wide IO submission as batch

2014-11-25 Thread Paolo Bonzini


On 22/11/2014 13:33, Ming Lei wrote:
  these patches are interesting.  I would like to compare them with the
  opposite approach (and, I think, more similar to your old work) where
  the qemu_laio_state API is moved entirely into AioContext, with lazy
  allocation (reference-counted too, probably).
 
 Yes, it can be done in that way, but the feature is linux native aio
 specific, so it might not be good to put it into AioContext.

I think it's not a problem as long as the eventfd and io queue is
created lazily.  My main issue with these series is that
aio_attach_aio_bs() (and detach) feels like a very ad hoc API.  Adding
io queue support directly in AioContext sounds better.

 Basically most of the implementation should be same, and the
 difference should be where the io queue is put.

Yes, the change is not big.

Paolo




Re: [Qemu-devel] [PATCH v2 00/12] qcow2: Add new overlap check functions

2014-11-25 Thread Max Reitz

On 2014-11-24 at 16:56, Max Reitz wrote:

RAM usage
=

So I have looked at my 2 GB image above, and the list uses 40 kB, which
may or may not be too much (sounds completely fine to me for an image
with 512 byte clusters); but it is a least a number I can use for
testing the following theoretical inspection:


(I'm in the process of some kind of self-review right now)

Wrong, it's 371 kB after patch 1 is fixed.

[snip]


Let's test that for the above image, which has a disk size of 266 MB:


Except the disk size doesn't matter; the image was created with 
preallocation=metadata, therefore all the metadata for a 2 GB virtual 
image is there. Let's check the file length: 2190559232, two percent 
above 2 GB. Sounds reasonable.


For that file length, we actually have:

40 * 2190559232 / (512 * 512) = 326 kB

Hm, okay, so it doesn't work so well. The good message is: I know why. 
In the calculation given here, I omitted the size of 
Qcow2MetadataWindow; for every WINDOW_SIZE (= WS) clusters, there is one 
such object. Let's include it in the calculation 
(sizeof(Qcow2MetadataWindow) is 40 on my x64 system):


40 * IS / (CS * CS) + 40 * IS / (CS * WS)
= 40 * IS / CS * (1 / CS + 1 / WS)

Okay, something else I forgot? There is the Qcow2MetadataList object 
itself; but we have it only once, so let's omit that. Then there is an 
integer array with an entry per cache entry and the cache itself; 
qcow2_create_empty_metadata_list() limits the cache size so that it that 
integer array and the cached bitmaps will not surpass the given byte 
size (currently 64 kB), so I'll just omit it as well (it's constant and 
can easily be adjusted).


So, with the above term we have:

40 * 2190559232 / 512 * (1 / 512 + 1 / 4096) = 367 kB

Much better.


40 * 266M / (512 * 512) = 42 kB

Great! It works.


So, now let's set CS to 64 kB, because that is basically the only
cluster size we really care about. For a 1 TB image, we need 10 kB for
the list. Sounds great to me. For a 1 PB image, we will need 10 MB. Fair
enough. (Note that you don't need 10 MB of RAM to create a 1 PB image;
you only need that once the disk size of the image has reached 1 PB).

And 1 TB with 512 byte clusters? 160 MB. Urgh, that is a lot. But then
again, you can switch off the overlap check with overlap-check=off; and
trying to actually use a 1 TB image with 512 byte clusters is crazy in
itself (have you tried just creating one without preallocation? It takes
forever). So I can live with that.


And with the fixed term:

1 TB / 64 kB: 170 kB
1 PB / 64 kB: 170 MB

1 TB / 512 B: 180 MB

The actually problematic 512 B cluster version actually doesn't get so 
much worse (because 1 / 4096  1 / 512; whereas 1 / 4096  1 / 65536, 
which is why fixing the term has a much higher impact on greater cluster 
sizes).


But for the default of 64 kB, the size basically explodes. We now can 
either choose to ignore that fact (17x is a lot, but using more than 1 
MB starting from 6 TB still sounds fine to me) or increase WINDOW_SIZE 
(to a maximum of 65536, which would reduce the RAM usage to 20 kB for a 
1 TB image and 20 MB for a 1 PB image), which would probably somewhat 
limit performance in the conversion case, but since I haven't seen any 
issues for WINDOW_SIZE = 4096, I don't think it should make a huge 
difference. But as a side effect, we will want to increase the cache 
size, because with the current default of 64 kB, we will have only one 
cached bitmap; but we probably want to have at least two, maybe four if 
possible. But 256 kB does not sound too bad either.



tl;dr
=

* CPU usage at runtime decreased by 150 to 275 percent on
   overlap-check-heavy tasks
* No additional performance problems at loading time (in theory has the
   same runtime complexity as a single overlap check right now; in
   practice I could not find any problems)
* Decent RAM usage (40 kB for a 1 TB image with 64 kB clusters; 40 MB
   for a 1 PB image etc. pp.)


I'm not sure why I wrote 40 kB and 40 MB here; it was 10 kB and 10 MB.

Anyway, now it's 170 kB for a 1 TB image and 170 MB for a 1 PB image.

Max



Re: [Qemu-devel] [PATCH v2 01/12] qcow2: Add new overlap check functions

2014-11-25 Thread Max Reitz

On 2014-11-24 at 16:56, Max Reitz wrote:

The existing qcow2 metadata overlap detection function used existing
structures to determine the location of the image metadata, from plain
fields such as l1_table_offset and l1_size in the BDRVQcowState, over
image structures in memory such as the L1 table for the L2 tables'
positions, or it even read the required data directly from disk for
every requested check, such as the snapshot L1 tables for the inactive
L2 tables' positions.

These new functions instead keep a dedicated structure for keeping track
of the metadata positions in memory. It consists of two parts: First,
there is one structure which is basically a list of all metadata
structures. Each entry has a bitmask of types (because some metadata
structures may actually overlap, such as active and inactive L2 tables),
a number of clusters occupied and the offset from the previous entry in
clusters. This structure requires relatively few memory, but checking a
certain point may take relatively long. Each entry is called a
fragment.

Therefore, there is another representation which is a bitmap, or rather
a bytemap, of metadata types. The previously described list is split
into multiple windows with each describing a constant number of clusters
(WINDOW_SIZE). If the list is to be queried or changed, the respective
window is selected in constant time and the bitmap is generated from the
fragments belonging to the window. This bitmap can then be queried in
constant time and easily be changed.

Because the bitmap representation requires more memory, it is only used
as a cache. Whenever a window is removed from the cache, the fragment
list will be rebuilt from the bitmap if the latter has been modified.
Therefore, the fragment list is only used as the background
representation to save memory, whereas the bitmap is used whenever
possible.

Regarding the size of the fragment list in memory: As one refcount block
can handle cluster_size / 2 entries and one L2 table can handle
cluster_size / 8 entries, for a qcow2 image with the standard cluster
size of 64 kB, there is a ratio of data to metadata of about 1/6000
(1/32768 refblocks and 1/8192 L2 tables) if one ignores the fact that
not every cluster requires an L2 table entry. The refcount table and the
L1 table is generally negligible. At the worst, each metadata cluster
requires its own entry in the fragment list; each entry takes up four
bytes, therefore, at the worst, the fragment list should take up (for an
image with 64 kB clusters) (4 B) / (64 kB * 6000) of the image size,
which is about 1.e-8 (i.e., 11 kB for a 1 TB image, or 11 MB for a 1 PB
image).

Signed-off-by: Max Reitz mre...@redhat.com
---
  block/Makefile.objs   |   3 +-
  block/qcow2-overlap.c | 404 ++
  block/qcow2.h |  13 ++
  3 files changed, 419 insertions(+), 1 deletion(-)
  create mode 100644 block/qcow2-overlap.c

diff --git a/block/qcow2-overlap.c b/block/qcow2-overlap.c
new file mode 100644
index 000..9f3f2aa
--- /dev/null
+++ b/block/qcow2-overlap.c


[snip]


+
+/**
+ * Destroys the cached window bitmap. If it has been modified, the fragment 
list
+ * will be rebuilt accordingly.
+ */
+static void destroy_window_bitmap(Qcow2MetadataList *mdl,
+  Qcow2MetadataWindow *window)
+{
+if (!window-bitmap) {
+return;
+}
+
+if (window-bitmap_modified) {
+int bitmap_i, fragment_i = 0;
+QCow2MetadataOverlap current_types = 0;
+int current_nb_clusters = 0;
+
+/* Rebuild the fragment list; the case bitmap_i == WINDOW_SIZE is for
+ * entering the last fragment at the bitmap end */
+
+for (bitmap_i = 0; bitmap_i = WINDOW_SIZE; bitmap_i++) {
+/* Qcow2MetadataFragment::nb_clusters is a uint8_t, so
+ * current_nb_clusters may not exceed 255 */
+if (bitmap_i  WINDOW_SIZE 
+current_types == window-bitmap[bitmap_i] 
+current_nb_clusters  255)
+{
+current_nb_clusters++;
+} else {
+if (current_types  current_nb_clusters) {
+if (fragment_i = window-fragments_array_size) {
+window-fragments_array_size =
+3 * window-fragments_array_size / 2 + 1;
+
+/* new_nb_fragments should be small enough, and there 
is
+ * nothing we can do on failure anyway, so do not use
+ * g_try_renew() here */
+window-fragments =
+g_renew(Qcow2MetadataFragment, window-fragments,
+window-fragments_array_size);
+}
+
+window-fragments[fragment_i++] = (Qcow2MetadataFragment){
+.types  = current_types,
+.nb_clusters= current_nb_clusters,
+ 

Re: [Qemu-devel] [PULL v2 00/15] pc, pci, misc bugfixes

2014-11-25 Thread Peter Maydell
On 24 November 2014 at 19:30, Michael S. Tsirkin m...@redhat.com wrote:
 The following changes since commit 0e88f478508b566152c6681f4889ed9830a2c0a5:

   Merge remote-tracking branch 'remotes/stefanha/tags/net-pull-request' into 
 staging (2014-11-21 14:15:37 +)

 are available in the git repository at:

   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

 for you to fetch changes up to dd0247e09a542d2a7ba6e390c70b5616edb9ec56:

   pc: acpi: mark all possible CPUs as enabled in SRAT (2014-11-24 20:57:11 
 +0200)

 
 pc, pci, misc bugfixes

 A bunch of bugfixes for 2.2.

 Signed-off-by: Michael S. Tsirkin m...@redhat.com

 

Applied, thanks.

-- PMM



Re: [Qemu-devel] [PATCH v2 2/6] geometry: Detect blocksize via ioctls in separate static functions

2014-11-25 Thread Stefan Hajnoczi
On Fri, Nov 21, 2014 at 11:17:02AM +0100, Christian Borntraeger wrote:
 Am 19.11.2014 um 11:17 schrieb Ekaterina Tumanova:
  Move the IOCTL calls that detect logical blocksize from raw_probe_alignment
  into separate function (probe_logical_blocksize).
  Introduce function which detect physical blocksize via IOCTL
  (probe_physical_blocksize).
  Both functions will be used in the next patch.
  
  Signed-off-by: Ekaterina Tumanova tuman...@linux.vnet.ibm.com
 
 From what I can tell this should be a no-op for raw_probe_alignment.
 
 probe_physical_blocksize looks also good. When this patch is applied 
 stand-alone,
 gcc will complain about a defined but unused function, though.
 
 So we might want to move this function into patch 3 or just add an 
 __attribute__((unused))
 here (and remove that in patch 3). Or just leave it as is.

Please move probe_physical_blocksize() to Patch 3 because some QEMU
builds default to -Werror where this patch causes a build failure.

(In order for git-bisect(1) to work patches must not break the build.)

Stefan


pgpo4bEkT9sy2.pgp
Description: PGP signature


Re: [Qemu-devel] [BUGFIX][PATCH for 2.2 1/1] hw/ide/core.c: Prevent SIGSEGV during migration

2014-11-25 Thread Markus Armbruster
Dr. David Alan Gilbert dgilb...@redhat.com writes:

 * Don Slutz (dsl...@verizon.com) wrote:
 On 11/21/14 05:49, Dr. David Alan Gilbert wrote:
 * Markus Armbruster (arm...@redhat.com) wrote:
[...]
 Management tools should use -nodefaults.  But if it mixes default and
 -nodefaults in migration, recreating the stuff it got by default but
 doesn't get with -nodefaults is its own responsibility.
 
 Yes. Since xen did not ask for a CD-ROM, it did not expect to need to create
 it.   xen was fixed to use -nodefaults.
 
 Well, mostly - we wouldn't expect a migration to work if the source/dest
 didn't match exactly; but QEMU shouldn't seg.
 
 Well, this change prevents a seg.  So you are in favor of having this
 backported?

 Yes.

I gladly defer to Dave's judgement here.



Re: [Qemu-devel] File too large error from qemu-img snapshot (was Re: AW: Bug Repoting Directions Request)

2014-11-25 Thread Kevin Wolf
Am 22.11.2014 um 18:02 hat Prof. Dr. Michael Schefczyk geschrieben:
 Dear All,
 
 after some trying, my impression is that the following steps do work with 
 plain Centos 7:
 
 virsh snapshot-create-as VM backsnap
 qemu-img convert -f qcow2 -s backsnap -O qcow2 VM.img backup.img
 virsh snapshot-delete VM backsnap
 
 Am I on the right track with these commands?

I won't tell you that you're bound to break your image with this
sequence, because chances are that you won't break it. (In practice,
this happens to work in most cases, because the essential snapshot
metadata generally isn't written to without explicit user actions on
that snapshot.)

But you're violating the rule of one writer or many readers, so as far
as qemu is concerned, we won't be careful to avoid breaking this setup.
Don't blame us if it fails one day.

With the QMP solutions (either drive-backup or snapshot/commit) you're
using official APIs, so I'd encourage you to use these.

Kevin


pgpzsx9nPqmaF.pgp
Description: PGP signature


Re: [Qemu-devel] File too large error from qemu-img snapshot (was Re: AW: Bug Repoting Directions Request)

2014-11-25 Thread Prof. Dr. Michael Schefczyk
Dear Kevin,

thank you very much! For the moment, I did stabilize my systems with these 
commands. For a next step, will explore three further options, which should all 
solve the issue better:

- move to ovirt altogether - probably as foolproof as needed at my skill level

- install ovirt's qemu-kvm-rhev to use the more extensive options

- move to QMP

Thanks again also to Paolo and Eric for providing substantial insights!

Regards,

Michael

-UrsprĂĽngliche Nachricht-
Von: Kevin Wolf [mailto:kw...@redhat.com] 
Gesendet: Dienstag, 25. November 2014 12:34
An: Prof. Dr. Michael Schefczyk
Cc: Eric Blake; Paolo Bonzini; qemu-devel
Betreff: Re: [Qemu-devel] File too large error from qemu-img snapshot (was 
Re: AW: Bug Repoting Directions Request)

Am 22.11.2014 um 18:02 hat Prof. Dr. Michael Schefczyk geschrieben:
 Dear All,
 
 after some trying, my impression is that the following steps do work with 
 plain Centos 7:
 
 virsh snapshot-create-as VM backsnap
 qemu-img convert -f qcow2 -s backsnap -O qcow2 VM.img backup.img virsh 
 snapshot-delete VM backsnap
 
 Am I on the right track with these commands?

I won't tell you that you're bound to break your image with this sequence, 
because chances are that you won't break it. (In practice, this happens to work 
in most cases, because the essential snapshot metadata generally isn't written 
to without explicit user actions on that snapshot.)

But you're violating the rule of one writer or many readers, so as far as 
qemu is concerned, we won't be careful to avoid breaking this setup.
Don't blame us if it fails one day.

With the QMP solutions (either drive-backup or snapshot/commit) you're using 
official APIs, so I'd encourage you to use these.

Kevin


smime.p7s
Description: S/MIME Cryptographic Signature


[Qemu-devel] [Bug 1368815] Re: qemu-img convert intermittently corrupts output images

2014-11-25 Thread Kevin Wolf
** Description changed:

  ==
- Impact: occasional qcow2 corruption
+ Impact: occasional image corruption (any format on local filesystem)
  Test case: see the qemu-img command below
  Regression potential: this cherrypicks a patch from upstream to a 
not-insignificantly older qemu source tree.  While the cherrypick seems sane, 
it's possible that there are subtle interactions with the other delta.  I'd 
really like for a full qa-regression-test qemu testcase to be run against this 
package.
  ==
  
  -- Found in releases qemu-2.0.0, qemu-2.0.2, qemu-2.1.0. Tested on
  Ubuntu 14.04 using Ext4 filesystems.
  
  The command
  
-   qemu-img convert -O raw inputimage.qcow2 outputimage.raw
+   qemu-img convert -O raw inputimage.qcow2 outputimage.raw
  
  intermittently creates corrupted output images, when the input image is
  not yet fully synchronized to disk. While the issue has actually been
  discovered in operation of of OpenStack nova, it can be reproduced
  easily on command line using
  
-   cat $SRC_PATH  $TMP_PATH  $QEMU_IMG_PATH convert -O raw $TMP_PATH
+   cat $SRC_PATH  $TMP_PATH  $QEMU_IMG_PATH convert -O raw $TMP_PATH
  $DST_PATH  cksum $DST_PATH
  
  on filesystems exposing this behavior. (The difficult part of this
  exercise is to prepare a filesystem to reliably trigger this race. On my
  test machine some filesystems are affected while other aren't, and
  unfortunately I haven't found the relevant difference between them, yet.
  Possible it's timing issues completely out of userspace control ...)
  
  The root cause, however, is the same as in
  
-   http://lists.gnu.org/archive/html/coreutils/2011-04/msg00069.html
+   http://lists.gnu.org/archive/html/coreutils/2011-04/msg00069.html
  
  and it can be solved the same way as suggested in
  
-   http://lists.gnu.org/archive/html/coreutils/2011-04/msg00102.html
+   http://lists.gnu.org/archive/html/coreutils/2011-04/msg00102.html
  
  In qemu, file block/raw-posix.c use the FIEMAP_FLAG_SYNC, i.e change
  
- f.fm.fm_flags = 0;
+ f.fm.fm_flags = 0;
  
  to
  
- f.fm.fm_flags = FIEMAP_FLAG_SYNC;
+ f.fm.fm_flags = FIEMAP_FLAG_SYNC;
  
  As discussed in the thread mentioned above, retrieving a page cache
  coherent map of file extents is possible only after fsync on that file.
  
  See also
  
-   https://bugs.launchpad.net/nova/+bug/1350766
+   https://bugs.launchpad.net/nova/+bug/1350766
  
  In that bug report filed against nova, fsync had been suggested to be
  performed by the framework invoking qemu-img. However, as the choice of
  fiemap -- implying this otherwise unneeded fsync of a temporary file  --
  is not made by the caller but by qemu-img, I agree with the nova bug
  reviewer's objection to put it into nova. The fsync should instead be
  triggered by qemu-img utilizing the FIEMAP_FLAG_SYNC, specifically
  intended for that purpose.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1368815

Title:
  qemu-img convert intermittently corrupts output images

Status in OpenStack Compute (Nova):
  In Progress
Status in QEMU:
  In Progress
Status in “qemu” package in Ubuntu:
  Fix Released
Status in “qemu” source package in Trusty:
  Triaged
Status in “qemu” source package in Utopic:
  Triaged
Status in “qemu” source package in Vivid:
  Fix Released

Bug description:
  ==
  Impact: occasional image corruption (any format on local filesystem)
  Test case: see the qemu-img command below
  Regression potential: this cherrypicks a patch from upstream to a 
not-insignificantly older qemu source tree.  While the cherrypick seems sane, 
it's possible that there are subtle interactions with the other delta.  I'd 
really like for a full qa-regression-test qemu testcase to be run against this 
package.
  ==

  -- Found in releases qemu-2.0.0, qemu-2.0.2, qemu-2.1.0. Tested on
  Ubuntu 14.04 using Ext4 filesystems.

  The command

    qemu-img convert -O raw inputimage.qcow2 outputimage.raw

  intermittently creates corrupted output images, when the input image
  is not yet fully synchronized to disk. While the issue has actually
  been discovered in operation of of OpenStack nova, it can be
  reproduced easily on command line using

    cat $SRC_PATH  $TMP_PATH  $QEMU_IMG_PATH convert -O raw $TMP_PATH
  $DST_PATH  cksum $DST_PATH

  on filesystems exposing this behavior. (The difficult part of this
  exercise is to prepare a filesystem to reliably trigger this race. On
  my test machine some filesystems are affected while other aren't, and
  unfortunately I haven't found the relevant difference between them,
  yet. Possible it's timing issues completely out of userspace control
  ...)

  The root cause, however, is the 

Re: [Qemu-devel] [PATCH] input: move input-send-event into experimental namespace

2014-11-25 Thread Markus Armbruster
Amos Kong ak...@redhat.com writes:

 On Tue, Nov 25, 2014 at 09:06:34AM +0100, Markus Armbruster wrote:
 Gerd Hoffmann kra...@redhat.com writes:
 
  Ongoing discussions on how we are going to specify the console,
  so tag the command as experiemntal so we can refine things in
  the 2.3 development cycle.
 
  Signed-off-by: Gerd Hoffmann kra...@redhat.com
  ---
   qmp-commands.hx | 12 ++--
 
 Don't you need to patch qapi-schema.json, too?

 Not necessary in function level.

s/need to/want to/?

For consistency, especially because the QAPI schema also serves as QMP
command documentation.

 Do we actually explain x- means experimental anywhere?

 What's the official way to make command experimental?

I think we have an understanding / convention that an x- prefix marks
names as unstable API.  But I can't find it spelled out anywhere.
Anyway, separate issue.

 Quote from qapi-schema.json:
 | # Notes: This command is experimental and may change
 | syntax in future releases.

Next to the return type ObjectTypeInfo rathe than the command
qom-list-types, blech.



Re: [Qemu-devel] [PATCH 1/3] s390: Add PCI bus support

2014-11-25 Thread Alexander Graf


On 25.11.14 11:11, Frank Blaschka wrote:
 On Tue, Nov 18, 2014 at 06:00:40PM +0100, Alexander Graf wrote:


 On 18.11.14 13:50, Frank Blaschka wrote:
 On Mon, Nov 10, 2014 at 04:14:16PM +0100, Alexander Graf wrote:


 On 10.11.14 15:20, Frank Blaschka wrote:
 From: Frank Blaschka frank.blasc...@de.ibm.com

 This patch implements a pci bus for s390x together with infrastructure
 to generate and handle hotplug events, to configure/unconfigure via
 sclp instruction, to do iommu translations and provide s390 support for
 MSI/MSI-X notification processing.

 Signed-off-by: Frank Blaschka frank.blasc...@de.ibm.com

 [...]

 diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
 new file mode 100644
 index 000..f2fa6ba
 --- /dev/null
 +++ b/hw/s390x/s390-pci-bus.c
 @@ -0,0 +1,485 @@
 +/*
 + * s390 PCI BUS
 + *
 + * Copyright 2014 IBM Corp.
 + * Author(s): Frank Blaschka frank.blasc...@de.ibm.com
 + *Hong Bo Li lih...@cn.ibm.com
 + *Yi Min Zhao zyi...@cn.ibm.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or (at
 + * your option) any later version. See the COPYING file in the top-level
 + * directory.
 + */
 +
 +#include hw/pci/pci.h
 +#include hw/pci/pci_bus.h
 +#include hw/s390x/css.h
 +#include hw/s390x/sclp.h
 +#include hw/pci/msi.h
 +#include qemu/error-report.h
 +#include s390-pci-bus.h
 +
 +/* #define DEBUG_S390PCI_BUS */
 +#ifdef DEBUG_S390PCI_BUS
 +#define DPRINTF(fmt, ...) \
 +do { fprintf(stderr, S390pci-bus:  fmt, ## __VA_ARGS__); } while 
 (0)
 +#else
 +#define DPRINTF(fmt, ...) \
 +do { } while (0)
 +#endif
 +
 +static const unsigned long be_to_le = BITS_PER_LONG - 1;
 +static QTAILQ_HEAD(, SeiContainer) pending_sei =
 +QTAILQ_HEAD_INITIALIZER(pending_sei);
 +static QTAILQ_HEAD(, S390PCIBusDevice) device_list =
 +QTAILQ_HEAD_INITIALIZER(device_list);

 Please get rid of all statics ;). All state has to live in objects.


 be_to_le was misleading and unnecesary will remove this one but
 static QTAILQ_HEAD seems to be a common practice for list anchors.
 If you really want me to change this do you have any prefered way,
 or can you point me to some code doing this?

 For PCI devices, I don't think you need a list at all. Your PHB device
 should already have a proper qbus that knows about all its child devices.
 
 OK
 

 As for pending_sei, what is this about?

 
 This is a queue to store events (StoreEventInformation) used for hotplug
 support. In case a device is pluged/unpluged an event is stored to this queue
 and the guest is notified. Then the guest pick up the event information via
 chsc instruction.

Is this for overall CCW or only for PCI? Depending on the answer, you
can put the sei event list into the respective parent device.


Alex



Re: [Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Markus Armbruster
Max Reitz mre...@redhat.com writes:

 Test 039 used to fail

I'm confused: used to suggests it doesn't anymore, but you sending a
patches strongly suggests something's broken.

   because qemu-io -c abort may generate core dumps
 even with ulimit -c 0 (and the output then contains (core dumped)).

How?

[...]



Re: [Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Max Reitz

On 2014-11-25 at 13:21, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


Test 039 used to fail

I'm confused: used to suggests it doesn't anymore, but you sending a
patches strongly suggests something's broken.


Well, it used to fail before this series. :-P

You're right, this sounds bad. Currently, 039 does fail, at least on any 
system with a /proc/sys/kernel/core_pattern passing the dump to another 
program. After this series, it does no longer.



   because qemu-io -c abort may generate core dumps
even with ulimit -c 0 (and the output then contains (core dumped)).

How?


See the patches[1][2] by Mao Chuan Li. If /proc/sys/kernel/core_pattern 
passes the dump to another program, ulimit -c 0 does not matter.


[1] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02092.html
[2] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02093.html

The problem with those patches is that they require access to 
/proc/sys/kernel/core_pattern. I don't like having to run the iotests as 
root.


Max



[Qemu-devel] [RFC PATCH v1 2/2] x86: Update VT-d Posted-Interrupts related information

2014-11-25 Thread Feng Wu
VT-d Posted-Interrupts(PI) is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
involvement when guest is running in non-root mode.

If VT-d PI is supported by KVM, we need to update the IRTE with
the new guest interrupt configuration.

Signed-off-by: Feng Wu feng...@intel.com
---
 hw/misc/vfio.c |   60 ++-
 1 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index fd318a1..d453db4 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -281,6 +281,8 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, 
uint32_t addr, int len);
 static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
   uint32_t val, int len);
 static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled);
+static void vfio_kvm_device_posting_irq(VFIODevice *vdev, uint32_t index,
+uint32_t start, uint32_t count);
 
 /*
  * Common VFIO interrupt disable
@@ -765,6 +767,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, 
unsigned int nr,
 {
 VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
 VFIOMSIVector *vector;
+uint32_t start, count;
 int ret;
 
 DPRINTF(%s(%04x:%02x:%02x.%x) vector %d used\n, __func__,
@@ -812,6 +815,8 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, 
unsigned int nr,
 if (ret) {
 error_report(vfio: failed to enable vectors, %d, ret);
 }
+start = 0;
+count = vdev-nr_vectors;
 } else {
 int argsz;
 struct vfio_irq_set *irq_set;
@@ -824,8 +829,8 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, 
unsigned int nr,
 irq_set-flags = VFIO_IRQ_SET_DATA_EVENTFD |
  VFIO_IRQ_SET_ACTION_TRIGGER;
 irq_set-index = VFIO_PCI_MSIX_IRQ_INDEX;
-irq_set-start = nr;
-irq_set-count = 1;
+irq_set-start = start = nr;
+irq_set-count = count = 1;
 pfd = (int32_t *)irq_set-data;
 
 if (vector-virq = 0) {
@@ -841,6 +846,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, 
unsigned int nr,
 }
 }
 
+if (msg) {
+vfio_kvm_device_posting_irq(vdev, VFIO_PCI_MSIX_IRQ_INDEX,
+start, count);
+}
+
 return 0;
 }
 
@@ -997,6 +1007,9 @@ retry:
 return;
 }
 
+vfio_kvm_device_posting_irq(vdev, VFIO_PCI_MSI_IRQ_INDEX,
+0, vdev-nr_vectors);
+
 DPRINTF(%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n, __func__,
 vdev-host.domain, vdev-host.bus, vdev-host.slot,
 vdev-host.function, vdev-nr_vectors);
@@ -1077,6 +1090,9 @@ static void vfio_update_msi(VFIODevice *vdev)
 msg = msi_get_message(vdev-pdev, i);
 vfio_update_kvm_msi_virq(vector, msg);
 }
+
+vfio_kvm_device_posting_irq(vdev, VFIO_PCI_MSIX_IRQ_INDEX,
+0, vdev-nr_vectors);
 }
 
 /*
@@ -3622,6 +3638,46 @@ static void vfio_kvm_device_del_group(VFIOGroup *group)
 #endif
 }
 
+static void vfio_kvm_device_posting_irq(VFIODevice *vdev, uint32_t index,
+uint32_t start, uint32_t count)
+{
+#ifdef CONFIG_KVM
+int i, argsz;
+struct kvm_posted_intr *pi_info = NULL;
+
+struct kvm_device_attr attr = {
+.group = KVM_DEV_VFIO_DEVICE,
+.attr = KVM_DEV_VFIO_DEVICE_POSTING_IRQ,
+};
+
+if (!kvm_enabled() ||
+ioctl(vfio_kvm_device_fd, KVM_HAS_DEVICE_ATTR, attr) != 0) {
+return;
+}
+
+argsz = sizeof(*pi_info) + sizeof(int) * count;
+
+pi_info = g_malloc0(argsz);
+pi_info-argsz = argsz;
+pi_info-fd = vdev-fd;
+pi_info-index = index;
+pi_info-start = start;
+pi_info-count = count;
+
+for (i = 0; i  count; i++) {
+pi_info-virq[i] = vdev-msi_vectors[start+i].virq;;
+}
+
+attr.addr = (uint64_t)(unsigned long)pi_info;
+
+if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, attr)) {
+error_report(Failed to configure PI for KVM VFIO device: %m\n);
+}
+
+g_free(pi_info);
+#endif
+}
+
 static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
 {
 VFIOAddressSpace *space;
-- 
1.7.1




[Qemu-devel] [RFC PATCH v1 1/2] linux-headers: Update KVM headers

2014-11-25 Thread Feng Wu
Sync-up KVM related Linux headers from KVM tree using
scripts/update-linux-header.sh

New VFIO attribute and data structure for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu feng...@intel.com
---
 linux-headers/linux/kvm.h |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 1937afa..997733e 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -949,6 +949,7 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_DEVICE   2
 #define   KVM_DEV_VFIO_DEVICE_FORWARD_IRQ  1
 #define   KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ2
+#define   KVM_DEV_VFIO_DEVICE_POSTING_IRQ  3
 
 enum kvm_device_type {
KVM_DEV_TYPE_FSL_MPIC_20= 1,
@@ -973,6 +974,15 @@ struct kvm_arch_forwarded_irq {
__u32 gsi; /* gsi, ie. virtual IRQ number */
 };
 
+struct kvm_posted_intr {
+   __u32   argsz;
+   __u32   fd; /* file descriptor of the VFIO device */
+   __u32   index;  /* VFIO device IRQ index */
+   __u32   start;
+   __u32   count;
+   int virq[0];/* gsi, ie. virtual IRQ number */
+};
+
 /*
  * ioctls for VM fds
  */
-- 
1.7.1




[Qemu-devel] [RFC PATCH v1 0/2] VFIO: add VT-d Posted-Interrupts support

2014-11-25 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This patch adds VT-d Posted-Interrrupts supports for MSI/MSI-X of assigned
devices. When guests updates MSI/MSIx for assigned devices, QEMU will notify
KVM to update the associated IRTE according to VT-d PI Spec.

Feng Wu (2):
  linux-headers: Update KVM headers
  x86: Update VT-d Posted-Interrupts related information

 hw/misc/vfio.c|   60 +++-
 linux-headers/linux/kvm.h |   10 +++
 2 files changed, 68 insertions(+), 2 deletions(-)




Re: [Qemu-devel] [PATCH 1/3] s390: Add PCI bus support

2014-11-25 Thread Frank Blaschka
On Tue, Nov 25, 2014 at 01:14:01PM +0100, Alexander Graf wrote:
 
 
 On 25.11.14 11:11, Frank Blaschka wrote:
  On Tue, Nov 18, 2014 at 06:00:40PM +0100, Alexander Graf wrote:
 
 
  On 18.11.14 13:50, Frank Blaschka wrote:
  On Mon, Nov 10, 2014 at 04:14:16PM +0100, Alexander Graf wrote:
 
 
  On 10.11.14 15:20, Frank Blaschka wrote:
  From: Frank Blaschka frank.blasc...@de.ibm.com
 
  This patch implements a pci bus for s390x together with infrastructure
  to generate and handle hotplug events, to configure/unconfigure via
  sclp instruction, to do iommu translations and provide s390 support for
  MSI/MSI-X notification processing.
 
  Signed-off-by: Frank Blaschka frank.blasc...@de.ibm.com
 
  [...]
 
  diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
  new file mode 100644
  index 000..f2fa6ba
  --- /dev/null
  +++ b/hw/s390x/s390-pci-bus.c
  @@ -0,0 +1,485 @@
  +/*
  + * s390 PCI BUS
  + *
  + * Copyright 2014 IBM Corp.
  + * Author(s): Frank Blaschka frank.blasc...@de.ibm.com
  + *Hong Bo Li lih...@cn.ibm.com
  + *Yi Min Zhao zyi...@cn.ibm.com
  + *
  + * This work is licensed under the terms of the GNU GPL, version 2 or 
  (at
  + * your option) any later version. See the COPYING file in the 
  top-level
  + * directory.
  + */
  +
  +#include hw/pci/pci.h
  +#include hw/pci/pci_bus.h
  +#include hw/s390x/css.h
  +#include hw/s390x/sclp.h
  +#include hw/pci/msi.h
  +#include qemu/error-report.h
  +#include s390-pci-bus.h
  +
  +/* #define DEBUG_S390PCI_BUS */
  +#ifdef DEBUG_S390PCI_BUS
  +#define DPRINTF(fmt, ...) \
  +do { fprintf(stderr, S390pci-bus:  fmt, ## __VA_ARGS__); } while 
  (0)
  +#else
  +#define DPRINTF(fmt, ...) \
  +do { } while (0)
  +#endif
  +
  +static const unsigned long be_to_le = BITS_PER_LONG - 1;
  +static QTAILQ_HEAD(, SeiContainer) pending_sei =
  +QTAILQ_HEAD_INITIALIZER(pending_sei);
  +static QTAILQ_HEAD(, S390PCIBusDevice) device_list =
  +QTAILQ_HEAD_INITIALIZER(device_list);
 
  Please get rid of all statics ;). All state has to live in objects.
 
 
  be_to_le was misleading and unnecesary will remove this one but
  static QTAILQ_HEAD seems to be a common practice for list anchors.
  If you really want me to change this do you have any prefered way,
  or can you point me to some code doing this?
 
  For PCI devices, I don't think you need a list at all. Your PHB device
  should already have a proper qbus that knows about all its child devices.
  
  OK
  
 
  As for pending_sei, what is this about?
 
  
  This is a queue to store events (StoreEventInformation) used for hotplug
  support. In case a device is pluged/unpluged an event is stored to this 
  queue
  and the guest is notified. Then the guest pick up the event information via
  chsc instruction.
 
 Is this for overall CCW or only for PCI? Depending on the answer, you
 can put the sei event list into the respective parent device.


An NT2 event is pci specific. So I moved the queue for NT2 events to the PHB as 
well.

 
 Alex
 




Re: [Qemu-devel] [PATCH v2 0/6] Geometry and blocksize support for backing devices

2014-11-25 Thread Stefan Hajnoczi
On Wed, Nov 19, 2014 at 11:17:50AM +0100, Ekaterina Tumanova wrote:
 Hi folks,
 
 I'm sorry for the recent spam. I messed up during code submission last time.
 So please ignore any previous notes you received from me and answer only to
 this thread.
 
 This is the rework of the geometry+blocksize patch, which was
 recently discussed here:
 http://lists.gnu.org/archive/html/qemu-devel/2014-11/msg01148.html
 
 Markus suggested that we only detect blocksize and geometry for DASDs.
 
 According to this agreement new version contains DASD special casing.
 The driver methods are implemented only for host_device and inner hdev_xxx
 functions check if the backing storage is a DASD by means of
 BIODASDINFO2 ioctl.
 
 Original patchset can be found here:
 http://lists.gnu.org/archive/html/qemu-devel/2014-07/msg03791.html

This is description is mainly a changelog.  Links to previous email
threads are useful for additional info but please include a
self-contained description of the series and the rationale behind it.

Comments:

1. This series overrides the logical_block_size and
   physical_block_size options for raw images on DASD devices.  Users
   expect their command-line options to be honored, so the options
   should not be overriden if they have been given on the command-line.

2. Only virtio_blk is modified, this is inconsistent.  All emulated
   storage controllers using BlockConf have the same block size
   probing behavior.

3. Why does s390 need to customize hd_geometry_guess()?

4. Please use scripts/checkpatch.pl to check coding style.


pgpwVAPAj1gSd.pgp
Description: PGP signature


Re: [Qemu-devel] [PATCH v2] hw/arm/realview.c: Fix memory leak in realview_init()

2014-11-25 Thread Markus Armbruster
Nikita Belov zod...@ispras.ru writes:

 Variable 'ram_lo' is allocated unconditionally, but used only in some cases.
 When it is unused pointer will be lost at function exit, resulting in a
 memory leak. Allocate memory for 'ram_lo' only if it is needed.

 Valgrind output:
 ==16879== 240 bytes in 1 blocks are definitely lost in loss record 6,033 of 
 7,018
 ==16879==at 0x4C2AB80: malloc (in 
 /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
 ==16879==by 0x33D2CE: malloc_and_trace (vl.c:2804)
 ==16879==by 0x509E610: g_malloc (in 
 /lib/x86_64-linux-gnu/libglib-2.0.so.0.4000.0)
 ==16879==by 0x288836: realview_init (realview.c:55)
 ==16879==by 0x28988C: realview_pb_a8_init (realview.c:375)
 ==16879==by 0x341426: main (vl.c:4413)

 Signed-off-by: Nikita Belov zod...@ispras.ru

Reviewed-by: Markus Armbruster arm...@redhat.com



Re: [Qemu-devel] [PATCH 2/3] iotests: Filter for Killed in qemu-io output

2014-11-25 Thread Markus Armbruster
Max Reitz mre...@redhat.com writes:

 _filter_qemu_io already filters out the process ID when qemu-io is
 aborted; the same should be done if it is aborted.

when it is killed.



Re: [Qemu-devel] [PATCH 2/3] iotests: Filter for Killed in qemu-io output

2014-11-25 Thread Max Reitz

On 2014-11-25 at 14:05, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


_filter_qemu_io already filters out the process ID when qemu-io is
aborted; the same should be done if it is aborted.

when it is killed.


Oops, right.

Max



Re: [Qemu-devel] [PATCH v6 1/3] linux-aio: fix submit aio as a batch

2014-11-25 Thread Stefan Hajnoczi
On Tue, Nov 25, 2014 at 03:23:11PM +0800, Ming Lei wrote:
 @@ -296,12 +370,14 @@ void laio_detach_aio_context(void *s_, AioContext 
 *old_context)
  
  aio_set_event_notifier(old_context, s-e, NULL);
  qemu_bh_delete(s-completion_bh);
 +qemu_bh_delete(s-io_q.abort_bh);
  }
  
  void laio_attach_aio_context(void *s_, AioContext *new_context)
  {
  struct qemu_laio_state *s = s_;
  
 +s-io_q.abort_bh = aio_bh_new(new_context, ioq_abort_bh, s);
  s-completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
  aio_set_event_notifier(new_context, s-e, qemu_laio_completion_cb);
  }

These functions are incomplete when -aborting == true.  I can't think
of a reason why we are guaranteed never to hit that state, and fixing it
is easy.  Just add the following to the end of
laio_attach_aio_context():

if (s-aborting) {
qemu_bh_schedule(s-io_q.abort_bh);
}

Stefan


pgp6ZGFRaYquX.pgp
Description: PGP signature


Re: [Qemu-devel] [PATCH v6 3/3] linux-aio: remove 'node' from 'struct qemu_laiocb'

2014-11-25 Thread Stefan Hajnoczi
On Tue, Nov 25, 2014 at 03:23:13PM +0800, Ming Lei wrote:
 No one uses the 'node' field any more, so remove it
 from 'struct qemu_laiocb', and this can save 16byte
 for the struct on 64bit arch.
 
 Reviewed-by: Paolo Bonzini pbonz...@redhat.com
 Signed-off-by: Ming Lei ming@canonical.com
 ---
  block/linux-aio.c |1 -
  1 file changed, 1 deletion(-)

Reviewed-by: Stefan Hajnoczi stefa...@redhat.com


pgpB9HxdZAHI2.pgp
Description: PGP signature


Re: [Qemu-devel] [PATCH v6 2/3] linux-aio: handling -EAGAIN for !s-io_q.plugged case

2014-11-25 Thread Stefan Hajnoczi
On Tue, Nov 25, 2014 at 03:23:12PM +0800, Ming Lei wrote:
 Previously -EAGAIN is simply ignored for !s-io_q.plugged case,
 and sometimes it is easy to cause -EIO to VM, such as NVME device.
 
 This patch handles -EAGAIN by io queue for !s-io_q.plugged case,
 and it will be retried in following aio completion cb.
 
 Reviewed-by: Paolo Bonzini pbonz...@redhat.com
 Suggested-by: Paolo Bonzini pbonz...@redhat.com
 Signed-off-by: Ming Lei ming@canonical.com
 ---
  block/linux-aio.c |   24 
  1 file changed, 16 insertions(+), 8 deletions(-)

Reviewed-by: Stefan Hajnoczi stefa...@redhat.com


pgpemlgBWJXwi.pgp
Description: PGP signature


Re: [Qemu-devel] [PATCH 1/3] qemu-io: Let -c abort raise any signal

2014-11-25 Thread Markus Armbruster
Max Reitz mre...@redhat.com writes:

 abort() has the sometimes undesirable side-effect of generating a core
 dump. If that is not needed, SIGKILL has the same effect of abruptly
 crash qemu; without a core dump.

 Therefore, this patch allows to use the qemu-io abort command to raise
 any signal.

 Signed-off-by: Max Reitz mre...@redhat.com
 ---
  qemu-io-cmds.c | 59 
 +++---
  1 file changed, 56 insertions(+), 3 deletions(-)

 diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
 index d94fb1e..5d39cf4 100644
 --- a/qemu-io-cmds.c
 +++ b/qemu-io-cmds.c
 @@ -2036,18 +2036,71 @@ static const cmdinfo_t wait_break_cmd = {
 .oneline= waits for the suspension of a request,
  };
  
 -static int abort_f(BlockDriverState *bs, int argc, char **argv)
 +
 +static void abort_help(void)
  {
 -abort();
 +printf(
 +\n
 + simulates a program crash\n
 +\n
 + Invokes abort(), or raise(signal) if a signal number is specified.\n
 + -S, -- number of the signal to raise()\n
 +\n);
  }
  
 +static int abort_f(BlockDriverState *bs, int argc, char **argv);
 +
  static const cmdinfo_t abort_cmd = {
 .name   = abort,
 .cfunc  = abort_f,
 +   .argmin = 0,
 +   .argmax = 2,
 .flags  = CMD_NOFILE_OK,
 -   .oneline= simulate a program crash using abort(3),
 +   .args   = [-S signal],
 +   .oneline= simulate a program crash,
 +   .help   = abort_help,
  };

This overloads the abort command with a kill command.

Do we really need a way to send arbitrary signals?  If yes, shouldn't we
call it kill rather than abort?

I suspect fooling around with signals isn't necessary, and a simple
exit(1) would do.

  
 +static int abort_f(BlockDriverState *bs, int argc, char **argv)
 +{
 +int c;
 +int sig = -1;
 +
 +while ((c = getopt(argc, argv, S:)) != EOF) {
 +switch (c) {
 +case 'S':
 +sig = cvtnum(optarg);
 +if (sig  0) {
 +printf(non-numeric signal number argument -- %s\n, 
 optarg);
 +return 0;
 +}
 +break;
 +
 +default:
 +return qemuio_command_usage(abort_cmd);
 +}
 +}
 +
 +if (optind != argc) {
 +return qemuio_command_usage(abort_cmd);
 +}
 +
 +if (sig  0) {
 +abort();
 +} else {
 +/* While abort() does flush all open streams, using raise() to kill 
 this
 + * process does not necessarily. At least stdout and stderr (although
 + * the latter should be non-buffered anyway) should be flushed, 
 though.
 + */
 +fflush(stdout);
 +fflush(stderr);

Without -S, we flush all streams.  With -S, we flush only stdout and
stderr.  The inconsistency is ugly.  Could be avoided with fcloseall(),
except it's a GNU extension.  Or drop the non-signal path entirely, and
raise(SIGABRT) instead of abort().

 +
 +raise(sig);
 +/* raise() may return */
 +return 0;
 +}
 +}
 +
  static void sleep_cb(void *opaque)
  {
  bool *expired = opaque;



Re: [Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Markus Armbruster
Max Reitz mre...@redhat.com writes:

 On 2014-11-25 at 13:21, Markus Armbruster wrote:
 Max Reitz mre...@redhat.com writes:

 Test 039 used to fail
 I'm confused: used to suggests it doesn't anymore, but you sending a
 patches strongly suggests something's broken.

 Well, it used to fail before this series. :-P

 You're right, this sounds bad. Currently, 039 does fail, at least on
 any system with a /proc/sys/kernel/core_pattern passing the dump to
 another program. After this series, it does no longer.

because qemu-io -c abort may generate core dumps
 even with ulimit -c 0 (and the output then contains (core dumped)).
 How?

 See the patches[1][2] by Mao Chuan Li. If
 /proc/sys/kernel/core_pattern passes the dump to another program,
 ulimit -c 0 does not matter.

 [1] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02092.html
 [2] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02093.html

 The problem with those patches is that they require access to
 /proc/sys/kernel/core_pattern. I don't like having to run the iotests
 as root.

To me, this sounds like a case of doctor, it hurts when I do this.



Re: [Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Max Reitz

On 2014-11-25 at 14:20, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


On 2014-11-25 at 13:21, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


Test 039 used to fail

I'm confused: used to suggests it doesn't anymore, but you sending a
patches strongly suggests something's broken.

Well, it used to fail before this series. :-P

You're right, this sounds bad. Currently, 039 does fail, at least on
any system with a /proc/sys/kernel/core_pattern passing the dump to
another program. After this series, it does no longer.


because qemu-io -c abort may generate core dumps
even with ulimit -c 0 (and the output then contains (core dumped)).

How?

See the patches[1][2] by Mao Chuan Li. If
/proc/sys/kernel/core_pattern passes the dump to another program,
ulimit -c 0 does not matter.

[1] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02092.html
[2] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02093.html

The problem with those patches is that they require access to
/proc/sys/kernel/core_pattern. I don't like having to run the iotests
as root.

To me, this sounds like a case of doctor, it hurts when I do this.


What do you mean? That I don't want the iotests to run as root? Or that 
I don't want to go the alternative of filtering out the (core dumped) 
message?


039 doesn't need a core dump. abort() generates a core dump. Why are we 
using abort()? Makes no sense. So don't use it.


I can see that people may want to use qemu-io -c abort to generate a 
core dump, though, which is why I'm not simply replacing the abort() 
call by raise(SIGKILL).


Max



[Qemu-devel] [PATCH RFC v2 00/12] qemu: towards virtio-1 host support

2014-11-25 Thread Cornelia Huck
Hi,

here's the next version of my virtio-1 qemu patchset. Using virtio-1
virtio-blk and virtio-net devices with a guest kernel built from
1416829787-14252-1-git-send-email-...@redhat.com still seems to
work for the virtio-ccw transport.

Changes from v1:
- rebased against current master
- don't advertise VERSION_1 for all devices, make devices switch it on
  individually

Cornelia Huck (9):
  virtio: cull virtio_bus_set_vdev_features
  virtio: support more feature bits
  s390x/virtio-ccw: fix check for WRITE_FEAT
  virtio: introduce legacy virtio devices
  virtio: allow virtio-1 queue layout
  dataplane: allow virtio-1 devices
  s390x/virtio-ccw: support virtio-1 set_vq format
  virtio-net/virtio-blk: enable virtio 1.0
  s390x/virtio-ccw: enable virtio 1.0

Thomas Huth (3):
  linux-headers/virtio_config: Update with VIRTIO_F_VERSION_1
  s390x/css: Add a callback for when subchannel gets disabled
  s390x/virtio-ccw: add virtio set-revision call

 hw/9pfs/virtio-9p-device.c  |7 +-
 hw/block/dataplane/virtio-blk.c |3 +-
 hw/block/virtio-blk.c   |   13 ++-
 hw/char/virtio-serial-bus.c |9 +-
 hw/net/virtio-net.c |   42 +---
 hw/s390x/css.c  |   12 +++
 hw/s390x/css.h  |1 +
 hw/s390x/s390-virtio-bus.c  |9 +-
 hw/s390x/virtio-ccw.c   |  186 +++
 hw/s390x/virtio-ccw.h   |7 +-
 hw/scsi/vhost-scsi.c|7 +-
 hw/scsi/virtio-scsi-dataplane.c |2 +-
 hw/scsi/virtio-scsi.c   |   10 +-
 hw/virtio/Makefile.objs |2 +-
 hw/virtio/dataplane/Makefile.objs   |2 +-
 hw/virtio/dataplane/vring.c |   95 ++
 hw/virtio/virtio-balloon.c  |8 +-
 hw/virtio/virtio-bus.c  |   23 +
 hw/virtio/virtio-mmio.c |9 +-
 hw/virtio/virtio-pci.c  |   13 +--
 hw/virtio/virtio-rng.c  |2 +-
 hw/virtio/virtio.c  |   51 +++---
 include/hw/virtio/dataplane/vring.h |   64 +++-
 include/hw/virtio/virtio-access.h   |4 +
 include/hw/virtio/virtio-bus.h  |   10 +-
 include/hw/virtio/virtio.h  |   34 +--
 linux-headers/linux/virtio_config.h |3 +
 27 files changed, 449 insertions(+), 179 deletions(-)

-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 01/12] linux-headers/virtio_config: Update with VIRTIO_F_VERSION_1

2014-11-25 Thread Cornelia Huck
From: Thomas Huth th...@linux.vnet.ibm.com

Add the new VIRTIO_F_VERSION_1 definition to the virtio_config.h
linux header.

Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 linux-headers/linux/virtio_config.h |3 +++
 1 file changed, 3 insertions(+)

diff --git a/linux-headers/linux/virtio_config.h 
b/linux-headers/linux/virtio_config.h
index 75dc20b..16aa289 100644
--- a/linux-headers/linux/virtio_config.h
+++ b/linux-headers/linux/virtio_config.h
@@ -54,4 +54,7 @@
 /* Can the device handle any descriptor layout? */
 #define VIRTIO_F_ANY_LAYOUT27
 
+/* v1.0 compliant. */
+#define VIRTIO_F_VERSION_1 32
+
 #endif /* _LINUX_VIRTIO_CONFIG_H */
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 09/12] s390x/virtio-ccw: add virtio set-revision call

2014-11-25 Thread Cornelia Huck
From: Thomas Huth th...@linux.vnet.ibm.com

Handle the virtio-ccw revision according to what the guest sets.
When revision 1 is selected, we have a virtio-1 standard device
with byteswapping for the virtio rings.

When a channel gets disabled, we have to revert to the legacy behavior
in case the next user of the device does not negotiate the revision 1
anymore (e.g. the boot firmware uses revision 1, but the operating
system only uses the legacy mode).

Note that revisions  0 are still disabled; but we still extend the
feature bit size to be able to handle the VERSION_1 bit.

Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/s390x/virtio-ccw.c |   52 +
 hw/s390x/virtio-ccw.h |7 ++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 62ec852..e79f3b8 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -20,9 +20,11 @@
 #include hw/virtio/virtio-net.h
 #include hw/sysbus.h
 #include qemu/bitops.h
+#include hw/virtio/virtio-access.h
 #include hw/virtio/virtio-bus.h
 #include hw/s390x/adapter.h
 #include hw/s390x/s390_flic.h
+#include linux/virtio_config.h
 
 #include ioinst.h
 #include css.h
@@ -260,6 +262,12 @@ typedef struct VirtioThinintInfo {
 uint8_t isc;
 } QEMU_PACKED VirtioThinintInfo;
 
+typedef struct VirtioRevInfo {
+uint16_t revision;
+uint16_t length;
+uint8_t data[0];
+} QEMU_PACKED VirtioRevInfo;
+
 /* Specify where the virtqueues for the subchannel are in guest memory. */
 static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align,
   uint16_t index, uint16_t num)
@@ -299,6 +307,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
 {
 int ret;
 VqInfoBlock info;
+VirtioRevInfo revinfo;
 uint8_t status;
 VirtioFeatDesc features;
 void *config;
@@ -373,6 +382,13 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
ccw.cda + sizeof(features.features));
 if (features.index  ARRAY_SIZE(dev-host_features)) {
 features.features = dev-host_features[features.index];
+/*
+ * Don't offer version 1 to the guest if it did not
+ * negotiate at least revision 1.
+ */
+if (features.index == 1  dev-revision = 0) {
+features.features = ~(1  (VIRTIO_F_VERSION_1 - 32));
+}
 } else {
 /* Return zeroes if the guest supports more feature bits. */
 features.features = 0;
@@ -400,6 +416,13 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
ccw.cda + sizeof(features.features));
 features.features = ldl_le_phys(address_space_memory, ccw.cda);
 if (features.index  ARRAY_SIZE(vdev-guest_features)) {
+/*
+ * The guest should not set version 1 if it didn't
+ * negotiate a revision = 1.
+ */
+if (features.index == 1  dev-revision = 0) {
+features.features = ~(1  (VIRTIO_F_VERSION_1 - 32));
+}
 virtio_set_features(vdev, features.index, features.features);
 } else {
 /*
@@ -600,6 +623,25 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
 }
 }
 break;
+case CCW_CMD_SET_VIRTIO_REV:
+len = sizeof(revinfo);
+if (ccw.count  len || (check_len  ccw.count  len)) {
+ret = -EINVAL;
+break;
+}
+if (!ccw.cda) {
+ret = -EFAULT;
+break;
+}
+cpu_physical_memory_read(ccw.cda, revinfo, len);
+if (dev-revision = 0 ||
+revinfo.revision  VIRTIO_CCW_REV_MAX) {
+ret = -ENOSYS;
+break;
+}
+ret = 0;
+dev-revision = revinfo.revision;
+break;
 default:
 ret = -ENOSYS;
 break;
@@ -607,6 +649,13 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
 return ret;
 }
 
+static void virtio_sch_disable_cb(SubchDev *sch)
+{
+VirtioCcwDevice *dev = sch-driver_data;
+
+dev-revision = -1;
+}
+
 static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev)
 {
 unsigned int cssid = 0;
@@ -733,6 +782,7 @@ static int virtio_ccw_device_init(VirtioCcwDevice *dev, 
VirtIODevice *vdev)
 css_sch_build_virtual_schib(sch, 0, VIRTIO_CCW_CHPID_TYPE);
 
 sch-ccw_cb = virtio_ccw_cb;
+sch-disable_cb = virtio_sch_disable_cb;
 
 /* Build senseid data. */
 memset(sch-id, 0, sizeof(SenseId));
@@ -740,6 +790,8 @@ static int virtio_ccw_device_init(VirtioCcwDevice *dev, 
VirtIODevice *vdev)
 sch-id.cu_type = VIRTIO_CCW_CU_TYPE;
 sch-id.cu_model = vdev-device_id;
 
+dev-revision = 

[Qemu-devel] [PATCH RFC v2 12/12] s390x/virtio-ccw: enable virtio 1.0

2014-11-25 Thread Cornelia Huck
virtio-ccw should now have everything in place to operate virtio 1.0
devices, so let's enable revision 1.

Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/s390x/virtio-ccw.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
index 03d5955..08edd8d 100644
--- a/hw/s390x/virtio-ccw.h
+++ b/hw/s390x/virtio-ccw.h
@@ -73,7 +73,7 @@ typedef struct VirtIOCCWDeviceClass {
 #define VIRTIO_CCW_FEATURE_SIZE NR_VIRTIO_FEATURE_WORDS
 
 /* The maximum virtio revision we support. */
-#define VIRTIO_CCW_REV_MAX 0
+#define VIRTIO_CCW_REV_MAX 1
 
 /* Performance improves when virtqueue kick processing is decoupled from the
  * vcpu thread using ioeventfd for some devices. */
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 04/12] s390x/virtio-ccw: fix check for WRITE_FEAT

2014-11-25 Thread Cornelia Huck
We need to check guest feature size, not host feature size to find
out whether we should call virtio_set_features(). This check is
possible now that vdev-guest_features is an array.

Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/s390x/virtio-ccw.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 2f52b82..62ec852 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -399,7 +399,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
 features.index = ldub_phys(address_space_memory,
ccw.cda + sizeof(features.features));
 features.features = ldl_le_phys(address_space_memory, ccw.cda);
-if (features.index  ARRAY_SIZE(dev-host_features)) {
+if (features.index  ARRAY_SIZE(vdev-guest_features)) {
 virtio_set_features(vdev, features.index, features.features);
 } else {
 /*
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 02/12] virtio: cull virtio_bus_set_vdev_features

2014-11-25 Thread Cornelia Huck
The only user of this function was virtio-ccw, and it should use
virtio_set_features() like everybody else: We need to make sure
that bad features are masked out properly, which this function did
not do.

Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/s390x/virtio-ccw.c  |3 +--
 hw/virtio/virtio-bus.c |   14 --
 include/hw/virtio/virtio-bus.h |3 ---
 3 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index ea236c9..84f17bc 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -400,8 +400,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
ccw.cda + sizeof(features.features));
 features.features = ldl_le_phys(address_space_memory, ccw.cda);
 if (features.index  ARRAY_SIZE(dev-host_features)) {
-virtio_bus_set_vdev_features(dev-bus, features.features);
-vdev-guest_features = features.features;
+virtio_set_features(vdev, features.features);
 } else {
 /*
  * If the guest supports more feature bits, assert that it
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index eb77019..a8ffa07 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -109,20 +109,6 @@ uint32_t virtio_bus_get_vdev_features(VirtioBusState *bus,
 return k-get_features(vdev, requested_features);
 }
 
-/* Set the features of the plugged device. */
-void virtio_bus_set_vdev_features(VirtioBusState *bus,
-  uint32_t requested_features)
-{
-VirtIODevice *vdev = virtio_bus_get_device(bus);
-VirtioDeviceClass *k;
-
-assert(vdev != NULL);
-k = VIRTIO_DEVICE_GET_CLASS(vdev);
-if (k-set_features != NULL) {
-k-set_features(vdev, requested_features);
-}
-}
-
 /* Get bad features of the plugged device. */
 uint32_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus)
 {
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index 0756545..0d2e7b4 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -84,9 +84,6 @@ size_t virtio_bus_get_vdev_config_len(VirtioBusState *bus);
 /* Get the features of the plugged device. */
 uint32_t virtio_bus_get_vdev_features(VirtioBusState *bus,
 uint32_t requested_features);
-/* Set the features of the plugged device. */
-void virtio_bus_set_vdev_features(VirtioBusState *bus,
-  uint32_t requested_features);
 /* Get bad features of the plugged device. */
 uint32_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus);
 /* Get config of the plugged device. */
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 03/12] virtio: support more feature bits

2014-11-25 Thread Cornelia Huck
With virtio-1, we support more than 32 feature bits. Let's make
vdev-guest_features depend on the number of supported feature bits,
allowing us to grow the feature bits automatically.

We also need to enhance the internal functions dealing with getting
and setting features with an additional index field, so that all feature
bits may be accessed (in chunks of 32 bits).

vhost and migration have been ignored for now.

Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/9pfs/virtio-9p-device.c |7 ++-
 hw/block/virtio-blk.c  |9 +++--
 hw/char/virtio-serial-bus.c|9 +++--
 hw/net/virtio-net.c|   38 ++
 hw/s390x/s390-virtio-bus.c |9 +
 hw/s390x/virtio-ccw.c  |   17 ++---
 hw/scsi/vhost-scsi.c   |7 +--
 hw/scsi/virtio-scsi.c  |   10 +-
 hw/virtio/dataplane/vring.c|   10 +-
 hw/virtio/virtio-balloon.c |8 ++--
 hw/virtio/virtio-bus.c |9 +
 hw/virtio/virtio-mmio.c|9 +
 hw/virtio/virtio-pci.c |   13 +++--
 hw/virtio/virtio-rng.c |2 +-
 hw/virtio/virtio.c |   29 +
 include/hw/virtio/virtio-bus.h |7 ---
 include/hw/virtio/virtio.h |   19 ++-
 17 files changed, 135 insertions(+), 77 deletions(-)

diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 2572747..c29c8c8 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -21,8 +21,13 @@
 #include virtio-9p-coth.h
 #include hw/virtio/virtio-access.h
 
-static uint32_t virtio_9p_get_features(VirtIODevice *vdev, uint32_t features)
+static uint32_t virtio_9p_get_features(VirtIODevice *vdev, unsigned int index,
+   uint32_t features)
 {
+if (index  0) {
+return features;
+}
+
 features |= 1  VIRTIO_9P_MOUNT_TAG;
 return features;
 }
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index b19b102..6d86f60 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -564,10 +564,15 @@ static void virtio_blk_set_config(VirtIODevice *vdev, 
const uint8_t *config)
 aio_context_release(blk_get_aio_context(s-blk));
 }
 
-static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
+static uint32_t virtio_blk_get_features(VirtIODevice *vdev, unsigned int index,
+uint32_t features)
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
 
+if (index  0) {
+return features;
+}
+
 features |= (1  VIRTIO_BLK_F_SEG_MAX);
 features |= (1  VIRTIO_BLK_F_GEOMETRY);
 features |= (1  VIRTIO_BLK_F_TOPOLOGY);
@@ -601,7 +606,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, 
uint8_t status)
 return;
 }
 
-features = vdev-guest_features;
+features = vdev-guest_features[0];
 
 /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send
  * cache flushes.  Thus, the auto writethrough behavior is never
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index a7b1b68..55de504 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -75,7 +75,7 @@ static VirtIOSerialPort *find_port_by_name(char *name)
 static bool use_multiport(VirtIOSerial *vser)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(vser);
-return vdev-guest_features  (1  VIRTIO_CONSOLE_F_MULTIPORT);
+return vdev-guest_features[0]  (1  VIRTIO_CONSOLE_F_MULTIPORT);
 }
 
 static size_t write_to_port(VirtIOSerialPort *port,
@@ -467,10 +467,15 @@ static void handle_input(VirtIODevice *vdev, VirtQueue 
*vq)
 {
 }
 
-static uint32_t get_features(VirtIODevice *vdev, uint32_t features)
+static uint32_t get_features(VirtIODevice *vdev, unsigned int index,
+ uint32_t features)
 {
 VirtIOSerial *vser;
 
+if (index  0) {
+return features;
+}
+
 vser = VIRTIO_SERIAL(vdev);
 
 if (vser-bus.max_nr_ports  1) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 9b88775..1e214b5 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -86,7 +86,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const 
uint8_t *config)
 
 memcpy(netcfg, config, n-config_size);
 
-if (!(vdev-guest_features  VIRTIO_NET_F_CTRL_MAC_ADDR  1) 
+if (!(vdev-guest_features[0]  VIRTIO_NET_F_CTRL_MAC_ADDR  1) 
 memcmp(netcfg.mac, n-mac, ETH_ALEN)) {
 memcpy(n-mac, netcfg.mac, ETH_ALEN);
 qemu_format_nic_info_str(qemu_get_queue(n-nic), n-mac);
@@ -305,7 +305,7 @@ static RxFilterInfo 
*virtio_net_query_rxfilter(NetClientState *nc)
 info-multicast_table = str_list;
 info-vlan_table = get_vlan_table(n);
 
-if (!((1  VIRTIO_NET_F_CTRL_VLAN)  vdev-guest_features)) {
+if (!((1  VIRTIO_NET_F_CTRL_VLAN)  

[Qemu-devel] [PATCH RFC v2 08/12] s390x/css: Add a callback for when subchannel gets disabled

2014-11-25 Thread Cornelia Huck
From: Thomas Huth th...@linux.vnet.ibm.com

We need a possibility to run code when a subchannel gets disabled.
This patch adds the necessary infrastructure.

Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/s390x/css.c |   12 
 hw/s390x/css.h |1 +
 2 files changed, 13 insertions(+)

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index b67c039..735ec55 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -588,6 +588,7 @@ int css_do_msch(SubchDev *sch, SCHIB *orig_schib)
 {
 SCSW *s = sch-curr_status.scsw;
 PMCW *p = sch-curr_status.pmcw;
+uint16_t oldflags;
 int ret;
 SCHIB schib;
 
@@ -610,6 +611,7 @@ int css_do_msch(SubchDev *sch, SCHIB *orig_schib)
 copy_schib_from_guest(schib, orig_schib);
 /* Only update the program-modifiable fields. */
 p-intparm = schib.pmcw.intparm;
+oldflags = p-flags;
 p-flags = ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
   PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
   PMCW_FLAGS_MASK_MP);
@@ -625,6 +627,12 @@ int css_do_msch(SubchDev *sch, SCHIB *orig_schib)
 (PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
 sch-curr_status.mba = schib.mba;
 
+/* Has the channel been disabled? */
+if (sch-disable_cb  (oldflags  PMCW_FLAGS_MASK_ENA) != 0
+ (p-flags  PMCW_FLAGS_MASK_ENA) == 0) {
+sch-disable_cb(sch);
+}
+
 ret = 0;
 
 out:
@@ -1443,6 +1451,10 @@ void css_reset_sch(SubchDev *sch)
 {
 PMCW *p = sch-curr_status.pmcw;
 
+if ((p-flags  PMCW_FLAGS_MASK_ENA) != 0  sch-disable_cb) {
+sch-disable_cb(sch);
+}
+
 p-intparm = 0;
 p-flags = ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
   PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
diff --git a/hw/s390x/css.h b/hw/s390x/css.h
index 33104ac..7fa807b 100644
--- a/hw/s390x/css.h
+++ b/hw/s390x/css.h
@@ -81,6 +81,7 @@ struct SubchDev {
 uint8_t ccw_no_data_cnt;
 /* transport-provided data: */
 int (*ccw_cb) (SubchDev *, CCW1);
+void (*disable_cb)(SubchDev *);
 SenseId id;
 void *driver_data;
 };
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 05/12] virtio: introduce legacy virtio devices

2014-11-25 Thread Cornelia Huck
Introduce a helper function to indicate  whether a virtio device is
operating in legacy or virtio standard mode.

It may be used to make decisions about the endianess of virtio accesses
and other virtio-1 specific changes, enabling us to support transitional
devices.

Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/virtio/virtio.c|6 +-
 include/hw/virtio/virtio-access.h |4 
 include/hw/virtio/virtio.h|   13 +++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2eb5d3c..4149f45 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -883,7 +883,11 @@ static bool virtio_device_endian_needed(void *opaque)
 VirtIODevice *vdev = opaque;
 
 assert(vdev-device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
-return vdev-device_endian != virtio_default_endian();
+if (virtio_device_is_legacy(vdev)) {
+return vdev-device_endian != virtio_default_endian();
+}
+/* Devices conforming to VIRTIO 1.0 or later are always LE. */
+return vdev-device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
 }
 
 static const VMStateDescription vmstate_virtio_device_endian = {
diff --git a/include/hw/virtio/virtio-access.h 
b/include/hw/virtio/virtio-access.h
index 46456fd..c123ee0 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -19,6 +19,10 @@
 
 static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
 {
+if (!virtio_device_is_legacy(vdev)) {
+/* Devices conforming to VIRTIO 1.0 or later are always LE. */
+return false;
+}
 #if defined(TARGET_IS_BIENDIAN)
 return virtio_is_big_endian(vdev);
 #elif defined(TARGET_WORDS_BIGENDIAN)
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index b408166..40e567c 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -275,9 +275,18 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue 
*vq, bool assign,
 void virtio_queue_notify_vq(VirtQueue *vq);
 void virtio_irq(VirtQueue *vq);
 
+static inline bool virtio_device_is_legacy(VirtIODevice *vdev)
+{
+return !(vdev-guest_features[1]  (1  (VIRTIO_F_VERSION_1 - 32)));
+}
+
 static inline bool virtio_is_big_endian(VirtIODevice *vdev)
 {
-assert(vdev-device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
-return vdev-device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
+if (virtio_device_is_legacy(vdev)) {
+assert(vdev-device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
+return vdev-device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
+}
+/* Devices conforming to VIRTIO 1.0 or later are always LE. */
+return false;
 }
 #endif
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 06/12] virtio: allow virtio-1 queue layout

2014-11-25 Thread Cornelia Huck
For virtio-1 devices, we allow a more complex queue layout that doesn't
require descriptor table and rings on a physically-contigous memory area:
add virtio_queue_set_rings() to allow transports to set this up.

Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/virtio/virtio.c |   16 
 include/hw/virtio/virtio.h |2 ++
 2 files changed, 18 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 4149f45..2c6bb91 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -96,6 +96,13 @@ static void virtqueue_init(VirtQueue *vq)
 {
 hwaddr pa = vq-pa;
 
+if (pa == -1ULL) {
+/*
+ * This is a virtio-1 style vq that has already been setup
+ * in virtio_queue_set.
+ */
+return;
+}
 vq-vring.desc = pa;
 vq-vring.avail = pa + vq-vring.num * sizeof(VRingDesc);
 vq-vring.used = vring_align(vq-vring.avail +
@@ -719,6 +726,15 @@ hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
 return vdev-vq[n].pa;
 }
 
+void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
+hwaddr avail, hwaddr used)
+{
+vdev-vq[n].pa = -1ULL;
+vdev-vq[n].vring.desc = desc;
+vdev-vq[n].vring.avail = avail;
+vdev-vq[n].vring.used = used;
+}
+
 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
 {
 /* Don't allow guest to flip queue between existent and
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 40e567c..f840320 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -227,6 +227,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, 
hwaddr addr);
 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n);
 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
+void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
+hwaddr avail, hwaddr used);
 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 11/12] virtio-net/virtio-blk: enable virtio 1.0

2014-11-25 Thread Cornelia Huck
virtio-net (non-vhost) and virtio-blk have everything in place to support
virtio 1.0: let's enable the feature bit for them.

Note that VIRTIO_F_VERSION_1 is technically a transport feature; once
every device is ready for virtio 1.0, we can move this setting this
feature bit out of the individual devices.

Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/block/virtio-blk.c |4 
 hw/net/virtio-net.c   |4 
 2 files changed, 8 insertions(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 6d86f60..3781f98 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -569,6 +569,10 @@ static uint32_t virtio_blk_get_features(VirtIODevice 
*vdev, unsigned int index,
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
 
+if (index == 1) {
+features |= (1  (VIRTIO_F_VERSION_1 - 32));
+}
+
 if (index  0) {
 return features;
 }
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 1e214b5..fcfc95f 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -447,6 +447,10 @@ static uint32_t virtio_net_get_features(VirtIODevice 
*vdev, unsigned int index,
 VirtIONet *n = VIRTIO_NET(vdev);
 NetClientState *nc = qemu_get_queue(n-nic);
 
+if (index == 1  !get_vhost_net(nc-peer)) {
+features |= (1  (VIRTIO_F_VERSION_1 - 32));
+}
+
 if (index  0) {
 return features;
 }
-- 
1.7.9.5




[Qemu-devel] [PATCH RFC v2 07/12] dataplane: allow virtio-1 devices

2014-11-25 Thread Cornelia Huck
Handle endianness conversion for virtio-1 virtqueues correctly.

Note that dataplane now needs to be built per-target.

Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/block/dataplane/virtio-blk.c |3 +-
 hw/scsi/virtio-scsi-dataplane.c |2 +-
 hw/virtio/Makefile.objs |2 +-
 hw/virtio/dataplane/Makefile.objs   |2 +-
 hw/virtio/dataplane/vring.c |   85 +++
 include/hw/virtio/dataplane/vring.h |   64 --
 6 files changed, 113 insertions(+), 45 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 1222a37..c25878c 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -16,6 +16,7 @@
 #include qemu/iov.h
 #include qemu/thread.h
 #include qemu/error-report.h
+#include hw/virtio/virtio-access.h
 #include hw/virtio/dataplane/vring.h
 #include sysemu/block-backend.h
 #include hw/virtio/virtio-blk.h
@@ -75,7 +76,7 @@ static void complete_request_vring(VirtIOBlockReq *req, 
unsigned char status)
 VirtIOBlockDataPlane *s = req-dev-dataplane;
 stb_p(req-in-status, status);
 
-vring_push(req-dev-dataplane-vring, req-elem,
+vring_push(s-vdev, req-dev-dataplane-vring, req-elem,
req-qiov.size + sizeof(*req-in));
 
 /* Suppress notification to guest by BH and its scheduled
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 03a1e8c..418d73b 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -94,7 +94,7 @@ void virtio_scsi_vring_push_notify(VirtIOSCSIReq *req)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(req-vring-parent);
 
-vring_push(req-vring-vring, req-elem,
+vring_push(vdev, req-vring-vring, req-elem,
req-qsgl.size + req-resp_iov.size);
 
 if (vring_should_notify(vdev, req-vring-vring)) {
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index d21c397..19b224a 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -2,7 +2,7 @@ common-obj-y += virtio-rng.o
 common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
 common-obj-y += virtio-bus.o
 common-obj-y += virtio-mmio.o
-common-obj-$(CONFIG_VIRTIO) += dataplane/
+obj-$(CONFIG_VIRTIO) += dataplane/
 
 obj-y += virtio.o virtio-balloon.o 
 obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
diff --git a/hw/virtio/dataplane/Makefile.objs 
b/hw/virtio/dataplane/Makefile.objs
index 9a8cfc0..753a9ca 100644
--- a/hw/virtio/dataplane/Makefile.objs
+++ b/hw/virtio/dataplane/Makefile.objs
@@ -1 +1 @@
-common-obj-y += vring.o
+obj-y += vring.o
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index a051775..69809ff 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -18,6 +18,7 @@
 #include hw/hw.h
 #include exec/memory.h
 #include exec/address-spaces.h
+#include hw/virtio/virtio-access.h
 #include hw/virtio/dataplane/vring.h
 #include qemu/error-report.h
 
@@ -83,7 +84,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 vring_init(vring-vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
 
 vring-last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n);
-vring-last_used_idx = vring-vr.used-idx;
+vring-last_used_idx = vring_get_used_idx(vdev, vring);
 vring-signalled_used = 0;
 vring-signalled_used_valid = false;
 
@@ -104,7 +105,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n)
 void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
 {
 if (!(vdev-guest_features[0]  (1  VIRTIO_RING_F_EVENT_IDX))) {
-vring-vr.used-flags |= VRING_USED_F_NO_NOTIFY;
+vring_set_used_flags(vdev, vring, VRING_USED_F_NO_NOTIFY);
 }
 }
 
@@ -117,10 +118,10 @@ bool vring_enable_notification(VirtIODevice *vdev, Vring 
*vring)
 if (vdev-guest_features[0]  (1  VIRTIO_RING_F_EVENT_IDX)) {
 vring_avail_event(vring-vr) = vring-vr.avail-idx;
 } else {
-vring-vr.used-flags = ~VRING_USED_F_NO_NOTIFY;
+vring_clear_used_flags(vdev, vring, VRING_USED_F_NO_NOTIFY);
 }
 smp_mb(); /* ensure update is seen before reading avail_idx */
-return !vring_more_avail(vring);
+return !vring_more_avail(vdev, vring);
 }
 
 /* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
@@ -134,12 +135,13 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
 smp_mb();
 
 if ((vdev-guest_features[0]  VIRTIO_F_NOTIFY_ON_EMPTY) 
-unlikely(vring-vr.avail-idx == vring-last_avail_idx)) {
+unlikely(!vring_more_avail(vdev, vring))) {
 return true;
 }
 
 if (!(vdev-guest_features[0]  VIRTIO_RING_F_EVENT_IDX)) {
-return !(vring-vr.avail-flags  VRING_AVAIL_F_NO_INTERRUPT);
+return !(vring_get_avail_flags(vdev, vring) 
+ VRING_AVAIL_F_NO_INTERRUPT);
 }
 old = vring-signalled_used;
 v = vring-signalled_used_valid;
@@ -154,15 

[Qemu-devel] [PATCH RFC v2 10/12] s390x/virtio-ccw: support virtio-1 set_vq format

2014-11-25 Thread Cornelia Huck
Support the new CCW_CMD_SET_VQ format for virtio-1 devices.

While we're at it, refactor the code a bit and enforce big endian
fields (which had always been required, even for legacy).

Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 hw/s390x/virtio-ccw.c |  114 ++---
 1 file changed, 80 insertions(+), 34 deletions(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index e79f3b8..60d67a3 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -238,11 +238,20 @@ VirtualCssBus *virtual_css_bus_init(void)
 }
 
 /* Communication blocks used by several channel commands. */
-typedef struct VqInfoBlock {
+typedef struct VqInfoBlockLegacy {
 uint64_t queue;
 uint32_t align;
 uint16_t index;
 uint16_t num;
+} QEMU_PACKED VqInfoBlockLegacy;
+
+typedef struct VqInfoBlock {
+uint64_t desc;
+uint32_t res0;
+uint16_t index;
+uint16_t num;
+uint64_t avail;
+uint64_t used;
 } QEMU_PACKED VqInfoBlock;
 
 typedef struct VqConfigBlock {
@@ -269,17 +278,20 @@ typedef struct VirtioRevInfo {
 } QEMU_PACKED VirtioRevInfo;
 
 /* Specify where the virtqueues for the subchannel are in guest memory. */
-static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align,
-  uint16_t index, uint16_t num)
+static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info,
+  VqInfoBlockLegacy *linfo)
 {
 VirtIODevice *vdev = virtio_ccw_get_vdev(sch);
+uint16_t index = info ? info-index : linfo-index;
+uint16_t num = info ? info-num : linfo-num;
+uint64_t desc = info ? info-desc : linfo-queue;
 
 if (index  VIRTIO_PCI_QUEUE_MAX) {
 return -EINVAL;
 }
 
 /* Current code in virtio.c relies on 4K alignment. */
-if (addr  (align != 4096)) {
+if (linfo  desc  (linfo-align != 4096)) {
 return -EINVAL;
 }
 
@@ -287,8 +299,12 @@ static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t 
addr, uint32_t align,
 return -EINVAL;
 }
 
-virtio_queue_set_addr(vdev, index, addr);
-if (!addr) {
+if (info) {
+virtio_queue_set_rings(vdev, index, desc, info-avail, info-used);
+} else {
+virtio_queue_set_addr(vdev, index, desc);
+}
+if (!desc) {
 virtio_queue_set_vector(vdev, index, 0);
 } else {
 /* Fail if we don't have a big enough queue. */
@@ -303,10 +319,66 @@ static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t 
addr, uint32_t align,
 return 0;
 }
 
-static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
+static int virtio_ccw_handle_set_vq(SubchDev *sch, CCW1 ccw, bool check_len,
+bool is_legacy)
 {
 int ret;
 VqInfoBlock info;
+VqInfoBlockLegacy linfo;
+size_t info_len = is_legacy ? sizeof(linfo) : sizeof(info);
+
+if (check_len) {
+if (ccw.count != info_len) {
+return -EINVAL;
+}
+} else if (ccw.count  info_len) {
+/* Can't execute command. */
+return -EINVAL;
+}
+if (!ccw.cda) {
+return -EFAULT;
+}
+if (is_legacy) {
+linfo.queue = ldq_be_phys(address_space_memory, ccw.cda);
+linfo.align = ldl_be_phys(address_space_memory,
+  ccw.cda + sizeof(linfo.queue));
+linfo.index = lduw_be_phys(address_space_memory,
+   ccw.cda + sizeof(linfo.queue)
+   + sizeof(linfo.align));
+linfo.num = lduw_be_phys(address_space_memory,
+ ccw.cda + sizeof(linfo.queue)
+ + sizeof(linfo.align)
+ + sizeof(linfo.index));
+ret = virtio_ccw_set_vqs(sch, NULL, linfo);
+} else {
+info.desc = ldq_be_phys(address_space_memory, ccw.cda);
+info.index = lduw_be_phys(address_space_memory,
+  ccw.cda + sizeof(info.desc)
+  + sizeof(info.res0));
+info.num = lduw_be_phys(address_space_memory,
+ccw.cda + sizeof(info.desc)
+  + sizeof(info.res0)
+  + sizeof(info.index));
+info.avail = ldq_be_phys(address_space_memory,
+ ccw.cda + sizeof(info.desc)
+ + sizeof(info.res0)
+ + sizeof(info.index)
+ + sizeof(info.num));
+info.used = ldq_be_phys(address_space_memory,
+ccw.cda + sizeof(info.desc)
++ sizeof(info.res0)
++ sizeof(info.index)
++ sizeof(info.num)
++ sizeof(info.avail));
+ret = virtio_ccw_set_vqs(sch, 

Re: [Qemu-devel] [PATCH 1/3] qemu-io: Let -c abort raise any signal

2014-11-25 Thread Max Reitz

On 2014-11-25 at 14:19, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


abort() has the sometimes undesirable side-effect of generating a core
dump. If that is not needed, SIGKILL has the same effect of abruptly
crash qemu; without a core dump.

Therefore, this patch allows to use the qemu-io abort command to raise
any signal.

Signed-off-by: Max Reitz mre...@redhat.com
---
  qemu-io-cmds.c | 59 +++---
  1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index d94fb1e..5d39cf4 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -2036,18 +2036,71 @@ static const cmdinfo_t wait_break_cmd = {
 .oneline= waits for the suspension of a request,
  };
  
-static int abort_f(BlockDriverState *bs, int argc, char **argv)

+
+static void abort_help(void)
  {
-abort();
+printf(
+\n
+ simulates a program crash\n
+\n
+ Invokes abort(), or raise(signal) if a signal number is specified.\n
+ -S, -- number of the signal to raise()\n
+\n);
  }
  
+static int abort_f(BlockDriverState *bs, int argc, char **argv);

+
  static const cmdinfo_t abort_cmd = {
 .name   = abort,
 .cfunc  = abort_f,
+   .argmin = 0,
+   .argmax = 2,
 .flags  = CMD_NOFILE_OK,
-   .oneline= simulate a program crash using abort(3),
+   .args   = [-S signal],
+   .oneline= simulate a program crash,
+   .help   = abort_help,
  };

This overloads the abort command with a kill command.


abort() does a bit more than raise(SIGABRT), but all that it does more 
is basically Make sure that raise(SIGABRT) actually works. So abort() 
basically is already a kill me command (kill -SIGABRT). I think 
overloading it is fine, but I wouldn't have that much of a problem to 
introduce another command, but it was just simpler this way.



Do we really need a way to send arbitrary signals?


Why not? I'm implementing the functionality here for a single signal, 
it's not that hard to do it for arbitrary signals, so I did it.



If yes, shouldn't we
call it kill rather than abort?


I'd call it raise (for obious reasons), but will not rename abort. I 
can create a separate raise or kill command, though, obviously. But 
as abort is basically a raise 6 (or abort -S 6 with this version 
of the series), I think it's completely fine to overload abort.



I suspect fooling around with signals isn't necessary, and a simple
exit(1) would do.


No, because that would execute the atexit() functions. I don't know 
whether such are used in qemu, but if we want to simulate a crash, 
exit() is not the right function to do that.


  
+static int abort_f(BlockDriverState *bs, int argc, char **argv)

+{
+int c;
+int sig = -1;
+
+while ((c = getopt(argc, argv, S:)) != EOF) {
+switch (c) {
+case 'S':
+sig = cvtnum(optarg);
+if (sig  0) {
+printf(non-numeric signal number argument -- %s\n, 
optarg);
+return 0;
+}
+break;
+
+default:
+return qemuio_command_usage(abort_cmd);
+}
+}
+
+if (optind != argc) {
+return qemuio_command_usage(abort_cmd);
+}
+
+if (sig  0) {
+abort();
+} else {
+/* While abort() does flush all open streams, using raise() to kill 
this
+ * process does not necessarily. At least stdout and stderr (although
+ * the latter should be non-buffered anyway) should be flushed, though.
+ */
+fflush(stdout);
+fflush(stderr);

Without -S, we flush all streams.  With -S, we flush only stdout and
stderr.  The inconsistency is ugly.  Could be avoided with fcloseall(),
except it's a GNU extension.  Or drop the non-signal path entirely, and
raise(SIGABRT) instead of abort().


Except abort() does a bit more.

Because I think not flushing any stream except for stdout and stderr is 
closer to a real crash, I think making sig = 6 the default and thus 
dropping the non-signal path is the better option.


Max



Re: [Qemu-devel] [PATCH v2 00/12] qcow2: Add new overlap check functions

2014-11-25 Thread Stefan Hajnoczi
On Mon, Nov 24, 2014 at 04:56:48PM +0100, Max Reitz wrote:
 * CPU usage at runtime decreased by 150 to 275 percent on
   overlap-check-heavy tasks
 * No additional performance problems at loading time (in theory has the
   same runtime complexity as a single overlap check right now; in
   practice I could not find any problems)
 * Decent RAM usage (40 kB for a 1 TB image with 64 kB clusters; 40 MB
   for a 1 PB image etc. pp.)

RAM usage seems fine and the CPU reduction is very nice.

I will review the individual patches for QEMU 2.3.  If there is demand
it can be added to the QEMU 2.2 -stable branch at a later date.


pgpgO5evuY9uP.pgp
Description: PGP signature


Re: [Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll

2014-11-25 Thread Stefan Hajnoczi
On Tue, Nov 25, 2014 at 04:07:57PM +0800, Fam Zheng wrote:
 +QEMUPoll *qemu_poll_new(void)
 +{
 +int epollfd;
 +QEMUPoll *qpoll = g_new0(QEMUPoll, 1);
 +epollfd = epoll_create1(EPOLL_CLOEXEC);
 +if (epollfd  0) {
 +perror(epoll_create1:);
 +abort();
 +}
 +qpoll-epollfd = epollfd;
 +qpoll-max_events = 1;
 +g_free(qpoll-events);

What is the point of this g_free() call?


pgpAFmZMzHpl3.pgp
Description: PGP signature


Re: [Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Markus Armbruster
Max Reitz mre...@redhat.com writes:

 On 2014-11-25 at 14:20, Markus Armbruster wrote:
 Max Reitz mre...@redhat.com writes:

 On 2014-11-25 at 13:21, Markus Armbruster wrote:
 Max Reitz mre...@redhat.com writes:

 Test 039 used to fail
 I'm confused: used to suggests it doesn't anymore, but you sending a
 patches strongly suggests something's broken.
 Well, it used to fail before this series. :-P

 You're right, this sounds bad. Currently, 039 does fail, at least on
 any system with a /proc/sys/kernel/core_pattern passing the dump to
 another program. After this series, it does no longer.

 because qemu-io -c abort may generate core dumps
 even with ulimit -c 0 (and the output then contains (core dumped)).
 How?
 See the patches[1][2] by Mao Chuan Li. If
 /proc/sys/kernel/core_pattern passes the dump to another program,
 ulimit -c 0 does not matter.

 [1] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02092.html
 [2] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02093.html

 The problem with those patches is that they require access to
 /proc/sys/kernel/core_pattern. I don't like having to run the iotests
 as root.
 To me, this sounds like a case of doctor, it hurts when I do this.

 What do you mean? That I don't want the iotests to run as root? Or
 that I don't want to go the alternative of filtering out the (core
 dumped) message?

I mean:

Doctor, it hurts when I write weird stuff to
/proc/sys/kernel/core_pattern.

Don't do that then.

If you want to be a nicer doc than me, go right ahead.

[...]



Re: [Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Max Reitz

On 2014-11-25 at 14:48, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


On 2014-11-25 at 14:20, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


On 2014-11-25 at 13:21, Markus Armbruster wrote:

Max Reitz mre...@redhat.com writes:


Test 039 used to fail

I'm confused: used to suggests it doesn't anymore, but you sending a
patches strongly suggests something's broken.

Well, it used to fail before this series. :-P

You're right, this sounds bad. Currently, 039 does fail, at least on
any system with a /proc/sys/kernel/core_pattern passing the dump to
another program. After this series, it does no longer.


 because qemu-io -c abort may generate core dumps
even with ulimit -c 0 (and the output then contains (core dumped)).

How?

See the patches[1][2] by Mao Chuan Li. If
/proc/sys/kernel/core_pattern passes the dump to another program,
ulimit -c 0 does not matter.

[1] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02092.html
[2] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02093.html

The problem with those patches is that they require access to
/proc/sys/kernel/core_pattern. I don't like having to run the iotests
as root.

To me, this sounds like a case of doctor, it hurts when I do this.

What do you mean? That I don't want the iotests to run as root? Or
that I don't want to go the alternative of filtering out the (core
dumped) message?

I mean:

 Doctor, it hurts when I write weird stuff to
 /proc/sys/kernel/core_pattern.

 Don't do that then.

If you want to be a nicer doc than me, go right ahead.


I don't write weird stuff there. My default system configuration does 
(and mine is not the only one):


$ uname -r
3.17.3-200.fc20.x86_64
$ cat /proc/sys/kernel/core_pattern
|/usr/sbin/chroot /proc/%P/root /usr/libexec/abrt-hook-ccpp %s %c %p %u 
%g %t e


Max



Re: [Qemu-devel] [Xen-devel] [v2 1/4] Qemu-Xen-vTPM: Support for Xen stubdom vTPM command line options

2014-11-25 Thread Xu, Quan


 -Original Message-
 From: xen-devel-boun...@lists.xen.org
 [mailto:xen-devel-boun...@lists.xen.org] On Behalf Of Eric Blake
 Sent: Tuesday, November 25, 2014 1:31 AM
 To: Xu, Quan; qemu-devel@nongnu.org
 Cc: xen-de...@lists.xen.org; arm...@redhat.com; lcapitul...@redhat.com
 Subject: Re: [Xen-devel] [Qemu-devel] [v2 1/4] Qemu-Xen-vTPM: Support for
 Xen stubdom vTPM command line options
 
 On 11/24/2014 10:19 AM, Eric Blake wrote:
  On 11/23/2014 09:10 PM, Quan Xu wrote:
  Signed-off-by: Quan Xu quan...@intel.com
  ---
   configure| 14 ++
   hmp.c|  7 +++
   qapi-schema.json | 20 ++--  qemu-options.hx  |
 13
  +++--
   tpm.c|  7 ++-
   5 files changed, 56 insertions(+), 5 deletions(-)
 
 Also, this message was not threaded properly; it appeared as a top-level
 thread along with three other threads for its siblings, instead of all four
 patches being in-reply-to the 0/4 cover letter.
 
Thanks Eric.

Should I: 

V2 is version number, 
1. $git format-patch --subject-prefix=v2 -ns --cover-letter master 
  Then, edit -cover-letter.patch 

2. $git format-patch --subject-prefix=v2 -4 
  Then, commit these 4 patch and -cover-letter.patch

right?


Thanks 
Quan Xu

 --
 Eric Blake   eblake redhat com+1-919-301-3266
 Libvirt virtualization library http://libvirt.org



Re: [Qemu-devel] [v2 1/4] Qemu-Xen-vTPM: Support for Xen stubdom vTPM command line options

2014-11-25 Thread Xu, Quan


 -Original Message-
 From: Eric Blake [mailto:ebl...@redhat.com]
 Sent: Tuesday, November 25, 2014 1:19 AM
 To: Xu, Quan; qemu-devel@nongnu.org
 Cc: xen-de...@lists.xen.org; lcapitul...@redhat.com; arm...@redhat.com
 Subject: Re: [v2 1/4] Qemu-Xen-vTPM: Support for Xen stubdom vTPM
 command line options
 
 On 11/23/2014 09:10 PM, Quan Xu wrote:
  Signed-off-by: Quan Xu quan...@intel.com
  ---
   configure| 14 ++
   hmp.c|  7 +++
   qapi-schema.json | 20 ++--  qemu-options.hx  | 13
  +++--
   tpm.c|  7 ++-
   5 files changed, 56 insertions(+), 5 deletions(-)
 
 
  +++ b/qapi-schema.json
  @@ -2855,8 +2855,12 @@
   # @passthrough: TPM passthrough type
   #
   # Since: 1.5
  +#
  +# @xenstubdoms: TPM xenstubdoms type
  +#
  +# Since: 2.3
 
 Typically, this would be done as:
 
 # @passthrough: TPM passthrough type
 #
 # @xenstubdoms: TPM xenstubdoms type (since 2.3) # # Since: 1.5
 

Thanks  Eric. 
I will modify it as your suggestion in v3.

 as the enum itself was added in 1.5, not 2.3.
 
 --
 Eric Blake   eblake redhat com+1-919-301-3266
 Libvirt virtualization library http://libvirt.org



Re: [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction

2014-11-25 Thread Stefan Hajnoczi
On Tue, Nov 25, 2014 at 04:07:54PM +0800, Fam Zheng wrote:
 ppoll(2) doesn't scale as well as epoll: The elapsed time of the syscall is
 linear to the number of fd's we poll, which hurts performance a bit when the
 number of devices are many, or when a virtio device registers many virtqueues
 (virtio-serial, for instance).
 
 To show some data from my test on current master:
 
  - As a base point (10~20 fd's), it takes 22000 ns for each qemu_poll_ns.
  - Add 10 virtio-serial, which adds some 6 hundreds of fd's in the main loop.
The time spent in qemu_poll_ns goes up to 75000 ns.
 
 This series introduces qemu_poll, which is implemented  with g_poll and epoll,
 decided at configure time with CONFIG_EPOLL.
 
 After this change, the times to do the same thing with qemu_poll (more
 precisely, with a sequence of qemu_poll_set_fds(), qemu_poll(),
 qemu_poll_get_events() followed by syncing back to gpollfds), are reduced to
 21000 ns and 25000 ns, respectively.
 
 We are still not O(1) because as a transition, the qemu_poll_set_fds before
 qemu_poll is not optimized out yet.

You didn't mention the change from nanosecond to millisecond timeouts.

QEMU did not use g_poll() for a long time because g_poll() only provides
milliseconds.  It seems this patch series undoes the work that has been
done to keep nanosecond timeouts in QEMU.

Do you think it is okay to forget about 1 ms timeout precision?

If we go ahead with this, we'll need to rethink other timeouts in QEMU.
For example, is there a point in setting timer slack to 1 ns if we
cannot even specify ns wait times?

Perhaps timerfd is needed before we can use epoll.  Hopefully the
overall performance effect will be positive with epoll + timerfd,
compared to ppoll().

Stefan


pgpAPxRvSykqV.pgp
Description: PGP signature


Re: [Qemu-devel] [Xen-devel] virtio leaks cpu mappings, was: qemu crash with virtio on Xen domUs (backtrace included)

2014-11-25 Thread Stefano Stabellini
On Tue, 25 Nov 2014, Jason Wang wrote:
 On 11/25/2014 02:44 AM, Stefano Stabellini wrote:
  On Mon, 24 Nov 2014, Stefano Stabellini wrote:
  On Mon, 24 Nov 2014, Stefano Stabellini wrote:
  CC'ing Paolo.
 
 
  Wen,
  thanks for the logs.
 
  I investigated a little bit and it seems to me that the bug occurs when
  QEMU tries to unmap only a portion of a memory region previously mapped.
  That doesn't work with xen-mapcache.
 
  See these logs for example:
 
  DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb68 len=0xa
  DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
  Sorry the logs don't quite match, it was supposed to be:
 
  DEBUG address_space_map phys_addr=78ed8b44 vaddr=7fab50afbb64 len=0xa
  DEBUG address_space_unmap vaddr=7fab50afbb68 len=0x6
  It looks like the problem is caused by iov_discard_front, called by
  virtio_net_handle_ctrl. By changing iov_base after the sg has already
  been mapped (cpu_physical_memory_map), it causes a leak in the mapping
  because the corresponding cpu_physical_memory_unmap will only unmap a
  portion of the original sg.  On Xen the problem is worse because
  xen-mapcache aborts.
 
  diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
  index 2ac6ce5..b2b5c2d 100644
  --- a/hw/net/virtio-net.c
  +++ b/hw/net/virtio-net.c
  @@ -775,7 +775,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, 
  VirtQueue *vq)
   struct iovec *iov;
   unsigned int iov_cnt;
   
  -while (virtqueue_pop(vq, elem)) {
  +while (virtqueue_pop_nomap(vq, elem)) {
   if (iov_size(elem.in_sg, elem.in_num)  sizeof(status) ||
   iov_size(elem.out_sg, elem.out_num)  sizeof(ctrl)) {
   error_report(virtio-net ctrl missing headers);
  @@ -784,8 +784,12 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, 
  VirtQueue *vq)
   
   iov = elem.out_sg;
   iov_cnt = elem.out_num;
  -s = iov_to_buf(iov, iov_cnt, 0, ctrl, sizeof(ctrl));
   iov_discard_front(iov, iov_cnt, sizeof(ctrl));
  +
  +virtqueue_map_sg(elem.in_sg, elem.in_addr, elem.in_num, 1);
  +virtqueue_map_sg(elem.out_sg, elem.out_addr, elem.out_num, 0);
  +
  +s = iov_to_buf(iov, iov_cnt, 0, ctrl, sizeof(ctrl));
 
 Does this really work?

It seems to work here, as in it doesn't crash QEMU and I am able to boot
a guest with network. I didn't try any MAC related commands.


 The code in fact skips the location that contains
 virtio_net_ctrl_hdr. And virtio_net_handle_mac() still calls
 iov_discard_front().

 How about copy iov to a temp variable and use it in this function?

That would only work if I moved the cpu_physical_memory_unmap call
outside of virtqueue_fill, so that we can pass different iov to them.
We need to unmap the same iov that was previously mapped by
virtqueue_pop.



[Qemu-devel] [PATCH v2 for-2.2] input: move input-send-event into experimental namespace

2014-11-25 Thread Markus Armbruster
From: Gerd Hoffmann kra...@redhat.com

Ongoing discussions on how we are going to specify the console,
so tag the command as experiental so we can refine things in
the 2.3 development cycle.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
[Spell out not a stable API, and x- the QAPI schema, too]
Signed-off-by: Markus Armbruster arm...@redhat.com

Signed-off-by: Markus Armbruster arm...@redhat.com
---
I would've ripped out the command entirely, but Gerd's approach works
for me, too.  Since time for -rc3 is running out, I'm respinning his
patch with my and Amos's review comments worked in.  Hope that's okay.

We should also pick Amos's [PATCH] qmp: fix typo in input-send-event
examples.

 qapi-schema.json |  6 --
 qmp-commands.hx  | 16 +---
 ui/input.c   |  4 ++--
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index d0926d9..9ffdcf8 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3245,7 +3245,7 @@
   'abs' : 'InputMoveEvent' } }
 
 ##
-# @input-send-event
+# @x-input-send-event
 #
 # Send input event(s) to guest.
 #
@@ -3257,8 +3257,10 @@
 #
 # Since: 2.2
 #
+# Note: this command is experimental, and not a stable API.
+#
 ##
-{ 'command': 'input-send-event',
+{ 'command': 'x-input-send-event',
   'data': { '*console':'int', 'events': [ 'InputEvent' ] } }
 
 ##
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8812401..718dd92 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3791,13 +3791,13 @@ Example:
 EQMP
 
 {
-.name   = input-send-event,
+.name   = x-input-send-event,
 .args_type  = console:i?,events:q,
-.mhandler.cmd_new = qmp_marshal_input_input_send_event,
+.mhandler.cmd_new = qmp_marshal_input_x_input_send_event,
 },
 
 SQMP
-@input-send-event
+@x-input-send-event
 -
 
 Send input event to guest.
@@ -3811,17 +3811,19 @@ The consoles are visible in the qom tree, under
 /backend/console[$index]. They have a device link and head property, so
 it is possible to map which console belongs to which device and display.
 
+Note: this command is experimental, and not a stable API.
+
 Example (1):
 
 Press left mouse button.
 
-- { execute: input-send-event,
+- { execute: x-input-send-event,
 arguments: { console: 0,
events: [ { type: btn,
 data : { down: true, button: Left } } } }
 - { return: {} }
 
-- { execute: input-send-event,
+- { execute: x-input-send-event,
 arguments: { console: 0,
events: [ { type: btn,
 data : { down: false, button: Left } } } }
@@ -3831,7 +3833,7 @@ Example (2):
 
 Press ctrl-alt-del.
 
-- { execute: input-send-event,
+- { execute: x-input-send-event,
  arguments: { console: 0, events: [
 { type: key, data : { down: true,
   key: {type: qcode, data: ctrl } } },
@@ -3845,7 +3847,7 @@ Example (3):
 
 Move mouse pointer to absolute coordinates (2, 400).
 
-- { execute: input-send-event ,
+- { execute: x-input-send-event ,
   arguments: { console: 0, events: [
{ type: abs, data : { axis: X, value : 2 } },
{ type: abs, data : { axis: Y, value : 400 } } ] } }
diff --git a/ui/input.c b/ui/input.c
index 37ff46f..7ba99e5 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -122,8 +122,8 @@ qemu_input_find_handler(uint32_t mask, QemuConsole *con)
 return NULL;
 }
 
-void qmp_input_send_event(bool has_console, int64_t console,
-  InputEventList *events, Error **errp)
+void qmp_x_input_send_event(bool has_console, int64_t console,
+InputEventList *events, Error **errp)
 {
 InputEventList *e;
 QemuConsole *con;
-- 
1.9.3




Re: [Qemu-devel] [PATCH] input: move input-send-event into experimental namespace

2014-11-25 Thread Markus Armbruster
Markus Armbruster arm...@redhat.com writes:

 Amos Kong ak...@redhat.com writes:

 On Tue, Nov 25, 2014 at 09:06:34AM +0100, Markus Armbruster wrote:
 Gerd Hoffmann kra...@redhat.com writes:
 
  Ongoing discussions on how we are going to specify the console,
  so tag the command as experiemntal so we can refine things in
  the 2.3 development cycle.
 
  Signed-off-by: Gerd Hoffmann kra...@redhat.com
  ---
   qmp-commands.hx | 12 ++--
 
 Don't you need to patch qapi-schema.json, too?

 Not necessary in function level.

 s/need to/want to/?

 For consistency, especially because the QAPI schema also serves as QMP
 command documentation.

 Do we actually explain x- means experimental anywhere?

 What's the official way to make command experimental?

 I think we have an understanding / convention that an x- prefix marks
 names as unstable API.  But I can't find it spelled out anywhere.
 Anyway, separate issue.

 Quote from qapi-schema.json:
 | # Notes: This command is experimental and may change
 | syntax in future releases.

 Next to the return type ObjectTypeInfo rathe than the command
 qom-list-types, blech.

I went with Note: this command is experimental, and not a stable API,
following precedence in qemu-options.hx.



Re: [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction

2014-11-25 Thread Paolo Bonzini
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1



On 25/11/2014 14:52, Stefan Hajnoczi wrote:
 Do you think it is okay to forget about 1 ms timeout precision?
 
 If we go ahead with this, we'll need to rethink other timeouts in 
 QEMU. For example, is there a point in setting timer slack to 1 ns 
 if we cannot even specify ns wait times?
 
 Perhaps timerfd is needed before we can use epoll.  Hopefully the 
 overall performance effect will be positive with epoll + timerfd, 
 compared to ppoll().

You can also use POLLIN with the epoll file descriptor, i.e. do ppoll
followed (if there's no timeout) by epoll_wait with zero timeout.

Paolo
-BEGIN PGP SIGNATURE-
Version: GnuPG v2

iQEcBAEBAgAGBQJUdIqEAAoJEL/70l94x66DdtsH/RIXbk6faPWdb3MXaHmAoH1E
z7q8cGuLPkI7XT54BYbiBFFn4MqS0XxLHLJ69CksFEC5u4wNl9vqsLLJrN/+uZe5
PznE7madjam32ZVtUbzfRwrBtO0KFgyXEiZfR9stAVXW+/KIUAaWU5rQ2IW6GqHg
skt2GGmKfrCbyvmxVhSt2oMDRZ7O2Tquox6eLYizQX6JJ3/5vDqpzXTKE/Ix+wnt
R3FA3IZkQuZMQPAFsMKj0AajN178RGiqXaB3UIR2YmPN1DWyjkfN05WmPgMpvZa/
eX70AhUdOjLzncfLU3bX9EAsml0s2Hsj5gKRYT6B5d8YK2b0ba3dCqgZRPV3+bo=
=8Awx
-END PGP SIGNATURE-



Re: [Qemu-devel] [PATCH 0/3] iotests: Fix test 039

2014-11-25 Thread Markus Armbruster
Max Reitz mre...@redhat.com writes:

 On 2014-11-25 at 14:48, Markus Armbruster wrote:
 Max Reitz mre...@redhat.com writes:

 On 2014-11-25 at 14:20, Markus Armbruster wrote:
 Max Reitz mre...@redhat.com writes:

 On 2014-11-25 at 13:21, Markus Armbruster wrote:
 Max Reitz mre...@redhat.com writes:

 Test 039 used to fail
 I'm confused: used to suggests it doesn't anymore, but you sending a
 patches strongly suggests something's broken.
 Well, it used to fail before this series. :-P

 You're right, this sounds bad. Currently, 039 does fail, at least on
 any system with a /proc/sys/kernel/core_pattern passing the dump to
 another program. After this series, it does no longer.

  because qemu-io -c abort may generate core 
 dumps
 even with ulimit -c 0 (and the output then contains (core dumped)).
 How?
 See the patches[1][2] by Mao Chuan Li. If
 /proc/sys/kernel/core_pattern passes the dump to another program,
 ulimit -c 0 does not matter.

 [1] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02092.html
 [2] http://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg02093.html

 The problem with those patches is that they require access to
 /proc/sys/kernel/core_pattern. I don't like having to run the iotests
 as root.
 To me, this sounds like a case of doctor, it hurts when I do this.
 What do you mean? That I don't want the iotests to run as root? Or
 that I don't want to go the alternative of filtering out the (core
 dumped) message?
 I mean:

  Doctor, it hurts when I write weird stuff to
  /proc/sys/kernel/core_pattern.

  Don't do that then.

 If you want to be a nicer doc than me, go right ahead.

 I don't write weird stuff there. My default system configuration does
 (and mine is not the only one):

 $ uname -r
 3.17.3-200.fc20.x86_64
 $ cat /proc/sys/kernel/core_pattern
 |/usr/sbin/chroot /proc/%P/root /usr/libexec/abrt-hook-ccpp %s %c %p
 %u %g %t e

abrt is one of the things I kill with prejudice on all my development
machines.



[Qemu-devel] [Regression] hmp: QEMU crash on device_del auto-completion

2014-11-25 Thread Marcel Apfelbaum
Hi,

The commits:
 - 6a1fa9f5 (monitor: add del completion for peripheral device) 
 - 66e56b13 (qdev: add qdev_build_hotpluggable_device_list helper)

cause a QEMU crash when trying to use HMP device_del auto-completion.
It can be easily reproduced by:
qemu-bin -enable-kvm  ~/images/fedora.qcow2 -monitor stdio -device 
virtio-net-pci,id=vnet
(qemu) device_del 
/home/mapfelba/git/upstream/qemu/hw/core/qdev.c:941:qdev_build_hotpluggable_device_list:
 Object 0x7f6ce04e4fe0 is not an instance of type device
Aborted (core dumped)

The root cause is qdev_build_hotpluggable_device_list going recursively over
all peripherals and their children assuming all are devices. It doesn't work
since PCI devices have at least on child which is a memory region (bus master).

Should we try to fix it for 2.2 or simply revert it?
Thanks,
  Marcel







[Qemu-devel] [PATCH 02/12] block/vvfat: qcow driver may not be found

2014-11-25 Thread Max Reitz
Although virtually impossible right now, bdrv_find_format(qcow) may
fail. The vvfat block driver should mind that case.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block/vvfat.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/block/vvfat.c b/block/vvfat.c
index cefe3a4..e34a789 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2917,6 +2917,12 @@ static int enable_write_target(BDRVVVFATState *s, Error 
**errp)
 }
 
 bdrv_qcow = bdrv_find_format(qcow);
+if (!bdrv_qcow) {
+error_setg(errp, Failed to locate qcow driver);
+ret = -ENOENT;
+goto err;
+}
+
 opts = qemu_opts_create(bdrv_qcow-create_opts, NULL, 0, error_abort);
 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s-sector_count * 512);
 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, fat:);
-- 
1.9.3




[Qemu-devel] [PATCH 03/12] block/nfs: Add create_opts

2014-11-25 Thread Max Reitz
The nfs protocol driver is capable of creating images, but did not
specify any creation options. Fix it.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block/nfs.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/block/nfs.c b/block/nfs.c
index c76e368..ca9e24e 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -409,6 +409,19 @@ out:
 return ret;
 }
 
+static QemuOptsList nfs_create_opts = {
+.name = nfs-create-opts,
+.head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
+.desc = {
+{
+.name = BLOCK_OPT_SIZE,
+.type = QEMU_OPT_SIZE,
+.help = Virtual disk size
+},
+{ /* end of list */ }
+}
+};
+
 static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
 {
 int ret = 0;
@@ -470,6 +483,8 @@ static BlockDriver bdrv_nfs = {
 
 .instance_size  = sizeof(NFSClient),
 .bdrv_needs_filename= true,
+.create_opts= nfs_create_opts,
+
 .bdrv_has_zero_init = nfs_has_zero_init,
 .bdrv_get_allocated_file_size   = nfs_get_allocated_file_size,
 .bdrv_truncate  = nfs_file_truncate,
-- 
1.9.3




[Qemu-devel] [PATCH 05/12] qemu-img: Check create_opts before image creation

2014-11-25 Thread Max Reitz
If a driver supports image creation, it needs to set the .create_opts
field. We can use that to make sure .create_opts for both drivers
involved is not NULL for the target image in qemu-img convert, which is
important so that the create_opts pointer in img_convert() is not NULL
after the qemu_opts_append() calls and when going into
qemu_opts_create().

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 qemu-img.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/qemu-img.c b/qemu-img.c
index a42335c..8c4edf3 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1531,6 +1531,20 @@ static int img_convert(int argc, char **argv)
 goto out;
 }
 
+if (!drv-create_opts) {
+error_report(Format driver '%s' does not support image creation,
+ drv-format_name);
+ret = -1;
+goto out;
+}
+
+if (!proto_drv-create_opts) {
+error_report(Protocol driver '%s' does not support image creation,
+ proto_drv-format_name);
+ret = -1;
+goto out;
+}
+
 create_opts = qemu_opts_append(create_opts, drv-create_opts);
 create_opts = qemu_opts_append(create_opts, proto_drv-create_opts);
 
-- 
1.9.3




[Qemu-devel] [PATCH 08/12] iotests: Add test for unsupported image creation

2014-11-25 Thread Max Reitz
Add a test for creating and amending images (amendment uses the creation
options) with formats not supporting creation over protocols not
supporting creation.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 tests/qemu-iotests/113 | 76 ++
 tests/qemu-iotests/113.out | 15 +
 tests/qemu-iotests/group   |  1 +
 3 files changed, 92 insertions(+)
 create mode 100755 tests/qemu-iotests/113
 create mode 100644 tests/qemu-iotests/113.out

diff --git a/tests/qemu-iotests/113 b/tests/qemu-iotests/113
new file mode 100755
index 000..a2cd96b
--- /dev/null
+++ b/tests/qemu-iotests/113
@@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Test case for accessing creation options on image formats and
+# protocols not supporting image creation
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see http://www.gnu.org/licenses/.
+#
+
+# creator
+owner=mre...@redhat.com
+
+seq=$(basename $0)
+echo QA output created by $seq
+
+here=$PWD
+tmp=/tmp/$$
+status=1   # failure is the default!
+
+_cleanup()
+{
+   _cleanup_test_img
+}
+trap _cleanup; exit \$status 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+# We can only test one format here because we need its sample file
+_supported_fmt bochs
+_supported_proto nbd
+_supported_os Linux
+
+echo
+echo '=== Unsupported image creation in qemu-img create ==='
+echo
+
+$QEMU_IMG create -f $IMGFMT nbd://example.com 21 64M | _filter_imgfmt
+
+echo
+echo '=== Unsupported image creation in qemu-img convert ==='
+echo
+
+# We could use any input image format here, but this is a bochs test, so just
+# use the bochs image
+_use_sample_img empty.bochs.bz2
+$QEMU_IMG convert -f $IMGFMT -O $IMGFMT $TEST_IMG nbd://example.com 21 \
+| _filter_imgfmt
+
+echo
+echo '=== Unsupported format in qemu-img amend ==='
+echo
+
+# The protocol does not matter here
+_use_sample_img empty.bochs.bz2
+$QEMU_IMG amend -f $IMGFMT -o foo=bar $TEST_IMG 21 | _filter_imgfmt
+
+
+# success, all done
+echo
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/113.out b/tests/qemu-iotests/113.out
new file mode 100644
index 000..00bdfd6
--- /dev/null
+++ b/tests/qemu-iotests/113.out
@@ -0,0 +1,15 @@
+QA output created by 113
+
+=== Unsupported image creation in qemu-img create ===
+
+qemu-img: nbd://example.com: Format driver 'IMGFMT' does not support image 
creation
+
+=== Unsupported image creation in qemu-img convert ===
+
+qemu-img: Format driver 'IMGFMT' does not support image creation
+
+=== Unsupported format in qemu-img amend ===
+
+qemu-img: Format driver 'IMGFMT' does not support any options to amend
+
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 7dfe469..fd2c64a 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -112,3 +112,4 @@
 107 rw auto quick
 108 rw auto quick
 111 rw auto quick
+113 rw auto quick
-- 
1.9.3




[Qemu-devel] [PATCH 01/12] block: qcow2 driver may not be found

2014-11-25 Thread Max Reitz
Albeit absolutely impossible right now, bdrv_find_format(qcow2) may
fail. bdrv_append_temp_snapshot() should heed that case.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/block.c b/block.c
index 866c8b4..b31fb67 100644
--- a/block.c
+++ b/block.c
@@ -1320,6 +1320,12 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int 
flags, Error **errp)
 }
 
 bdrv_qcow2 = bdrv_find_format(qcow2);
+if (!bdrv_qcow2) {
+error_setg(errp, Failed to locate qcow2 driver);
+ret = -ENOENT;
+goto out;
+}
+
 opts = qemu_opts_create(bdrv_qcow2-create_opts, NULL, 0,
 error_abort);
 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
-- 
1.9.3




[Qemu-devel] [PATCH 04/12] block: Check create_opts before image creation

2014-11-25 Thread Max Reitz
If a driver supports image creation, it needs to set the .create_opts
field. We can use that to make sure .create_opts for both drivers
involved is not NULL in bdrv_img_create(), which is important so that
the create_opts pointer in that function is not NULL after the
qemu_opts_append() calls and when going into qemu_opts_create().

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/block.c b/block.c
index b31fb67..1e4a72f 100644
--- a/block.c
+++ b/block.c
@@ -5560,6 +5560,18 @@ void bdrv_img_create(const char *filename, const char 
*fmt,
 return;
 }
 
+if (!drv-create_opts) {
+error_setg(errp, Format driver '%s' does not support image creation,
+   drv-format_name);
+return;
+}
+
+if (!proto_drv-create_opts) {
+error_setg(errp, Protocol driver '%s' does not support image 
creation,
+   proto_drv-format_name);
+return;
+}
+
 create_opts = qemu_opts_append(create_opts, drv-create_opts);
 create_opts = qemu_opts_append(create_opts, proto_drv-create_opts);
 
-- 
1.9.3




[Qemu-devel] [PATCH 10/12] qcow2: Flushing the caches in qcow2_close may fail

2014-11-25 Thread Max Reitz
qcow2_cache_flush() may fail; if one of the caches failed to be flushed
successfully to disk in qcow2_close() the image should not be marked
clean, and we should emit a warning.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block/qcow2.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index d120494..2bd2a53 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1428,10 +1428,23 @@ static void qcow2_close(BlockDriverState *bs)
 s-l1_table = NULL;
 
 if (!(bs-open_flags  BDRV_O_INCOMING)) {
-qcow2_cache_flush(bs, s-l2_table_cache);
-qcow2_cache_flush(bs, s-refcount_block_cache);
+int ret1, ret2;
 
-qcow2_mark_clean(bs);
+ret1 = qcow2_cache_flush(bs, s-l2_table_cache);
+ret2 = qcow2_cache_flush(bs, s-refcount_block_cache);
+
+if (ret1) {
+error_report(Failed to flush the L2 table cache: %s,
+ strerror(-ret1));
+}
+if (ret2) {
+error_report(Failed to flush the refcount block cache: %s,
+ strerror(-ret2));
+}
+
+if (!ret1  !ret2) {
+qcow2_mark_clean(bs);
+}
 }
 
 qcow2_cache_destroy(bs, s-l2_table_cache);
-- 
1.9.3




[Qemu-devel] [PATCH 06/12] qemu-img: Check create_opts before image amendment

2014-11-25 Thread Max Reitz
The image options which can be amended are described by the .create_opts
field for every driver. This field must therefore be non-NULL so that
anything can be amended in the first place. Check that this holds true
before going into qemu_opts_create() (because if .create_opts is NULL,
the create_opts pointer in img_amend() will be NULL after
qemu_opts_append()).

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 qemu-img.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/qemu-img.c b/qemu-img.c
index 8c4edf3..7876258 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2986,6 +2986,13 @@ static int img_amend(int argc, char **argv)
 goto out;
 }
 
+if (!bs-drv-create_opts) {
+error_report(Format driver '%s' does not support any options to 
amend,
+ fmt);
+ret = -1;
+goto out;
+}
+
 create_opts = qemu_opts_append(create_opts, bs-drv-create_opts);
 opts = qemu_opts_create(create_opts, NULL, 0, error_abort);
 if (options  qemu_opts_do_parse(opts, options, NULL)) {
-- 
1.9.3




[Qemu-devel] [PATCH 00/12] block: Various Coverity-spotted fixes

2014-11-25 Thread Max Reitz
This series fixes various issues spotted by Coverity. None of these is
critical; most are just If you do something crazy, qemu-img crashes or
But what if there is no qcow2 driver?. Therefore, while these are bug
fixes, it is a bit late to try to push them into 2.2.0. I am therefore
tempted to vote to target 2.3 instead.

Also, none is security-relevant. The only crashes which are fixed here
are sure to have resulted from dereferencing a NULL pointer.


Max Reitz (12):
  block: qcow2 driver may not be found
  block/vvfat: qcow driver may not be found
  block/nfs: Add create_opts
  block: Check create_opts before image creation
  qemu-img: Check create_opts before image creation
  qemu-img: Check create_opts before image amendment
  iotests: Only kill NBD server if it runs
  iotests: Add test for unsupported image creation
  qcow2: Prevent numerical overflow
  qcow2: Flushing the caches in qcow2_close may fail
  qcow2: Respect bdrv_truncate() error
  block/raw-posix: Fix ret in raw_open_common()

 block.c  | 18 +++
 block/nfs.c  | 15 +
 block/qcow2-cluster.c|  2 +-
 block/qcow2.c| 22 ++---
 block/raw-posix.c|  1 +
 block/vvfat.c|  6 
 qemu-img.c   | 21 
 tests/qemu-iotests/113   | 76 
 tests/qemu-iotests/113.out   | 15 +
 tests/qemu-iotests/common.rc |  4 ++-
 tests/qemu-iotests/group |  1 +
 11 files changed, 174 insertions(+), 7 deletions(-)
 create mode 100755 tests/qemu-iotests/113
 create mode 100644 tests/qemu-iotests/113.out

-- 
1.9.3




[Qemu-devel] [PATCH 09/12] qcow2: Prevent numerical overflow

2014-11-25 Thread Max Reitz
In qcow2_alloc_cluster_offset(), *num is limited to
INT_MAX  BDRV_SECTOR_BITS by all callers. However, since remaining is
of type uint64_t, we might as well cast *num to that type before
performing the shift.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block/qcow2-cluster.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index df0b2c9..1fea514 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1263,7 +1263,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
 
 again:
 start = offset;
-remaining = *num  BDRV_SECTOR_BITS;
+remaining = (uint64_t)*num  BDRV_SECTOR_BITS;
 cluster_offset = 0;
 *host_offset = 0;
 cur_bytes = 0;
-- 
1.9.3




[Qemu-devel] [PATCH 12/12] block/raw-posix: Fix ret in raw_open_common()

2014-11-25 Thread Max Reitz
The return value must be negative on error; there is one place in
raw_open_common() where errp is set, but ret remains 0. Fix it.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block/raw-posix.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index b1af77e..96491fc 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -446,6 +446,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 }
 
 if (fstat(s-fd, st)  0) {
+ret = -errno;
 error_setg_errno(errp, errno, Could not stat file);
 goto fail;
 }
-- 
1.9.3




Re: [Qemu-devel] [Regression] hmp: QEMU crash on device_del auto-completion

2014-11-25 Thread Luiz Capitulino
On Tue, 25 Nov 2014 16:04:19 +0200
Marcel Apfelbaum marce...@redhat.com wrote:

 Hi,
 
 The commits:
  - 6a1fa9f5 (monitor: add del completion for peripheral device) 
  - 66e56b13 (qdev: add qdev_build_hotpluggable_device_list helper)
 
 cause a QEMU crash when trying to use HMP device_del auto-completion.
 It can be easily reproduced by:
 qemu-bin -enable-kvm  ~/images/fedora.qcow2 -monitor stdio -device 
 virtio-net-pci,id=vnet
 (qemu) device_del 
 /home/mapfelba/git/upstream/qemu/hw/core/qdev.c:941:qdev_build_hotpluggable_device_list:
  Object 0x7f6ce04e4fe0 is not an instance of type device
 Aborted (core dumped)
 
 The root cause is qdev_build_hotpluggable_device_list going recursively over
 all peripherals and their children assuming all are devices. It doesn't work
 since PCI devices have at least on child which is a memory region (bus 
 master).
 
 Should we try to fix it for 2.2 or simply revert it?

Do you think you can post a patch in the next few days? If you can then
let's try to fix it, otherwise we better revert those commits.



[Qemu-devel] [PATCH 07/12] iotests: Only kill NBD server if it runs

2014-11-25 Thread Max Reitz
There may be NBD tests which do not create a sample image and simply
test whether wrong usage of the protocol is rejected as expected. In
this case, there will be no NBD server and trying to kill it during
clean-up will fail.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 tests/qemu-iotests/common.rc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 9c49deb..f2554ec 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -175,7 +175,9 @@ _cleanup_test_img()
 case $IMGPROTO in
 
 nbd)
-kill $QEMU_NBD_PID
+if [ -n $QEMU_NBD_PID ]; then
+kill $QEMU_NBD_PID
+fi
 rm -f $TEST_IMG_FILE
 ;;
 file)
-- 
1.9.3




[Qemu-devel] [PATCH 11/12] qcow2: Respect bdrv_truncate() error

2014-11-25 Thread Max Reitz
bdrv_truncate() may fail and qcow2_write_compressed() should return the
error code in that case.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Max Reitz mre...@redhat.com
---
 block/qcow2.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 2bd2a53..691f363 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2163,8 +2163,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
 /* align end of file to a sector boundary to ease reading with
sector based I/Os */
 cluster_offset = bdrv_getlength(bs-file);
-bdrv_truncate(bs-file, cluster_offset);
-return 0;
+return bdrv_truncate(bs-file, cluster_offset);
 }
 
 if (nb_sectors != s-cluster_sectors) {
-- 
1.9.3




Re: [Qemu-devel] [PATCH v2 for-2.2] input: move input-send-event into experimental namespace

2014-11-25 Thread Luiz Capitulino
On Tue, 25 Nov 2014 14:54:17 +0100
Markus Armbruster arm...@redhat.com wrote:

 From: Gerd Hoffmann kra...@redhat.com
 
 Ongoing discussions on how we are going to specify the console,
 so tag the command as experiental so we can refine things in
 the 2.3 development cycle.
 
 Signed-off-by: Gerd Hoffmann kra...@redhat.com
 [Spell out not a stable API, and x- the QAPI schema, too]
 Signed-off-by: Markus Armbruster arm...@redhat.com
 
 Signed-off-by: Markus Armbruster arm...@redhat.com
 ---
 I would've ripped out the command entirely, but Gerd's approach works
 for me, too.  Since time for -rc3 is running out, I'm respinning his
 patch with my and Amos's review comments worked in.  Hope that's okay.
 
 We should also pick Amos's [PATCH] qmp: fix typo in input-send-event
 examples.

Is this going to be merged via Gerd's tree or QMP tree? In any case it
would be good to have Eric's review.

 
  qapi-schema.json |  6 --
  qmp-commands.hx  | 16 +---
  ui/input.c   |  4 ++--
  3 files changed, 15 insertions(+), 11 deletions(-)
 
 diff --git a/qapi-schema.json b/qapi-schema.json
 index d0926d9..9ffdcf8 100644
 --- a/qapi-schema.json
 +++ b/qapi-schema.json
 @@ -3245,7 +3245,7 @@
'abs' : 'InputMoveEvent' } }
  
  ##
 -# @input-send-event
 +# @x-input-send-event
  #
  # Send input event(s) to guest.
  #
 @@ -3257,8 +3257,10 @@
  #
  # Since: 2.2
  #
 +# Note: this command is experimental, and not a stable API.
 +#
  ##
 -{ 'command': 'input-send-event',
 +{ 'command': 'x-input-send-event',
'data': { '*console':'int', 'events': [ 'InputEvent' ] } }
  
  ##
 diff --git a/qmp-commands.hx b/qmp-commands.hx
 index 8812401..718dd92 100644
 --- a/qmp-commands.hx
 +++ b/qmp-commands.hx
 @@ -3791,13 +3791,13 @@ Example:
  EQMP
  
  {
 -.name   = input-send-event,
 +.name   = x-input-send-event,
  .args_type  = console:i?,events:q,
 -.mhandler.cmd_new = qmp_marshal_input_input_send_event,
 +.mhandler.cmd_new = qmp_marshal_input_x_input_send_event,
  },
  
  SQMP
 -@input-send-event
 +@x-input-send-event
  -
  
  Send input event to guest.
 @@ -3811,17 +3811,19 @@ The consoles are visible in the qom tree, under
  /backend/console[$index]. They have a device link and head property, so
  it is possible to map which console belongs to which device and display.
  
 +Note: this command is experimental, and not a stable API.
 +
  Example (1):
  
  Press left mouse button.
  
 -- { execute: input-send-event,
 +- { execute: x-input-send-event,
  arguments: { console: 0,
 events: [ { type: btn,
  data : { down: true, button: Left } } } }
  - { return: {} }
  
 -- { execute: input-send-event,
 +- { execute: x-input-send-event,
  arguments: { console: 0,
 events: [ { type: btn,
  data : { down: false, button: Left } } } }
 @@ -3831,7 +3833,7 @@ Example (2):
  
  Press ctrl-alt-del.
  
 -- { execute: input-send-event,
 +- { execute: x-input-send-event,
   arguments: { console: 0, events: [
  { type: key, data : { down: true,
key: {type: qcode, data: ctrl } } },
 @@ -3845,7 +3847,7 @@ Example (3):
  
  Move mouse pointer to absolute coordinates (2, 400).
  
 -- { execute: input-send-event ,
 +- { execute: x-input-send-event ,
arguments: { console: 0, events: [
 { type: abs, data : { axis: X, value : 2 } },
 { type: abs, data : { axis: Y, value : 400 } } ] 
 } }
 diff --git a/ui/input.c b/ui/input.c
 index 37ff46f..7ba99e5 100644
 --- a/ui/input.c
 +++ b/ui/input.c
 @@ -122,8 +122,8 @@ qemu_input_find_handler(uint32_t mask, QemuConsole *con)
  return NULL;
  }
  
 -void qmp_input_send_event(bool has_console, int64_t console,
 -  InputEventList *events, Error **errp)
 +void qmp_x_input_send_event(bool has_console, int64_t console,
 +InputEventList *events, Error **errp)
  {
  InputEventList *e;
  QemuConsole *con;




[Qemu-devel] [PATCH 6/6] softfloat: Clarify license status

2014-11-25 Thread Peter Maydell
The code in the softfloat source files is under a mixture of
licenses: the original code and many changes from QEMU contributors
are under the base SoftFloat-2a license; changes from Stefan Weil
and RedHat employees are GPLv2-or-later; changes from Fabrice Bellard
are under the BSD license. Clarify this in the comments at the
top of each affected source file, including a statement about
the assumed licensing for future contributions, so we don't need
to remember to ask patch submitters explicitly to pick a license.

Signed-off-by: Peter Maydell peter.mayd...@linaro.org
Acked-by: Andreas Färber afaer...@suse.de
Acked-by: Aurelien Jarno aurel...@aurel32.net
Acked-by: Avi Kivity avi.kiv...@gmail.com
Acked-by: Ben Taylor bentaylor.sol...@gmail.com
Acked-by: Blue Swirl blauwir...@gmail.com
Acked-by: Christophe Lyon christophe.l...@st.com
Acked-by: Fabrice Bellard fabr...@bellard.org
Acked-by: Guan Xuetao g...@mprc.pku.edu.cn
Acked-by: Juan Quintela quint...@redhat.com
Acked-by: Max Filippov jcmvb...@gmail.com
Acked-by: Paul Brook p...@codesourcery.com
Acked-by: Paolo Bonzini pbonz...@redhat.com
Acked-by: Peter Maydell peter.mayd...@linaro.org
Acked-by: Richard Henderson r...@twiddle.net
Acked-by: Richard Sandiford rdsandif...@googlemail.com
Acked-by: Stefan Weil s...@weilnetz.de
---
 fpu/softfloat-macros.h | 48 +-
 fpu/softfloat-specialize.h | 48 +-
 fpu/softfloat.c| 48 +-
 include/fpu/softfloat.h| 48 +-
 4 files changed, 188 insertions(+), 4 deletions(-)

diff --git a/fpu/softfloat-macros.h b/fpu/softfloat-macros.h
index ca1d81e..194360e 100644
--- a/fpu/softfloat-macros.h
+++ b/fpu/softfloat-macros.h
@@ -1,7 +1,18 @@
 /*
  * QEMU float support macros
  *
- * Derived from SoftFloat.
+ * The code in this source file is derived from release 2a of the SoftFloat
+ * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
+ * some later contributions) are provided under that license, as detailed 
below.
+ * It has subsequently been modified by contributors to the QEMU Project,
+ * so some portions are provided under:
+ *  the SoftFloat-2a license
+ *  the BSD license
+ *  GPL-v2-or-later
+ *
+ * Any future contributions to this file after December 1st 2014
+ * will be taken to be licensed under GPL-v2-or-later unless specifically
+ * indicated otherwise. See the COPYING file in the top-level directory.
  */
 
 /*
@@ -33,6 +44,41 @@ this code that are retained.
 ===
 */
 
+/* BSD licensing:
+ * Copyright (c) 2006, Fabrice Bellard
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its 
contributors
+ * may be used to endorse or promote products derived from this software 
without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Portions of this work are licensed under the terms of the GNU GPL,
+ * version 2 or later. See the COPYING file in the top-level directory.
+ */
+
 /*
 | This macro tests for minimum version of the GNU C compiler.
 **/
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 6de66ce..b65505f 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -1,7 +1,18 @@
 /*
  * QEMU float support
  *
- * Derived from SoftFloat.
+ * The code in this source file is derived from release 2a of the 

  1   2   3   >