Re: [Qemu-devel] [PATCH] spapr: pci: clean msi info when releasing it

2014-05-09 Thread liu ping fan
On Wed, May 7, 2014 at 3:20 PM, Alexey Kardashevskiy a...@ozlabs.ru wrote:
 On 05/07/2014 04:51 PM, Liu Ping Fan wrote:
 In current code, we use phb-msi_table[ndev].nvec to indicate whether
 this msi entries are used by a device or not. So when unplug a pci
 device, we should reset nvec to zero.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/ppc/spapr_pci.c | 1 +
  1 file changed, 1 insertion(+)

 diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
 index cbef095..7b1dfe1 100644
 --- a/hw/ppc/spapr_pci.c
 +++ b/hw/ppc/spapr_pci.c
 @@ -316,6 +316,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
 sPAPREnvironment *spapr,
  rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
  return;
  }
 +phb-msi_table[ndev].nvec = 0;
  trace_spapr_pci_msi(Released MSIs, ndev, config_addr);
  rtas_st(rets, 0, RTAS_OUT_SUCCESS);
  rtas_st(rets, 1, 0);


 ibm,change-msi is called with 0 to disable MSIs. If later the guest decides
 to reenable MSI on the same device (rmmod + modprobe in the guest can do
 that I suppose), new block will be allocated because of this patch which is
 bad.

 And there is no PCI hotplug for SPAPR in upstream QEMU so this patch cannot
 possibly fix it :)

I saw your patch [PATCH 0/6] move interrupts from spapr to xics. And
it is great to consider the reclaim of irq.  So if I call something to
free the irq after phb-msi_table[ndev].nvec = 0, can it work ?

Thx,
Fan



 --
 Alexey




[Qemu-devel] [PATCH] spapr: pci: clean msi info when releasing it

2014-05-07 Thread Liu Ping Fan
In current code, we use phb-msi_table[ndev].nvec to indicate whether
this msi entries are used by a device or not. So when unplug a pci
device, we should reset nvec to zero.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/ppc/spapr_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index cbef095..7b1dfe1 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -316,6 +316,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
 return;
 }
+phb-msi_table[ndev].nvec = 0;
 trace_spapr_pci_msi(Released MSIs, ndev, config_addr);
 rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 rtas_st(rets, 1, 0);
-- 
1.8.1.4




[Qemu-devel] [PATCH 2/2] ppc: tcg: implement helper_nap

2014-01-05 Thread Liu Ping Fan
When nap, clear no persistent register as ISA spec says.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 target-ppc/excp_helper.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index a9712bc..5dbb166 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -892,6 +892,20 @@ void helper_hrfid(CPUPPCState *env)
 
 void helper_nap(CPUPPCState *env)
 {
+int i;
+for (i = 0; i  32; i++) {
+env-gpr[i] = 0;
+}
+env-lr = 0;
+env-ctr = 0;
+for (i = 0; i  8; i++) {
+env-crf[i] = 0;
+}
+env-msr = 0;
+for (i = 0; i  32; i++) {
+env-fpr[i] = 0;
+}
+
 }
 #endif
 
-- 
1.8.1.4




[Qemu-devel] [PATCH 1/2] ppc: tcg: add nap insn support

2014-01-05 Thread Liu Ping Fan
Add the emulation of insn nap for hypervisor

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 target-ppc/excp_helper.c | 4 
 target-ppc/helper.h  | 1 +
 target-ppc/translate.c   | 6 ++
 3 files changed, 11 insertions(+)

diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index c959460..a9712bc 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -889,6 +889,10 @@ void helper_hrfid(CPUPPCState *env)
 do_rfi(env, env-spr[SPR_HSRR0], env-spr[SPR_HSRR1],
~((target_ulong)0x783F), 0);
 }
+
+void helper_nap(CPUPPCState *env)
+{
+}
 #endif
 
 /*/
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 6d282bb..87873db 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -17,6 +17,7 @@ DEF_HELPER_1(rfmci, void, env)
 #if defined(TARGET_PPC64)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
+DEF_HELPER_1(nap, void, env)
 #endif
 #endif
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 66c7771..ce63783 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3798,6 +3798,11 @@ static void gen_hrfid(DisasContext *ctx)
 gen_sync_exception(ctx);
 #endif
 }
+
+static void gen_nap(DisasContext *ctx)
+{
+gen_helper_nap(cpu_env);
+}
 #endif
 
 /* sc */
@@ -8732,6 +8737,7 @@ GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(rfid, 0x13, 0x12, 0x00, 0x03FF8001, PPC_64B),
 GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H),
+GEN_HANDLER(nap, 0x13, 0x12, 0x0d, 0x03FF8001, PPC_64H),
 #endif
 GEN_HANDLER(sc, 0x11, 0xFF, 0xFF, 0x03FFF01D, PPC_FLOW),
 GEN_HANDLER(tw, 0x1F, 0x04, 0x00, 0x0001, PPC_FLOW),
-- 
1.8.1.4




[Qemu-devel] [PATCH v9 0/2] bugs fix for hpet

2013-12-08 Thread Liu Ping Fan
I see the open of 2.0 development window, and rebase V8 with a small fix

v9:
  use PC_Q35_1_8_MACHINE_OPTIONS in pc_q35_machine_v1_8
v8:
  make piix/q35 compat diverge
  simplify the code, use hpet_irqs to pass intcap value


Liu Ping Fan (2):
  hpet: inverse polarity when pin above ISA_NUM_IRQS
  hpet: enable to entitle more irq pins for hpet

 hw/i386/pc.c | 19 ---
 hw/i386/pc_piix.c|  3 ++-
 hw/i386/pc_q35.c | 21 +
 hw/timer/hpet.c  | 23 +++
 include/hw/i386/pc.h | 24 +++-
 5 files changed, 77 insertions(+), 13 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH 1/2] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-12-08 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line. (On bare metal, if OS driver
claims high active on this line, spurious irq is generated)

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 2eb75ea..0aee2c1 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH 0/2] make slirp drivern by glib directly

2013-10-27 Thread Liu Ping Fan
This series make slirp drivern directly by glib, so we can clean up
the hooks for slrip in mainloop and stub 


Liu Ping Fan (2):
  slirp: introduce gsource event abstraction
  slirp: make slirp event dispatch based on slirp instance

 main-loop.c   |   6 ---
 net/slirp.c   |   3 ++
 slirp/Makefile.objs   |   2 +-
 slirp/TFX7d70.tmp |   0
 slirp/libslirp.h  |   7 ++-
 slirp/slirp.c | 133 --
 slirp/slirp.h |   1 +
 slirp/slirp_gsource.c |  94 +++
 slirp/slirp_gsource.h |  37 ++
 slirp/socket.c|   1 -
 slirp/socket.h|   2 +-
 stubs/Makefile.objs   |   1 -
 stubs/slirp.c |  11 -
 13 files changed, 181 insertions(+), 117 deletions(-)
 create mode 100644 slirp/TFX7d70.tmp
 create mode 100644 slirp/slirp_gsource.c
 create mode 100644 slirp/slirp_gsource.h
 delete mode 100644 stubs/slirp.c

-- 
1.8.1.4




[Qemu-devel] [PATCH 1/2] slirp: introduce gsource event abstraction

2013-10-27 Thread Liu Ping Fan
Introduce struct SlirpGSource. It will ease the usage of GSource
associated with a group of files, which are dynamically allocated
and release for slirp.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 slirp/Makefile.objs   |  2 +-
 slirp/slirp_gsource.c | 94 +++
 slirp/slirp_gsource.h | 37 
 3 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 slirp/slirp_gsource.c
 create mode 100644 slirp/slirp_gsource.h

diff --git a/slirp/Makefile.objs b/slirp/Makefile.objs
index 2daa9dc..ee39eed 100644
--- a/slirp/Makefile.objs
+++ b/slirp/Makefile.objs
@@ -1,3 +1,3 @@
 common-obj-y = cksum.o if.o ip_icmp.o ip_input.o ip_output.o dnssearch.o
-common-obj-y += slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o tcp_output.o
+common-obj-y += slirp.o slirp_gsource.o mbuf.o misc.o sbuf.o socket.o 
tcp_input.o tcp_output.o
 common-obj-y += tcp_subr.o tcp_timer.o udp.o bootp.o tftp.o arp_table.o
diff --git a/slirp/slirp_gsource.c b/slirp/slirp_gsource.c
new file mode 100644
index 000..b502697
--- /dev/null
+++ b/slirp/slirp_gsource.c
@@ -0,0 +1,94 @@
+/*
+ *  Copyright (C) 2013 IBM
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see http://www.gnu.org/licenses/.
+ */
+
+#include slirp_gsource.h
+#include qemu/bitops.h
+
+GPollFD *slirp_gsource_add_pollfd(SlirpGSource *src, int fd)
+{
+GPollFD *retfd;
+
+retfd = g_slice_alloc(sizeof(GPollFD));
+retfd-events = 0;
+retfd-fd = fd;
+src-pollfds_list = g_list_append(src-pollfds_list, retfd);
+if (fd = 0) {
+g_source_add_poll(src-source, retfd);
+}
+
+return retfd;
+}
+
+void slirp_gsource_remove_pollfd(SlirpGSource *src, GPollFD *pollfd)
+{
+g_source_remove_poll(src-source, pollfd);
+src-pollfds_list = g_list_remove(src-pollfds_list, pollfd);
+g_slice_free(GPollFD, pollfd);
+}
+
+static gboolean slirp_gsource_check(GSource *src)
+{
+SlirpGSource *nsrc = (SlirpGSource *)src;
+GList *cur;
+GPollFD *gfd;
+
+cur = nsrc-pollfds_list;
+while (cur) {
+gfd = cur-data;
+if (gfd-fd = 0  (gfd-revents  gfd-events)) {
+return true;
+}
+cur = g_list_next(cur);
+}
+
+return false;
+}
+
+static gboolean slirp_gsource_dispatch(GSource *src, GSourceFunc cb,
+gpointer data)
+{
+gboolean ret = false;
+
+if (cb) {
+ret = cb(data);
+}
+return ret;
+}
+
+SlirpGSource *slirp_gsource_new(GPrepare prepare, GSourceFunc dispatch_cb,
+void *opaque)
+{
+SlirpGSource *src;
+GSourceFuncs *gfuncs = g_new0(GSourceFuncs, 1);
+gfuncs-prepare = prepare;
+gfuncs-check = slirp_gsource_check,
+gfuncs-dispatch = slirp_gsource_dispatch,
+
+src = (SlirpGSource *)g_source_new(gfuncs, sizeof(SlirpGSource));
+src-gfuncs = gfuncs;
+src-pollfds_list = NULL;
+src-opaque = opaque;
+g_source_set_callback(src-source, dispatch_cb, src, NULL);
+
+return src;
+}
+
+void slirp_gsource_release(SlirpGSource *src)
+{
+assert(!src-pollfds_list);
+g_free(src-gfuncs);
+g_source_destroy(src-source);
+}
diff --git a/slirp/slirp_gsource.h b/slirp/slirp_gsource.h
new file mode 100644
index 000..98a9e3a
--- /dev/null
+++ b/slirp/slirp_gsource.h
@@ -0,0 +1,37 @@
+/*
+ *  Copyright (C) 2013 IBM
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef SLIRP_GSOURCE_H
+#define SLIRP_GSOURCE_H
+#include qemu-common.h
+
+typedef gboolean (*GPrepare)(GSource *source, gint *timeout_);
+
+/* multi fd drive GSource*/
+typedef struct SlirpGSource {
+GSource source;
+/* a group of GPollFD which dynamically join or leave the GSource */
+GList *pollfds_list;
+GSourceFuncs *gfuncs;
+void *opaque;
+} SlirpGSource;
+
+SlirpGSource *slirp_gsource_new(GPrepare prepare, GSourceFunc dispatch_cb,
+void *opaque);
+void

[Qemu-devel] [PATCH 2/2] slirp: make slirp event dispatch based on slirp instance

2013-10-27 Thread Liu Ping Fan
Each slirp instance has its own GFuncs, so we can driver slirp by glib main 
loop.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
For easing the review, This patch does not obey coding guide. Will fix
it later
---
 main-loop.c |   6 ---
 net/slirp.c |   3 ++
 slirp/TFX7d70.tmp   |   0
 slirp/libslirp.h|   7 ++-
 slirp/slirp.c   | 133 
 slirp/slirp.h   |   1 +
 slirp/socket.c  |   1 -
 slirp/socket.h  |   2 +-
 stubs/Makefile.objs |   1 -
 stubs/slirp.c   |  11 -
 10 files changed, 49 insertions(+), 116 deletions(-)
 create mode 100644 slirp/TFX7d70.tmp
 delete mode 100644 stubs/slirp.c

diff --git a/main-loop.c b/main-loop.c
index c3c9c28..374d26f 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -465,9 +465,6 @@ int main_loop_wait(int nonblocking)
 /* poll any events */
 g_array_set_size(gpollfds, 0); /* reset for new iteration */
 /* XXX: separate device handlers from system ones */
-#ifdef CONFIG_SLIRP
-slirp_pollfds_fill(gpollfds, timeout);
-#endif
 qemu_iohandler_fill(gpollfds);
 
 if (timeout == UINT32_MAX) {
@@ -482,9 +479,6 @@ int main_loop_wait(int nonblocking)
 
 ret = os_host_main_loop_wait(timeout_ns);
 qemu_iohandler_poll(gpollfds, ret);
-#ifdef CONFIG_SLIRP
-slirp_pollfds_poll(gpollfds, (ret  0));
-#endif
 
 qemu_clock_run_all_timers();
 
diff --git a/net/slirp.c b/net/slirp.c
index 124e953..2a21e16 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -35,6 +35,7 @@
 #include monitor/monitor.h
 #include qemu/sockets.h
 #include slirp/libslirp.h
+#include slirp/slirp_gsource.h
 #include sysemu/char.h
 
 static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
@@ -76,6 +77,7 @@ typedef struct SlirpState {
 #ifndef _WIN32
 char smb_dir[128];
 #endif
+SlirpGSource *slirp_src;
 } SlirpState;
 
 static struct slirp_config_str *slirp_configs;
@@ -244,6 +246,7 @@ static int net_slirp_init(NetClientState *peer, const char 
*model,
 
 s-slirp = slirp_init(restricted, net, mask, host, vhostname,
   tftp_export, bootfile, dhcp, dns, dnssearch, s);
+s-slirp_src = slirp_gsource_new(slirp_prepare, slirp_handler, s-slirp);
 QTAILQ_INSERT_TAIL(slirp_stacks, s, entry);
 
 for (config = slirp_configs; config; config = config-next) {
diff --git a/slirp/TFX7d70.tmp b/slirp/TFX7d70.tmp
new file mode 100644
index 000..e69de29
diff --git a/slirp/libslirp.h b/slirp/libslirp.h
index 5bdcbd5..bb0c537 100644
--- a/slirp/libslirp.h
+++ b/slirp/libslirp.h
@@ -16,10 +16,6 @@ Slirp *slirp_init(int restricted, struct in_addr vnetwork,
   void *opaque);
 void slirp_cleanup(Slirp *slirp);
 
-void slirp_pollfds_fill(GArray *pollfds, uint32_t *timeout);
-
-void slirp_pollfds_poll(GArray *pollfds, int select_error);
-
 void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len);
 
 /* you must provide the following functions: */
@@ -39,5 +35,8 @@ void slirp_socket_recv(Slirp *slirp, struct in_addr 
guest_addr,
int guest_port, const uint8_t *buf, int size);
 size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr,
  int guest_port);
+gboolean slirp_prepare(GSource *source, gint *time);
+gboolean slirp_handler(gpointer data);
+
 
 #endif
diff --git a/slirp/slirp.c b/slirp/slirp.c
index bad8dad..6d57994 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -25,6 +25,7 @@
 #include qemu/timer.h
 #include sysemu/char.h
 #include slirp.h
+#include slirp_gsource.h
 #include hw/hw.h
 
 /* host loopback address */
@@ -262,46 +263,34 @@ void slirp_cleanup(Slirp *slirp)
 #define CONN_CANFSEND(so) (((so)-so_state  
(SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
 #define CONN_CANFRCV(so) (((so)-so_state  (SS_FCANTRCVMORE|SS_ISFCONNECTED)) 
== SS_ISFCONNECTED)
 
-static void slirp_update_timeout(uint32_t *timeout)
+static void slirp_update_timeout(Slirp *slirp, gint *timeout)
 {
-Slirp *slirp;
-uint32_t t;
+gint t = *timeout;
 
 if (*timeout = TIMEOUT_FAST) {
 return;
 }
 
-t = MIN(1000, *timeout);
-
-/* If we have tcp timeout with slirp, then we will fill @timeout with
- * more precise value.
- */
-QTAILQ_FOREACH(slirp, slirp_instances, entry) {
-if (slirp-time_fasttimo) {
-*timeout = TIMEOUT_FAST;
-return;
-}
-if (slirp-do_slowtimo) {
-t = MIN(TIMEOUT_SLOW, t);
-}
+if (slirp-time_fasttimo) {
+*timeout = TIMEOUT_FAST;
+return;
+}
+if (slirp-do_slowtimo) {
+t = MIN(TIMEOUT_SLOW, t);
 }
 *timeout = t;
 }
 
-void slirp_pollfds_fill(GArray *pollfds, uint32_t *timeout)
+gboolean slirp_prepare(GSource *source, gint *time)
 {
-Slirp *slirp;
+SlirpGSource *slirp_src = (SlirpGSource *)source;
+Slirp *slirp = slirp_src-opaque;
 struct socket *so, *so_next;
+u_int curtime

Re: [Qemu-devel] [PATCH v8 0/2] bugs fix for hpet

2013-10-22 Thread liu ping fan
ping? Any further comment?

Thanks

On Fri, Oct 18, 2013 at 12:00 PM, Liu Ping Fan qemul...@gmail.com wrote:
 v8:
   make piix/q35 compat diverge
   simplify the code, use hpet_irqs to pass intcap value

 v7:
   use macro to define intcap in pc.h
   (as to 3/4 and 4/4, I am not sure about whether to merge them or not, so 
 keep them separate)

 v6:
   move the setting of intcap to board, and keep the init value as zero. 
 (thanks for the discussion from Paolo and Michael)
   introduce an extra hpet property compat to tell PC version

 v5:
   use stand compat property to fix hpet intcap on pc-q35, while on pc-piix, 
 hard code intcap as IRQ2

 v4:
   use stand compat property to fix hpet intcap

 v3:
   change hpet interrupt capablity on board's demand


 Liu Ping Fan (2):
   hpet: inverse polarity when pin above ISA_NUM_IRQS
   hpet: enable to entitle more irq pins for hpet

  hw/i386/pc.c | 19 ---
  hw/i386/pc_piix.c|  3 ++-
  hw/i386/pc_q35.c | 21 +
  hw/timer/hpet.c  | 23 +++
  include/hw/i386/pc.h | 24 +++-
  5 files changed, 77 insertions(+), 13 deletions(-)

 --
 1.8.1.4




Re: [Qemu-devel] [PATCH v7 4/4] PC: differentiate hpet's interrupt capability on piix and q35

2013-10-17 Thread liu ping fan
On Thu, Oct 17, 2013 at 1:44 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Thu, Oct 17, 2013 at 11:16:05AM +0800, Liu Ping Fan wrote:
 For pc-piix-*, hpet's intcap is always hard coded as IRQ2.
 For q35, if it is pc-q35-1.7 and earlier, we use IRQ2 for compat
 reason, otherwise IRQ2, IRQ8, and IRQ16~23 are allowed.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/i386/pc.c | 20 +++-
  hw/i386/pc_piix.c|  7 ++-
  hw/i386/pc_q35.c |  6 +-
  include/hw/i386/pc.h | 11 ++-
  4 files changed, 36 insertions(+), 8 deletions(-)

 diff --git a/hw/i386/pc.c b/hw/i386/pc.c
 index 72cc850..3e98ff0 100644
 --- a/hw/i386/pc.c
 +++ b/hw/i386/pc.c
 @@ -1219,7 +1219,8 @@ static const MemoryRegionOps ioportF0_io_ops = {
  void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
ISADevice **rtc_state,
ISADevice **floppy,
 -  bool no_vmport)
 +  bool no_vmport,
 +  bool hpet_irqs)
  {
  int i;
  DriveInfo *fd[MAX_FD];
 @@ -1249,10 +1250,19 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
 *gsi,
  /* In order to set property, here not using 
 sysbus_try_create_simple */
  hpet = qdev_try_create(NULL, hpet);
  if (hpet) {
 -/* tmp fix. For compat, hard code to IRQ2 until we have correct
 - * compat property and differentiate pc-iix with pc-q35
 - */
 -qdev_prop_set_uint32(hpet, HPET_INTCAP, 0x4);
 +/* For pc-piix-*, hpet's intcap is always IRQ2. */
 +if (!hpet_irqs) {

 So for piix the property is ignored even if set.

Yes.
 +qdev_prop_set_uint32(hpet, HPET_INTCAP, 0x4);
 +} else {
 +/* For pc-q35-1.7 and earlier, use IRQ2 for compat.

 Now you lost me. It's still 4 for 1.7?
 You only target 1.8 then?

Yes, since the compat for guest. And start this fix with 1.8

 + * Otherwise, use IRQ16~23, IRQ8 and IRQ2.
 + */
 +uint8_t compat = object_property_get_int(OBJECT(hpet),
 +HPET_INTCAP, NULL);
 +if (!compat) {
 +qdev_prop_set_uint32(hpet, HPET_INTCAP, 0xff0104);
 +}
 +}
  qdev_init_nofail(hpet);
  sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);

 diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
 index c6042c7..a45ce11 100644
 --- a/hw/i386/pc_piix.c
 +++ b/hw/i386/pc_piix.c
 @@ -180,7 +180,8 @@ static void pc_init1(QEMUMachineInitArgs *args,
  pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL);

  /* init basic PC hardware */
 -pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled());
 +pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled(),
 +false);

  pc_nic_init(isa_bus, pci_bus);

 @@ -346,6 +347,10 @@ static QEMUMachine pc_i440fx_machine_v1_7 = {
  .alias = pc,
  .init = pc_init_pci,
  .is_default = 1,
 +.compat_props = (GlobalProperty[]) {
 +PC_COMPAT_1_7,

 So you add this property for PIIX but it's then ignored?
 That's kind of ugly.

Yes a little ugly, but PIIX and ich9 diverge for hpet's hardware.

 Not sure how to fix this. Paolo?

 +{ /* end of list */ }
 +},
  };

  #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
 diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
 index ca84e1c..124ecc1 100644
 --- a/hw/i386/pc_q35.c
 +++ b/hw/i386/pc_q35.c
 @@ -181,7 +181,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
  pc_register_ferr_irq(gsi[13]);

  /* init basic PC hardware */
 -pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false);
 +pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false, true);

  /* connect pm stuff to lpc */
  ich9_lpc_pm_init(lpc);
 @@ -270,6 +270,10 @@ static QEMUMachine pc_q35_machine_v1_7 = {
  .name = pc-q35-1.7,
  .alias = q35,
  .init = pc_q35_init,
 +.compat_props = (GlobalProperty[]) {
 +PC_COMPAT_1_7,
 +{ /* end of list */ }
 +},
  };

  #define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
 diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
 index 1361a27..bfeccf2 100644
 --- a/include/hw/i386/pc.h
 +++ b/include/hw/i386/pc.h
 @@ -136,7 +136,8 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus 
 *pci_bus);
  void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
ISADevice **rtc_state,
ISADevice **floppy,
 -  bool no_vmport);
 +  bool no_vmport,
 +  bool hpet_irqs);
  void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
  void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
const char *boot_device,
 @@ -227,7 +228,15 @@ void

[Qemu-devel] [PATCH v8 0/2] bugs fix for hpet

2013-10-17 Thread Liu Ping Fan
v8: 
  make piix/q35 compat diverge
  simplify the code, use hpet_irqs to pass intcap value

v7:
  use macro to define intcap in pc.h
  (as to 3/4 and 4/4, I am not sure about whether to merge them or not, so keep 
them separate)

v6:
  move the setting of intcap to board, and keep the init value as zero. (thanks 
for the discussion from Paolo and Michael)
  introduce an extra hpet property compat to tell PC version

v5:
  use stand compat property to fix hpet intcap on pc-q35, while on pc-piix, 
hard code intcap as IRQ2

v4:
  use stand compat property to fix hpet intcap

v3:
  change hpet interrupt capablity on board's demand


Liu Ping Fan (2):
  hpet: inverse polarity when pin above ISA_NUM_IRQS
  hpet: enable to entitle more irq pins for hpet

 hw/i386/pc.c | 19 ---
 hw/i386/pc_piix.c|  3 ++-
 hw/i386/pc_q35.c | 21 +
 hw/timer/hpet.c  | 23 +++
 include/hw/i386/pc.h | 24 +++-
 5 files changed, 77 insertions(+), 13 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH v8 1/2] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-10-17 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line. (On bare metal, if OS driver
claims high active on this line, spurious irq is generated)

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index fcd22ae..8429eb3 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH v8 2/2] hpet: enable to entitle more irq pins for hpet

2013-10-17 Thread Liu Ping Fan
Owning to some different hardware design, piix and q35 need
different compat. So making them diverge.

On q35, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
can be assigned to hpet as guest chooses. So we introduce intcap
property to do that.

Consider the compat and piix/q35, we finally have the following
value for intcap: For piix, hpet's intcap is hard coded as IRQ2.
For pc-q35-1.7 and earlier, we use IRQ2 for compat reason. Otherwise
IRQ2, IRQ8, and IRQ16~23 are allowed.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c | 19 ---
 hw/i386/pc_piix.c|  3 ++-
 hw/i386/pc_q35.c | 21 +
 hw/timer/hpet.c  |  9 +++--
 include/hw/i386/pc.h | 24 +++-
 5 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0c313fe..bb92465 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1219,7 +1219,8 @@ static const MemoryRegionOps ioportF0_io_ops = {
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-  bool no_vmport)
+  bool no_vmport,
+  uint32 hpet_irqs)
 {
 int i;
 DriveInfo *fd[MAX_FD];
@@ -1246,9 +1247,21 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
  * when the HPET wants to take over. Thus we have to disable the latter.
  */
 if (!no_hpet  (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
-hpet = sysbus_try_create_simple(hpet, HPET_BASE, NULL);
-
+/* In order to set property, here not using sysbus_try_create_simple */
+hpet = qdev_try_create(NULL, hpet);
 if (hpet) {
+/* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7
+ * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23,
+ * IRQ8 and IRQ2.
+ */
+uint8_t compat = object_property_get_int(OBJECT(hpet),
+HPET_INTCAP, NULL);
+if (!compat) {
+qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs);
+}
+qdev_init_nofail(hpet);
+sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
+
 for (i = 0; i  GSI_NUM_PINS; i++) {
 sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
 }
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c6042c7..506f026 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -180,7 +180,8 @@ static void pc_init1(QEMUMachineInitArgs *args,
 pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled());
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled(),
+0x4);
 
 pc_nic_init(isa_bus, pci_bus);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ca84e1c..d12d3f0 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -181,7 +181,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 pc_register_ferr_irq(gsi[13]);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false);
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false, 0xff0104);
 
 /* connect pm stuff to lpc */
 ich9_lpc_pm_init(lpc);
@@ -263,6 +263,15 @@ static void pc_q35_init_1_4(QEMUMachineInitArgs *args)
 .desc = Standard PC (Q35 + ICH9, 2009), \
 .hot_add_cpu = pc_hot_add_cpu
 
+#define PC_Q35_1_8_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
+
+static QEMUMachine pc_q35_machine_v1_8 = {
+PC_Q35_1_7_MACHINE_OPTIONS,
+.name = pc-q35-1.8,
+.alias = q35,
+.init = pc_q35_init,
+};
+
 #define PC_Q35_1_7_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
 
 static QEMUMachine pc_q35_machine_v1_7 = {
@@ -270,6 +279,10 @@ static QEMUMachine pc_q35_machine_v1_7 = {
 .name = pc-q35-1.7,
 .alias = q35,
 .init = pc_q35_init,
+.compat_props = (GlobalProperty[]) {
+PC_Q35_COMPAT_1_7,
+{ /* end of list */ }
+},
 };
 
 #define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
@@ -279,7 +292,7 @@ static QEMUMachine pc_q35_machine_v1_6 = {
 .name = pc-q35-1.6,
 .init = pc_q35_init_1_6,
 .compat_props = (GlobalProperty[]) {
-PC_COMPAT_1_6,
+PC_Q35_COMPAT_1_6,
 { /* end of list */ }
 },
 };
@@ -289,7 +302,7 @@ static QEMUMachine pc_q35_machine_v1_5 = {
 .name = pc-q35-1.5,
 .init = pc_q35_init_1_5,
 .compat_props = (GlobalProperty[]) {
-PC_COMPAT_1_5,
+PC_Q35_COMPAT_1_5,
 { /* end of list */ }
 },
 };
@@ -303,7 +316,7 @@ static QEMUMachine pc_q35_machine_v1_4 = {
 .name = pc-q35-1.4,
 .init = pc_q35_init_1_4,
 .compat_props = (GlobalProperty[]) {
-PC_COMPAT_1_4,
+PC_Q35_COMPAT_1_4,
 { /* end of list */ }
 },
 };
diff --git a/hw/timer/hpet.c b

[Qemu-devel] [PATCH v7 0/4] bugs fix for hpet

2013-10-16 Thread Liu Ping Fan
v7:
  use macro to define intcap in pc.h
  (as to 3/4 and 4/4, I am not sure about whether to merge them or not, so keep 
them separate)

v6:
  move the setting of intcap to board, and keep the init value as zero. (thanks 
for the discussion from Paolo and Michael)
  introduce an extra hpet property compat to tell PC version

v5:
  use stand compat property to fix hpet intcap on pc-q35, while on pc-piix, 
hard code intcap as IRQ2

v4:
  use stand compat property to fix hpet intcap

v3:
  change hpet interrupt capablity on board's demand



Liu Ping Fan (4):
  hpet: inverse polarity when pin above ISA_NUM_IRQS
  hpet: enable to entitle more irq pins for hpet
  PC: use qdev_xx to create hpet instead of sysbus_create_xx
  PC: differentiate hpet's interrupt capability on piix and q35

 hw/i386/pc.c | 23 ---
 hw/i386/pc_piix.c|  7 ++-
 hw/i386/pc_q35.c |  6 +-
 hw/timer/hpet.c  | 24 
 include/hw/i386/pc.h | 13 -
 5 files changed, 63 insertions(+), 10 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH v7 3/4] PC: use qdev_xx to create hpet instead of sysbus_create_xx

2013-10-16 Thread Liu Ping Fan
sysbus_create_xx func does not allow us to set a device's extra
properties.  While hpet need to set its compat property before
initialization, so we abandon the wrapper function, and spread
its logic inline

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c| 11 +--
 hw/timer/hpet.c |  4 +---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0c313fe..72cc850 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1246,9 +1246,16 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
  * when the HPET wants to take over. Thus we have to disable the latter.
  */
 if (!no_hpet  (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
-hpet = sysbus_try_create_simple(hpet, HPET_BASE, NULL);
-
+/* In order to set property, here not using sysbus_try_create_simple */
+hpet = qdev_try_create(NULL, hpet);
 if (hpet) {
+/* tmp fix. For compat, hard code to IRQ2 until we have correct
+ * compat property and differentiate pc-iix with pc-q35
+ */
+qdev_prop_set_uint32(hpet, HPET_INTCAP, 0x4);
+qdev_init_nofail(hpet);
+sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
+
 for (i = 0; i  GSI_NUM_PINS; i++) {
 sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
 }
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 48a13bf..d783849 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -42,8 +42,6 @@
 
 #define HPET_MSI_SUPPORT0
 
-/* Will fix: intcap is set by board, and should be 0 if nobody sets. */
-#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
 
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
@@ -760,7 +758,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
-DEFINE_PROP_UINT32(HPET_INTCAP, HPETState, intcap, 
HPET_TN_INT_CAP_DEFAULT),
+DEFINE_PROP_UINT32(HPET_INTCAP, HPETState, intcap, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v7 1/4] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-10-16 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line. (On bare metal, if OS driver
claims high active on this line, spurious irq is generated)

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index fcd22ae..8429eb3 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH v7 2/4] hpet: enable to entitle more irq pins for hpet

2013-10-16 Thread Liu Ping Fan
On q35, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
of ioapic can be dynamically assigned to hpet as guest chooses.
So we introduce intcap property to do that. (currently, its value
is IRQ2. Later, it should be set by board.)

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c  | 12 ++--
 include/hw/i386/pc.h |  2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 8429eb3..48a13bf 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -42,6 +42,9 @@
 
 #define HPET_MSI_SUPPORT0
 
+/* Will fix: intcap is set by board, and should be 0 if nobody sets. */
+#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
+
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 
@@ -73,6 +76,7 @@ typedef struct HPETState {
 uint8_t rtc_irq_level;
 qemu_irq pit_enabled;
 uint8_t num_timers;
+uint32_t intcap;
 HPETTimer timer[HPET_MAX_TIMERS];
 
 /* Memory-mapped, software visible registers */
@@ -663,8 +667,8 @@ static void hpet_reset(DeviceState *d)
 if (s-flags  (1  HPET_MSI_SUPPORT)) {
 timer-config |= HPET_TN_FSB_CAP;
 }
-/* advertise availability of ioapic inti2 */
-timer-config |=  0x0004ULL  32;
+/* advertise availability of ioapic int */
+timer-config |=  (uint64_t)s-intcap  32;
 timer-period = 0ULL;
 timer-wrap_flag = 0;
 }
@@ -713,6 +717,9 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 int i;
 HPETTimer *timer;
 
+if (!s-intcap) {
+error_printf(Hpet's intcap not initialized.\n);
+}
 if (hpet_cfg.count == UINT8_MAX) {
 /* first instance */
 hpet_cfg.count = 0;
@@ -753,6 +760,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
+DEFINE_PROP_UINT32(HPET_INTCAP, HPETState, intcap, 
HPET_TN_INT_CAP_DEFAULT),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9b2ddc4..1361a27 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -10,6 +10,8 @@
 
 #include qemu/range.h
 
+#define HPET_INTCAP hpet-intcap
+
 /* PC-style peripherals (also used by other machines).  */
 
 typedef struct PcPciInfo {
-- 
1.8.1.4




[Qemu-devel] [PATCH v7 4/4] PC: differentiate hpet's interrupt capability on piix and q35

2013-10-16 Thread Liu Ping Fan
For pc-piix-*, hpet's intcap is always hard coded as IRQ2.
For q35, if it is pc-q35-1.7 and earlier, we use IRQ2 for compat
reason, otherwise IRQ2, IRQ8, and IRQ16~23 are allowed.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c | 20 +++-
 hw/i386/pc_piix.c|  7 ++-
 hw/i386/pc_q35.c |  6 +-
 include/hw/i386/pc.h | 11 ++-
 4 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 72cc850..3e98ff0 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1219,7 +1219,8 @@ static const MemoryRegionOps ioportF0_io_ops = {
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-  bool no_vmport)
+  bool no_vmport,
+  bool hpet_irqs)
 {
 int i;
 DriveInfo *fd[MAX_FD];
@@ -1249,10 +1250,19 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 /* In order to set property, here not using sysbus_try_create_simple */
 hpet = qdev_try_create(NULL, hpet);
 if (hpet) {
-/* tmp fix. For compat, hard code to IRQ2 until we have correct
- * compat property and differentiate pc-iix with pc-q35
- */
-qdev_prop_set_uint32(hpet, HPET_INTCAP, 0x4);
+/* For pc-piix-*, hpet's intcap is always IRQ2. */
+if (!hpet_irqs) {
+qdev_prop_set_uint32(hpet, HPET_INTCAP, 0x4);
+} else {
+/* For pc-q35-1.7 and earlier, use IRQ2 for compat.
+ * Otherwise, use IRQ16~23, IRQ8 and IRQ2.
+ */
+uint8_t compat = object_property_get_int(OBJECT(hpet),
+HPET_INTCAP, NULL);
+if (!compat) {
+qdev_prop_set_uint32(hpet, HPET_INTCAP, 0xff0104);
+}
+}
 qdev_init_nofail(hpet);
 sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c6042c7..a45ce11 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -180,7 +180,8 @@ static void pc_init1(QEMUMachineInitArgs *args,
 pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled());
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled(),
+false);
 
 pc_nic_init(isa_bus, pci_bus);
 
@@ -346,6 +347,10 @@ static QEMUMachine pc_i440fx_machine_v1_7 = {
 .alias = pc,
 .init = pc_init_pci,
 .is_default = 1,
+.compat_props = (GlobalProperty[]) {
+PC_COMPAT_1_7,
+{ /* end of list */ }
+},
 };
 
 #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ca84e1c..124ecc1 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -181,7 +181,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 pc_register_ferr_irq(gsi[13]);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false);
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false, true);
 
 /* connect pm stuff to lpc */
 ich9_lpc_pm_init(lpc);
@@ -270,6 +270,10 @@ static QEMUMachine pc_q35_machine_v1_7 = {
 .name = pc-q35-1.7,
 .alias = q35,
 .init = pc_q35_init,
+.compat_props = (GlobalProperty[]) {
+PC_COMPAT_1_7,
+{ /* end of list */ }
+},
 };
 
 #define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 1361a27..bfeccf2 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -136,7 +136,8 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-  bool no_vmport);
+  bool no_vmport,
+  bool hpet_irqs);
 void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
 void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
   const char *boot_device,
@@ -227,7 +228,15 @@ void pvpanic_init(ISABus *bus);
 
 int e820_add_entry(uint64_t, uint64_t, uint32_t);
 
+#define PC_COMPAT_1_7 \
+{\
+.driver   = hpet,\
+.property = HPET_INTCAP,\
+.value= stringify(4),\
+}
+
 #define PC_COMPAT_1_6 \
+PC_COMPAT_1_7, \
 {\
 .driver   = e1000,\
 .property = mitigation,\
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH v6 2/5] hpet: enable to entitle more irq pins for hpet

2013-10-14 Thread liu ping fan
On Mon, Oct 14, 2013 at 10:18 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Fri, Oct 11, 2013 at 10:59:40AM +0800, liu ping fan wrote:
 On Thu, Oct 10, 2013 at 7:41 PM, Michael S. Tsirkin m...@redhat.com wrote:
  On Thu, Oct 10, 2013 at 11:46:42AM +0200, Paolo Bonzini wrote:
  Il 10/10/2013 11:41, Michael S. Tsirkin ha scritto:
Are you sure?  This is not done for any other compat property.
   
Paolo
   It's done if we use the property from C.
   See PCI_HOST_PROP_PCI_HOLE64_SIZE.
  
   You want compiler to catch errors, that's
   much better than a runtime failure.
 
  I agree, but I think there should be no need to use the property from C.
 
  Paolo
 
  Well this patchset does use it from C.
  If it's done it needs a macro.

 hpet.h is the ideal place to put the macro, so pc.c can see it. But
 what about PC_COMPAT_1_7 in pc.h? I think it is not right to include
 hpet.h.

 Why not?

Since pc.h is included by so many hpet unrelated drivers, if pc.h
include hpet.h, then we will export the internal of hpet struct.

Regards,
Ping Fan



Re: [Qemu-devel] [PATCH v6 2/5] hpet: enable to entitle more irq pins for hpet

2013-10-11 Thread liu ping fan
On Fri, Oct 11, 2013 at 4:38 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 11/10/2013 04:59, liu ping fan ha scritto:
 On Thu, Oct 10, 2013 at 7:41 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Thu, Oct 10, 2013 at 11:46:42AM +0200, Paolo Bonzini wrote:
 Il 10/10/2013 11:41, Michael S. Tsirkin ha scritto:
 Are you sure?  This is not done for any other compat property.

 Paolo
 It's done if we use the property from C.
 See PCI_HOST_PROP_PCI_HOLE64_SIZE.

 You want compiler to catch errors, that's
 much better than a runtime failure.

 I agree, but I think there should be no need to use the property from C.

 Paolo

 Well this patchset does use it from C.
 If it's done it needs a macro.

 hpet.h is the ideal place to put the macro, so pc.c can see it. But
 what about PC_COMPAT_1_7 in pc.h? I think it is not right to include
 hpet.h. So can I do not use marco in pc.h?

 I think you shouldn't need the macro (no need to use the property from
 C, only from compat properties).

We need to tell the compat and then decide to set intcap in
pc_basic_device_init()

uint8_t compat = object_property_get_int(OBJECT(hpet),
intcap, NULL);
if (!compat) {
qdev_prop_set_uint32(hpet, intcap, 0xff0104);
}

Regards,
Ping Fan



[Qemu-devel] [PATCH v6 0/5] bugs fix for hpet

2013-10-10 Thread Liu Ping Fan
v6:
  move the setting of intcap to board, and keep the init value as zero. (thanks 
for the discussion from Paolo and Michael)
  introduce an extra hpet property compat to tell PC version

v5:
  use stand compat property to fix hpet intcap on pc-q35, while on pc-piix, 
hard code intcap as IRQ2

v4:
  use stand compat property to fix hpet intcap

v3:
  change hpet interrupt capablity on board's demand


Liu Ping Fan (5):
  hpet: inverse polarity when pin above ISA_NUM_IRQS
  hpet: enable to entitle more irq pins for hpet
  PC: use qdev_xx to create hpet instead of sysbus_create_xx
  PC: add hpet compat to trace compatability version
  PC: differentiate hpet's interrupt capability on piix and q35

 hw/i386/pc.c | 24 +---
 hw/i386/pc_piix.c|  7 ++-
 hw/i386/pc_q35.c |  2 +-
 hw/timer/hpet.c  | 24 
 include/hw/i386/pc.h | 11 ++-
 5 files changed, 58 insertions(+), 10 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH v6 1/5] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-10-10 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line. (On bare metal, if OS driver
claims high active on this line, spurious irq is generated)

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index fcd22ae..8429eb3 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH v6 2/5] hpet: enable to entitle more irq pins for hpet

2013-10-10 Thread Liu Ping Fan
On q35, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
of ioapic can be dynamically assigned to hpet as guest chooses.
So we introduce intcap property to do that. (currently, its value
is IRQ2. Later, it should be set by board.)

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 8429eb3..5b11be4 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -25,6 +25,7 @@
  */
 
 #include hw/hw.h
+#include hw/boards.h
 #include hw/i386/pc.h
 #include ui/console.h
 #include qemu/timer.h
@@ -42,6 +43,9 @@
 
 #define HPET_MSI_SUPPORT0
 
+/* Will fix: intcap is set by board, and should be 0 if nobody sets. */
+#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
+
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 
@@ -73,6 +77,7 @@ typedef struct HPETState {
 uint8_t rtc_irq_level;
 qemu_irq pit_enabled;
 uint8_t num_timers;
+uint32_t intcap;
 HPETTimer timer[HPET_MAX_TIMERS];
 
 /* Memory-mapped, software visible registers */
@@ -663,8 +668,8 @@ static void hpet_reset(DeviceState *d)
 if (s-flags  (1  HPET_MSI_SUPPORT)) {
 timer-config |= HPET_TN_FSB_CAP;
 }
-/* advertise availability of ioapic inti2 */
-timer-config |=  0x0004ULL  32;
+/* advertise availability of ioapic int */
+timer-config |=  (uint64_t)s-intcap  32;
 timer-period = 0ULL;
 timer-wrap_flag = 0;
 }
@@ -753,6 +758,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
+DEFINE_PROP_UINT32(intcap, HPETState, intcap, HPET_TN_INT_CAP_DEFAULT),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v6 5/5] PC: differentiate hpet's interrupt capability on piix and q35

2013-10-10 Thread Liu Ping Fan
For pc-piix-*, hpet's intcap is always hard coded as IRQ2.
For q35, if it is pc-q35-1.7 and earlier, we use IRQ2 for compat
reason, otherwise IRQ2, IRQ8, and IRQ16~23 are allowed.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c | 21 -
 hw/i386/pc_piix.c|  3 ++-
 hw/i386/pc_q35.c |  2 +-
 include/hw/i386/pc.h |  3 ++-
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f2b7b6c..062019d 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1219,7 +1219,8 @@ static const MemoryRegionOps ioportF0_io_ops = {
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-  bool no_vmport)
+  bool no_vmport,
+  bool hpet_irqs)
 {
 int i;
 DriveInfo *fd[MAX_FD];
@@ -1249,10 +1250,20 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 /* In order to set property, here not using sysbus_try_create_simple */
 hpet = qdev_try_create(NULL, hpet);
 if (hpet) {
-/* tmp fix. For compat, hard code to IRQ2 until we have correct
- * compat property and differentiate pc-iix with pc-q35
- */
-qdev_prop_set_uint32(hpet, intcap, 0x4);
+/* For pc-piix-*, hpet's intcap is always IRQ2. */
+if (!hpet_irqs) {
+qdev_prop_set_uint32(hpet, intcap, 0x4);
+} else {
+/* For pc-q35-1.7 and earlier, use IRQ2 for compat. */
+uint8_t compat = object_property_get_int(OBJECT(hpet),
+compat, NULL);
+if (compat) {
+qdev_prop_set_uint32(hpet, intcap, 0x4);
+} else {
+/* using IRQ16~23, IRQ8 and IRQ2 */
+qdev_prop_set_uint32(hpet, intcap, 0xff0104);
+}
+}
 qdev_init_nofail(hpet);
 sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 90f1ea4..a45ce11 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -180,7 +180,8 @@ static void pc_init1(QEMUMachineInitArgs *args,
 pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled());
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled(),
+false);
 
 pc_nic_init(isa_bus, pci_bus);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ca84e1c..9e41f4a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -181,7 +181,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 pc_register_ferr_irq(gsi[13]);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false);
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false, true);
 
 /* connect pm stuff to lpc */
 ich9_lpc_pm_init(lpc);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 80aa7bd..a49d9cd 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -134,7 +134,8 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-  bool no_vmport);
+  bool no_vmport,
+  bool hpet_irqs);
 void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
 void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
   const char *boot_device,
-- 
1.8.1.4




[Qemu-devel] [PATCH v6 3/5] PC: use qdev_xx to create hpet instead of sysbus_create_xx

2013-10-10 Thread Liu Ping Fan
sysbus_create_xx func does not allow us to set a device's extra
properties.  While hpet need to set its compat property before
initialization, so we abandon the wrapper function, and spread
its logic inline

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c| 11 +--
 hw/timer/hpet.c |  4 +---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0c313fe..f2b7b6c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1246,9 +1246,16 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
  * when the HPET wants to take over. Thus we have to disable the latter.
  */
 if (!no_hpet  (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
-hpet = sysbus_try_create_simple(hpet, HPET_BASE, NULL);
-
+/* In order to set property, here not using sysbus_try_create_simple */
+hpet = qdev_try_create(NULL, hpet);
 if (hpet) {
+/* tmp fix. For compat, hard code to IRQ2 until we have correct
+ * compat property and differentiate pc-iix with pc-q35
+ */
+qdev_prop_set_uint32(hpet, intcap, 0x4);
+qdev_init_nofail(hpet);
+sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
+
 for (i = 0; i  GSI_NUM_PINS; i++) {
 sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
 }
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 5b11be4..69ce587 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -43,8 +43,6 @@
 
 #define HPET_MSI_SUPPORT0
 
-/* Will fix: intcap is set by board, and should be 0 if nobody sets. */
-#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
 
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
@@ -758,7 +756,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
-DEFINE_PROP_UINT32(intcap, HPETState, intcap, HPET_TN_INT_CAP_DEFAULT),
+DEFINE_PROP_UINT32(intcap, HPETState, intcap, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v6 4/5] PC: add hpet compat to trace compatability version

2013-10-10 Thread Liu Ping Fan
For guest bug compat, we need to limit hpet's intcap on IRQ2
for pc-q35-1.7 and earlier. We use hpet's compat property to
indicate the PC version.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c6042c7..90f1ea4 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -346,6 +346,10 @@ static QEMUMachine pc_i440fx_machine_v1_7 = {
 .alias = pc,
 .init = pc_init_pci,
 .is_default = 1,
+.compat_props = (GlobalProperty[]) {
+PC_COMPAT_1_7,
+{ /* end of list */ }
+},
 };
 
 #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ca84e1c..569f946 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -270,6 +270,10 @@ static QEMUMachine pc_q35_machine_v1_7 = {
 .name = pc-q35-1.7,
 .alias = q35,
 .init = pc_q35_init,
+.compat_props = (GlobalProperty[]) {
+PC_COMPAT_1_7,
+{ /* end of list */ }
+},
 };
 
 #define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 69ce587..3cbe71e 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -76,6 +76,7 @@ typedef struct HPETState {
 qemu_irq pit_enabled;
 uint8_t num_timers;
 uint32_t intcap;
+uint8_t compat;
 HPETTimer timer[HPET_MAX_TIMERS];
 
 /* Memory-mapped, software visible registers */
@@ -757,6 +758,7 @@ static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
 DEFINE_PROP_UINT32(intcap, HPETState, intcap, 0),
+DEFINE_PROP_UINT8(compat, HPETState, compat, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9b2ddc4..80aa7bd 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -225,7 +225,15 @@ void pvpanic_init(ISABus *bus);
 
 int e820_add_entry(uint64_t, uint64_t, uint32_t);
 
+#define PC_COMPAT_1_7 \
+{\
+.driver   = hpet,\
+.property = compat,\
+.value= stringify(1),\
+}
+
 #define PC_COMPAT_1_6 \
+PC_COMPAT_1_7, \
 {\
 .driver   = e1000,\
 .property = mitigation,\



Re: [Qemu-devel] [PATCH v6 2/5] hpet: enable to entitle more irq pins for hpet

2013-10-10 Thread liu ping fan
On Thu, Oct 10, 2013 at 5:11 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 10/10/2013 09:56, Liu Ping Fan ha scritto:
 On q35, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
 of ioapic can be dynamically assigned to hpet as guest chooses.
 So we introduce intcap property to do that. (currently, its value
 is IRQ2. Later, it should be set by board.)

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/timer/hpet.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

 diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
 index 8429eb3..5b11be4 100644
 --- a/hw/timer/hpet.c
 +++ b/hw/timer/hpet.c
 @@ -25,6 +25,7 @@
   */

  #include hw/hw.h
 +#include hw/boards.h
  #include hw/i386/pc.h
  #include ui/console.h
  #include qemu/timer.h
 @@ -42,6 +43,9 @@

  #define HPET_MSI_SUPPORT0

 +/* Will fix: intcap is set by board, and should be 0 if nobody sets. */
 +#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
 +
  #define TYPE_HPET hpet
  #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)

 @@ -73,6 +77,7 @@ typedef struct HPETState {
  uint8_t rtc_irq_level;
  qemu_irq pit_enabled;
  uint8_t num_timers;
 +uint32_t intcap;
  HPETTimer timer[HPET_MAX_TIMERS];

  /* Memory-mapped, software visible registers */
 @@ -663,8 +668,8 @@ static void hpet_reset(DeviceState *d)
  if (s-flags  (1  HPET_MSI_SUPPORT)) {
  timer-config |= HPET_TN_FSB_CAP;
  }
 -/* advertise availability of ioapic inti2 */
 -timer-config |=  0x0004ULL  32;
 +/* advertise availability of ioapic int */
 +timer-config |=  (uint64_t)s-intcap  32;
  timer-period = 0ULL;
  timer-wrap_flag = 0;
  }
 @@ -753,6 +758,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
  static Property hpet_device_properties[] = {
  DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
  DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
 +DEFINE_PROP_UINT32(intcap, HPETState, intcap, 
 HPET_TN_INT_CAP_DEFAULT),
  DEFINE_PROP_END_OF_LIST(),
  };



 According to Michael's request, a zero intcap should be detected in
 hpet_realize and give an error.

Will fix.

Thanks and regards,
Ping Fan



Re: [Qemu-devel] [PATCH v6 2/5] hpet: enable to entitle more irq pins for hpet

2013-10-10 Thread liu ping fan
On Thu, Oct 10, 2013 at 7:41 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Thu, Oct 10, 2013 at 11:46:42AM +0200, Paolo Bonzini wrote:
 Il 10/10/2013 11:41, Michael S. Tsirkin ha scritto:
   Are you sure?  This is not done for any other compat property.
  
   Paolo
  It's done if we use the property from C.
  See PCI_HOST_PROP_PCI_HOLE64_SIZE.
 
  You want compiler to catch errors, that's
  much better than a runtime failure.

 I agree, but I think there should be no need to use the property from C.

 Paolo

 Well this patchset does use it from C.
 If it's done it needs a macro.

hpet.h is the ideal place to put the macro, so pc.c can see it. But
what about PC_COMPAT_1_7 in pc.h? I think it is not right to include
hpet.h. So can I do not use marco in pc.h?

Thanks and regards,
Ping Fan



Re: [Qemu-devel] [PATCH v5 2/5] hpet: entitle more irq pins for hpet

2013-10-08 Thread liu ping fan
On Mon, Sep 30, 2013 at 11:58 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Mon, Sep 30, 2013 at 05:48:03PM +0200, Paolo Bonzini wrote:
 Il 30/09/2013 11:30, Michael S. Tsirkin ha scritto:
  I was really only talking about q35 here.
  I thought it's ugly that users can control intcap
  directly. Can object_set_property be used after
  qdev_try_create?

 Yes, after that and before qdev_init.  This is how Ping Fan is doing
 PIIX right now.

  PIIX has another issue:
  the default value in hpet is really Q35 specific,
  that's also kind of ugly, isn't it?

 Yeah, perhaps it is.  So are you suggesting the default to be 4 (GSI2 only)?

 Paolo

 I suggest it fails unless caller set the property.

Sorry, out of office for a long time, and did not keep up with this
thread in time.
When letting the caller set the intcap, we should consider the
compatibility of q35. For pc-q35-1.7 or later, the caller should set
the property, otherwise not. But how can the caller tell that it runs
on q35-1.7?
The essential problem is that set the property will always overwrite
the property which is set up by compatible mechanism. So it is hard to
implement without breaking the current mechanism. Do you think so?

Thanks and regards,
Ping fan



Re: [Qemu-devel] [PATCH v5 2/5] hpet: entitle more irq pins for hpet

2013-09-30 Thread liu ping fan
On Sun, Sep 29, 2013 at 12:15 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Sun, Sep 29, 2013 at 11:49:41AM +0800, liu ping fan wrote:
 On Sun, Sep 29, 2013 at 3:56 AM, Michael S. Tsirkin m...@redhat.com wrote:
  On Thu, Sep 12, 2013 at 11:25:15AM +0800, Liu Ping Fan wrote:
  On PC, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
  of ioapic can be dynamically assigned to hpet as guest chooses.
  (Will enable them after introducing pc 1.6 compat)
 
  Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
  ---
   hw/timer/hpet.c | 13 +++--
   1 file changed, 11 insertions(+), 2 deletions(-)
 
  diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
  index 8429eb3..46903b9 100644
  --- a/hw/timer/hpet.c
  +++ b/hw/timer/hpet.c
  @@ -25,6 +25,7 @@
*/
 
   #include hw/hw.h
  +#include hw/boards.h
   #include hw/i386/pc.h
   #include ui/console.h
   #include qemu/timer.h
  @@ -42,6 +43,12 @@
 
   #define HPET_MSI_SUPPORT0
 
  +/* For bug compat, using only IRQ2. Soon it will be fixed as
  + * 0xff0104ULL, i.e using IRQ16~23, IRQ8 and IRQ2
 
  So users are expected to stick a bitmask of legal
  pins here?
  I think that's a bit too much rope to give to users.
  Don't you think?
 
 Sorry, not understand your meaning exactly.  But the scene will be:
 guest kernel polls the ability bitmask, and pick up one pin which is
 not occupied or can be shared with the level-trigger and low-active.
 So is it rope?

 I merely say that it's better to make this a bool or bit property.
 UINT32 is too much flexibility imho.

The interrupt capability is export to guest by register
Tn_INT_ROUTE_CAP[63:32]. So it is useless to make them as a bit
property. Do you think so?

Regards
Pingfan

config register in hpet con
 Thanks and regards,
 Pingfan
  after
  + * introducing pc-1.6 compat.
  + */
  +#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
  +
   #define TYPE_HPET hpet
   #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 
  @@ -73,6 +80,7 @@ typedef struct HPETState {
   uint8_t rtc_irq_level;
   qemu_irq pit_enabled;
   uint8_t num_timers;
  +uint32_t intcap;
   HPETTimer timer[HPET_MAX_TIMERS];
 
   /* Memory-mapped, software visible registers */
  @@ -663,8 +671,8 @@ static void hpet_reset(DeviceState *d)
   if (s-flags  (1  HPET_MSI_SUPPORT)) {
   timer-config |= HPET_TN_FSB_CAP;
   }
  -/* advertise availability of ioapic inti2 */
  -timer-config |=  0x0004ULL  32;
  +/* advertise availability of ioapic int */
  +timer-config |=  (uint64_t)s-intcap  32;
   timer-period = 0ULL;
   timer-wrap_flag = 0;
   }
  @@ -753,6 +761,7 @@ static void hpet_realize(DeviceState *dev, Error 
  **errp)
   static Property hpet_device_properties[] = {
   DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
   DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
  +DEFINE_PROP_UINT32(intcap, HPETState, intcap, 
  HPET_TN_INT_CAP_DEFAULT),
   DEFINE_PROP_END_OF_LIST(),
   };
 
  --
  1.8.1.4
 



Re: [Qemu-devel] [PATCH v5 1/5] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-09-30 Thread liu ping fan
On Sun, Sep 29, 2013 at 12:20 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Sun, Sep 29, 2013 at 11:25:24AM +0800, liu ping fan wrote:
 On Sun, Sep 29, 2013 at 3:52 AM, Michael S. Tsirkin m...@redhat.com wrote:
  On Thu, Sep 12, 2013 at 11:25:14AM +0800, Liu Ping Fan wrote:
  According to hpet spec, hpet irq is high active. But according to
  ICH spec, there is inversion before the input of ioapic. So the OS
  will expect low active on this IRQ line.
 
 
 (And this is observed on
  bare metal).
 
  How does one test this on bare metal?
 
 If changing the func hpet_timer_set_irq() in linux's hpet driver,
 from acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
 ACPI_ACTIVE_LOW);  to ACPI_ACTIVE_HIGH,  then run hpet_example.c on
 bare metal, the modified kernel will complain about spurious irq and
 disable the irq line.

 ok that's useful info for the changelog.

Will document them.

 
  We fold the emulation of this inversion inside the hpet logic.
 
  Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 
 
  Doesn't this affect cross-version migration?
  E.g. imagine that you migrate between systems
  with/without this fix.
 
 No. the changing only affect route = ISA_NUM_IRQS,  For linux
 guest, it use IRQ2/8 for hpet0/hpet1 which is reserved by kernel. It
 work without this fix (I think windows is the same). But the
 hpet_example.c(in linux) can not work without this fix. So no such
 run-time instance before this bug fix.

 Regards,
 Pingfan

 aha so the argument is it's already too broken to even mostly work,
 we don't need to worry about migrating it to/from old qemu.

Yes :)

Thanks,
Pingfan



Re: [Qemu-devel] [PATCH v5 1/5] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-09-28 Thread liu ping fan
On Sun, Sep 29, 2013 at 3:52 AM, Michael S. Tsirkin m...@redhat.com wrote:
 On Thu, Sep 12, 2013 at 11:25:14AM +0800, Liu Ping Fan wrote:
 According to hpet spec, hpet irq is high active. But according to
 ICH spec, there is inversion before the input of ioapic. So the OS
 will expect low active on this IRQ line.


(And this is observed on
 bare metal).

 How does one test this on bare metal?

If changing the func hpet_timer_set_irq() in linux's hpet driver,
from acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
ACPI_ACTIVE_LOW);  to ACPI_ACTIVE_HIGH,  then run hpet_example.c on
bare metal, the modified kernel will complain about spurious irq and
disable the irq line.

 We fold the emulation of this inversion inside the hpet logic.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com


 Doesn't this affect cross-version migration?
 E.g. imagine that you migrate between systems
 with/without this fix.

No. the changing only affect route = ISA_NUM_IRQS,  For linux
guest, it use IRQ2/8 for hpet0/hpet1 which is reserved by kernel. It
work without this fix (I think windows is the same). But the
hpet_example.c(in linux) can not work without this fix. So no such
run-time instance before this bug fix.

Regards,
Pingfan

 ---
  hw/timer/hpet.c | 14 --
  1 file changed, 12 insertions(+), 2 deletions(-)

 diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
 index fcd22ae..8429eb3 100644
 --- a/hw/timer/hpet.c
 +++ b/hw/timer/hpet.c
 @@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int 
 set)
  if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
  s-isr = ~mask;
  if (!timer_fsb_route(timer)) {
 -qemu_irq_lower(s-irqs[route]);
 +/* fold the ICH PIRQ# pin's internal inversion logic into hpet 
 */
 +if (route = ISA_NUM_IRQS) {
 +qemu_irq_raise(s-irqs[route]);
 +} else {
 +qemu_irq_lower(s-irqs[route]);
 +}
  }
  } else if (timer_fsb_route(timer)) {
  stl_le_phys(timer-fsb  32, timer-fsb  0x);
  } else if (timer-config  HPET_TN_TYPE_LEVEL) {
  s-isr |= mask;
 -qemu_irq_raise(s-irqs[route]);
 +/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
 +if (route = ISA_NUM_IRQS) {
 +qemu_irq_lower(s-irqs[route]);
 +} else {
 +qemu_irq_raise(s-irqs[route]);
 +}
  } else {
  s-isr = ~mask;
  qemu_irq_pulse(s-irqs[route]);
 --
 1.8.1.4




Re: [Qemu-devel] [PATCH v5 2/5] hpet: entitle more irq pins for hpet

2013-09-28 Thread liu ping fan
On Sun, Sep 29, 2013 at 3:56 AM, Michael S. Tsirkin m...@redhat.com wrote:
 On Thu, Sep 12, 2013 at 11:25:15AM +0800, Liu Ping Fan wrote:
 On PC, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
 of ioapic can be dynamically assigned to hpet as guest chooses.
 (Will enable them after introducing pc 1.6 compat)

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/timer/hpet.c | 13 +++--
  1 file changed, 11 insertions(+), 2 deletions(-)

 diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
 index 8429eb3..46903b9 100644
 --- a/hw/timer/hpet.c
 +++ b/hw/timer/hpet.c
 @@ -25,6 +25,7 @@
   */

  #include hw/hw.h
 +#include hw/boards.h
  #include hw/i386/pc.h
  #include ui/console.h
  #include qemu/timer.h
 @@ -42,6 +43,12 @@

  #define HPET_MSI_SUPPORT0

 +/* For bug compat, using only IRQ2. Soon it will be fixed as
 + * 0xff0104ULL, i.e using IRQ16~23, IRQ8 and IRQ2

 So users are expected to stick a bitmask of legal
 pins here?
 I think that's a bit too much rope to give to users.
 Don't you think?

Sorry, not understand your meaning exactly.  But the scene will be:
guest kernel polls the ability bitmask, and pick up one pin which is
not occupied or can be shared with the level-trigger and low-active.
So is it rope?

Thanks and regards,
Pingfan
 after
 + * introducing pc-1.6 compat.
 + */
 +#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
 +
  #define TYPE_HPET hpet
  #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)

 @@ -73,6 +80,7 @@ typedef struct HPETState {
  uint8_t rtc_irq_level;
  qemu_irq pit_enabled;
  uint8_t num_timers;
 +uint32_t intcap;
  HPETTimer timer[HPET_MAX_TIMERS];

  /* Memory-mapped, software visible registers */
 @@ -663,8 +671,8 @@ static void hpet_reset(DeviceState *d)
  if (s-flags  (1  HPET_MSI_SUPPORT)) {
  timer-config |= HPET_TN_FSB_CAP;
  }
 -/* advertise availability of ioapic inti2 */
 -timer-config |=  0x0004ULL  32;
 +/* advertise availability of ioapic int */
 +timer-config |=  (uint64_t)s-intcap  32;
  timer-period = 0ULL;
  timer-wrap_flag = 0;
  }
 @@ -753,6 +761,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
  static Property hpet_device_properties[] = {
  DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
  DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
 +DEFINE_PROP_UINT32(intcap, HPETState, intcap, 
 HPET_TN_INT_CAP_DEFAULT),
  DEFINE_PROP_END_OF_LIST(),
  };

 --
 1.8.1.4




[Qemu-devel] [PATCH v5 0/4] timers thread-safe stuff

2013-09-25 Thread Liu Ping Fan
v5:
  fine rename some variable in patch24. 
  fix commit log for patch12

v4:
  fix commit log for protect timers_state's clock with seqlock  (Thanks for 
Alex)

v3:
  1. rename seqlock_read_check as seqlock_read_retry
  2. Document timerlist were protected by BQL, and discard private lock around 
qemu_event_wait(tl-ev).

v2:
  1. fix comment in commit and code
  2. fix race issue for qemu_clock_enable(foo,disable)


Liu Ping Fan (2):
  timer: protect timers_state's clock with seqlock
  timer: make qemu_clock_enable sync between disable and timer's cb

Paolo Bonzini (2):
  seqlock: introduce read-write seqlock
  qemu-thread: add QemuEvent

 cpus.c  |  41 +---
 include/qemu/seqlock.h  |  72 +++
 include/qemu/thread-posix.h |   8 +++
 include/qemu/thread-win32.h |   4 ++
 include/qemu/thread.h   |   7 +++
 include/qemu/timer.h|   8 +++
 qemu-timer.c|  21 +++-
 util/qemu-thread-posix.c| 116 
 util/qemu-thread-win32.c|  26 ++
 9 files changed, 294 insertions(+), 9 deletions(-)
 create mode 100644 include/qemu/seqlock.h

-- 
1.8.1.4




[Qemu-devel] [PATCH v5 4/4] timer: make qemu_clock_enable sync between disable and timer's cb

2013-09-25 Thread Liu Ping Fan
After disabling the QemuClock, we should make sure that no QemuTimers
are still in flight. To implement that with light overhead, we resort
to QemuEvent. The caller of disabling will wait on QemuEvent of each
timerlist.

Note, qemu_clock_enable(foo,false) can _not_ be called from timer's cb.
And the callers of qemu_clock_enable() should be sync by themselves,
not protected by this patch.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 include/qemu/timer.h |  6 ++
 qemu-timer.c | 21 -
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index bb1de23..c7fa83a 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -185,6 +185,12 @@ void qemu_clock_notify(QEMUClockType type);
  * @enabled: true to enable, false to disable
  *
  * Enable or disable a clock
+ * Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers.  Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
+ *
+ * Caller should hold BQL.
  */
 void qemu_clock_enable(QEMUClockType type, bool enabled);
 
diff --git a/qemu-timer.c b/qemu-timer.c
index 95ff47f..3a828aa 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -45,6 +45,7 @@
 /* timers */
 
 typedef struct QEMUClock {
+ /* We rely on BQL to protect the timerlists */
 QLIST_HEAD(, QEMUTimerList) timerlists;
 
 NotifierList reset_notifiers;
@@ -70,6 +71,8 @@ struct QEMUTimerList {
 QLIST_ENTRY(QEMUTimerList) list;
 QEMUTimerListNotifyCB *notify_cb;
 void *notify_opaque;
+/* light weight method to mark the end of timerlist's running */
+QemuEvent timers_done_ev;
 };
 
 /**
@@ -98,6 +101,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
 QEMUClock *clock = qemu_clock_ptr(type);
 
 timer_list = g_malloc0(sizeof(QEMUTimerList));
+qemu_event_init(timer_list-timers_done_ev, false);
 timer_list-clock = clock;
 timer_list-notify_cb = cb;
 timer_list-notify_opaque = opaque;
@@ -140,13 +144,25 @@ void qemu_clock_notify(QEMUClockType type)
 }
 }
 
+/* Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers.  Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
+ *
+ * Caller should hold BQL.
+ */
 void qemu_clock_enable(QEMUClockType type, bool enabled)
 {
 QEMUClock *clock = qemu_clock_ptr(type);
+QEMUTimerList *tl;
 bool old = clock-enabled;
 clock-enabled = enabled;
 if (enabled  !old) {
 qemu_clock_notify(type);
+} else if (!enabled  old) {
+QLIST_FOREACH(tl, clock-timerlists, list) {
+qemu_event_wait(tl-timers_done_ev);
+}
 }
 }
 
@@ -373,8 +389,10 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 QEMUTimer *ts;
 int64_t current_time;
 bool progress = false;
-   
+
+qemu_event_reset(timer_list-timers_done_ev);
 if (!timer_list-clock-enabled) {
+qemu_event_set(timer_list-timers_done_ev);
 return progress;
 }
 
@@ -392,6 +410,7 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 ts-cb(ts-opaque);
 progress = true;
 }
+qemu_event_set(timer_list-timers_done_ev);
 return progress;
 }
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v5 1/4] seqlock: introduce read-write seqlock

2013-09-25 Thread Liu Ping Fan
From: Paolo Bonzini pbonz...@redhat.com

Seqlock implementation for QEMU. Usage idiom

reader:
  do{
start = seqlock_read_begin()

  }while(seqlock_read_try(start))

writer:
  seqlock_write_lock()
  ...
  seqlock_write_unlock()

initialization:
  seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
  where mutex could be NULL if the caller will provide extra lock
  protection for seqlock_write_lock.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/seqlock.h | 72 ++
 1 file changed, 72 insertions(+)
 create mode 100644 include/qemu/seqlock.h

diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
new file mode 100644
index 000..3ff118a
--- /dev/null
+++ b/include/qemu/seqlock.h
@@ -0,0 +1,72 @@
+/*
+ * Seqlock implementation for QEMU
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Author:
+ *  Paolo Bonzini pbonz...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_SEQLOCK_H
+#define QEMU_SEQLOCK_H 1
+
+#include qemu/atomic.h
+#include qemu/thread.h
+
+typedef struct QemuSeqLock QemuSeqLock;
+
+struct QemuSeqLock {
+QemuMutex *mutex;
+unsigned sequence;
+};
+
+static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
+{
+sl-mutex = mutex;
+sl-sequence = 0;
+}
+
+/* Lock out other writers and update the count.  */
+static inline void seqlock_write_lock(QemuSeqLock *sl)
+{
+if (sl-mutex) {
+qemu_mutex_lock(sl-mutex);
+}
+++sl-sequence;
+
+/* Write sequence before updating other fields.  */
+smp_wmb();
+}
+
+static inline void seqlock_write_unlock(QemuSeqLock *sl)
+{
+/* Write other fields before finalizing sequence.  */
+smp_wmb();
+
+++sl-sequence;
+if (sl-mutex) {
+qemu_mutex_unlock(sl-mutex);
+}
+}
+
+static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
+{
+/* Always fail if a write is in progress.  */
+unsigned ret = sl-sequence  ~1;
+
+/* Read sequence before reading other fields.  */
+smp_rmb();
+return ret;
+}
+
+static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
+{
+/* Read other fields before reading final sequence.  */
+smp_rmb();
+return unlikely(sl-sequence != start);
+}
+
+#endif
-- 
1.8.1.4




[Qemu-devel] [PATCH v5 3/4] qemu-thread: add QemuEvent

2013-09-25 Thread Liu Ping Fan
From: Paolo Bonzini pbonz...@redhat.com

This emulates Win32 manual-reset events using futexes or conditional
variables.  Typical ways to use them are with multi-producer,
single-consumer data structures, to test for a complex condition whose
elements come from different threads:

for (;;) {
qemu_event_reset(ev);
... test complex condition ...
if (condition is true) {
break;
}
qemu_event_wait(ev);
}

Or more efficiently (but with some duplication):

... evaluate condition ...
while (!condition) {
qemu_event_reset(ev);
... evaluate condition ...
if (!condition) {
qemu_event_wait(ev);
... evaluate condition ...
}
}

QemuEvent provides a very fast userspace path in the common case when
no other thread is waiting, or the event is not changing state.  It
is used to report RCU quiescent states to the thread calling
synchronize_rcu (the latter being the single consumer), and to report
call_rcu invocations to the thread that receives them.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/thread-posix.h |   8 +++
 include/qemu/thread-win32.h |   4 ++
 include/qemu/thread.h   |   7 +++
 util/qemu-thread-posix.c| 116 
 util/qemu-thread-win32.c|  26 ++
 5 files changed, 161 insertions(+)

diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index 361566a..eb5c7a1 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -21,6 +21,14 @@ struct QemuSemaphore {
 #endif
 };
 
+struct QemuEvent {
+#ifndef __linux__
+pthread_mutex_t lock;
+pthread_cond_t cond;
+#endif
+unsigned value;
+};
+
 struct QemuThread {
 pthread_t thread;
 };
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index 13adb95..3d58081 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -17,6 +17,10 @@ struct QemuSemaphore {
 HANDLE sema;
 };
 
+struct QemuEvent {
+HANDLE event;
+};
+
 typedef struct QemuThreadData QemuThreadData;
 struct QemuThread {
 QemuThreadData *data;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index c02404b..3e32c65 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -7,6 +7,7 @@
 typedef struct QemuMutex QemuMutex;
 typedef struct QemuCond QemuCond;
 typedef struct QemuSemaphore QemuSemaphore;
+typedef struct QemuEvent QemuEvent;
 typedef struct QemuThread QemuThread;
 
 #ifdef _WIN32
@@ -45,6 +46,12 @@ void qemu_sem_wait(QemuSemaphore *sem);
 int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
 void qemu_sem_destroy(QemuSemaphore *sem);
 
+void qemu_event_init(QemuEvent *ev, bool init);
+void qemu_event_set(QemuEvent *ev);
+void qemu_event_reset(QemuEvent *ev);
+void qemu_event_wait(QemuEvent *ev);
+void qemu_event_destroy(QemuEvent *ev);
+
 void qemu_thread_create(QemuThread *thread,
 void *(*start_routine)(void *),
 void *arg, int mode);
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 4de133e..37dd298 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -20,7 +20,12 @@
 #include limits.h
 #include unistd.h
 #include sys/time.h
+#ifdef __linux__
+#include sys/syscall.h
+#include linux/futex.h
+#endif
 #include qemu/thread.h
+#include qemu/atomic.h
 
 static void error_exit(int err, const char *msg)
 {
@@ -272,6 +277,117 @@ void qemu_sem_wait(QemuSemaphore *sem)
 #endif
 }
 
+#ifdef __linux__
+#define futex(...)  syscall(__NR_futex, __VA_ARGS__)
+
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
+}
+#else
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+if (n == 1) {
+pthread_cond_signal(ev-cond);
+} else {
+pthread_cond_broadcast(ev-cond);
+}
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+pthread_mutex_lock(ev-lock);
+if (ev-value == val) {
+pthread_cond_wait(ev-cond, ev-lock);
+}
+pthread_mutex_unlock(ev-lock);
+}
+#endif
+
+/* Valid transitions:
+ * - free-set, when setting the event
+ * - busy-set, when setting the event, followed by futex_wake
+ * - set-free, when resetting the event
+ * - free-busy, when waiting
+ *
+ * set-busy does not happen (it can be observed from the outside but
+ * it really is set-free-busy).
+ *
+ * busy-free provably cannot happen; to enforce it, the set-free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET 0
+#define EV_FREE1
+#define EV_BUSY   -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef __linux__
+pthread_mutex_init(ev-lock, NULL);
+

Re: [Qemu-devel] [PATCH v5 0/5] bugs fix for hpet

2013-09-25 Thread liu ping fan
Hi, is hpet orphan? Or who can help me to merge this patch-set if my
patch is fine.

Thanks.

On Thu, Sep 12, 2013 at 11:25 AM, Liu Ping Fan qemul...@gmail.com wrote:
 v5:
   use stand compat property to fix hpet intcap on pc-q35, while on pc-piix, 
 hard code intcap as IRQ2

 v4:
   use stand compat property to fix hpet intcap

 v3:
   change hpet interrupt capablity on board's demand

 Liu Ping Fan (5):
   hpet: inverse polarity when pin above ISA_NUM_IRQS
   hpet: entitle more irq pins for hpet
   PC: use qdev_xx to create hpet instead of sysbus_create_xx
   PC: differentiate hpet's interrupt capability on piix and q35
   PC-1.6: add compatibility for hpet intcap on pc-q35-1.6

  hw/i386/pc.c | 17 ++---
  hw/i386/pc_piix.c|  3 ++-
  hw/i386/pc_q35.c |  2 +-
  hw/timer/hpet.c  | 24 
  include/hw/i386/pc.h |  7 ++-
  5 files changed, 43 insertions(+), 10 deletions(-)

 --
 1.8.1.4




[Qemu-devel] [PATCH v5 2/4] timer: protect timers_state's clock with seqlock

2013-09-25 Thread Liu Ping Fan
QEMU_CLOCK_VIRTUAL may be read outside BQL. This will make its
foundation, i.e. cpu_clock_offset exposed to race condition.
Using private lock to protect it.

After this patch, reading QEMU_CLOCK_VIRTUAL is thread safe
unless use_icount is true, in which case the existing callers
still rely on the BQL

Lock rule: private lock innermost, ie BQL-this lock

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 cpus.c   | 41 +
 include/qemu/timer.h |  2 ++
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/cpus.c b/cpus.c
index e566297..5baa76d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -37,6 +37,7 @@
 #include sysemu/qtest.h
 #include qemu/main-loop.h
 #include qemu/bitmap.h
+#include qemu/seqlock.h
 
 #ifndef _WIN32
 #include qemu/compatfd.h
@@ -112,6 +113,11 @@ static int64_t qemu_icount;
 typedef struct TimersState {
 int64_t cpu_ticks_prev;
 int64_t cpu_ticks_offset;
+/* cpu_clock_offset will be read out of BQL, so protect it with private
+ * lock. As for cpu_ticks_*, no requirement to read it outside BQL yet.
+ * Lock rule: innermost
+ */
+QemuSeqLock cpu_clock_offset_seqlock;
 int64_t cpu_clock_offset;
 int32_t cpu_ticks_enabled;
 int64_t dummy;
@@ -137,6 +143,7 @@ int64_t cpu_get_icount(void)
 }
 
 /* return the host CPU cycle counter and handle stop/restart */
+/* Caller must hold the BQL */
 int64_t cpu_get_ticks(void)
 {
 if (use_icount) {
@@ -161,33 +168,50 @@ int64_t cpu_get_ticks(void)
 int64_t cpu_get_clock(void)
 {
 int64_t ti;
-if (!timers_state.cpu_ticks_enabled) {
-return timers_state.cpu_clock_offset;
-} else {
-ti = get_clock();
-return ti + timers_state.cpu_clock_offset;
-}
+unsigned start;
+
+do {
+start = seqlock_read_begin(timers_state.cpu_clock_offset_seqlock);
+if (!timers_state.cpu_ticks_enabled) {
+ti = timers_state.cpu_clock_offset;
+} else {
+ti = get_clock();
+ti += timers_state.cpu_clock_offset;
+}
+} while (seqlock_read_retry(timers_state.cpu_clock_offset_seqlock, 
start));
+
+return ti;
 }
 
-/* enable cpu_get_ticks() */
+/* enable cpu_get_ticks()
+ * Caller must hold BQL which server as mutex for cpu_clock_offset_seqlock.
+ */
 void cpu_enable_ticks(void)
 {
+/* Here, the really thing protected by seqlock is cpu_clock_offset. */
+seqlock_write_lock(timers_state.cpu_clock_offset_seqlock);
 if (!timers_state.cpu_ticks_enabled) {
 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
 timers_state.cpu_clock_offset -= get_clock();
 timers_state.cpu_ticks_enabled = 1;
 }
+seqlock_write_unlock(timers_state.cpu_clock_offset_seqlock);
 }
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
-   cpu_get_ticks() after that.  */
+ * cpu_get_ticks() after that.
+ * Caller must hold BQL which server as mutex for cpu_clock_offset_seqlock.
+ */
 void cpu_disable_ticks(void)
 {
+/* Here, the really thing protected by seqlock is cpu_clock_offset. */
+seqlock_write_lock(timers_state.cpu_clock_offset_seqlock);
 if (timers_state.cpu_ticks_enabled) {
 timers_state.cpu_ticks_offset = cpu_get_ticks();
 timers_state.cpu_clock_offset = cpu_get_clock();
 timers_state.cpu_ticks_enabled = 0;
 }
+seqlock_write_unlock(timers_state.cpu_clock_offset_seqlock);
 }
 
 /* Correlation between real and virtual time is always going to be
@@ -371,6 +395,7 @@ static const VMStateDescription vmstate_timers = {
 
 void configure_icount(const char *option)
 {
+seqlock_init(timers_state.cpu_clock_offset_seqlock, NULL);
 vmstate_register(NULL, 0, vmstate_timers, timers_state);
 if (!option) {
 return;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index e4934dd..bb1de23 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -636,7 +636,9 @@ static inline int64_t qemu_soonest_timeout(int64_t 
timeout1, int64_t timeout2)
 void init_clocks(void);
 
 int64_t cpu_get_ticks(void);
+/* Caller must hold BQL */
 void cpu_enable_ticks(void);
+/* Caller must hold BQL */
 void cpu_disable_ticks(void);
 
 static inline int64_t get_ticks_per_sec(void)
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH v5 1/4] seqlock: introduce read-write seqlock

2013-09-25 Thread liu ping fan
On Wed, Sep 25, 2013 at 2:20 PM, Liu Ping Fan qemul...@gmail.com wrote:
 From: Paolo Bonzini pbonz...@redhat.com

 Seqlock implementation for QEMU. Usage idiom

 reader:
   do{
 start = seqlock_read_begin()

   }while(seqlock_read_try(start))

 writer:
   seqlock_write_lock()
   ...
   seqlock_write_unlock()

 initialization:
   seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
   where mutex could be NULL if the caller will provide extra lock
   protection for seqlock_write_lock.

 Signed-off-by: Paolo Bonzini pbonz...@redhat.com

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  include/qemu/seqlock.h | 72 
 ++
  1 file changed, 72 insertions(+)
  create mode 100644 include/qemu/seqlock.h

 diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
 new file mode 100644
 index 000..3ff118a
 --- /dev/null
 +++ b/include/qemu/seqlock.h
 @@ -0,0 +1,72 @@
 +/*
 + * Seqlock implementation for QEMU
 + *
 + * Copyright Red Hat, Inc. 2013
 + *
 + * Author:
 + *  Paolo Bonzini pbonz...@redhat.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +#ifndef QEMU_SEQLOCK_H
 +#define QEMU_SEQLOCK_H 1
 +
 +#include qemu/atomic.h
 +#include qemu/thread.h
 +
 +typedef struct QemuSeqLock QemuSeqLock;
 +
 +struct QemuSeqLock {
 +QemuMutex *mutex;
 +unsigned sequence;
 +};
 +
 +static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
 +{
 +sl-mutex = mutex;
 +sl-sequence = 0;
 +}
 +
 +/* Lock out other writers and update the count.  */
 +static inline void seqlock_write_lock(QemuSeqLock *sl)
 +{
 +if (sl-mutex) {
 +qemu_mutex_lock(sl-mutex);
 +}
 +++sl-sequence;
 +
 +/* Write sequence before updating other fields.  */
 +smp_wmb();
 +}
 +
 +static inline void seqlock_write_unlock(QemuSeqLock *sl)
 +{
 +/* Write other fields before finalizing sequence.  */
 +smp_wmb();
 +
 +++sl-sequence;
 +if (sl-mutex) {
 +qemu_mutex_unlock(sl-mutex);
 +}
 +}
 +
 +static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
 +{
 +/* Always fail if a write is in progress.  */
 +unsigned ret = sl-sequence  ~1;
 +
 +/* Read sequence before reading other fields.  */
 +smp_rmb();
 +return ret;
 +}
 +
 +static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
 +{
 +/* Read other fields before reading final sequence.  */
 +smp_rmb();
 +return unlikely(sl-sequence != start);
 +}
 +
 +#endif
 --
 1.8.1.4




Re: [Qemu-devel] [PATCH v5 3/4] qemu-thread: add QemuEvent

2013-09-25 Thread liu ping fan
On Wed, Sep 25, 2013 at 2:20 PM, Liu Ping Fan qemul...@gmail.com wrote:
 From: Paolo Bonzini pbonz...@redhat.com

 This emulates Win32 manual-reset events using futexes or conditional
 variables.  Typical ways to use them are with multi-producer,
 single-consumer data structures, to test for a complex condition whose
 elements come from different threads:

 for (;;) {
 qemu_event_reset(ev);
 ... test complex condition ...
 if (condition is true) {
 break;
 }
 qemu_event_wait(ev);
 }

 Or more efficiently (but with some duplication):

 ... evaluate condition ...
 while (!condition) {
 qemu_event_reset(ev);
 ... evaluate condition ...
 if (!condition) {
 qemu_event_wait(ev);
 ... evaluate condition ...
 }
 }

 QemuEvent provides a very fast userspace path in the common case when
 no other thread is waiting, or the event is not changing state.  It
 is used to report RCU quiescent states to the thread calling
 synchronize_rcu (the latter being the single consumer), and to report
 call_rcu invocations to the thread that receives them.

 Signed-off-by: Paolo Bonzini pbonz...@redhat.com

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  include/qemu/thread-posix.h |   8 +++
  include/qemu/thread-win32.h |   4 ++
  include/qemu/thread.h   |   7 +++
  util/qemu-thread-posix.c| 116 
 
  util/qemu-thread-win32.c|  26 ++
  5 files changed, 161 insertions(+)

 diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
 index 361566a..eb5c7a1 100644
 --- a/include/qemu/thread-posix.h
 +++ b/include/qemu/thread-posix.h
 @@ -21,6 +21,14 @@ struct QemuSemaphore {
  #endif
  };

 +struct QemuEvent {
 +#ifndef __linux__
 +pthread_mutex_t lock;
 +pthread_cond_t cond;
 +#endif
 +unsigned value;
 +};
 +
  struct QemuThread {
  pthread_t thread;
  };
 diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
 index 13adb95..3d58081 100644
 --- a/include/qemu/thread-win32.h
 +++ b/include/qemu/thread-win32.h
 @@ -17,6 +17,10 @@ struct QemuSemaphore {
  HANDLE sema;
  };

 +struct QemuEvent {
 +HANDLE event;
 +};
 +
  typedef struct QemuThreadData QemuThreadData;
  struct QemuThread {
  QemuThreadData *data;
 diff --git a/include/qemu/thread.h b/include/qemu/thread.h
 index c02404b..3e32c65 100644
 --- a/include/qemu/thread.h
 +++ b/include/qemu/thread.h
 @@ -7,6 +7,7 @@
  typedef struct QemuMutex QemuMutex;
  typedef struct QemuCond QemuCond;
  typedef struct QemuSemaphore QemuSemaphore;
 +typedef struct QemuEvent QemuEvent;
  typedef struct QemuThread QemuThread;

  #ifdef _WIN32
 @@ -45,6 +46,12 @@ void qemu_sem_wait(QemuSemaphore *sem);
  int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
  void qemu_sem_destroy(QemuSemaphore *sem);

 +void qemu_event_init(QemuEvent *ev, bool init);
 +void qemu_event_set(QemuEvent *ev);
 +void qemu_event_reset(QemuEvent *ev);
 +void qemu_event_wait(QemuEvent *ev);
 +void qemu_event_destroy(QemuEvent *ev);
 +
  void qemu_thread_create(QemuThread *thread,
  void *(*start_routine)(void *),
  void *arg, int mode);
 diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
 index 4de133e..37dd298 100644
 --- a/util/qemu-thread-posix.c
 +++ b/util/qemu-thread-posix.c
 @@ -20,7 +20,12 @@
  #include limits.h
  #include unistd.h
  #include sys/time.h
 +#ifdef __linux__
 +#include sys/syscall.h
 +#include linux/futex.h
 +#endif
  #include qemu/thread.h
 +#include qemu/atomic.h

  static void error_exit(int err, const char *msg)
  {
 @@ -272,6 +277,117 @@ void qemu_sem_wait(QemuSemaphore *sem)
  #endif
  }

 +#ifdef __linux__
 +#define futex(...)  syscall(__NR_futex, __VA_ARGS__)
 +
 +static inline void futex_wake(QemuEvent *ev, int n)
 +{
 +futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
 +}
 +
 +static inline void futex_wait(QemuEvent *ev, unsigned val)
 +{
 +futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
 +}
 +#else
 +static inline void futex_wake(QemuEvent *ev, int n)
 +{
 +if (n == 1) {
 +pthread_cond_signal(ev-cond);
 +} else {
 +pthread_cond_broadcast(ev-cond);
 +}
 +}
 +
 +static inline void futex_wait(QemuEvent *ev, unsigned val)
 +{
 +pthread_mutex_lock(ev-lock);
 +if (ev-value == val) {
 +pthread_cond_wait(ev-cond, ev-lock);
 +}
 +pthread_mutex_unlock(ev-lock);
 +}
 +#endif
 +
 +/* Valid transitions:
 + * - free-set, when setting the event
 + * - busy-set, when setting the event, followed by futex_wake
 + * - set-free, when resetting the event
 + * - free-busy, when waiting
 + *
 + * set-busy does not happen (it can be observed from the outside but
 + * it really is set-free-busy).
 + *
 + * busy-free provably cannot happen; to enforce it, the set-free transition
 + * is done with an OR, which becomes a no-op

Re: [Qemu-devel] [PATCH v4 2/4] timer: protect timers_state's clock with seqlock

2013-09-24 Thread liu ping fan
On Mon, Sep 23, 2013 at 2:21 PM, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2013-09-22 10:11, Liu Ping Fan wrote:
 QEMU_CLOCK_VIRTUAL may be read outside BQL. This will make its
 foundation, i.e. timers_state exposed to race condition.
 Using private lock to protect it.

 After this patch, reading QEMU_CLOCK_VIRTUAL is thread safe
 unless use_icount is true, in which case the existing callers
 still rely on the BQL

 Lock rule: private lock innermost, ie BQL-this lock

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  cpus.c | 36 ++--
  1 file changed, 30 insertions(+), 6 deletions(-)

 diff --git a/cpus.c b/cpus.c
 index e566297..870a832 100644
 --- a/cpus.c
 +++ b/cpus.c
 @@ -37,6 +37,7 @@
  #include sysemu/qtest.h
  #include qemu/main-loop.h
  #include qemu/bitmap.h
 +#include qemu/seqlock.h

  #ifndef _WIN32
  #include qemu/compatfd.h
 @@ -112,6 +113,13 @@ static int64_t qemu_icount;
  typedef struct TimersState {
  int64_t cpu_ticks_prev;
  int64_t cpu_ticks_offset;
 +/* cpu_clock_offset will be read out of BQL, so protect it with private
 + * lock. As for cpu_ticks_*, no requirement to read it outside BQL yet.
 + * Lock rule: innermost
 + */
 +QemuSeqLock clock_seqlock;
 +/* mutex for seqlock */
 +QemuMutex mutex;

 If these locks only protect cpu_clock_offset, name them accordingly
 (cpu_clock_offset_seqlock, cpu_clock_offset_mutex). But I think they

The mutex is internal for seqlock, not exported outside. So I think
the name is fine. But, I think we can drop it since the
cpu_enable_ticks/cpu_disable_ticks are always inside BQL.  So I will
rename clock_seqlock as cpu_clock_offset.
 also protect cpu_ticks_enabled, no? Then you should adjust the comment.

No. cpu_get_tsc()-cpu_get_ticks() is one reader of cpu_ticks_enabled,
which is protected against writers by BQL, not by this lock.

Thanks and regards,
Pingfan
  int64_t cpu_clock_offset;
  int32_t cpu_ticks_enabled;
  int64_t dummy;
 @@ -137,6 +145,7 @@ int64_t cpu_get_icount(void)
  }

  /* return the host CPU cycle counter and handle stop/restart */
 +/* cpu_ticks is safely if holding BQL */

 Caller must hold the BQL.

  int64_t cpu_get_ticks(void)
  {
  if (use_icount) {
 @@ -161,33 +170,46 @@ int64_t cpu_get_ticks(void)
  int64_t cpu_get_clock(void)
  {
  int64_t ti;
 -if (!timers_state.cpu_ticks_enabled) {
 -return timers_state.cpu_clock_offset;
 -} else {
 -ti = get_clock();
 -return ti + timers_state.cpu_clock_offset;
 -}
 +unsigned start;
 +
 +do {
 +start = seqlock_read_begin(timers_state.clock_seqlock);
 +if (!timers_state.cpu_ticks_enabled) {
 +ti = timers_state.cpu_clock_offset;
 +} else {
 +ti = get_clock();
 +ti += timers_state.cpu_clock_offset;
 +}
 +} while (seqlock_read_retry(timers_state.clock_seqlock, start));
 +
 +return ti;
  }

  /* enable cpu_get_ticks() */
  void cpu_enable_ticks(void)
  {
 +/* Here, the really thing protected by seqlock is cpu_clock_offset. */
 +seqlock_write_lock(timers_state.clock_seqlock);
  if (!timers_state.cpu_ticks_enabled) {
  timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
  timers_state.cpu_clock_offset -= get_clock();
  timers_state.cpu_ticks_enabled = 1;
  }
 +seqlock_write_unlock(timers_state.clock_seqlock);
  }

  /* disable cpu_get_ticks() : the clock is stopped. You must not call
 cpu_get_ticks() after that.  */
  void cpu_disable_ticks(void)
  {
 +/* Here, the really thing protected by seqlock is cpu_clock_offset. */
 +seqlock_write_lock(timers_state.clock_seqlock);
  if (timers_state.cpu_ticks_enabled) {
  timers_state.cpu_ticks_offset = cpu_get_ticks();
  timers_state.cpu_clock_offset = cpu_get_clock();
  timers_state.cpu_ticks_enabled = 0;
  }
 +seqlock_write_unlock(timers_state.clock_seqlock);
  }

  /* Correlation between real and virtual time is always going to be
 @@ -371,6 +393,8 @@ static const VMStateDescription vmstate_timers = {

  void configure_icount(const char *option)
  {
 +qemu_mutex_init(timers_state.mutex);
 +seqlock_init(timers_state.clock_seqlock, timers_state.mutex);
  vmstate_register(NULL, 0, vmstate_timers, timers_state);
  if (!option) {
  return;


 Jan

 --
 Siemens AG, Corporate Technology, CT RTC ITP SES-DE
 Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH v4 1/4] seqlock: introduce read-write seqlock

2013-09-23 Thread liu ping fan
On Mon, Sep 23, 2013 at 2:21 PM, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2013-09-22 10:11, Liu Ping Fan wrote:
 This lets the read-side access run outside the BQL.

 In fact, not only BQL. Didn't the original commit provide a changlog
 about the content of this patch? Otherwise, briefly describe use cases
 and maybe the typical invocation pattern.

Original commit provide no changelog (right? Paolo, if I do miss the
latest one in your tree).
What about the commit log like:

Seqlock implementation for QEMU. Usage idiom
reader:
seqlock_read_begin()
do{
}while(seqlock_read_try())

writer:
seqlock_write_lock()
...
seqlock_write_unlock()

initialization:
seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
where mutex could be NULL if the caller has provided extra lock
protection for seqlock_write_lock.


 Signed-off-by: Paolo Bonzini pbonz...@redhat.com

 From says you, signed-off only Paolo - this is inconsistent.

Oh, a mistake, the author is Paolo!

Thanks and regards,
Pingfan
 Jan

 ---
  include/qemu/seqlock.h | 72 
 ++
  1 file changed, 72 insertions(+)
  create mode 100644 include/qemu/seqlock.h

 diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
 new file mode 100644
 index 000..3ff118a
 --- /dev/null
 +++ b/include/qemu/seqlock.h
 @@ -0,0 +1,72 @@
 +/*
 + * Seqlock implementation for QEMU
 + *
 + * Copyright Red Hat, Inc. 2013
 + *
 + * Author:
 + *  Paolo Bonzini pbonz...@redhat.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +#ifndef QEMU_SEQLOCK_H
 +#define QEMU_SEQLOCK_H 1
 +
 +#include qemu/atomic.h
 +#include qemu/thread.h
 +
 +typedef struct QemuSeqLock QemuSeqLock;
 +
 +struct QemuSeqLock {
 +QemuMutex *mutex;
 +unsigned sequence;
 +};
 +
 +static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
 +{
 +sl-mutex = mutex;
 +sl-sequence = 0;
 +}
 +
 +/* Lock out other writers and update the count.  */
 +static inline void seqlock_write_lock(QemuSeqLock *sl)
 +{
 +if (sl-mutex) {
 +qemu_mutex_lock(sl-mutex);
 +}
 +++sl-sequence;
 +
 +/* Write sequence before updating other fields.  */
 +smp_wmb();
 +}
 +
 +static inline void seqlock_write_unlock(QemuSeqLock *sl)
 +{
 +/* Write other fields before finalizing sequence.  */
 +smp_wmb();
 +
 +++sl-sequence;
 +if (sl-mutex) {
 +qemu_mutex_unlock(sl-mutex);
 +}
 +}
 +
 +static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
 +{
 +/* Always fail if a write is in progress.  */
 +unsigned ret = sl-sequence  ~1;
 +
 +/* Read sequence before reading other fields.  */
 +smp_rmb();
 +return ret;
 +}
 +
 +static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
 +{
 +/* Read other fields before reading final sequence.  */
 +smp_rmb();
 +return unlikely(sl-sequence != start);
 +}
 +
 +#endif


 --
 Siemens AG, Corporate Technology, CT RTC ITP SES-DE
 Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH v4 4/4] timer: make qemu_clock_enable sync between disable and timer's cb

2013-09-23 Thread liu ping fan
On Mon, Sep 23, 2013 at 2:26 PM, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2013-09-22 10:11, Liu Ping Fan wrote:
 After disabling the QemuClock, we should make sure that no QemuTimers
 are still in flight. To implement that with light overhead, we resort
 to QemuEvent. The caller of disabling will wait on QemuEvent of each
 timerlist.

 Note, qemu_clock_enable(foo,false) can _not_ be called from timer's cb.
 And the callers of qemu_clock_enable() should be sync by themselves,
 not protected by this patch.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  include/qemu/timer.h |  4 
  qemu-timer.c | 20 +++-
  2 files changed, 23 insertions(+), 1 deletion(-)

 diff --git a/include/qemu/timer.h b/include/qemu/timer.h
 index e4934dd..b26909a 100644
 --- a/include/qemu/timer.h
 +++ b/include/qemu/timer.h
 @@ -185,6 +185,10 @@ void qemu_clock_notify(QEMUClockType type);
   * @enabled: true to enable, false to disable
   *
   * Enable or disable a clock
 + * Disabling the clock will wait for related timerlists to stop
 + * executing qemu_run_timers.  Thus, this functions should not
 + * be used from the callback of a timer that is based on @clock.
 + * Doing so would cause a deadlock.
   */
  void qemu_clock_enable(QEMUClockType type, bool enabled);

 diff --git a/qemu-timer.c b/qemu-timer.c
 index 95ff47f..c500a76 100644
 --- a/qemu-timer.c
 +++ b/qemu-timer.c
 @@ -45,6 +45,7 @@
  /* timers */

  typedef struct QEMUClock {
 + /* We rely on BQL to protect the timerlists */
  QLIST_HEAD(, QEMUTimerList) timerlists;

  NotifierList reset_notifiers;
 @@ -70,6 +71,8 @@ struct QEMUTimerList {
  QLIST_ENTRY(QEMUTimerList) list;
  QEMUTimerListNotifyCB *notify_cb;
  void *notify_opaque;
 +/* light weight method to mark the end of timerlist's running */
 +QemuEvent ev;

 What about timers_done_ev?

fine, thx.
  };

  /**
 @@ -98,6 +101,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
  QEMUClock *clock = qemu_clock_ptr(type);

  timer_list = g_malloc0(sizeof(QEMUTimerList));
 +qemu_event_init(timer_list-ev, false);
  timer_list-clock = clock;
  timer_list-notify_cb = cb;
  timer_list-notify_opaque = opaque;
 @@ -140,13 +144,24 @@ void qemu_clock_notify(QEMUClockType type)
  }
  }

 +/* Disabling the clock will wait for related timerlists to stop
 + * executing qemu_run_timers.  Thus, this functions should not
 + * be used from the callback of a timer that is based on @clock.
 + * Doing so would cause a deadlock.
 + */
  void qemu_clock_enable(QEMUClockType type, bool enabled)
  {
  QEMUClock *clock = qemu_clock_ptr(type);
 +QEMUTimerList *tl;
  bool old = clock-enabled;
  clock-enabled = enabled;
  if (enabled  !old) {
  qemu_clock_notify(type);
 +} else if (!enabled  old) {
 +/* We rely on BQL to protect the timerlists */

 So the caller of qemu_clock_enable has to hold the BQL? Then please add
 that to the function description above instead.

Ok, thx.

Regards,
Pingfan
 +QLIST_FOREACH(tl, clock-timerlists, list) {
 +qemu_event_wait(tl-ev);
 +}
  }
  }

 @@ -373,8 +388,10 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
  QEMUTimer *ts;
  int64_t current_time;
  bool progress = false;
 -
 +
 +qemu_event_reset(timer_list-ev);
  if (!timer_list-clock-enabled) {
 +qemu_event_set(timer_list-ev);
  return progress;
  }

 @@ -392,6 +409,7 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
  ts-cb(ts-opaque);
  progress = true;
  }
 +qemu_event_set(timer_list-ev);
  return progress;
  }




 --
 Siemens AG, Corporate Technology, CT RTC ITP SES-DE
 Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH v3 0/4] timers thread-safe stuff

2013-09-22 Thread liu ping fan
On Wed, Sep 18, 2013 at 9:54 PM, Stefan Hajnoczi stefa...@gmail.com wrote:
 On Tue, Aug 27, 2013 at 11:20:59AM +0800, Liu Ping Fan wrote:
 Saw the Alex's patches has been merged, rebase mine onto his.

 v3:
   1. rename seqlock_read_check as seqlock_read_retry
   2. Document timerlist were protected by BQL, and discard private lock 
 around qemu_event_wait(tl-ev).

 v2:
   1. fix comment in commit and code
   2. fix race issue for qemu_clock_enable(foo,disable)


 Liu Ping Fan (2):
   timer: protect timers_state's clock with seqlock
   timer: make qemu_clock_enable sync between disable and timer's cb

 Paolo Bonzini (2):
   seqlock: introduce read-write seqlock
   qemu-thread: add QemuEvent

  cpus.c  |  36 +++---
  include/qemu/seqlock.h  |  72 +++
  include/qemu/thread-posix.h |   8 +++
  include/qemu/thread-win32.h |   4 ++
  include/qemu/thread.h   |   7 +++
  include/qemu/timer.h|   4 ++
  qemu-timer.c|  20 +++-
  util/qemu-thread-posix.c| 116 
 
  util/qemu-thread-win32.c|  26 ++
  9 files changed, 286 insertions(+), 7 deletions(-)
  create mode 100644 include/qemu/seqlock.h

 Ping Fan: Can you send a final version that addresses Alex's request for
 documentation?

 Otherwise we're ready to go.

Sorry, miss this letter, will update it immediately.

Thx,
Pingfan



[Qemu-devel] [PATCH v4 0/4] timers thread-safe stuff

2013-09-22 Thread Liu Ping Fan
v4:
  fix commit log for protect timers_state's clock with seqlock  (Thanks for 
Alex)

v3:
  1. rename seqlock_read_check as seqlock_read_retry
  2. Document timerlist were protected by BQL, and discard private lock around 
qemu_event_wait(tl-ev).

v2:
  1. fix comment in commit and code
  2. fix race issue for qemu_clock_enable(foo,disable)



Liu Ping Fan (2):
  timer: protect timers_state's clock with seqlock
  timer: make qemu_clock_enable sync between disable and timer's cb

Paolo Bonzini (2):
  seqlock: introduce read-write seqlock
  qemu-thread: add QemuEvent

 cpus.c  |  36 +++---
 include/qemu/seqlock.h  |  72 +++
 include/qemu/thread-posix.h |   8 +++
 include/qemu/thread-win32.h |   4 ++
 include/qemu/thread.h   |   7 +++
 include/qemu/timer.h|   4 ++
 qemu-timer.c|  20 +++-
 util/qemu-thread-posix.c| 116 
 util/qemu-thread-win32.c|  26 ++
 9 files changed, 286 insertions(+), 7 deletions(-)
 create mode 100644 include/qemu/seqlock.h

-- 
1.8.1.4




[Qemu-devel] [PATCH v4 1/4] seqlock: introduce read-write seqlock

2013-09-22 Thread Liu Ping Fan
This lets the read-side access run outside the BQL.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/seqlock.h | 72 ++
 1 file changed, 72 insertions(+)
 create mode 100644 include/qemu/seqlock.h

diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
new file mode 100644
index 000..3ff118a
--- /dev/null
+++ b/include/qemu/seqlock.h
@@ -0,0 +1,72 @@
+/*
+ * Seqlock implementation for QEMU
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Author:
+ *  Paolo Bonzini pbonz...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_SEQLOCK_H
+#define QEMU_SEQLOCK_H 1
+
+#include qemu/atomic.h
+#include qemu/thread.h
+
+typedef struct QemuSeqLock QemuSeqLock;
+
+struct QemuSeqLock {
+QemuMutex *mutex;
+unsigned sequence;
+};
+
+static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
+{
+sl-mutex = mutex;
+sl-sequence = 0;
+}
+
+/* Lock out other writers and update the count.  */
+static inline void seqlock_write_lock(QemuSeqLock *sl)
+{
+if (sl-mutex) {
+qemu_mutex_lock(sl-mutex);
+}
+++sl-sequence;
+
+/* Write sequence before updating other fields.  */
+smp_wmb();
+}
+
+static inline void seqlock_write_unlock(QemuSeqLock *sl)
+{
+/* Write other fields before finalizing sequence.  */
+smp_wmb();
+
+++sl-sequence;
+if (sl-mutex) {
+qemu_mutex_unlock(sl-mutex);
+}
+}
+
+static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
+{
+/* Always fail if a write is in progress.  */
+unsigned ret = sl-sequence  ~1;
+
+/* Read sequence before reading other fields.  */
+smp_rmb();
+return ret;
+}
+
+static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
+{
+/* Read other fields before reading final sequence.  */
+smp_rmb();
+return unlikely(sl-sequence != start);
+}
+
+#endif
-- 
1.8.1.4




[Qemu-devel] [PATCH v4 2/4] timer: protect timers_state's clock with seqlock

2013-09-22 Thread Liu Ping Fan
QEMU_CLOCK_VIRTUAL may be read outside BQL. This will make its
foundation, i.e. timers_state exposed to race condition.
Using private lock to protect it.

After this patch, reading QEMU_CLOCK_VIRTUAL is thread safe
unless use_icount is true, in which case the existing callers
still rely on the BQL

Lock rule: private lock innermost, ie BQL-this lock

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 cpus.c | 36 ++--
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/cpus.c b/cpus.c
index e566297..870a832 100644
--- a/cpus.c
+++ b/cpus.c
@@ -37,6 +37,7 @@
 #include sysemu/qtest.h
 #include qemu/main-loop.h
 #include qemu/bitmap.h
+#include qemu/seqlock.h
 
 #ifndef _WIN32
 #include qemu/compatfd.h
@@ -112,6 +113,13 @@ static int64_t qemu_icount;
 typedef struct TimersState {
 int64_t cpu_ticks_prev;
 int64_t cpu_ticks_offset;
+/* cpu_clock_offset will be read out of BQL, so protect it with private
+ * lock. As for cpu_ticks_*, no requirement to read it outside BQL yet.
+ * Lock rule: innermost
+ */
+QemuSeqLock clock_seqlock;
+/* mutex for seqlock */
+QemuMutex mutex;
 int64_t cpu_clock_offset;
 int32_t cpu_ticks_enabled;
 int64_t dummy;
@@ -137,6 +145,7 @@ int64_t cpu_get_icount(void)
 }
 
 /* return the host CPU cycle counter and handle stop/restart */
+/* cpu_ticks is safely if holding BQL */
 int64_t cpu_get_ticks(void)
 {
 if (use_icount) {
@@ -161,33 +170,46 @@ int64_t cpu_get_ticks(void)
 int64_t cpu_get_clock(void)
 {
 int64_t ti;
-if (!timers_state.cpu_ticks_enabled) {
-return timers_state.cpu_clock_offset;
-} else {
-ti = get_clock();
-return ti + timers_state.cpu_clock_offset;
-}
+unsigned start;
+
+do {
+start = seqlock_read_begin(timers_state.clock_seqlock);
+if (!timers_state.cpu_ticks_enabled) {
+ti = timers_state.cpu_clock_offset;
+} else {
+ti = get_clock();
+ti += timers_state.cpu_clock_offset;
+}
+} while (seqlock_read_retry(timers_state.clock_seqlock, start));
+
+return ti;
 }
 
 /* enable cpu_get_ticks() */
 void cpu_enable_ticks(void)
 {
+/* Here, the really thing protected by seqlock is cpu_clock_offset. */
+seqlock_write_lock(timers_state.clock_seqlock);
 if (!timers_state.cpu_ticks_enabled) {
 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
 timers_state.cpu_clock_offset -= get_clock();
 timers_state.cpu_ticks_enabled = 1;
 }
+seqlock_write_unlock(timers_state.clock_seqlock);
 }
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
cpu_get_ticks() after that.  */
 void cpu_disable_ticks(void)
 {
+/* Here, the really thing protected by seqlock is cpu_clock_offset. */
+seqlock_write_lock(timers_state.clock_seqlock);
 if (timers_state.cpu_ticks_enabled) {
 timers_state.cpu_ticks_offset = cpu_get_ticks();
 timers_state.cpu_clock_offset = cpu_get_clock();
 timers_state.cpu_ticks_enabled = 0;
 }
+seqlock_write_unlock(timers_state.clock_seqlock);
 }
 
 /* Correlation between real and virtual time is always going to be
@@ -371,6 +393,8 @@ static const VMStateDescription vmstate_timers = {
 
 void configure_icount(const char *option)
 {
+qemu_mutex_init(timers_state.mutex);
+seqlock_init(timers_state.clock_seqlock, timers_state.mutex);
 vmstate_register(NULL, 0, vmstate_timers, timers_state);
 if (!option) {
 return;
-- 
1.8.1.4




[Qemu-devel] [PATCH v4 1/4] seqlock: introduce read-write seqlock

2013-09-22 Thread Liu Ping Fan
This lets the read-side access run outside the BQL.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/seqlock.h | 72 ++
 1 file changed, 72 insertions(+)
 create mode 100644 include/qemu/seqlock.h

diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
new file mode 100644
index 000..3ff118a
--- /dev/null
+++ b/include/qemu/seqlock.h
@@ -0,0 +1,72 @@
+/*
+ * Seqlock implementation for QEMU
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Author:
+ *  Paolo Bonzini pbonz...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_SEQLOCK_H
+#define QEMU_SEQLOCK_H 1
+
+#include qemu/atomic.h
+#include qemu/thread.h
+
+typedef struct QemuSeqLock QemuSeqLock;
+
+struct QemuSeqLock {
+QemuMutex *mutex;
+unsigned sequence;
+};
+
+static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
+{
+sl-mutex = mutex;
+sl-sequence = 0;
+}
+
+/* Lock out other writers and update the count.  */
+static inline void seqlock_write_lock(QemuSeqLock *sl)
+{
+if (sl-mutex) {
+qemu_mutex_lock(sl-mutex);
+}
+++sl-sequence;
+
+/* Write sequence before updating other fields.  */
+smp_wmb();
+}
+
+static inline void seqlock_write_unlock(QemuSeqLock *sl)
+{
+/* Write other fields before finalizing sequence.  */
+smp_wmb();
+
+++sl-sequence;
+if (sl-mutex) {
+qemu_mutex_unlock(sl-mutex);
+}
+}
+
+static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
+{
+/* Always fail if a write is in progress.  */
+unsigned ret = sl-sequence  ~1;
+
+/* Read sequence before reading other fields.  */
+smp_rmb();
+return ret;
+}
+
+static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
+{
+/* Read other fields before reading final sequence.  */
+smp_rmb();
+return unlikely(sl-sequence != start);
+}
+
+#endif
-- 
1.8.1.4




[Qemu-devel] [PATCH v4 4/4] timer: make qemu_clock_enable sync between disable and timer's cb

2013-09-22 Thread Liu Ping Fan
After disabling the QemuClock, we should make sure that no QemuTimers
are still in flight. To implement that with light overhead, we resort
to QemuEvent. The caller of disabling will wait on QemuEvent of each
timerlist.

Note, qemu_clock_enable(foo,false) can _not_ be called from timer's cb.
And the callers of qemu_clock_enable() should be sync by themselves,
not protected by this patch.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 include/qemu/timer.h |  4 
 qemu-timer.c | 20 +++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index e4934dd..b26909a 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -185,6 +185,10 @@ void qemu_clock_notify(QEMUClockType type);
  * @enabled: true to enable, false to disable
  *
  * Enable or disable a clock
+ * Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers.  Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
  */
 void qemu_clock_enable(QEMUClockType type, bool enabled);
 
diff --git a/qemu-timer.c b/qemu-timer.c
index 95ff47f..c500a76 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -45,6 +45,7 @@
 /* timers */
 
 typedef struct QEMUClock {
+ /* We rely on BQL to protect the timerlists */
 QLIST_HEAD(, QEMUTimerList) timerlists;
 
 NotifierList reset_notifiers;
@@ -70,6 +71,8 @@ struct QEMUTimerList {
 QLIST_ENTRY(QEMUTimerList) list;
 QEMUTimerListNotifyCB *notify_cb;
 void *notify_opaque;
+/* light weight method to mark the end of timerlist's running */
+QemuEvent ev;
 };
 
 /**
@@ -98,6 +101,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
 QEMUClock *clock = qemu_clock_ptr(type);
 
 timer_list = g_malloc0(sizeof(QEMUTimerList));
+qemu_event_init(timer_list-ev, false);
 timer_list-clock = clock;
 timer_list-notify_cb = cb;
 timer_list-notify_opaque = opaque;
@@ -140,13 +144,24 @@ void qemu_clock_notify(QEMUClockType type)
 }
 }
 
+/* Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers.  Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
+ */
 void qemu_clock_enable(QEMUClockType type, bool enabled)
 {
 QEMUClock *clock = qemu_clock_ptr(type);
+QEMUTimerList *tl;
 bool old = clock-enabled;
 clock-enabled = enabled;
 if (enabled  !old) {
 qemu_clock_notify(type);
+} else if (!enabled  old) {
+/* We rely on BQL to protect the timerlists */
+QLIST_FOREACH(tl, clock-timerlists, list) {
+qemu_event_wait(tl-ev);
+}
 }
 }
 
@@ -373,8 +388,10 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 QEMUTimer *ts;
 int64_t current_time;
 bool progress = false;
-   
+
+qemu_event_reset(timer_list-ev);
 if (!timer_list-clock-enabled) {
+qemu_event_set(timer_list-ev);
 return progress;
 }
 
@@ -392,6 +409,7 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 ts-cb(ts-opaque);
 progress = true;
 }
+qemu_event_set(timer_list-ev);
 return progress;
 }
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v4 3/4] qemu-thread: add QemuEvent

2013-09-22 Thread Liu Ping Fan
This emulates Win32 manual-reset events using futexes or conditional
variables.  Typical ways to use them are with multi-producer,
single-consumer data structures, to test for a complex condition whose
elements come from different threads:

for (;;) {
qemu_event_reset(ev);
... test complex condition ...
if (condition is true) {
break;
}
qemu_event_wait(ev);
}

Or more efficiently (but with some duplication):

... evaluate condition ...
while (!condition) {
qemu_event_reset(ev);
... evaluate condition ...
if (!condition) {
qemu_event_wait(ev);
... evaluate condition ...
}
}

QemuEvent provides a very fast userspace path in the common case when
no other thread is waiting, or the event is not changing state.  It
is used to report RCU quiescent states to the thread calling
synchronize_rcu (the latter being the single consumer), and to report
call_rcu invocations to the thread that receives them.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/thread-posix.h |   8 +++
 include/qemu/thread-win32.h |   4 ++
 include/qemu/thread.h   |   7 +++
 util/qemu-thread-posix.c| 116 
 util/qemu-thread-win32.c|  26 ++
 5 files changed, 161 insertions(+)

diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index 361566a..eb5c7a1 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -21,6 +21,14 @@ struct QemuSemaphore {
 #endif
 };
 
+struct QemuEvent {
+#ifndef __linux__
+pthread_mutex_t lock;
+pthread_cond_t cond;
+#endif
+unsigned value;
+};
+
 struct QemuThread {
 pthread_t thread;
 };
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index 13adb95..3d58081 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -17,6 +17,10 @@ struct QemuSemaphore {
 HANDLE sema;
 };
 
+struct QemuEvent {
+HANDLE event;
+};
+
 typedef struct QemuThreadData QemuThreadData;
 struct QemuThread {
 QemuThreadData *data;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index c02404b..3e32c65 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -7,6 +7,7 @@
 typedef struct QemuMutex QemuMutex;
 typedef struct QemuCond QemuCond;
 typedef struct QemuSemaphore QemuSemaphore;
+typedef struct QemuEvent QemuEvent;
 typedef struct QemuThread QemuThread;
 
 #ifdef _WIN32
@@ -45,6 +46,12 @@ void qemu_sem_wait(QemuSemaphore *sem);
 int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
 void qemu_sem_destroy(QemuSemaphore *sem);
 
+void qemu_event_init(QemuEvent *ev, bool init);
+void qemu_event_set(QemuEvent *ev);
+void qemu_event_reset(QemuEvent *ev);
+void qemu_event_wait(QemuEvent *ev);
+void qemu_event_destroy(QemuEvent *ev);
+
 void qemu_thread_create(QemuThread *thread,
 void *(*start_routine)(void *),
 void *arg, int mode);
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 4de133e..37dd298 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -20,7 +20,12 @@
 #include limits.h
 #include unistd.h
 #include sys/time.h
+#ifdef __linux__
+#include sys/syscall.h
+#include linux/futex.h
+#endif
 #include qemu/thread.h
+#include qemu/atomic.h
 
 static void error_exit(int err, const char *msg)
 {
@@ -272,6 +277,117 @@ void qemu_sem_wait(QemuSemaphore *sem)
 #endif
 }
 
+#ifdef __linux__
+#define futex(...)  syscall(__NR_futex, __VA_ARGS__)
+
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
+}
+#else
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+if (n == 1) {
+pthread_cond_signal(ev-cond);
+} else {
+pthread_cond_broadcast(ev-cond);
+}
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+pthread_mutex_lock(ev-lock);
+if (ev-value == val) {
+pthread_cond_wait(ev-cond, ev-lock);
+}
+pthread_mutex_unlock(ev-lock);
+}
+#endif
+
+/* Valid transitions:
+ * - free-set, when setting the event
+ * - busy-set, when setting the event, followed by futex_wake
+ * - set-free, when resetting the event
+ * - free-busy, when waiting
+ *
+ * set-busy does not happen (it can be observed from the outside but
+ * it really is set-free-busy).
+ *
+ * busy-free provably cannot happen; to enforce it, the set-free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET 0
+#define EV_FREE1
+#define EV_BUSY   -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef __linux__
+pthread_mutex_init(ev-lock, NULL);
+pthread_cond_init(ev-cond, NULL);
+#endif
+
+ev-value = 

[Qemu-devel] [PATCH] sPAPR: implement route_intx_to_irq to get gsi of pci device.

2013-09-22 Thread Liu Ping Fan
This is useful when pci assignment happens on sPAPR.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
This patch will apply on patches which enable xics in kernel.
---
 hw/intc/xics.c|  5 +
 hw/ppc/spapr_pci.c| 14 ++
 include/hw/ppc/xics.h |  1 +
 3 files changed, 20 insertions(+)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index bb018d1..02cdab8 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -442,6 +442,11 @@ void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
 icp-ics-islsi[irq - icp-ics-offset] = lsi;
 }
 
+int xics_get_irq_offset(XICSState *icp)
+{
+return icp-ics-offset;
+}
+
 /*
  * Guest interfaces
  */
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 9b6ee32..6d3657a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -432,6 +432,19 @@ static void pci_spapr_set_irq(void *opaque, int irq_num, 
int level)
 qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
 }
 
+static PCIINTxRoute spapr_phb_route_intx_to_irq(void *opaque, int pirq_pin)
+{
+int gsi;
+PCIINTxRoute route;
+sPAPRPHBState *phb = opaque;
+
+gsi = phb-lsi_table[pirq_pin].irq;
+gsi += xics_get_irq_offset(spapr-icp);
+route.mode = PCI_INTX_ENABLED;
+route.irq = gsi;
+return route;
+}
+
 /*
  * MSI/MSIX memory region implementation.
  * The handler handles both MSI and MSIX.
@@ -595,6 +608,7 @@ static int spapr_phb_init(SysBusDevice *s)
pci_spapr_set_irq, pci_spapr_map_irq, sphb,
sphb-memspace, sphb-iospace,
PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
+pci_bus_set_route_irq_fn(bus, spapr_phb_route_intx_to_irq);
 phb-bus = bus;
 
 sphb-dma_window_start = 0;
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 66364c5..6ed1f4d 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -97,6 +97,7 @@ struct ICSIRQState {
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
 void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
+int xics_get_irq_offset(XICSState *icp);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
 
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH] sPAPR: implement route_intx_to_irq to get gsi of pci device.

2013-09-22 Thread liu ping fan
On Mon, Sep 23, 2013 at 4:02 AM, Alexander Graf ag...@suse.de wrote:

 Am 22.09.2013 um 13:47 schrieb Liu Ping Fan qemul...@gmail.com:

 This is useful when pci assignment happens on sPAPR.

 This patch doesn't sound useful on its own to me, thus probably belongs in a 
 greater patch set.

Yes, I think, it will be applied after Alexey Kardashevskiy's
patch-set, [PATCH v4 00/12] xics: reworks and in-kernel support
CCing Alexey Kardashevskiy

 And without even a clear commit message that explains why exactly this is 
 going to be useful eventually I don't see any reason to apply this patch.

Pci assignment adopts irqfd in kernel, which need gsi as input param, right?

Thanks,
Pingfan



Re: [Qemu-devel] [PATCH] sPAPR: implement route_intx_to_irq to get gsi of pci device.

2013-09-22 Thread liu ping fan
On Mon, Sep 23, 2013 at 9:59 AM, Alexey Kardashevskiy a...@ozlabs.ru wrote:
 On 09/22/2013 09:47 PM, Liu Ping Fan wrote:
 This is useful when pci assignment happens on sPAPR.


 I have almost the same patch in my queue already, it will enable irqfd for
 both INTX and MSI, I am just waiting till in-kernel XICS patchset gets in
 upstream and then I'll post it.

Ok,

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
 This patch will apply on patches which enable xics in kernel.
 ---
  hw/intc/xics.c|  5 +
  hw/ppc/spapr_pci.c| 14 ++
  include/hw/ppc/xics.h |  1 +
  3 files changed, 20 insertions(+)

 diff --git a/hw/intc/xics.c b/hw/intc/xics.c
 index bb018d1..02cdab8 100644
 --- a/hw/intc/xics.c
 +++ b/hw/intc/xics.c
 @@ -442,6 +442,11 @@ void xics_set_irq_type(XICSState *icp, int irq, bool 
 lsi)
  icp-ics-islsi[irq - icp-ics-offset] = lsi;
  }

 +int xics_get_irq_offset(XICSState *icp)
 +{
 +return icp-ics-offset;
 +}
 +
  /*
   * Guest interfaces
   */
 diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
 index 9b6ee32..6d3657a 100644
 --- a/hw/ppc/spapr_pci.c
 +++ b/hw/ppc/spapr_pci.c
 @@ -432,6 +432,19 @@ static void pci_spapr_set_irq(void *opaque, int 
 irq_num, int level)
  qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
  }

 +static PCIINTxRoute spapr_phb_route_intx_to_irq(void *opaque, int pirq_pin)
 +{
 +int gsi;
 +PCIINTxRoute route;
 +sPAPRPHBState *phb = opaque;
 +
 +gsi = phb-lsi_table[pirq_pin].irq;
 +gsi += xics_get_irq_offset(spapr-icp);


 Why do you need this? lsi_table[].irq is received from spapr_allocate_lsi()
 which already adds this offset.

Oh, you are right, next_irq begin at XICS_IRQ_BASE

Thx,
Pingfan


 +route.mode = PCI_INTX_ENABLED;
 +route.irq = gsi;
 +return route;
 +}
 +
  /*
   * MSI/MSIX memory region implementation.
   * The handler handles both MSI and MSIX.
 @@ -595,6 +608,7 @@ static int spapr_phb_init(SysBusDevice *s)
 pci_spapr_set_irq, pci_spapr_map_irq, sphb,
 sphb-memspace, sphb-iospace,
 PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
 +pci_bus_set_route_irq_fn(bus, spapr_phb_route_intx_to_irq);
  phb-bus = bus;

  sphb-dma_window_start = 0;
 diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
 index 66364c5..6ed1f4d 100644
 --- a/include/hw/ppc/xics.h
 +++ b/include/hw/ppc/xics.h
 @@ -97,6 +97,7 @@ struct ICSIRQState {

  qemu_irq xics_get_qirq(XICSState *icp, int irq);
  void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
 +int xics_get_irq_offset(XICSState *icp);

  void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);




 --
 Alexey



Re: [Qemu-devel] [PATCH v5 0/5] bugs fix for hpet

2013-09-12 Thread liu ping fan
On Thu, Sep 12, 2013 at 2:29 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 12/09/2013 05:25, Liu Ping Fan ha scritto:
 v5:
   use stand compat property to fix hpet intcap on pc-q35, while on pc-piix, 
 hard code intcap as IRQ2

 v4:
   use stand compat property to fix hpet intcap

 v3:
   change hpet interrupt capablity on board's demand

 Liu Ping Fan (5):
   hpet: inverse polarity when pin above ISA_NUM_IRQS
   hpet: entitle more irq pins for hpet
   PC: use qdev_xx to create hpet instead of sysbus_create_xx
   PC: differentiate hpet's interrupt capability on piix and q35
   PC-1.6: add compatibility for hpet intcap on pc-q35-1.6

  hw/i386/pc.c | 17 ++---
  hw/i386/pc_piix.c|  3 ++-
  hw/i386/pc_q35.c |  2 +-
  hw/timer/hpet.c  | 24 
  include/hw/i386/pc.h |  7 ++-
  5 files changed, 43 insertions(+), 10 deletions(-)


 Reviewed-by: Paolo Bonzini pbonz...@redhat.com

 Nice series, patches are split well.  Thank you very much!

Thanks for your guide :)  and learn much through it.

Regards,
Pingfan
 Paolo



Re: [Qemu-devel] [PATCH v3 0/3] qemu-timer: make qemu_timer_mod_ns() and qemu_timer_del() thread-safe

2013-09-12 Thread liu ping fan
On Thu, Sep 12, 2013 at 4:17 PM, Stefan Hajnoczi stefa...@gmail.com wrote:
 On Thu, Sep 12, 2013 at 11:10:01AM +0800, liu ping fan wrote:
 Do you think this series is ready to be merged?
 I have some code to run hpet on a dedicated thread, and in theory it
 will rely on this.

 Yes, it is ready.  To ease the merge I have rebased it and sent a new
 revision.

Great, thanks!

Regards,
Pingfan



Re: [Qemu-devel] [PATCH v3 0/3] qemu-timer: make qemu_timer_mod_ns() and qemu_timer_del() thread-safe

2013-09-11 Thread liu ping fan
Hi Stefan,

Do you think this series is ready to be merged?
I have some code to run hpet on a dedicated thread, and in theory it
will rely on this.

Thanks and regards,
Pingfan

On Thu, Aug 29, 2013 at 10:42 PM, Stefan Hajnoczi stefa...@redhat.com wrote:
 v3:
  * Squashed Paolo's fixes and added his patch to avoid locking in 
 timer_pending()

 v2:
  * Rebased onto qemu.git/master following the merge of Alex's AioContext 
 timers

 The purpose of these patches is to eventually allow device models to set and
 cancel timers without holding the global mutex.  When the device model runs in
 a vcpu thread and the iothread processes timers, the
 QEMUTimerList-active_timers must be protected from concurrent access.

 Patch 1 is a clean-up.

 Patch 2 is the entire change needed to protect -active_timers.

 Patch 3 makes timer_pending() run without a lock.

 Paolo Bonzini (1):
   qemu-timer: do not take the lock in timer_pending

 Stefan Hajnoczi (2):
   qemu-timer: drop outdated signal safety comments
   qemu-timer: make qemu_timer_mod_ns() and qemu_timer_del() thread-safe

  include/qemu/timer.h | 17 ++
  qemu-timer.c | 92 
 
  2 files changed, 81 insertions(+), 28 deletions(-)

 --
 1.8.3.1





[Qemu-devel] [PATCH v5 0/5] bugs fix for hpet

2013-09-11 Thread Liu Ping Fan
v5:
  use stand compat property to fix hpet intcap on pc-q35, while on pc-piix, 
hard code intcap as IRQ2

v4:
  use stand compat property to fix hpet intcap

v3:
  change hpet interrupt capablity on board's demand

Liu Ping Fan (5):
  hpet: inverse polarity when pin above ISA_NUM_IRQS
  hpet: entitle more irq pins for hpet
  PC: use qdev_xx to create hpet instead of sysbus_create_xx
  PC: differentiate hpet's interrupt capability on piix and q35
  PC-1.6: add compatibility for hpet intcap on pc-q35-1.6

 hw/i386/pc.c | 17 ++---
 hw/i386/pc_piix.c|  3 ++-
 hw/i386/pc_q35.c |  2 +-
 hw/timer/hpet.c  | 24 
 include/hw/i386/pc.h |  7 ++-
 5 files changed, 43 insertions(+), 10 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH v5 2/5] hpet: entitle more irq pins for hpet

2013-09-11 Thread Liu Ping Fan
On PC, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
of ioapic can be dynamically assigned to hpet as guest chooses.
(Will enable them after introducing pc 1.6 compat)

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 8429eb3..46903b9 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -25,6 +25,7 @@
  */
 
 #include hw/hw.h
+#include hw/boards.h
 #include hw/i386/pc.h
 #include ui/console.h
 #include qemu/timer.h
@@ -42,6 +43,12 @@
 
 #define HPET_MSI_SUPPORT0
 
+/* For bug compat, using only IRQ2. Soon it will be fixed as
+ * 0xff0104ULL, i.e using IRQ16~23, IRQ8 and IRQ2 after
+ * introducing pc-1.6 compat.
+ */
+#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
+
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 
@@ -73,6 +80,7 @@ typedef struct HPETState {
 uint8_t rtc_irq_level;
 qemu_irq pit_enabled;
 uint8_t num_timers;
+uint32_t intcap;
 HPETTimer timer[HPET_MAX_TIMERS];
 
 /* Memory-mapped, software visible registers */
@@ -663,8 +671,8 @@ static void hpet_reset(DeviceState *d)
 if (s-flags  (1  HPET_MSI_SUPPORT)) {
 timer-config |= HPET_TN_FSB_CAP;
 }
-/* advertise availability of ioapic inti2 */
-timer-config |=  0x0004ULL  32;
+/* advertise availability of ioapic int */
+timer-config |=  (uint64_t)s-intcap  32;
 timer-period = 0ULL;
 timer-wrap_flag = 0;
 }
@@ -753,6 +761,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
+DEFINE_PROP_UINT32(intcap, HPETState, intcap, HPET_TN_INT_CAP_DEFAULT),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v5 1/5] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-09-11 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line.(And this is observed on
bare metal).

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index fcd22ae..8429eb3 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH v5 3/5] PC: use qdev_xx to create hpet instead of sysbus_create_xx

2013-09-11 Thread Liu Ping Fan
sysbus_create_xx func does not allow us to set a device's extra
properties.  While hpet need to set its compat property before
initialization, so we abandon the wrapper function, and spread
its logic inline

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0c313fe..f2b7b6c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1246,9 +1246,16 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
  * when the HPET wants to take over. Thus we have to disable the latter.
  */
 if (!no_hpet  (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
-hpet = sysbus_try_create_simple(hpet, HPET_BASE, NULL);
-
+/* In order to set property, here not using sysbus_try_create_simple */
+hpet = qdev_try_create(NULL, hpet);
 if (hpet) {
+/* tmp fix. For compat, hard code to IRQ2 until we have correct
+ * compat property and differentiate pc-piix with pc-q35
+ */
+qdev_prop_set_uint32(hpet, intcap, 0x4);
+qdev_init_nofail(hpet);
+sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
+
 for (i = 0; i  GSI_NUM_PINS; i++) {
 sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
 }
-- 
1.8.1.4




[Qemu-devel] [PATCH v5 4/5] PC: differentiate hpet's interrupt capability on piix and q35

2013-09-11 Thread Liu Ping Fan
For pc-piix-*, hpet's intcap is always hard coded as IRQ2. While
for pc-q35-*, we resort to compat property to fix it (a later patch).

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c | 12 
 hw/i386/pc_piix.c|  3 ++-
 hw/i386/pc_q35.c |  2 +-
 include/hw/i386/pc.h |  3 ++-
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f2b7b6c..b8e7cee 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1219,7 +1219,8 @@ static const MemoryRegionOps ioportF0_io_ops = {
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-  bool no_vmport)
+  bool no_vmport,
+  bool hpet_irqs)
 {
 int i;
 DriveInfo *fd[MAX_FD];
@@ -1249,10 +1250,13 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 /* In order to set property, here not using sysbus_try_create_simple */
 hpet = qdev_try_create(NULL, hpet);
 if (hpet) {
-/* tmp fix. For compat, hard code to IRQ2 until we have correct
- * compat property and differentiate pc-iix with pc-q35
- */
 qdev_prop_set_uint32(hpet, intcap, 0x4);
+/* For pc-piix-*, hpet's intcap is always IRQ2. While for pc-q35-*,
+ * we resort to compat property to fix it.
+ */
+if (!hpet_irqs) {
+qdev_prop_set_uint32(hpet, intcap, 0x4);
+}
 qdev_init_nofail(hpet);
 sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 907792b..8dcd0d6 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -180,7 +180,8 @@ static void pc_init1(QEMUMachineInitArgs *args,
 pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled());
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, xen_enabled(),
+false);
 
 pc_nic_init(isa_bus, pci_bus);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ca84e1c..9e41f4a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -181,7 +181,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 pc_register_ferr_irq(gsi[13]);
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false);
+pc_basic_device_init(isa_bus, gsi, rtc_state, floppy, false, true);
 
 /* connect pm stuff to lpc */
 ich9_lpc_pm_init(lpc);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9b2ddc4..9dd077f 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -134,7 +134,8 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-  bool no_vmport);
+  bool no_vmport,
+  bool hpet_irqs);
 void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
 void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
   const char *boot_device,
-- 
1.8.1.4




[Qemu-devel] [PATCH v5 5/5] PC-1.6: add compatibility for hpet intcap on pc-q35-1.6

2013-09-11 Thread Liu Ping Fan
For guest bug compat, we limit hpet's interrupt compatibility on
ioapic's IRQ2 for pc-q35-1.6. As to pc-35-1.7 and newer, IRQ2, IRQ8,
and IRQ16~23 are allowed.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c  | 7 ++-
 include/hw/i386/pc.h | 4 
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 46903b9..7af495a 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -43,11 +43,8 @@
 
 #define HPET_MSI_SUPPORT0
 
-/* For bug compat, using only IRQ2. Soon it will be fixed as
- * 0xff0104ULL, i.e using IRQ16~23, IRQ8 and IRQ2 after
- * introducing pc-1.6 compat.
- */
-#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
+/* using IRQ16~23, IRQ8 and IRQ2 */
+#define HPET_TN_INT_CAP_DEFAULT 0xff0104ULL
 
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9dd077f..79c63b3 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -231,6 +231,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
 .driver   = e1000,\
 .property = mitigation,\
 .value= off,\
+},{\
+.driver   = hpet,\
+.property = intcap,\
+.value= stringify(4),\
 }
 
 #define PC_COMPAT_1_5 \
-- 
1.8.1.4




[Qemu-devel] [PATCH 2/5] irq: implement route method of ioapic

2013-09-11 Thread Liu Ping Fan
Implement the routing of PC's interrupt gpio to intc, and
retrieve the gsi.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/core/qdev.c |  8 
 hw/i386/kvm/i8259.c|  8 +++-
 hw/i386/kvm/ioapic.c   | 21 -
 hw/i386/pc_q35.c   |  4 ++--
 include/hw/qdev-core.h |  2 ++
 include/sysemu/kvm.h   |  1 +
 6 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 758de9f..63605d1 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -312,6 +312,14 @@ void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler 
handler, int n)
 dev-num_gpio_in += n;
 }
 
+void qdev_init_gpio_in_2(DeviceState *dev, qemu_irq_handler handler,
+qemu_irq_route route, int n)
+{
+dev-gpio_in = qemu_extend_irqs_2(dev-gpio_in, dev-num_gpio_in, handler,
+route, dev, n);
+dev-num_gpio_in += n;
+}
+
 void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n)
 {
 assert(dev-num_gpio_out == 0);
diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c
index 53e3ca8..1193b54 100644
--- a/hw/i386/kvm/i8259.c
+++ b/hw/i386/kvm/i8259.c
@@ -106,6 +106,11 @@ static void kvm_pic_reset(DeviceState *dev)
 kvm_pic_put(s);
 }
 
+static int kvm_pic_route_gsi(void *opaque, int irq)
+{
+return irq;
+}
+
 static void kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 int delivered;
@@ -130,7 +135,8 @@ qemu_irq *kvm_i8259_init(ISABus *bus)
 i8259_init_chip(TYPE_KVM_I8259, bus, true);
 i8259_init_chip(TYPE_KVM_I8259, bus, false);
 
-return qemu_allocate_irqs(kvm_pic_set_irq, NULL, ISA_NUM_IRQS);
+return qemu_extend_irqs_2(NULL, 0, kvm_pic_set_irq, kvm_pic_route_gsi,
+NULL, ISA_NUM_IRQS);
 }
 
 static void kvm_i8259_class_init(ObjectClass *klass, void *data)
diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index f11a540..1e6ff0b 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -44,6 +44,19 @@ void kvm_pc_setup_irq_routing(bool pci_enabled)
 }
 }
 
+int kvm_pc_route_gsi(void *opaque, int n)
+{
+GSIState *s = opaque;
+int gsi;
+
+if (n  ISA_NUM_IRQS) {
+gsi = qemu_irq_route_gsi(s-i8259_irq[n]);
+} else {
+gsi = qemu_irq_route_gsi(s-ioapic_irq[n]);
+}
+return gsi;
+}
+
 void kvm_pc_gsi_handler(void *opaque, int n, int level)
 {
 GSIState *s = opaque;
@@ -127,11 +140,17 @@ static void kvm_ioapic_set_irq(void *opaque, int irq, int 
level)
 apic_report_irq_delivered(delivered);
 }
 
+static int kvm_ioapic_route_irq(void *opaque, int irq)
+{
+return irq;
+}
+
 static void kvm_ioapic_init(IOAPICCommonState *s, int instance_no)
 {
 memory_region_init_reservation(s-io_memory, NULL, kvm-ioapic, 0x1000);
 
-qdev_init_gpio_in(DEVICE(s), kvm_ioapic_set_irq, IOAPIC_NUM_PINS);
+qdev_init_gpio_in_2(DEVICE(s), kvm_ioapic_set_irq, kvm_ioapic_route_irq,
+IOAPIC_NUM_PINS);
 }
 
 static Property kvm_ioapic_properties[] = {
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 198c785..df1deb3 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -120,8 +120,8 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 gsi_state = g_malloc0(sizeof(*gsi_state));
 if (kvm_irqchip_in_kernel()) {
 kvm_pc_setup_irq_routing(pci_enabled);
-gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
- GSI_NUM_PINS);
+gsi = qemu_extend_irqs_2(NULL, 0, kvm_pc_gsi_handler,
+kvm_pc_route_gsi, gsi_state, GSI_NUM_PINS);
 } else {
 gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
 }
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 46972f4..4b8eb35 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -252,6 +252,8 @@ BusState *qdev_get_child_bus(DeviceState *dev, const char 
*name);
 /* Register device properties.  */
 /* GPIO inputs also double as IRQ sinks.  */
 void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n);
+void qdev_init_gpio_in_2(DeviceState *dev, qemu_irq_handler handler,
+qemu_irq_route route, int n);
 void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n);
 
 BusState *qdev_get_parent_bus(DeviceState *dev);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 8e76685..47cc012 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -312,6 +312,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq);
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
EventNotifier *rn, int virq);
 int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
+int kvm_pc_route_gsi(void *opaque, int n);
 void kvm_pc_gsi_handler(void *opaque, int n, int level);
 void kvm_pc_setup_irq_routing(bool pci_enabled);
 void kvm_init_irq_routing(KVMState *s);
-- 
1.8.1.4




[Qemu-devel] [RFC 0/5] run hpet on a dedicated thread

2013-09-11 Thread Liu Ping Fan
Nowadays, we run hpet on iothread. But there are so many undetermined jobs
on iothread. It will heavily affect the accuracy of hpet's timing.
When running on a dedicated thread, the accuracy of timing is only determined
by scheduler. which is more fixed.

This series depend on the thread-safe patches for timers. 
And it is just for discussion. And code is premature.


Liu Ping Fan (5):
  irq: introduce route method in IRQState to get gsi
  irq: implement route method of ioapic
  irqfd: equip irqfd with polarity
  hpet: deliver irq by irqfd when in dedicated thread mode
  hpet: run on dedicate thread

 hw/core/irq.c | 39 ++
 hw/core/qdev.c|  8 +
 hw/i386/kvm/i8259.c   |  8 -
 hw/i386/kvm/ioapic.c  | 21 +++-
 hw/i386/pc_q35.c  |  4 +--
 hw/misc/vfio.c|  4 +--
 hw/timer/hpet.c   | 85 ---
 hw/virtio/virtio-pci.c|  2 +-
 include/hw/irq.h  |  5 +++
 include/hw/qdev-core.h|  2 ++
 include/sysemu/kvm.h  |  4 ++-
 kvm-all.c | 14 
 linux-headers/linux/kvm.h |  3 ++
 13 files changed, 180 insertions(+), 19 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH 1/5] irq: introduce route method in IRQState to get gsi

2013-09-11 Thread Liu Ping Fan
Since the interrupt pin's connection does not change, so we can decide
the gsi for the pin.  Introducing route method in IRQState to do that,
and the implement of route requires the guarantee that it can survive
without BQL.

When delivering irq, this will help us to step around the irq system in
qemu, and forward irq directly to kernel where re-entrance is already
allowed.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/core/irq.c| 39 +++
 include/hw/irq.h |  5 +
 2 files changed, 44 insertions(+)

diff --git a/hw/core/irq.c b/hw/core/irq.c
index 2078542..3f88f57 100644
--- a/hw/core/irq.c
+++ b/hw/core/irq.c
@@ -26,6 +26,10 @@
 
 struct IRQState {
 qemu_irq_handler handler;
+/* the implement requires to guarantee the re-entrance without the
+* protection of BQL
+*/
+qemu_irq_route route;
 void *opaque;
 int n;
 };
@@ -38,6 +42,15 @@ void qemu_set_irq(qemu_irq irq, int level)
 irq-handler(irq-opaque, irq-n, level);
 }
 
+/* This func is designed to survive without BQL. */
+int qemu_irq_route_gsi(qemu_irq irq)
+{
+if (irq-route) {
+return irq-route(irq-opaque, irq-n);
+}
+return -1;
+}
+
 qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, qemu_irq_handler handler,
void *opaque, int n)
 {
@@ -63,6 +76,32 @@ qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, 
qemu_irq_handler handler,
 return s;
 }
 
+qemu_irq *qemu_extend_irqs_2(qemu_irq *old, int n_old, qemu_irq_handler 
handler,
+   qemu_irq_route route, void *opaque, int n)
+{
+qemu_irq *s;
+struct IRQState *p;
+int i;
+
+if (!old) {
+n_old = 0;
+}
+s = old ? g_renew(qemu_irq, old, n + n_old) : g_new(qemu_irq, n);
+p = old ? g_renew(struct IRQState, s[0], n + n_old) :
+g_new(struct IRQState, n);
+for (i = 0; i  n + n_old; i++) {
+if (i = n_old) {
+p-handler = handler;
+p-route = route;
+p-opaque = opaque;
+p-n = i;
+}
+s[i] = p;
+p++;
+}
+return s;
+}
+
 qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n)
 {
 return qemu_extend_irqs(NULL, 0, handler, opaque, n);
diff --git a/include/hw/irq.h b/include/hw/irq.h
index 610e6b7..c10ef25 100644
--- a/include/hw/irq.h
+++ b/include/hw/irq.h
@@ -6,6 +6,7 @@
 typedef struct IRQState *qemu_irq;
 
 typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
+typedef int (*qemu_irq_route)(void *opaque, int n);
 
 void qemu_set_irq(qemu_irq irq, int level);
 
@@ -30,11 +31,15 @@ static inline void qemu_irq_pulse(qemu_irq irq)
  */
 qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n);
 
+int qemu_irq_route_gsi(qemu_irq irq);
+
 /* Extends an Array of IRQs. Old IRQs have their handlers and opaque data
  * preserved. New IRQs are assigned the argument handler and opaque data.
  */
 qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, qemu_irq_handler handler,
 void *opaque, int n);
+qemu_irq *qemu_extend_irqs_2(qemu_irq *old, int n_old, qemu_irq_handler 
handler,
+   qemu_irq_route route, void *opaque, int n);
 
 void qemu_free_irqs(qemu_irq *s);
 
-- 
1.8.1.4




[Qemu-devel] [PATCH 3/5] irqfd: equip irqfd with polarity

2013-09-11 Thread Liu Ping Fan
Equip irqfd with polarity, so it can emulate the low-active interrupt.
We take one extra bit in flags to pass this info to kernel and keep
the default value zero as high-active.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
The kernel will extend this interface correspondingly

---
 hw/misc/vfio.c|  4 ++--
 hw/virtio/virtio-pci.c|  2 +-
 include/sysemu/kvm.h  |  3 ++-
 kvm-all.c | 14 --
 linux-headers/linux/kvm.h |  3 +++
 5 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index a1c08fb..f1fb761 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -646,7 +646,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, 
unsigned int nr,
 vector-virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
 if (vector-virq  0 ||
 kvm_irqchip_add_irqfd_notifier(kvm_state, vector-interrupt,
-   NULL, vector-virq)  0) {
+   NULL, vector-virq, 0)  0) {
 if (vector-virq = 0) {
 kvm_irqchip_release_virq(kvm_state, vector-virq);
 vector-virq = -1;
@@ -814,7 +814,7 @@ retry:
 vector-virq = kvm_irqchip_add_msi_route(kvm_state, msg);
 if (vector-virq  0 ||
 kvm_irqchip_add_irqfd_notifier(kvm_state, vector-interrupt,
-   NULL, vector-virq)  0) {
+   NULL, vector-virq, 0)  0) {
 qemu_set_fd_handler(event_notifier_get_fd(vector-interrupt),
 vfio_msi_interrupt, NULL, vector);
 }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index f2c489b..a8dbb4f 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -508,7 +508,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
 VirtQueue *vq = virtio_get_queue(proxy-vdev, queue_no);
 EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 int ret;
-ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, NULL, irqfd-virq);
+ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, NULL, irqfd-virq, 0);
 return ret;
 }
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 47cc012..a990db7 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -309,8 +309,9 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
 int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
 void kvm_irqchip_release_virq(KVMState *s, int virq);
 
+/* polarity: 0 high-active */
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
-   EventNotifier *rn, int virq);
+   EventNotifier *rn, int virq, int polarity);
 int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
 int kvm_pc_route_gsi(void *opaque, int n);
 void kvm_pc_gsi_handler(void *opaque, int n, int level);
diff --git a/kvm-all.c b/kvm-all.c
index 875e32e..d0e457b 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1231,12 +1231,13 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, 
MSIMessage msg)
 }
 
 static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq,
-bool assign)
+bool assign, int polarity)
 {
 struct kvm_irqfd irqfd = {
 .fd = fd,
 .gsi = virq,
-.flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
+.flags = (assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN)
+| (polarity ? 0 : KVM_IRQFD_FLAG_POLARITY),
 };
 
 if (rfd != -1) {
@@ -1271,7 +1272,8 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
 return -ENOSYS;
 }
 
-static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
+static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign,
+int polarity)
 {
 abort();
 }
@@ -1283,16 +1285,16 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, 
MSIMessage msg)
 #endif /* !KVM_CAP_IRQ_ROUTING */
 
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
-   EventNotifier *rn, int virq)
+   EventNotifier *rn, int virq, int polarity)
 {
 return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
-   rn ? event_notifier_get_fd(rn) : -1, virq, true);
+   rn ? event_notifier_get_fd(rn) : -1, virq, true, polarity);
 }
 
 int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
 {
 return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
-   false);
+   false, 0);
 }
 
 static int kvm_irqchip_create(KVMState *s)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index c614070..64afc8a 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -740,6 +740,9 @@ struct kvm_xen_hvm_config {
  */
 #define

[Qemu-devel] [PATCH 5/5] hpet: run on dedicate thread

2013-09-11 Thread Liu Ping Fan
migration is not supported yet.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index ae54b87..8e32e36 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -91,6 +91,10 @@ typedef struct HPETState {
 uint32_t intcap;
 HPETTimer timer[HPET_MAX_TIMERS];
 bool dedicate_mode;
+AioContext *ctx;
+bool created;
+bool stop;
+QemuThread t;
 
 /* Memory-mapped, software visible registers */
 uint64_t capability;/* capabilities */
@@ -702,6 +706,16 @@ static const MemoryRegionOps hpet_ram_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static void *hpet_work_thread(void *opaque)
+{
+HPETState *hpet = opaque;
+
+while (!hpet-stop) {
+aio_poll(hpet-ctx, true);
+}
+return NULL;
+}
+
 static void hpet_reset(DeviceState *d)
 {
 HPETState *s = HPET(d);
@@ -732,6 +746,11 @@ static void hpet_reset(DeviceState *d)
 
 /* to document that the RTC lowers its output on reset as well */
 s-rtc_irq_level = 0;
+if (!s-created) {
+s-created = true;
+qemu_thread_create(s-t, hpet_work_thread, s, QEMU_THREAD_JOINABLE);
+}
+
 }
 
 static void hpet_handle_legacy_irq(void *opaque, int n, int level)
@@ -788,10 +807,20 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 } else if (s-num_timers  HPET_MAX_TIMERS) {
 s-num_timers = HPET_MAX_TIMERS;
 }
+s-dedicate_mode = kvm_irqfds_enabled() ? true : false;
+if (s-dedicate_mode) {
+s-ctx = aio_context_new();
+}
 for (i = 0; i  HPET_MAX_TIMERS; i++) {
 timer = s-timer[i];
-timer-qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, hpet_timer, 
timer);
 timer-tn = i;
+if (s-dedicate_mode) {
+timer-qemu_timer = aio_timer_new(s-ctx, QEMU_CLOCK_VIRTUAL,
+SCALE_NS, hpet_timer, timer);
+} else {
+timer-qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+hpet_timer, timer);
+}
 timer-state = s;
 }
 
-- 
1.8.1.4




[Qemu-devel] [PATCH 4/5] hpet: deliver irq by irqfd when in dedicated thread mode

2013-09-11 Thread Liu Ping Fan
Running hpet in iothread, there could be variable payload, which
will finally affect the accurate of timing. So we want to run
hpet on dedicated thread.

For hpet, almost of the things can run out of BQL, except interrupt.
We step around interrupt by using irqfd.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 54 ++
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index ff43850..ae54b87 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -33,6 +33,7 @@
 #include hw/sysbus.h
 #include hw/timer/mc146818rtc.h
 #include hw/timer/i8254.h
+#include sysemu/kvm.h
 
 //#define HPET_DEBUG
 #ifdef HPET_DEBUG
@@ -68,6 +69,12 @@ typedef struct HPETTimer {  /* timers */
  */
 } HPETTimer;
 
+typedef struct IrqfdInfo {
+EventNotifier *n;
+EventNotifier *rn;
+int gsi;
+} IrqfdInfo;
+
 typedef struct HPETState {
 /* private */
 SysBusDevice parent_obj;
@@ -76,12 +83,14 @@ typedef struct HPETState {
 MemoryRegion iomem;
 uint64_t hpet_offset;
 qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
+IrqfdInfo irqfds[HPET_NUM_IRQ_ROUTES];
 uint32_t flags;
 uint8_t rtc_irq_level;
 qemu_irq pit_enabled;
 uint8_t num_timers;
 uint32_t intcap;
 HPETTimer timer[HPET_MAX_TIMERS];
+bool dedicate_mode;
 
 /* Memory-mapped, software visible registers */
 uint64_t capability;/* capabilities */
@@ -206,14 +215,19 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
-if (route = ISA_NUM_IRQS) {
-qemu_irq_raise(s-irqs[route]);
+if (s-dedicate_mode  s-irqfds[timer-tn].n) {
+event_notifier_set(s-irqfds[timer-tn].n);
 } else {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet 
*/
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 }
 } else if (timer_fsb_route(timer)) {
+/* For the case of fsb, we resort to the lock in mem dispatcher */
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
@@ -479,6 +493,32 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
 return 0;
 }
 
+static void timer_assign_irqfd(HPETState *s, HPETTimer *timer, bool assign)
+{
+int irqnum = (timer-config  HPET_TN_INT_ROUTE_MASK)
+ HPET_TN_INT_ROUTE_SHIFT;
+int level = timer-fsb  HPET_TN_TYPE_LEVEL;
+IrqfdInfo *info = s-irqfds[timer-tn];
+
+if (assign) {
+if (!info-n) {
+info-n = g_new0(EventNotifier, 1);
+info-rn = g_new0(EventNotifier, 1);
+event_notifier_init(info-n, 0);
+event_notifier_init(info-rn, 0);
+}
+info-gsi = qemu_irq_route_gsi(s-irqs[irqnum]);
+kvm_irqchip_add_irqfd_notifier(kvm_state, info-n,
+level ? info-rn : NULL, info-gsi, 1);
+} else {
+kvm_irqchip_remove_irqfd_notifier(kvm_state, info-n, info-gsi);
+g_free(info-n);
+g_free(info-rn);
+info-n = NULL;
+info-rn = NULL;
+}
+}
+
 static void hpet_ram_write(void *opaque, hwaddr addr,
uint64_t value, unsigned size)
 {
@@ -514,8 +554,14 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
 timer-period = (uint32_t)timer-period;
 }
 if (activating_bit(old_val, new_val, HPET_TN_ENABLE)) {
+if (s-dedicate_mode) {
+timer_assign_irqfd(s, timer, true);
+}
 hpet_set_timer(timer);
 } else if (deactivating_bit(old_val, new_val, HPET_TN_ENABLE)) {
+if (s-dedicate_mode) {
+timer_assign_irqfd(s, timer, false);
+}
 hpet_del_timer(timer);
 }
 break;
-- 
1.8.1.4




[Qemu-devel] [PATCH] KVM: IRQFD: equip irqfd and resamplefd with polarity

2013-09-11 Thread Liu Ping Fan
Nowadays, irqfd can emulate trigger mode, but it can not emulate
trigger polarity. While in some cases, ioapic ioredtbl[x] expects
_low_ active. So equipping irqfd with the ability. Correspondingly,
resamplefd will have the same polarity as irqfd.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
This helps to step around making the interrupt component re-entrance
in qemu
---
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h |  2 ++
 virt/kvm/eventfd.c   | 32 ++--
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a63d83e..0b8c3b1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -678,6 +678,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn);
 struct kvm_irq_ack_notifier {
struct hlist_node link;
unsigned gsi;
+   int polarity; /* 0 high active */
void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index acccd08..bba3a1b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -740,6 +740,8 @@ struct kvm_xen_hvm_config {
  * emlation.  See Documentation/virtual/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1  1)
+/* 0: high-active */
+#define KVM_IRQFD_FLAG_POLARITY (12)
 
 struct kvm_irqfd {
__u32 fd;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 1550637..865c656 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -77,6 +77,7 @@ struct _irqfd {
struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
/* Used for level IRQ fast-path */
int gsi;
+   int polarity; /* 0 high active */
struct work_struct inject;
/* The resampler used by this irqfd (resampler-only) */
struct _irqfd_resampler *resampler;
@@ -100,13 +101,13 @@ irqfd_inject(struct work_struct *work)
struct kvm *kvm = irqfd-kvm;
 
if (!irqfd-resampler) {
-   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1,
-   false);
-   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0,
-   false);
+   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi,
+   !irqfd-polarity, false);
+   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi,
+   !!irqfd-polarity, false);
} else
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
-   irqfd-gsi, 1, false);
+   irqfd-gsi, !irqfd-polarity, false);
 }
 
 /*
@@ -123,7 +124,8 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
resampler = container_of(kian, struct _irqfd_resampler, notifier);
 
kvm_set_irq(resampler-kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
-   resampler-notifier.gsi, 0, false);
+   resampler-notifier.gsi,
+   !!resampler-notifier.polarity, false);
 
rcu_read_lock();
 
@@ -148,7 +150,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
list_del(resampler-link);
kvm_unregister_irq_ack_notifier(kvm, resampler-notifier);
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
-   resampler-notifier.gsi, 0, false);
+   resampler-notifier.gsi, !!irqfd-polarity, false);
kfree(resampler);
}
 
@@ -302,6 +304,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 
irqfd-kvm = kvm;
irqfd-gsi = args-gsi;
+   irqfd-polarity = !!(args-flags  KVM_IRQFD_FLAG_POLARITY);
INIT_LIST_HEAD(irqfd-list);
INIT_WORK(irqfd-inject, irqfd_inject);
INIT_WORK(irqfd-shutdown, irqfd_shutdown);
@@ -337,8 +340,15 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
list_for_each_entry(resampler,
kvm-irqfds.resampler_list, link) {
if (resampler-notifier.gsi == irqfd-gsi) {
-   irqfd-resampler = resampler;
-   break;
+   if (likely(resampler-notifier.polarity ==
+   irqfd-polarity)) {
+   irqfd-resampler = resampler;
+   break;
+   } else {
+   ret = -EBUSY;
+   
mutex_unlock(kvm-irqfds.resampler_lock);
+   goto fail;
+   }
}
}
 
@@ -353,6 +363,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
resampler-kvm = kvm;
INIT_LIST_HEAD(resampler-list);
resampler

Re: [Qemu-devel] [PATCH v4 0/3] bugs fix for hpet

2013-09-04 Thread liu ping fan
On Wed, Sep 4, 2013 at 3:03 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 04/09/2013 07:29, liu ping fan ha scritto:
 On Tue, Sep 3, 2013 at 7:17 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 02/09/2013 09:06, Liu Ping Fan ha scritto:
 note: I rebase it onto Stefan's net-next tree, since pc-1.7 has already 
 been defined there.

 v4:
   use standard compat property to set hpet's interrupt compatibility

 v3:
   change hpet interrupt capablity on board's demand


 Liu Ping Fan (3):
   hpet: inverse polarity when pin above ISA_NUM_IRQS
   hpet: entitle more irq pins for hpet
   pc-1.6: add compatibility for hpet intcap on pc-*-1.6

  hw/timer/hpet.c  | 27 +++
  include/hw/i386/pc.h |  5 +
  2 files changed, 28 insertions(+), 4 deletions(-)


 Looks good.  But I have one question; should this be changed for PIIX
 too, or should the 1.7 PIIX machine keep the old behavior?  (I have no
 idea).

 Your suspicion is right.  When going through PIIX4 spec, I found that
 the chipset was without ioapic integrated. So there is divergence for
 the compatibility of pc-piix-* and pc-q35-*.  Can I code the hpet's
 compatiblity in pc-piix-1.7 to resolve this?

 You could do something like what people suggested earlier in the review.
  Change sysbus_try_create_simple to
 qdev_try_create/sysbus_mmio_map/qdev_init in pc_basic_device_init (this
 is one patch, going before v4 3/3).  Then add a new argument to
 pc_basic_device_init, and set the intcap property if the new argument is
 nonzero (another patch, going before v4 3/3; where the new argument is
 always 0).  Finally, piix can pass 4 for the new argument, while q35 can
 keep passing 0 (squashed in v4 3/3).

Will follow this, thanks.

 In other words, PIIX hardcodes the value 4, while Q35 uses compat
 properties.

 I still have a doubt.  Can the PIIX HPET use interrupt 8?  If so, this

No. on ich9, hpet can work with LegacyReplacement Mode to block
rtc(IRQ8). But on PIIX, spec said nothing about this, so I think we
had better keep it unchanged.

Thanks and regards,
Pingfan
 won't work and we have to go back to the drawing board.  But if it can
 use interrupt 2 only, it will be okay.

 Paolo



Re: [Qemu-devel] [PATCH v4 0/3] bugs fix for hpet

2013-09-03 Thread liu ping fan
On Tue, Sep 3, 2013 at 7:17 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 02/09/2013 09:06, Liu Ping Fan ha scritto:
 note: I rebase it onto Stefan's net-next tree, since pc-1.7 has already been 
 defined there.

 v4:
   use standard compat property to set hpet's interrupt compatibility

 v3:
   change hpet interrupt capablity on board's demand


 Liu Ping Fan (3):
   hpet: inverse polarity when pin above ISA_NUM_IRQS
   hpet: entitle more irq pins for hpet
   pc-1.6: add compatibility for hpet intcap on pc-*-1.6

  hw/timer/hpet.c  | 27 +++
  include/hw/i386/pc.h |  5 +
  2 files changed, 28 insertions(+), 4 deletions(-)


 Looks good.  But I have one question; should this be changed for PIIX
 too, or should the 1.7 PIIX machine keep the old behavior?  (I have no
 idea).

Your suspicion is right.  When going through PIIX4 spec, I found that
the chipset was without ioapic integrated. So there is divergence for
the compatibility of pc-piix-* and pc-q35-*.  Can I code the hpet's
compatiblity in pc-piix-1.7 to resolve this?

Regards,
Pingfan
 Paolo



Re: [Qemu-devel] [PATCH v3 2/3] qdev: interface for SysBusDevice to change property on requirement

2013-09-02 Thread liu ping fan
On Fri, Aug 30, 2013 at 4:17 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 30/08/2013 09:53, Liu Ping Fan ha scritto:
 qdev's property can not be set after realized, but there is a
 requirement of adjusting device's behavior on different mother
 boards.  So introducing a callback in sysbus_try_create_simple()
 to adjust device's property on board's demand.

 (This patch is needed by the later one which changes hpet's intcap
 property)

 I don't think it is useful to add a new mechanism since there is an
 existing mechanism to set properties for compatibility (which I pointed
 you to earlier).  It is also incorrect because this will have an effect

Oh, just aware of this tricky method to set qdev's property. Thanks,
will try this way!

Regards,
Pingfan
 on all PC boards including pc-q35-1.7 and newer.

 You need to create a 1.7 machine like commit 45053fd (pc: Create
 pc-*-1.6 machine-types, 2013-05-27).  On top of this:

 * the 1.6 machines need to have a compatibility property in hw/i386/pc.h.

 * the pc-i440fx-1.7 machine needs to have a compatibility property for
 the same thing in hw/i386/pc_piix.c

 Paolo

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/core/sysbus.c| 5 -
  hw/i386/pc.c| 2 +-
  include/hw/sysbus.h | 8 +---
  3 files changed, 10 insertions(+), 5 deletions(-)

 diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
 index 9004d8c..e894bbb 100644
 --- a/hw/core/sysbus.c
 +++ b/hw/core/sysbus.c
 @@ -172,7 +172,7 @@ DeviceState *sysbus_create_varargs(const char *name,
  return dev;
  }

 -DeviceState *sysbus_try_create_varargs(const char *name,
 +DeviceState *sysbus_try_create_varargs(const char *name, CompatSet set,
 hwaddr addr, ...)
  {
  DeviceState *dev;
 @@ -185,6 +185,9 @@ DeviceState *sysbus_try_create_varargs(const char *name,
  if (!dev) {
  return NULL;
  }
 +if (set) {
 +set(dev);
 +}
  s = SYS_BUS_DEVICE(dev);
  qdev_init_nofail(dev);
  if (addr != (hwaddr)-1) {
 diff --git a/hw/i386/pc.c b/hw/i386/pc.c
 index e8bc8ce..09c10ac 100644
 --- a/hw/i386/pc.c
 +++ b/hw/i386/pc.c
 @@ -1247,7 +1247,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
 *gsi,
   * when the HPET wants to take over. Thus we have to disable the latter.
   */
  if (!no_hpet  (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
 -hpet = sysbus_try_create_simple(hpet, HPET_BASE, NULL);
 +hpet = sysbus_try_create_simple(hpet, NULL, HPET_BASE, NULL);

  if (hpet) {
  for (i = 0; i  GSI_NUM_PINS; i++) {
 diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
 index bb50a87..47337f2 100644
 --- a/include/hw/sysbus.h
 +++ b/include/hw/sysbus.h
 @@ -58,6 +58,8 @@ struct SysBusDevice {
  pio_addr_t pio[QDEV_MAX_PIO];
  };

 +typedef void (*CompatSet)(DeviceState *dev);
 +
  void sysbus_init_mmio(SysBusDevice *dev, MemoryRegion *memory);
  MemoryRegion *sysbus_mmio_get_region(SysBusDevice *dev, int n);
  void sysbus_init_irq(SysBusDevice *dev, qemu_irq *p);
 @@ -77,7 +79,7 @@ MemoryRegion *sysbus_address_space(SysBusDevice *dev);
  /* Legacy helper function for creating devices.  */
  DeviceState *sysbus_create_varargs(const char *name,
   hwaddr addr, ...);
 -DeviceState *sysbus_try_create_varargs(const char *name,
 +DeviceState *sysbus_try_create_varargs(const char *name, CompatSet set,
 hwaddr addr, ...);
  static inline DeviceState *sysbus_create_simple(const char *name,
hwaddr addr,
 @@ -86,11 +88,11 @@ static inline DeviceState *sysbus_create_simple(const 
 char *name,
  return sysbus_create_varargs(name, addr, irq, NULL);
  }

 -static inline DeviceState *sysbus_try_create_simple(const char *name,
 +static inline DeviceState *sysbus_try_create_simple(const char *name, 
 CompatSet set,
  hwaddr addr,
  qemu_irq irq)
  {
 -return sysbus_try_create_varargs(name, addr, irq, NULL);
 +return sysbus_try_create_varargs(name, set, addr, irq, NULL);
  }

  #endif /* !HW_SYSBUS_H */





Re: [Qemu-devel] [PATCH v3 2/3] qdev: interface for SysBusDevice to change property on requirement

2013-09-02 Thread liu ping fan
On Fri, Aug 30, 2013 at 8:32 PM, Andreas Färber afaer...@suse.de wrote:
 Am 30.08.2013 10:17, schrieb Paolo Bonzini:
 Il 30/08/2013 09:53, Liu Ping Fan ha scritto:
 qdev's property can not be set after realized, but there is a
 requirement of adjusting device's behavior on different mother
 boards.  So introducing a callback in sysbus_try_create_simple()
 to adjust device's property on board's demand.

 (This patch is needed by the later one which changes hpet's intcap
 property)

 I don't think it is useful to add a new mechanism since there is an
 existing mechanism to set properties for compatibility (which I pointed
 you to earlier).  It is also incorrect because this will have an effect
 on all PC boards including pc-q35-1.7 and newer.

 You need to create a 1.7 machine like commit 45053fd (pc: Create
 pc-*-1.6 machine-types, 2013-05-27).

 Stefan already has than in his net tree and just needs to (rebase and)
 flush his queue!

Thanks, will do like it.

Regards,
Pingfan



[Qemu-devel] [PATCH v4 0/3] bugs fix for hpet

2013-09-02 Thread Liu Ping Fan
note: I rebase it onto Stefan's net-next tree, since pc-1.7 has already been 
defined there.

v4:
  use standard compat property to set hpet's interrupt compatibility

v3:
  change hpet interrupt capablity on board's demand


Liu Ping Fan (3):
  hpet: inverse polarity when pin above ISA_NUM_IRQS
  hpet: entitle more irq pins for hpet
  pc-1.6: add compatibility for hpet intcap on pc-*-1.6

 hw/timer/hpet.c  | 27 +++
 include/hw/i386/pc.h |  5 +
 2 files changed, 28 insertions(+), 4 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH v4 1/3] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-09-02 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line.(And this is observed on
bare metal).

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 648b383..1139448 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH v4 2/3] hpet: entitle more irq pins for hpet

2013-09-02 Thread Liu Ping Fan
On PC, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
of ioapic can be dynamically assigned to hpet as guest chooses.
(Will enable them after introducing pc 1.6 compat)

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 1139448..888be66 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -25,6 +25,7 @@
  */
 
 #include hw/hw.h
+#include hw/boards.h
 #include hw/i386/pc.h
 #include ui/console.h
 #include qemu/timer.h
@@ -42,6 +43,12 @@
 
 #define HPET_MSI_SUPPORT0
 
+/* For bug compat, using only IRQ2. Soon it will be fixed as
+ * 0xff0104ULL, i.e using IRQ16~23, IRQ8 and IRQ2 after
+ * introducing pc-1.6 compat.
+ */
+#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
+
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 
@@ -73,6 +80,7 @@ typedef struct HPETState {
 uint8_t rtc_irq_level;
 qemu_irq pit_enabled;
 uint8_t num_timers;
+uint32_t intcap;
 HPETTimer timer[HPET_MAX_TIMERS];
 
 /* Memory-mapped, software visible registers */
@@ -663,8 +671,8 @@ static void hpet_reset(DeviceState *d)
 if (s-flags  (1  HPET_MSI_SUPPORT)) {
 timer-config |= HPET_TN_FSB_CAP;
 }
-/* advertise availability of ioapic inti2 */
-timer-config |=  0x0004ULL  32;
+/* advertise availability of ioapic int */
+timer-config |=  (uint64_t)s-intcap  32;
 timer-period = 0ULL;
 timer-wrap_flag = 0;
 }
@@ -753,6 +761,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
+DEFINE_PROP_UINT32(intcap, HPETState, intcap, HPET_TN_INT_CAP_DEFAULT),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v4 3/3] pc-1.6: add compatibility for hpet intcap on pc-*-1.6

2013-09-02 Thread Liu Ping Fan
For guest bug compat, we limit hpet's interrupt compatibility on
ioapic's IRQ2 for pc-*-1.6. As to pc-*-1.7 and newer, IRQ2, IRQ8,
and IRQ16~23 are allowed.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c  | 6 +-
 include/hw/i386/pc.h | 4 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 888be66..b6e8c12 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -43,11 +43,7 @@
 
 #define HPET_MSI_SUPPORT0
 
-/* For bug compat, using only IRQ2. Soon it will be fixed as
- * 0xff0104ULL, i.e using IRQ16~23, IRQ8 and IRQ2 after
- * introducing pc-1.6 compat.
- */
-#define HPET_TN_INT_CAP_DEFAULT 0x4ULL
+#define HPET_TN_INT_CAP_DEFAULT 0xff0104ULL
 
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 894c124..ef481bc 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -219,6 +219,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
 .driver   = e1000,\
 .property = mitigation,\
 .value= off,\
+},{\
+.driver   = hpet,\
+.property = intcap,\
+.value= stringify(4),\
 }
 
 #define PC_COMPAT_1_5 \
-- 
1.8.1.4




[Qemu-devel] [PATCH v3 0/3] bugs fix for hpet

2013-08-30 Thread Liu Ping Fan
v3:
  change hpet interrupt capablity on board's demand

Liu Ping Fan (3):
  hpet: inverse polarity when pin above ISA_NUM_IRQS
  qdev: interface for SysBusDevice to change property on requirement
  hpet: entitle more irq pins for hpet

 hw/core/sysbus.c|  5 -
 hw/i386/pc.c|  8 +++-
 hw/timer/hpet.c | 26 ++
 include/hw/sysbus.h |  8 +---
 4 files changed, 38 insertions(+), 9 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH v3 2/3] qdev: interface for SysBusDevice to change property on requirement

2013-08-30 Thread Liu Ping Fan
qdev's property can not be set after realized, but there is a
requirement of adjusting device's behavior on different mother
boards.  So introducing a callback in sysbus_try_create_simple()
to adjust device's property on board's demand.

(This patch is needed by the later one which changes hpet's intcap
property)

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/core/sysbus.c| 5 -
 hw/i386/pc.c| 2 +-
 include/hw/sysbus.h | 8 +---
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 9004d8c..e894bbb 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -172,7 +172,7 @@ DeviceState *sysbus_create_varargs(const char *name,
 return dev;
 }
 
-DeviceState *sysbus_try_create_varargs(const char *name,
+DeviceState *sysbus_try_create_varargs(const char *name, CompatSet set,
hwaddr addr, ...)
 {
 DeviceState *dev;
@@ -185,6 +185,9 @@ DeviceState *sysbus_try_create_varargs(const char *name,
 if (!dev) {
 return NULL;
 }
+if (set) {
+set(dev);
+}
 s = SYS_BUS_DEVICE(dev);
 qdev_init_nofail(dev);
 if (addr != (hwaddr)-1) {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e8bc8ce..09c10ac 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1247,7 +1247,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
  * when the HPET wants to take over. Thus we have to disable the latter.
  */
 if (!no_hpet  (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
-hpet = sysbus_try_create_simple(hpet, HPET_BASE, NULL);
+hpet = sysbus_try_create_simple(hpet, NULL, HPET_BASE, NULL);
 
 if (hpet) {
 for (i = 0; i  GSI_NUM_PINS; i++) {
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index bb50a87..47337f2 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -58,6 +58,8 @@ struct SysBusDevice {
 pio_addr_t pio[QDEV_MAX_PIO];
 };
 
+typedef void (*CompatSet)(DeviceState *dev);
+
 void sysbus_init_mmio(SysBusDevice *dev, MemoryRegion *memory);
 MemoryRegion *sysbus_mmio_get_region(SysBusDevice *dev, int n);
 void sysbus_init_irq(SysBusDevice *dev, qemu_irq *p);
@@ -77,7 +79,7 @@ MemoryRegion *sysbus_address_space(SysBusDevice *dev);
 /* Legacy helper function for creating devices.  */
 DeviceState *sysbus_create_varargs(const char *name,
  hwaddr addr, ...);
-DeviceState *sysbus_try_create_varargs(const char *name,
+DeviceState *sysbus_try_create_varargs(const char *name, CompatSet set,
hwaddr addr, ...);
 static inline DeviceState *sysbus_create_simple(const char *name,
   hwaddr addr,
@@ -86,11 +88,11 @@ static inline DeviceState *sysbus_create_simple(const char 
*name,
 return sysbus_create_varargs(name, addr, irq, NULL);
 }
 
-static inline DeviceState *sysbus_try_create_simple(const char *name,
+static inline DeviceState *sysbus_try_create_simple(const char *name, 
CompatSet set,
 hwaddr addr,
 qemu_irq irq)
 {
-return sysbus_try_create_varargs(name, addr, irq, NULL);
+return sysbus_try_create_varargs(name, set, addr, irq, NULL);
 }
 
 #endif /* !HW_SYSBUS_H */
-- 
1.8.1.4




[Qemu-devel] [PATCH v3 3/3] hpet: entitle more irq pins for hpet

2013-08-30 Thread Liu Ping Fan
On q35 machine, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
of ioapic can be dynamically assigned to hpet as guest chooses.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/i386/pc.c|  8 +++-
 hw/timer/hpet.c | 12 ++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 09c10ac..bb23d99 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1217,6 +1217,12 @@ static const MemoryRegionOps ioportF0_io_ops = {
 },
 };
 
+static void hpet_intcap_set(DeviceState *dev)
+{
+/* For guest bug compatibility, only IRQ2 is reserved for hpet on q35 */
+qdev_prop_set_uint32(dev, intcap, 0x4);
+}
+
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
@@ -1247,7 +1253,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
  * when the HPET wants to take over. Thus we have to disable the latter.
  */
 if (!no_hpet  (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
-hpet = sysbus_try_create_simple(hpet, NULL, HPET_BASE, NULL);
+hpet = sysbus_try_create_simple(hpet, hpet_intcap_set, HPET_BASE, 
NULL);
 
 if (hpet) {
 for (i = 0; i  GSI_NUM_PINS; i++) {
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 1139448..2e19ff5 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -25,6 +25,7 @@
  */
 
 #include hw/hw.h
+#include hw/boards.h
 #include hw/i386/pc.h
 #include ui/console.h
 #include qemu/timer.h
@@ -42,6 +43,11 @@
 
 #define HPET_MSI_SUPPORT0
 
+/* only IRQ2 allowed for pc-1.6 and former */
+#define HPET_TN_INT_CAP_PC (0x4ULL  32)
+/* Hpet can use non-legacy IRQ16~23, and an IRQ2 ,IRQ8 */
+#define HPET_TN_INT_CAP_DEFAULT 0xff0104ULL
+
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 
@@ -73,6 +79,7 @@ typedef struct HPETState {
 uint8_t rtc_irq_level;
 qemu_irq pit_enabled;
 uint8_t num_timers;
+uint32_t intcap;
 HPETTimer timer[HPET_MAX_TIMERS];
 
 /* Memory-mapped, software visible registers */
@@ -663,8 +670,8 @@ static void hpet_reset(DeviceState *d)
 if (s-flags  (1  HPET_MSI_SUPPORT)) {
 timer-config |= HPET_TN_FSB_CAP;
 }
-/* advertise availability of ioapic inti2 */
-timer-config |=  0x0004ULL  32;
+/* advertise availability of ioapic int */
+timer-config |=  (uint64_t)s-intcap  32;
 timer-period = 0ULL;
 timer-wrap_flag = 0;
 }
@@ -753,6 +760,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
 static Property hpet_device_properties[] = {
 DEFINE_PROP_UINT8(timers, HPETState, num_timers, HPET_MIN_TIMERS),
 DEFINE_PROP_BIT(msi, HPETState, flags, HPET_MSI_SUPPORT, false),
+DEFINE_PROP_UINT32(intcap, HPETState, intcap, HPET_TN_INT_CAP_DEFAULT),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.8.1.4




[Qemu-devel] [PATCH v3 1/3] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-08-30 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line.(And this is observed on
bare metal).

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 648b383..1139448 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH v2 1/3] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-08-27 Thread Liu Ping Fan
According to hpet spec, hpet irq is high active. But according to
ICH spec, there is inversion before the input of ioapic. So the OS
will expect low active on this IRQ line.(And this is observed on
bare metal).

We fold the emulation of this inversion inside the hpet logic.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 648b383..1139448 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -198,13 +198,23 @@ static void update_irq(struct HPETTimer *timer, int set)
 if (!set || !timer_enabled(timer) || !hpet_enabled(timer-state)) {
 s-isr = ~mask;
 if (!timer_fsb_route(timer)) {
-qemu_irq_lower(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_raise(s-irqs[route]);
+} else {
+qemu_irq_lower(s-irqs[route]);
+}
 }
 } else if (timer_fsb_route(timer)) {
 stl_le_phys(timer-fsb  32, timer-fsb  0x);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
-qemu_irq_raise(s-irqs[route]);
+/* fold the ICH PIRQ# pin's internal inversion logic into hpet */
+if (route = ISA_NUM_IRQS) {
+qemu_irq_lower(s-irqs[route]);
+} else {
+qemu_irq_raise(s-irqs[route]);
+}
 } else {
 s-isr = ~mask;
 qemu_irq_pulse(s-irqs[route]);
-- 
1.8.1.4




[Qemu-devel] [PATCH v2 2/3] vl: add func to check the machine type

2013-08-27 Thread Liu Ping Fan
On different machines, the device can has different properties.
Export machine type's check interface to devices, so the device
can decide its behavior at runtime.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 include/hw/boards.h | 1 +
 vl.c| 8 
 2 files changed, 9 insertions(+)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index fb7c6f1..020edfb 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -49,6 +49,7 @@ typedef struct QEMUMachine {
 } QEMUMachine;
 
 int qemu_register_machine(QEMUMachine *m);
+bool qemu_check_machine(const char *idstr);
 QEMUMachine *find_default_machine(void);
 
 extern QEMUMachine *current_machine;
diff --git a/vl.c b/vl.c
index f422a1c..1f37489 100644
--- a/vl.c
+++ b/vl.c
@@ -1667,6 +1667,14 @@ int qemu_register_machine(QEMUMachine *m)
 return 0;
 }
 
+bool qemu_check_machine(const char *idstr)
+{
+size_t n = strlen(idstr);
+
+assert(current_machine);
+return !strncmp(current_machine-name, idstr, n);
+}
+
 static QEMUMachine *find_machine(const char *name)
 {
 QEMUMachine *m;
-- 
1.8.1.4




[Qemu-devel] [PATCH v2 3/3] hpet: entitle more irq pins for hpet

2013-08-27 Thread Liu Ping Fan
On q35 machine, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
of ioapic can be dynamically assigned to hpet as guest chooses.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/timer/hpet.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 1139448..92cd4fa 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -25,6 +25,7 @@
  */
 
 #include hw/hw.h
+#include hw/boards.h
 #include hw/i386/pc.h
 #include ui/console.h
 #include qemu/timer.h
@@ -42,6 +43,11 @@
 
 #define HPET_MSI_SUPPORT0
 
+/* only IRQ2 allowed for pc-1.6 and former */
+#define HPET_TN_INT_CAP_PC (0x4ULL  32)
+/* Hpet can use non-legacy IRQ16~23, and an IRQ2 ,IRQ8 */
+#define HPET_TN_INT_CAP_Q35 (0xff0104ULL  32)
+
 #define TYPE_HPET hpet
 #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 
@@ -663,8 +669,12 @@ static void hpet_reset(DeviceState *d)
 if (s-flags  (1  HPET_MSI_SUPPORT)) {
 timer-config |= HPET_TN_FSB_CAP;
 }
-/* advertise availability of ioapic inti2 */
-timer-config |=  0x0004ULL  32;
+/* advertise availability of ioapic int */
+if (qemu_check_machine(pc-q35)) {
+timer-config |=  HPET_TN_INT_CAP_Q35;
+} else {
+timer-config |=  HPET_TN_INT_CAP_PC;
+}
 timer-period = 0ULL;
 timer-wrap_flag = 0;
 }
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH v2 3/3] hpet: entitle more irq pins for hpet

2013-08-27 Thread liu ping fan
On Tue, Aug 27, 2013 at 4:45 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 27/08/2013 10:10, Liu Ping Fan ha scritto:
 On q35 machine, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23
 of ioapic can be dynamically assigned to hpet as guest chooses.

 First of all, the backwards-compatible q35 machines should also use the
 old value.

Sorry but could you tell me why even on q35, we can only use IRQ2 for hpet?

 Second, the HPET should _not_ know the machine types.  You need to add a
 qdev property as I suggested in my reply to v1.  The default value
 should be 0xFF0104.  Then you can add the compatibility value in
 hw/i386/pc_piix.c and hw/i386/pc_q35.c.

So export the hpet's property at board-level?

Regards,
Pingfan
 ---
  hw/timer/hpet.c | 14 --
  1 file changed, 12 insertions(+), 2 deletions(-)

 diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
 index 1139448..92cd4fa 100644
 --- a/hw/timer/hpet.c
 +++ b/hw/timer/hpet.c
 @@ -25,6 +25,7 @@
   */

  #include hw/hw.h
 +#include hw/boards.h
  #include hw/i386/pc.h
  #include ui/console.h
  #include qemu/timer.h
 @@ -42,6 +43,11 @@

  #define HPET_MSI_SUPPORT0

 +/* only IRQ2 allowed for pc-1.6 and former */
 +#define HPET_TN_INT_CAP_PC (0x4ULL  32)
 +/* Hpet can use non-legacy IRQ16~23, and an IRQ2 ,IRQ8 */
 +#define HPET_TN_INT_CAP_Q35 (0xff0104ULL  32)
 +
  #define TYPE_HPET hpet
  #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)

 @@ -663,8 +669,12 @@ static void hpet_reset(DeviceState *d)
  if (s-flags  (1  HPET_MSI_SUPPORT)) {
  timer-config |= HPET_TN_FSB_CAP;
  }
 -/* advertise availability of ioapic inti2 */
 -timer-config |=  0x0004ULL  32;
 +/* advertise availability of ioapic int */
 +if (qemu_check_machine(pc-q35)) {
 +timer-config |=  HPET_TN_INT_CAP_Q35;
 +} else {
 +timer-config |=  HPET_TN_INT_CAP_PC;
 +}
  timer-period = 0ULL;
  timer-wrap_flag = 0;
  }





Re: [Qemu-devel] [PATCH v3 4/4] timer: make qemu_clock_enable sync between disable and timer's cb

2013-08-27 Thread liu ping fan
On Tue, Aug 27, 2013 at 11:32 PM, Alex Bligh a...@alex.org.uk wrote:

 On 27 Aug 2013, at 04:21, Liu Ping Fan wrote:

 After disabling the QemuClock, we should make sure that no QemuTimers
 are still in flight. To implement that with light overhead, we resort
 to QemuEvent. The caller of disabling will wait on QemuEvent of each
 timerlist.

 Note, qemu_clock_enable(foo,false) can _not_ be called from timer's cb.
 And the callers of qemu_clock_enable() should be sync by themselves,
 not protected by this patch.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
 include/qemu/timer.h |  4 
 qemu-timer.c | 20 +++-
 2 files changed, 23 insertions(+), 1 deletion(-)

 diff --git a/include/qemu/timer.h b/include/qemu/timer.h
 index e4934dd..b26909a 100644
 --- a/include/qemu/timer.h
 +++ b/include/qemu/timer.h
 @@ -185,6 +185,10 @@ void qemu_clock_notify(QEMUClockType type);
  * @enabled: true to enable, false to disable
  *
  * Enable or disable a clock
 + * Disabling the clock will wait for related timerlists to stop
 + * executing qemu_run_timers.  Thus, this functions should not
 + * be used from the callback of a timer that is based on @clock.
 + * Doing so would cause a deadlock.

 Presumably they should also be called with the BQL held?

Yes, will document this.

Thx,
Pingfan
  */
 void qemu_clock_enable(QEMUClockType type, bool enabled);

 diff --git a/qemu-timer.c b/qemu-timer.c
 index 95ff47f..c500a76 100644
 --- a/qemu-timer.c
 +++ b/qemu-timer.c
 @@ -45,6 +45,7 @@
 /* timers */

 typedef struct QEMUClock {
 + /* We rely on BQL to protect the timerlists */
 QLIST_HEAD(, QEMUTimerList) timerlists;

 NotifierList reset_notifiers;
 @@ -70,6 +71,8 @@ struct QEMUTimerList {
 QLIST_ENTRY(QEMUTimerList) list;
 QEMUTimerListNotifyCB *notify_cb;
 void *notify_opaque;
 +/* light weight method to mark the end of timerlist's running */
 +QemuEvent ev;
 };

 /**
 @@ -98,6 +101,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
 QEMUClock *clock = qemu_clock_ptr(type);

 timer_list = g_malloc0(sizeof(QEMUTimerList));
 +qemu_event_init(timer_list-ev, false);
 timer_list-clock = clock;
 timer_list-notify_cb = cb;
 timer_list-notify_opaque = opaque;
 @@ -140,13 +144,24 @@ void qemu_clock_notify(QEMUClockType type)
 }
 }

 +/* Disabling the clock will wait for related timerlists to stop
 + * executing qemu_run_timers.  Thus, this functions should not
 + * be used from the callback of a timer that is based on @clock.
 + * Doing so would cause a deadlock.
 + */
 void qemu_clock_enable(QEMUClockType type, bool enabled)
 {
 QEMUClock *clock = qemu_clock_ptr(type);
 +QEMUTimerList *tl;
 bool old = clock-enabled;
 clock-enabled = enabled;
 if (enabled  !old) {
 qemu_clock_notify(type);
 +} else if (!enabled  old) {
 +/* We rely on BQL to protect the timerlists */
 +QLIST_FOREACH(tl, clock-timerlists, list) {
 +qemu_event_wait(tl-ev);
 +}
 }
 }

 @@ -373,8 +388,10 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 QEMUTimer *ts;
 int64_t current_time;
 bool progress = false;
 -
 +
 +qemu_event_reset(timer_list-ev);
 if (!timer_list-clock-enabled) {
 +qemu_event_set(timer_list-ev);
 return progress;
 }

 @@ -392,6 +409,7 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 ts-cb(ts-opaque);
 progress = true;
 }
 +qemu_event_set(timer_list-ev);
 return progress;
 }

 --
 1.8.1.4




 --
 Alex Bligh







Re: [Qemu-devel] [PATCH v3 2/4] timer: protect timers_state's clock with seqlock

2013-08-27 Thread liu ping fan
On Tue, Aug 27, 2013 at 11:18 PM, Alex Bligh a...@alex.org.uk wrote:

 On 27 Aug 2013, at 04:21, Liu Ping Fan wrote:

 Note in tcg mode, vm_clock still read inside BQL, so icount is

 Should refer to QEMU_CLOCK_VIRTUAL if after my patches

Will change the log.
 left without private lock's protection. As for cpu_ticks_* in
 timers_state, it is still protected by BQL.

 I *think* what you are saying here is that after this patch,
 reading QEMU_CLOCK_VIRTUAL is threadsafe unless use_icount
 is true, in which case it is not thread safe as existing
 callers rely on the BQL.

Yes.
 The commit could be a bit more specific here, not least as
 if I read it right, that will need fixing before
 QEMU_CLOCK_VIRTUAL is used at all in other other threads.

Yes, this patch should be ready, before we use timers on other threads.

Thx,
Pingfan
 --
 Alex Bligh







Re: [Qemu-devel] [PATCH 1/2] hpet: entitle more irq pins for hpet

2013-08-26 Thread liu ping fan
On Mon, Aug 26, 2013 at 3:59 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 26/08/2013 04:53, liu ping fan ha scritto:
 On Sun, Aug 25, 2013 at 2:45 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 25/08/2013 04:16, Liu Ping Fan ha scritto:
 On PC, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23 of
 ioapic can be dynamically assigned to hpet as guest chooses.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/timer/hpet.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

 diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
 index 648b383..cd95d39 100644
 --- a/hw/timer/hpet.c
 +++ b/hw/timer/hpet.c
 @@ -41,6 +41,8 @@
  #endif

  #define HPET_MSI_SUPPORT0
 +/* Hpet can use non-legacy IRQ16~23, and an IRQ2 ,IRQ8 */
 +#define HPET_TN_INT_CAP (0xff0104ULL  32)

  #define TYPE_HPET hpet
  #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 @@ -653,8 +655,8 @@ static void hpet_reset(DeviceState *d)
  if (s-flags  (1  HPET_MSI_SUPPORT)) {
  timer-config |= HPET_TN_FSB_CAP;
  }
 -/* advertise availability of ioapic inti2 */
 -timer-config |=  0x0004ULL  32;
 +/* advertise availability of ioapic int */
 +timer-config |=  HPET_TN_INT_CAP;
  timer-period = 0ULL;
  timer-wrap_flag = 0;
  }


 These high 32-bits of timer-config need to be a property of the HPET
 devices, so that the old value (4) is used when running with old machine

 Sorry, but I had included old value (4) in macro HPET_TN_INT_CAP.

 No, *only* GSI 2 must be available on old machine types (pc-1.6 and older).

Oh, got your meaning, will fix.

Thx,
Pingfan



[Qemu-devel] [PATCH v3 0/4] timers thread-safe stuff

2013-08-26 Thread Liu Ping Fan
Saw the Alex's patches has been merged, rebase mine onto his.

v3:
  1. rename seqlock_read_check as seqlock_read_retry
  2. Document timerlist were protected by BQL, and discard private lock around 
qemu_event_wait(tl-ev).

v2:
  1. fix comment in commit and code
  2. fix race issue for qemu_clock_enable(foo,disable)


Liu Ping Fan (2):
  timer: protect timers_state's clock with seqlock
  timer: make qemu_clock_enable sync between disable and timer's cb

Paolo Bonzini (2):
  seqlock: introduce read-write seqlock
  qemu-thread: add QemuEvent

 cpus.c  |  36 +++---
 include/qemu/seqlock.h  |  72 +++
 include/qemu/thread-posix.h |   8 +++
 include/qemu/thread-win32.h |   4 ++
 include/qemu/thread.h   |   7 +++
 include/qemu/timer.h|   4 ++
 qemu-timer.c|  20 +++-
 util/qemu-thread-posix.c| 116 
 util/qemu-thread-win32.c|  26 ++
 9 files changed, 286 insertions(+), 7 deletions(-)
 create mode 100644 include/qemu/seqlock.h

-- 
1.8.1.4




[Qemu-devel] [PATCH v3 1/4] seqlock: introduce read-write seqlock

2013-08-26 Thread Liu Ping Fan
From: Paolo Bonzini pbonz...@redhat.com

This lets the read-side access run outside the BQL.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/seqlock.h | 72 ++
 1 file changed, 72 insertions(+)
 create mode 100644 include/qemu/seqlock.h

diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
new file mode 100644
index 000..3ff118a
--- /dev/null
+++ b/include/qemu/seqlock.h
@@ -0,0 +1,72 @@
+/*
+ * Seqlock implementation for QEMU
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Author:
+ *  Paolo Bonzini pbonz...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_SEQLOCK_H
+#define QEMU_SEQLOCK_H 1
+
+#include qemu/atomic.h
+#include qemu/thread.h
+
+typedef struct QemuSeqLock QemuSeqLock;
+
+struct QemuSeqLock {
+QemuMutex *mutex;
+unsigned sequence;
+};
+
+static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
+{
+sl-mutex = mutex;
+sl-sequence = 0;
+}
+
+/* Lock out other writers and update the count.  */
+static inline void seqlock_write_lock(QemuSeqLock *sl)
+{
+if (sl-mutex) {
+qemu_mutex_lock(sl-mutex);
+}
+++sl-sequence;
+
+/* Write sequence before updating other fields.  */
+smp_wmb();
+}
+
+static inline void seqlock_write_unlock(QemuSeqLock *sl)
+{
+/* Write other fields before finalizing sequence.  */
+smp_wmb();
+
+++sl-sequence;
+if (sl-mutex) {
+qemu_mutex_unlock(sl-mutex);
+}
+}
+
+static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
+{
+/* Always fail if a write is in progress.  */
+unsigned ret = sl-sequence  ~1;
+
+/* Read sequence before reading other fields.  */
+smp_rmb();
+return ret;
+}
+
+static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
+{
+/* Read other fields before reading final sequence.  */
+smp_rmb();
+return unlikely(sl-sequence != start);
+}
+
+#endif
-- 
1.8.1.4




[Qemu-devel] [PATCH v3 2/4] timer: protect timers_state's clock with seqlock

2013-08-26 Thread Liu Ping Fan
The vm_clock may be read outside BQL. This will make timers_state
--the foundation of vm_clock exposed to race condition.
Using private lock to protect it.

Note in tcg mode, vm_clock still read inside BQL, so icount is
left without private lock's protection. As for cpu_ticks_* in
timers_state, it is still protected by BQL.

Lock rule: private lock innermost, ie BQL-this lock

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 cpus.c | 36 ++--
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/cpus.c b/cpus.c
index b9e5685..bcead3b 100644
--- a/cpus.c
+++ b/cpus.c
@@ -37,6 +37,7 @@
 #include sysemu/qtest.h
 #include qemu/main-loop.h
 #include qemu/bitmap.h
+#include qemu/seqlock.h
 
 #ifndef _WIN32
 #include qemu/compatfd.h
@@ -112,6 +113,13 @@ static int64_t qemu_icount;
 typedef struct TimersState {
 int64_t cpu_ticks_prev;
 int64_t cpu_ticks_offset;
+/* cpu_clock_offset will be read out of BQL, so protect it with private
+ * lock. As for cpu_ticks_*, no requirement to read it outside BQL yet.
+ * Lock rule: innermost
+ */
+QemuSeqLock clock_seqlock;
+/* mutex for seqlock */
+QemuMutex mutex;
 int64_t cpu_clock_offset;
 int32_t cpu_ticks_enabled;
 int64_t dummy;
@@ -137,6 +145,7 @@ int64_t cpu_get_icount(void)
 }
 
 /* return the host CPU cycle counter and handle stop/restart */
+/* cpu_ticks is safely if holding BQL */
 int64_t cpu_get_ticks(void)
 {
 if (use_icount) {
@@ -161,33 +170,46 @@ int64_t cpu_get_ticks(void)
 int64_t cpu_get_clock(void)
 {
 int64_t ti;
-if (!timers_state.cpu_ticks_enabled) {
-return timers_state.cpu_clock_offset;
-} else {
-ti = get_clock();
-return ti + timers_state.cpu_clock_offset;
-}
+unsigned start;
+
+do {
+start = seqlock_read_begin(timers_state.clock_seqlock);
+if (!timers_state.cpu_ticks_enabled) {
+ti = timers_state.cpu_clock_offset;
+} else {
+ti = get_clock();
+ti += timers_state.cpu_clock_offset;
+}
+} while (seqlock_read_retry(timers_state.clock_seqlock, start));
+
+return ti;
 }
 
 /* enable cpu_get_ticks() */
 void cpu_enable_ticks(void)
 {
+/* Here, the really thing protected by seqlock is cpu_clock_offset. */
+seqlock_write_lock(timers_state.clock_seqlock);
 if (!timers_state.cpu_ticks_enabled) {
 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
 timers_state.cpu_clock_offset -= get_clock();
 timers_state.cpu_ticks_enabled = 1;
 }
+seqlock_write_unlock(timers_state.clock_seqlock);
 }
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
cpu_get_ticks() after that.  */
 void cpu_disable_ticks(void)
 {
+/* Here, the really thing protected by seqlock is cpu_clock_offset. */
+seqlock_write_lock(timers_state.clock_seqlock);
 if (timers_state.cpu_ticks_enabled) {
 timers_state.cpu_ticks_offset = cpu_get_ticks();
 timers_state.cpu_clock_offset = cpu_get_clock();
 timers_state.cpu_ticks_enabled = 0;
 }
+seqlock_write_unlock(timers_state.clock_seqlock);
 }
 
 /* Correlation between real and virtual time is always going to be
@@ -371,6 +393,8 @@ static const VMStateDescription vmstate_timers = {
 
 void configure_icount(const char *option)
 {
+qemu_mutex_init(timers_state.mutex);
+seqlock_init(timers_state.clock_seqlock, timers_state.mutex);
 vmstate_register(NULL, 0, vmstate_timers, timers_state);
 if (!option) {
 return;
-- 
1.8.1.4




[Qemu-devel] [PATCH v3 3/4] qemu-thread: add QemuEvent

2013-08-26 Thread Liu Ping Fan
From: Paolo Bonzini pbonz...@redhat.com

This emulates Win32 manual-reset events using futexes or conditional
variables.  Typical ways to use them are with multi-producer,
single-consumer data structures, to test for a complex condition whose
elements come from different threads:

for (;;) {
qemu_event_reset(ev);
... test complex condition ...
if (condition is true) {
break;
}
qemu_event_wait(ev);
}

Or more efficiently (but with some duplication):

... evaluate condition ...
while (!condition) {
qemu_event_reset(ev);
... evaluate condition ...
if (!condition) {
qemu_event_wait(ev);
... evaluate condition ...
}
}

QemuEvent provides a very fast userspace path in the common case when
no other thread is waiting, or the event is not changing state.  It
is used to report RCU quiescent states to the thread calling
synchronize_rcu (the latter being the single consumer), and to report
call_rcu invocations to the thread that receives them.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/thread-posix.h |   8 +++
 include/qemu/thread-win32.h |   4 ++
 include/qemu/thread.h   |   7 +++
 util/qemu-thread-posix.c| 116 
 util/qemu-thread-win32.c|  26 ++
 5 files changed, 161 insertions(+)

diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index 361566a..eb5c7a1 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -21,6 +21,14 @@ struct QemuSemaphore {
 #endif
 };
 
+struct QemuEvent {
+#ifndef __linux__
+pthread_mutex_t lock;
+pthread_cond_t cond;
+#endif
+unsigned value;
+};
+
 struct QemuThread {
 pthread_t thread;
 };
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index 13adb95..3d58081 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -17,6 +17,10 @@ struct QemuSemaphore {
 HANDLE sema;
 };
 
+struct QemuEvent {
+HANDLE event;
+};
+
 typedef struct QemuThreadData QemuThreadData;
 struct QemuThread {
 QemuThreadData *data;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index c02404b..3e32c65 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -7,6 +7,7 @@
 typedef struct QemuMutex QemuMutex;
 typedef struct QemuCond QemuCond;
 typedef struct QemuSemaphore QemuSemaphore;
+typedef struct QemuEvent QemuEvent;
 typedef struct QemuThread QemuThread;
 
 #ifdef _WIN32
@@ -45,6 +46,12 @@ void qemu_sem_wait(QemuSemaphore *sem);
 int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
 void qemu_sem_destroy(QemuSemaphore *sem);
 
+void qemu_event_init(QemuEvent *ev, bool init);
+void qemu_event_set(QemuEvent *ev);
+void qemu_event_reset(QemuEvent *ev);
+void qemu_event_wait(QemuEvent *ev);
+void qemu_event_destroy(QemuEvent *ev);
+
 void qemu_thread_create(QemuThread *thread,
 void *(*start_routine)(void *),
 void *arg, int mode);
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 4de133e..37dd298 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -20,7 +20,12 @@
 #include limits.h
 #include unistd.h
 #include sys/time.h
+#ifdef __linux__
+#include sys/syscall.h
+#include linux/futex.h
+#endif
 #include qemu/thread.h
+#include qemu/atomic.h
 
 static void error_exit(int err, const char *msg)
 {
@@ -272,6 +277,117 @@ void qemu_sem_wait(QemuSemaphore *sem)
 #endif
 }
 
+#ifdef __linux__
+#define futex(...)  syscall(__NR_futex, __VA_ARGS__)
+
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
+}
+#else
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+if (n == 1) {
+pthread_cond_signal(ev-cond);
+} else {
+pthread_cond_broadcast(ev-cond);
+}
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+pthread_mutex_lock(ev-lock);
+if (ev-value == val) {
+pthread_cond_wait(ev-cond, ev-lock);
+}
+pthread_mutex_unlock(ev-lock);
+}
+#endif
+
+/* Valid transitions:
+ * - free-set, when setting the event
+ * - busy-set, when setting the event, followed by futex_wake
+ * - set-free, when resetting the event
+ * - free-busy, when waiting
+ *
+ * set-busy does not happen (it can be observed from the outside but
+ * it really is set-free-busy).
+ *
+ * busy-free provably cannot happen; to enforce it, the set-free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET 0
+#define EV_FREE1
+#define EV_BUSY   -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef __linux__
+pthread_mutex_init(ev-lock, NULL);
+

[Qemu-devel] [PATCH v3 4/4] timer: make qemu_clock_enable sync between disable and timer's cb

2013-08-26 Thread Liu Ping Fan
After disabling the QemuClock, we should make sure that no QemuTimers
are still in flight. To implement that with light overhead, we resort
to QemuEvent. The caller of disabling will wait on QemuEvent of each
timerlist.

Note, qemu_clock_enable(foo,false) can _not_ be called from timer's cb.
And the callers of qemu_clock_enable() should be sync by themselves,
not protected by this patch.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 include/qemu/timer.h |  4 
 qemu-timer.c | 20 +++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index e4934dd..b26909a 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -185,6 +185,10 @@ void qemu_clock_notify(QEMUClockType type);
  * @enabled: true to enable, false to disable
  *
  * Enable or disable a clock
+ * Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers.  Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
  */
 void qemu_clock_enable(QEMUClockType type, bool enabled);
 
diff --git a/qemu-timer.c b/qemu-timer.c
index 95ff47f..c500a76 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -45,6 +45,7 @@
 /* timers */
 
 typedef struct QEMUClock {
+ /* We rely on BQL to protect the timerlists */
 QLIST_HEAD(, QEMUTimerList) timerlists;
 
 NotifierList reset_notifiers;
@@ -70,6 +71,8 @@ struct QEMUTimerList {
 QLIST_ENTRY(QEMUTimerList) list;
 QEMUTimerListNotifyCB *notify_cb;
 void *notify_opaque;
+/* light weight method to mark the end of timerlist's running */
+QemuEvent ev;
 };
 
 /**
@@ -98,6 +101,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
 QEMUClock *clock = qemu_clock_ptr(type);
 
 timer_list = g_malloc0(sizeof(QEMUTimerList));
+qemu_event_init(timer_list-ev, false);
 timer_list-clock = clock;
 timer_list-notify_cb = cb;
 timer_list-notify_opaque = opaque;
@@ -140,13 +144,24 @@ void qemu_clock_notify(QEMUClockType type)
 }
 }
 
+/* Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers.  Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
+ */
 void qemu_clock_enable(QEMUClockType type, bool enabled)
 {
 QEMUClock *clock = qemu_clock_ptr(type);
+QEMUTimerList *tl;
 bool old = clock-enabled;
 clock-enabled = enabled;
 if (enabled  !old) {
 qemu_clock_notify(type);
+} else if (!enabled  old) {
+/* We rely on BQL to protect the timerlists */
+QLIST_FOREACH(tl, clock-timerlists, list) {
+qemu_event_wait(tl-ev);
+}
 }
 }
 
@@ -373,8 +388,10 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 QEMUTimer *ts;
 int64_t current_time;
 bool progress = false;
-   
+
+qemu_event_reset(timer_list-ev);
 if (!timer_list-clock-enabled) {
+qemu_event_set(timer_list-ev);
 return progress;
 }
 
@@ -392,6 +409,7 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
 ts-cb(ts-opaque);
 progress = true;
 }
+qemu_event_set(timer_list-ev);
 return progress;
 }
 
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH 2/2] hpet: inverse polarity when pin above ISA_NUM_IRQS

2013-08-25 Thread liu ping fan
On Sun, Aug 25, 2013 at 2:44 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 25/08/2013 04:16, Liu Ping Fan ha scritto:
 According to hpet spec, hpet irq is high active. But according to
 ICH spec, there is inversion before the input of ioapic. So the OS
 will expect low active on this IRQ line.(And this is observed on
 bare metal).

 We fold the emulation of this inversion inside the hpet logic.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
 kernel has a bug with ioapic, refer to
https://lkml.org/lkml/2013/8/23/98
 With all these patch, linux-2.6/Documentation/timers/hpet_example.c can work
 on qemu

 Can you explain in qemu q35 machine ioapic's ioredtbl[x] can never be
 set as low-active, even if the hpet driver registered it?

From kernel side, in drivers/char/hpet.c
hpet_timer_set_irq()
gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
ACPI_ACTIVE_LOW);

So we expect the ioredtbl[x] will be set as low-active polarity, but
on q35, I found this did not happen. Scene: a former call to
io_apic_setup_irq_pin_once(), then the later one(hpet, which decided
to share this pin with other device) will not actually program the
pin.  Here, notice the name  xx_once(), but it ignored the second
request without a check.

Regards,
Pingfan

 Paolo



Re: [Qemu-devel] [PATCH 1/2] hpet: entitle more irq pins for hpet

2013-08-25 Thread liu ping fan
On Sun, Aug 25, 2013 at 2:45 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 25/08/2013 04:16, Liu Ping Fan ha scritto:
 On PC, IRQ2/8 can be reserved for hpet timer 0/1. And pin 16~23 of
 ioapic can be dynamically assigned to hpet as guest chooses.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/timer/hpet.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

 diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
 index 648b383..cd95d39 100644
 --- a/hw/timer/hpet.c
 +++ b/hw/timer/hpet.c
 @@ -41,6 +41,8 @@
  #endif

  #define HPET_MSI_SUPPORT0
 +/* Hpet can use non-legacy IRQ16~23, and an IRQ2 ,IRQ8 */
 +#define HPET_TN_INT_CAP (0xff0104ULL  32)

  #define TYPE_HPET hpet
  #define HPET(obj) OBJECT_CHECK(HPETState, (obj), TYPE_HPET)
 @@ -653,8 +655,8 @@ static void hpet_reset(DeviceState *d)
  if (s-flags  (1  HPET_MSI_SUPPORT)) {
  timer-config |= HPET_TN_FSB_CAP;
  }
 -/* advertise availability of ioapic inti2 */
 -timer-config |=  0x0004ULL  32;
 +/* advertise availability of ioapic int */
 +timer-config |=  HPET_TN_INT_CAP;
  timer-period = 0ULL;
  timer-wrap_flag = 0;
  }


 These high 32-bits of timer-config need to be a property of the HPET
 devices, so that the old value (4) is used when running with old machine

Sorry, but I had included old value (4) in macro HPET_TN_INT_CAP.
 types.  Also, this patch must be the second, not the first, otherwise
 you have a commit with btoken polarity IRQs.

Will fix

Thx,
Pingfan



Re: [Qemu-devel] [PATCH v3 3/3] slirp: set mainloop timeout with more precise value

2013-08-24 Thread liu ping fan
On Sat, Aug 24, 2013 at 12:49 AM, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2013-08-21 10:07, liu ping fan wrote:
 On Wed, Aug 21, 2013 at 3:36 PM, Alex Bligh a...@alex.org.uk wrote:


 --On 21 August 2013 10:15:52 +0800 Liu Ping Fan qemul...@gmail.com wrote:

 -void slirp_update_timeout(uint32_t *timeout)
 +static void slirp_update_timeout(uint32_t *timeout)
  {
 -if (!QTAILQ_EMPTY(slirp_instances)) {
 -*timeout = MIN(1000, *timeout);


 If you are putting things in macros, you might as well change that

 TIMEOUT_FAST/SLOW have definite meaning, and used more than one place
 in the code. For 1000ms, I do not know this magic value's meaning, but
 whatever, it just occurs once. So there is no trouble to read the
 code.

 You could name it ONE_SEC or so. Can be done as trivial patch on top.

 IIRC, slirp requires regular polling for the aging of certain requests
 like DNS.

Thanks for the explanation. Will fix and document it.

Regards,
Pingfan



Re: [Qemu-devel] [PATCH v3 3/3] slirp: set mainloop timeout with more precise value

2013-08-24 Thread liu ping fan
On Sat, Aug 24, 2013 at 12:54 AM, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2013-08-21 04:15, Liu Ping Fan wrote:
 If slirp needs to emulate tcp timeout, then the timeout value
 for mainloop should be more precise, which is determined by
 slirp's fasttimo or slowtimo. Achieve this by swap the logic
 sequence of slirp_pollfds_fill and slirp_update_timeout.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  main-loop.c  |  3 +--
  slirp/libslirp.h |  3 +--
  slirp/slirp.c| 28 
  stubs/slirp.c|  6 +-
  4 files changed, 27 insertions(+), 13 deletions(-)

 diff --git a/main-loop.c b/main-loop.c
 index a44fff6..e258567 100644
 --- a/main-loop.c
 +++ b/main-loop.c
 @@ -458,8 +458,7 @@ int main_loop_wait(int nonblocking)
  g_array_set_size(gpollfds, 0); /* reset for new iteration */
  /* XXX: separate device handlers from system ones */
  #ifdef CONFIG_SLIRP
 -slirp_update_timeout(timeout);
 -slirp_pollfds_fill(gpollfds);
 +slirp_pollfds_fill(gpollfds, timeout);
  #endif
  qemu_iohandler_fill(gpollfds);
  ret = os_host_main_loop_wait(timeout);
 diff --git a/slirp/libslirp.h b/slirp/libslirp.h
 index ceabff8..5bdcbd5 100644
 --- a/slirp/libslirp.h
 +++ b/slirp/libslirp.h
 @@ -16,8 +16,7 @@ Slirp *slirp_init(int restricted, struct in_addr vnetwork,
void *opaque);
  void slirp_cleanup(Slirp *slirp);

 -void slirp_update_timeout(uint32_t *timeout);
 -void slirp_pollfds_fill(GArray *pollfds);
 +void slirp_pollfds_fill(GArray *pollfds, uint32_t *timeout);

  void slirp_pollfds_poll(GArray *pollfds, int select_error);

 diff --git a/slirp/slirp.c b/slirp/slirp.c
 index 1e8983e..f312a7d 100644
 --- a/slirp/slirp.c
 +++ b/slirp/slirp.c
 @@ -260,14 +260,33 @@ void slirp_cleanup(Slirp *slirp)
  #define CONN_CANFSEND(so) (((so)-so_state  
 (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
  #define CONN_CANFRCV(so) (((so)-so_state  
 (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)

 -void slirp_update_timeout(uint32_t *timeout)
 +static void slirp_update_timeout(uint32_t *timeout)
  {
 -if (!QTAILQ_EMPTY(slirp_instances)) {
 -*timeout = MIN(1000, *timeout);
 +Slirp *slirp;
 +uint32_t t;
 +
 +*timeout = MIN(1000, *timeout);
 +if (*timeout = TIMEOUT_FAST) {

 Nitpicking, sorry, but TIMEOUT_FAST is always smaller than 1000. So this
 check should come first, and then the MIN assignment (to t).

Will fix.

Regards,
Pingfan



[Qemu-devel] [PATCH v4 1/3] slirp: make timeout local

2013-08-24 Thread Liu Ping Fan
Each slirp has its own time to caculate timeout.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 slirp/slirp.c | 22 ++
 slirp/slirp.h |  3 +++
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/slirp/slirp.c b/slirp/slirp.c
index 80b28ea..b71c617 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -40,8 +40,6 @@ static const uint8_t special_ethaddr[ETH_ALEN] = {
 static const uint8_t zero_ethaddr[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
 
 u_int curtime;
-static u_int time_fasttimo, last_slowtimo;
-static int do_slowtimo;
 
 static QTAILQ_HEAD(slirp_instances, Slirp) slirp_instances =
 QTAILQ_HEAD_INITIALIZER(slirp_instances);
@@ -278,14 +276,13 @@ void slirp_pollfds_fill(GArray *pollfds)
 /*
  * First, TCP sockets
  */
-do_slowtimo = 0;
 
 QTAILQ_FOREACH(slirp, slirp_instances, entry) {
 /*
  * *_slowtimo needs calling if there are IP fragments
  * in the fragment queue, or there are TCP connections active
  */
-do_slowtimo |= ((slirp-tcb.so_next != slirp-tcb) ||
+slirp-do_slowtimo = ((slirp-tcb.so_next != slirp-tcb) ||
 (slirp-ipq.ip_link != slirp-ipq.ip_link.next));
 
 for (so = slirp-tcb.so_next; so != slirp-tcb;
@@ -299,8 +296,9 @@ void slirp_pollfds_fill(GArray *pollfds)
 /*
  * See if we need a tcp_fasttimo
  */
-if (time_fasttimo == 0  so-so_tcpcb-t_flags  TF_DELACK) {
-time_fasttimo = curtime; /* Flag when we want a fasttimo */
+if (slirp-time_fasttimo == 0 
+so-so_tcpcb-t_flags  TF_DELACK) {
+slirp-time_fasttimo = curtime; /* Flag when want a fasttimo */
 }
 
 /*
@@ -381,7 +379,7 @@ void slirp_pollfds_fill(GArray *pollfds)
 udp_detach(so);
 continue;
 } else {
-do_slowtimo = 1; /* Let socket expire */
+slirp-do_slowtimo = true; /* Let socket expire */
 }
 }
 
@@ -422,7 +420,7 @@ void slirp_pollfds_fill(GArray *pollfds)
 icmp_detach(so);
 continue;
 } else {
-do_slowtimo = 1; /* Let socket expire */
+slirp-do_slowtimo = true; /* Let socket expire */
 }
 }
 
@@ -454,14 +452,14 @@ void slirp_pollfds_poll(GArray *pollfds, int select_error)
 /*
  * See if anything has timed out
  */
-if (time_fasttimo  ((curtime - time_fasttimo) = 2)) {
+if (slirp-time_fasttimo  ((curtime - slirp-time_fasttimo) = 2)) {
 tcp_fasttimo(slirp);
-time_fasttimo = 0;
+slirp-time_fasttimo = 0;
 }
-if (do_slowtimo  ((curtime - last_slowtimo) = 499)) {
+if (slirp-do_slowtimo  ((curtime - slirp-last_slowtimo) = 499)) {
 ip_slowtimo(slirp);
 tcp_slowtimo(slirp);
-last_slowtimo = curtime;
+slirp-last_slowtimo = curtime;
 }
 
 /*
diff --git a/slirp/slirp.h b/slirp/slirp.h
index fe0e65d..e4a1bd4 100644
--- a/slirp/slirp.h
+++ b/slirp/slirp.h
@@ -203,6 +203,9 @@ bool arp_table_search(Slirp *slirp, uint32_t ip_addr,
 
 struct Slirp {
 QTAILQ_ENTRY(Slirp) entry;
+u_int time_fasttimo;
+u_int last_slowtimo;
+bool do_slowtimo;
 
 /* virtual network configuration */
 struct in_addr vnetwork_addr;
-- 
1.8.1.4




  1   2   3   4   5   6   7   8   >