[Qemu-devel] [PATCH] fix qemu compile error with --enable-debug

2012-07-03 Thread Wanpeng Li
From: Wanpeng Li 

 CC  i386-softmmu/target-i386/translate.o
 /home/kernel/qemu/target-i386/translate.c: In function ‘gen_sse’:
 /home/kernel/qemu/target-i386/translate.c:3571:27: error: assignment from 
incompatible pointer type [-Werror]
 /home/kernel/qemu/target-i386/translate.c:3573:17: error: incompatible type 
for argument 2 of ‘sse_fn_pl’
 /home/kernel/qemu/target-i386/translate.c:3573:17: note: expected ‘TCGv_i64’ 
but argument is of type ‘TCGv_i32’
 /home/kernel/qemu/target-i386/translate.c:3633:28: error: assignment from 
incompatible pointer type [-Werror]
 /home/kernel/qemu/target-i386/translate.c:3636:17: error: incompatible type 
for argument 1 of ‘sse_fn_l_p’
 /home/kernel/qemu/target-i386/translate.c:3636:17: note: expected ‘TCGv_i64’ 
but argument is of type ‘TCGv_i32’
 cc1: all warnings being treated as errors

 make[1]: *** [target-i386/translate.o] Error 1
 make: *** [subdir-i386-softmmu] Error 2

 Signed-off-by: Wanpeng Li 
---
 target-i386/translate.c |   10 --
 1 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index a902f4a..ab1d0ff 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3098,9 +3098,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong 
pc_start, int rex_r)
 int b1, op1_offset, op2_offset, is_xmm, val, ot;
 int modrm, mod, rm, reg, reg_addr, offset_addr;
 SSEFunc_i_p sse_fn_i_p;
-SSEFunc_l_p sse_fn_l_p;
 SSEFunc_0_pi sse_fn_pi;
-SSEFunc_0_pl sse_fn_pl;
 SSEFunc_0_pp sse_fn_pp;
 SSEFunc_0_ppi sse_fn_ppi;
 SSEFunc_0_ppt sse_fn_ppt;
@@ -3568,9 +3566,9 @@ static void gen_sse(DisasContext *s, int b, target_ulong 
pc_start, int rex_r)
 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
 sse_fn_pi(cpu_ptr0, cpu_tmp2_i32);
 } else {
-sse_fn_pl = sse_op_table3a[(s->dflag == 2) * 2 +
+sse_fn_pi = sse_op_table3a[(s->dflag == 2) * 2 +
((b >> 8) - 2)];
-sse_fn_pl(cpu_ptr0, cpu_T[0]);
+sse_fn_pi(cpu_ptr0, cpu_T[0]);
 }
 break;
 case 0x02c: /* cvttps2pi */
@@ -3630,10 +3628,10 @@ static void gen_sse(DisasContext *s, int b, 
target_ulong pc_start, int rex_r)
 sse_fn_i_p(cpu_tmp2_i32, cpu_ptr0);
 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
 } else {
-sse_fn_l_p = sse_op_table3b[(s->dflag == 2) * 2 +
+sse_fn_i_p = sse_op_table3b[(s->dflag == 2) * 2 +
 ((b >> 8) - 2) +
 (b & 1) * 4];
-sse_fn_l_p(cpu_T[0], cpu_ptr0);
+sse_fn_i_p(cpu_T[0], cpu_ptr0);
 }
 gen_op_mov_reg_T0(ot, reg);
 break;
-- 
1.7.5.4




Re: [Qemu-devel] [PATCH] fix qemu compile error with --enable-debug

2012-07-03 Thread Wanpeng Li
On Wed, Jul 04, 2012 at 10:51:25AM +0800, Dunrong Huang wrote:
>There have been  two discussions about this error:
>http://lists.nongnu.org/archive/html/qemu-devel/2012-06/msg04858.html
>and
>http://lists.nongnu.org/archive/html/qemu-devel/2012-06/msg04728.html
>
Oh, thank you!

>
>2012/7/4 Wanpeng Li :
>> From: Wanpeng Li 
>>
>>  CC  i386-softmmu/target-i386/translate.o
>>  /home/kernel/qemu/target-i386/translate.c: In function ‘gen_sse’:
>>  /home/kernel/qemu/target-i386/translate.c:3571:27: error: assignment from 
>> incompatible pointer type [-Werror]
>>  /home/kernel/qemu/target-i386/translate.c:3573:17: error: incompatible type 
>> for argument 2 of ‘sse_fn_pl’
>>  /home/kernel/qemu/target-i386/translate.c:3573:17: note: expected 
>> ‘TCGv_i64’ but argument is of type ‘TCGv_i32’
>>  /home/kernel/qemu/target-i386/translate.c:3633:28: error: assignment from 
>> incompatible pointer type [-Werror]
>>  /home/kernel/qemu/target-i386/translate.c:3636:17: error: incompatible type 
>> for argument 1 of ‘sse_fn_l_p’
>>  /home/kernel/qemu/target-i386/translate.c:3636:17: note: expected 
>> ‘TCGv_i64’ but argument is of type ‘TCGv_i32’
>>  cc1: all warnings being treated as errors
>>
>>  make[1]: *** [target-i386/translate.o] Error 1
>>  make: *** [subdir-i386-softmmu] Error 2
>>
>>  Signed-off-by: Wanpeng Li 
>> ---
>>  target-i386/translate.c |   10 --
>>  1 files changed, 4 insertions(+), 6 deletions(-)
>>
>> diff --git a/target-i386/translate.c b/target-i386/translate.c
>> index a902f4a..ab1d0ff 100644
>> --- a/target-i386/translate.c
>> +++ b/target-i386/translate.c
>> @@ -3098,9 +3098,7 @@ static void gen_sse(DisasContext *s, int b, 
>> target_ulong pc_start, int rex_r)
>>  int b1, op1_offset, op2_offset, is_xmm, val, ot;
>>  int modrm, mod, rm, reg, reg_addr, offset_addr;
>>  SSEFunc_i_p sse_fn_i_p;
>> -SSEFunc_l_p sse_fn_l_p;
>>  SSEFunc_0_pi sse_fn_pi;
>> -SSEFunc_0_pl sse_fn_pl;
>>  SSEFunc_0_pp sse_fn_pp;
>>  SSEFunc_0_ppi sse_fn_ppi;
>>  SSEFunc_0_ppt sse_fn_ppt;
>> @@ -3568,9 +3566,9 @@ static void gen_sse(DisasContext *s, int b, 
>> target_ulong pc_start, int rex_r)
>>  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>>  sse_fn_pi(cpu_ptr0, cpu_tmp2_i32);
>>  } else {
>> -sse_fn_pl = sse_op_table3a[(s->dflag == 2) * 2 +
>> +sse_fn_pi = sse_op_table3a[(s->dflag == 2) * 2 +
>> ((b >> 8) - 2)];
>> -sse_fn_pl(cpu_ptr0, cpu_T[0]);
>> +sse_fn_pi(cpu_ptr0, cpu_T[0]);
>>  }
>>  break;
>>  case 0x02c: /* cvttps2pi */
>> @@ -3630,10 +3628,10 @@ static void gen_sse(DisasContext *s, int b, 
>> target_ulong pc_start, int rex_r)
>>  sse_fn_i_p(cpu_tmp2_i32, cpu_ptr0);
>>  tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
>>  } else {
>> -sse_fn_l_p = sse_op_table3b[(s->dflag == 2) * 2 +
>> +sse_fn_i_p = sse_op_table3b[(s->dflag == 2) * 2 +
>>  ((b >> 8) - 2) +
>>  (b & 1) * 4];
>> -sse_fn_l_p(cpu_T[0], cpu_ptr0);
>> +sse_fn_i_p(cpu_T[0], cpu_ptr0);
>>  }
>>  gen_op_mov_reg_T0(ot, reg);
>>  break;
>> --
>> 1.7.5.4
>>
>>
>
>
>
>-- 
>Best Regards,
>
>Dunrong Huang



[Qemu-devel] [PATCH] cleanup pc_vga_init function

2012-07-04 Thread Wanpeng Li
From: Wanpeng Li 

Since function pc_vga_init doesn't need to return DeviceState any more, 
just cleanup.

Signed-off-by: Wanpneg Li 

---
 hw/pc.c |   18 +++---
 hw/pc.h |2 +-
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index c7e9ab3..f387448 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1037,39 +1037,35 @@ qemu_irq *pc_allocate_cpu_irq(void)
 return qemu_allocate_irqs(pic_irq_request, NULL, 1);
 }
 
-DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
+void pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
 {
-DeviceState *dev = NULL;
-
 if (cirrus_vga_enabled) {
 if (pci_bus) {
-dev = pci_cirrus_vga_init(pci_bus);
+pci_cirrus_vga_init(pci_bus);
 } else {
-dev = &isa_create_simple(isa_bus, "isa-cirrus-vga")->qdev;
+isa_create_simple(isa_bus, "isa-cirrus-vga");
 }
 } else if (vmsvga_enabled) {
 if (pci_bus) {
-dev = pci_vmsvga_init(pci_bus);
+pci_vmsvga_init(pci_bus);
 } else {
 fprintf(stderr, "%s: vmware_vga: no PCI bus\n", __FUNCTION__);
 }
 #ifdef CONFIG_SPICE
 } else if (qxl_enabled) {
 if (pci_bus) {
-dev = &pci_create_simple(pci_bus, -1, "qxl-vga")->qdev;
+pci_create_simple(pci_bus, -1, "qxl-vga");
 } else {
 fprintf(stderr, "%s: qxl: no PCI bus\n", __FUNCTION__);
 }
 #endif
 } else if (std_vga_enabled) {
 if (pci_bus) {
-dev = pci_vga_init(pci_bus);
+pci_vga_init(pci_bus);
 } else {
-dev = isa_vga_init(isa_bus);
+isa_vga_init(isa_bus);
 }
 }
-
-return dev;
 }
 
 static void cpu_request_exit(void *opaque, int irq, int level)
diff --git a/hw/pc.h b/hw/pc.h
index 31ccb6f..616864e 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -115,7 +115,7 @@ void *pc_memory_init(MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
 MemoryRegion **ram_memory);
 qemu_irq *pc_allocate_cpu_irq(void);
-DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
+void pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
-- 
1.7.5.4




[Qemu-devel] [BUG] guest os oops when run in upstream qemu x86_64

2012-07-05 Thread Wanpeng Li
Hi all,

the upstream qemu v1.1.0-rc4   x86_64

/opt/qemu/bin/qemu-system-x86-64 -s -kernel vmlinuz-3.0.0-22-generic
-initrd initrd.img-3.0.0-22-generic -hda ./linux-0.2.img -append
root=/dev/sda -boot c -monitor stdio --enable-kvm

no --enable-kvm no -m   guest os will oops (out of memory)  during guest os boot
no --enable-kvm  has -m 1024  guest os will block during guest os boot
has --enable-kvm no -m  guest os will block during guest os boot 
has --enable-kvm has -m 1024  run normally

Regards,
Wanpeng Li 




Re: [Qemu-devel] [question] Is there a plan to introduce a unified co-scheduling mechanism to CFS ?

2014-10-10 Thread Wanpeng Li


于 10/10/14, 7:37 PM, Zhang Haoyu 写道:

Hi,

Is it worthy to introduce a unified co-scheduling mechanism to CFS ?
Because multiple cooperating threads or tasks frequently synchronize 
with each other,
not executing them concurrently would only increase the latency of 
synchronization.
For example, a thread blocking in spinlock to waiting for another 
thread to release the same spinlock
might reduce its waiting time by being executed concurrently with the 
thread which hold the same spinlock.
In virtualization scenario, multiple vcpus (which belong to the same 
vm) co-scheduling is more desired

when several cooperating threads/task is running in guest.

Is there a plane for this work?


Please refer to gang scheduler.

Regards,
Wanpeng Li



Thanks,
Zhang Haoyu
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html





[Qemu-devel] main-system-bus

2012-03-13 Thread Wanpeng Li
Hi all:

I am confused with what's the meaning of main-system-bus, it emulates
physical bus like FSB(front side bus) or just a virtual bus as the root
of device tree.

Regards,

Wanpeng Li




[Qemu-devel] [PATCH 2/6] convert MemoryRegion to QOM

2012-03-25 Thread Wanpeng Li

From: Anthony Liguori 


Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 memory.c |   94 ++
 memory.h |8 +
 2 files changed, 78 insertions(+), 24 deletions(-)

diff --git a/memory.c b/memory.c
index 22b0352..ec1a4ae 100644
--- a/memory.c
+++ b/memory.c
@@ -797,35 +797,26 @@ static bool memory_region_wrong_endianness(MemoryRegion 
*mr)
 #endif
 }
 
-void memory_region_init(MemoryRegion *mr,
-const char *name,
-uint64_t size)
+void memory_region_set_name(MemoryRegion *mr, const char *name)
+{
+mr->name = g_strdup(name);
+}
+
+void memory_region_set_size(MemoryRegion *mr, uint64_t size)
 {
-mr->ops = NULL;
-mr->parent = NULL;
 mr->size = int128_make64(size);
 if (size == UINT64_MAX) {
 mr->size = int128_2_64();
 }
-mr->addr = 0;
-mr->subpage = false;
-mr->enabled = true;
-mr->terminates = false;
-mr->ram = false;
-mr->readable = true;
-mr->readonly = false;
-mr->rom_device = false;
-mr->destructor = memory_region_destructor_none;
-mr->priority = 0;
-mr->may_overlap = false;
-mr->alias = NULL;
-QTAILQ_INIT(&mr->subregions);
-memset(&mr->subregions_link, 0, sizeof mr->subregions_link);
-QTAILQ_INIT(&mr->coalesced);
-mr->name = g_strdup(name);
-mr->dirty_log_mask = 0;
-mr->ioeventfd_nb = 0;
-mr->ioeventfds = NULL;
+}
+
+void memory_region_init(MemoryRegion *mr,
+const char *name,
+uint64_t size)
+{
+object_initialize(mr, TYPE_MEMORY_REGION);
+memory_region_set_name(mr, name);
+memory_region_set_size(mr, size);
 }
 
 static bool memory_region_access_valid(MemoryRegion *mr,
@@ -1640,3 +1631,58 @@ void mtree_info(fprintf_function mon_printf, void *f)
 mtree_print_mr(mon_printf, f, address_space_io.root, 0, 0, &ml_head);
 }
 }
+
+static void memory_region_initfn(Object *obj)
+{
+MemoryRegion *mr = MEMORY_REGION(obj);
+mr->ops = NULL;
+mr->parent = NULL;
+mr->size = int128_2_64();
+mr->addr = 0;
+mr->subpage = false;
+mr->enabled = true;
+mr->terminates = false;
+mr->ram = false;
+mr->readable = true;
+mr->readonly = false;
+mr->rom_device = false;
+mr->destructor = memory_region_destructor_none;
+mr->priority = 0;
+mr->may_overlap = false;
+mr->alias = NULL;
+mr->name = NULL;
+QTAILQ_INIT(&mr->subregions);
+memset(&mr->subregions_link, 0, sizeof mr->subregions_link);
+QTAILQ_INIT(&mr->coalesced);
+mr->dirty_log_mask = 0;
+mr->ioeventfd_nb = 0;
+mr->ioeventfds = NULL;
+}
+
+static void memory_region_finalize(Object *obj)
+{
+MemoryRegion *mr = MEMORY_REGION(obj);
+
+assert(QTAILQ_EMPTY(&mr->subregions));
+mr->destructor(mr);
+memory_region_clear_coalescing(mr);
+if (mr->name) {
+g_free((char *)mr->name);
+}
+g_free(mr->ioeventfds);
+}
+
+static TypeInfo memory_region_type = {
+.name = TYPE_MEMORY_REGION,
+.parent = TYPE_OBJECT,
+.instance_size = sizeof(MemoryRegion),
+.instance_init = memory_region_initfn,
+.instance_finalize = memory_region_finalize,
+};
+
+static void register_devices(void)
+{
+type_register_static(&memory_region_type);
+}
+
+type_init(register_devices);
diff --git a/memory.h b/memory.h
index 53ff62b..d4d5600 100644
--- a/memory.h
+++ b/memory.h
@@ -25,6 +25,7 @@
 #include "iorange.h"
 #include "ioport.h"
 #include "int128.h"
+#include "qemu/object.h"
 
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegion MemoryRegion;
@@ -116,6 +117,9 @@ struct MemoryRegionOps {
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
 
+#define TYPE_MEMORY_REGION "memory-region"
+#define MEMORY_REGION(obj) OBJECT_CHECK(MemoryRegion, (obj), 
TYPE_MEMORY_REGION)
+
 struct MemoryRegion {
 /* All fields are private - violators will be prosecuted */
 const MemoryRegionOps *ops;
@@ -719,6 +723,10 @@ void memory_global_dirty_log_stop(void);
 
 void mtree_info(fprintf_function mon_printf, void *f);
 
+void memory_region_set_name(MemoryRegion *mr, const char *name);
+
+void memory_region_set_size(MemoryRegion *mr, uint64_t size);
+
 #endif
 
 #endif
-- 
1.7.5.4




[Qemu-devel] [PATCH 4/6] prepare to create HPET, RTC and i8254 through composition

2012-03-25 Thread Wanpeng Li

From: Anthony Liguori 

The HPET usually sits on the LPC bus (which replaces ISA in modern systems).  
It's sometimes a dedicated chip but can certain co-exist in a Super IO chip.  
I think in terms of where it would live in this hypothetical device model,
putting it in the PIIX is rational.


Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/hpet.c   |   39 ++-
 hw/hpet_emul.h  |   41 +
 hw/i8254_internal.h |2 +-
 hw/mc146818rtc.c|   26 --
 hw/mc146818rtc.h|   29 +
 5 files changed, 73 insertions(+), 64 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index fd3ddca..fc0ff6c 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -42,41 +42,6 @@
 
 #define HPET_MSI_SUPPORT0
 
-struct HPETState;
-typedef struct HPETTimer {  /* timers */
-uint8_t tn; /*timer number*/
-QEMUTimer *qemu_timer;
-struct HPETState *state;
-/* Memory-mapped, software visible timer registers */
-uint64_t config;/* configuration/cap */
-uint64_t cmp;   /* comparator */
-uint64_t fsb;   /* FSB route */
-/* Hidden register state */
-uint64_t period;/* Last value written to comparator */
-uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
- * mode. Next pop will be actual timer expiration.
- */
-} HPETTimer;
-
-typedef struct HPETState {
-SysBusDevice busdev;
-MemoryRegion iomem;
-uint64_t hpet_offset;
-qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
-uint32_t flags;
-uint8_t rtc_irq_level;
-qemu_irq pit_enabled;
-uint8_t num_timers;
-HPETTimer timer[HPET_MAX_TIMERS];
-
-/* Memory-mapped, software visible registers */
-uint64_t capability;/* capabilities */
-uint64_t config;/* configuration */
-uint64_t isr;   /* interrupt status reg */
-uint64_t hpet_counter;  /* main counter */
-uint8_t  hpet_id;   /* instance id */
-} HPETState;
-
 static uint32_t hpet_in_legacy_mode(HPETState *s)
 {
 return s->config & HPET_CFG_LEGACY;
@@ -278,7 +243,7 @@ static const VMStateDescription vmstate_hpet_timer = {
 };
 
 static const VMStateDescription vmstate_hpet = {
-.name = "hpet",
+.name = TYPE_HPET,
 .version_id = 2,
 .minimum_version_id = 1,
 .minimum_version_id_old = 1,
@@ -746,7 +711,7 @@ static void hpet_device_class_init(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo hpet_device_info = {
-.name  = "hpet",
+.name  = TYPE_HPET,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(HPETState),
 .class_init= hpet_device_class_init,
diff --git a/hw/hpet_emul.h b/hw/hpet_emul.h
index 757f79f..5808a19 100644
--- a/hw/hpet_emul.h
+++ b/hw/hpet_emul.h
@@ -13,6 +13,9 @@
 #ifndef QEMU_HPET_EMUL_H
 #define QEMU_HPET_EMUL_H
 
+#include "hw.h"
+#include "sysbus.h"
+
 #define HPET_BASE   0xfed0
 #define HPET_CLK_PERIOD 1000ULL /* 1000 femtoseconds == 10ns*/
 
@@ -71,4 +74,42 @@ struct hpet_fw_config
 } QEMU_PACKED;
 
 extern struct hpet_fw_config hpet_cfg;
+
+#define TYPE_HPET "hpet"
+
+struct HPETState;
+typedef struct HPETTimer {  /* timers */
+uint8_t tn; /*timer number*/
+QEMUTimer *qemu_timer;
+struct HPETState *state;
+/* Memory-mapped, software visible timer registers */
+uint64_t config;/* configuration/cap */
+uint64_t cmp;   /* comparator */
+uint64_t fsb;   /* FSB route */
+/* Hidden register state */
+uint64_t period;/* Last value written to comparator */
+uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
+ * mode. Next pop will be actual timer expiration.
+ */
+} HPETTimer;
+
+typedef struct HPETState {
+SysBusDevice busdev;
+MemoryRegion iomem;
+uint64_t hpet_offset;
+qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
+uint32_t flags;
+uint8_t rtc_irq_level;
+   qemu_irq pit_enabled;
+uint8_t num_timers;
+HPETTimer timer[HPET_MAX_TIMERS];
+
+/* Memory-mapped, software visible registers */
+uint64_t capability;/* capabilities */
+uint64_t config;/* configuration */
+uint64_t isr;   /* interrupt status reg */
+uint64_t hpet_counter;  /* main counter */
+uint8_t  hpet_id;   /* instance id */
+} HPETState;
+
 #endif
diff --git a/hw/i8254_internal.h b/hw/i8254_internal.h
index 686f0c2..542f7c1 100644
--- a/hw/i8254_internal.h
+++ b/hw/i8254_internal.h
@@ -26,7 +26,6 @@
 #define QEMU_I8254_INTERNAL_H
 
 #include "hw.h"
-#include "pc.h"
 #include "isa.h"
 
 typedef struct PITChan

[Qemu-devel] [PATCH 3/6] convert pci-host to QOM

2012-03-25 Thread Wanpeng Li

From: Anthony Liguori 


Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/pci_host.c |   26 ++
 hw/pci_host.h |5 +
 2 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/hw/pci_host.c b/hw/pci_host.c
index 44c6c20..44d7e55 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -162,4 +162,30 @@ const MemoryRegionOps pci_host_data_be_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value)
+{
+   object_property_set_link(OBJECT(s), OBJECT(value), "mmio", NULL);
+}
+
+static void pci_host_initfn(Object *obj)
+{
+PCIHostState *s = PCI_HOST(obj);
+
+   object_property_add_link(obj, "mmio", TYPE_MEMORY_REGION,
+(Object **)&s->address_space, 
NULL);
+}
+
+static TypeInfo pci_host_type = {
+.name = TYPE_PCI_HOST,
+.parent = TYPE_SYS_BUS_DEVICE,
+   .instance_size = sizeof(PCIHostState),
+   .instance_init = pci_host_initfn,
+};
+
+static void register_devices(void)
+{
+   type_register_static(&pci_host_type);
+}
+
+type_init(register_devices);
 
diff --git a/hw/pci_host.h b/hw/pci_host.h
index 359e38f..084e15c 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -30,6 +30,9 @@
 
 #include "sysbus.h"
 
+#define TYPE_PCI_HOST "pci-host"
+#define PCI_HOST(obj) OBJECT_CHECK(PCIHostState, (obj), TYPE_PCI_HOST)
+
 struct PCIHostState {
 SysBusDevice busdev;
 MemoryRegion conf_mem;
@@ -49,6 +52,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, 
uint32_t addr,
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value);
+
 extern const MemoryRegionOps pci_host_conf_le_ops;
 extern const MemoryRegionOps pci_host_conf_be_ops;
 extern const MemoryRegionOps pci_host_data_le_ops;
-- 
1.7.5.4




[Qemu-devel] [PATCH 0/6] refactor PC machine, i440fx and piix3 to take advantage of QOM

2012-03-25 Thread Wanpeng Li

From: Anthony Liguori 


This series aggressively refactors the PC machine initialization to be more
modelled and less ad-hoc.  The highlights of this series are:

 1) Things like -m and -bios-name are now device model properties

 2) The i440fx and piix3 are now modelled in a thorough fashion

 3) Most of the chipset features of the piix3 are modelled through composition

 4) i440fx_init is trivialized to creating devices and setting properties

 5) convert MemoryRegion to QOM

 6) convert PCI host bridge to QOM

The point (4) is the most important one.  As we refactor in this fashion,
we should quickly get to the point where machine->init disappears completely in
favor of just creating a handful of devices.

The two stage initialization of QOM is important here.  instance_init() is when
composed devices are created which means that after you've created a device, all
of its children are visible in the device model.  This lets you set properties
of the parent and its children.

realize() (which is still called DeviceState::init today) will be called right
before the guest starts up for the first time.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 Makefile.target |3 +-
 hw/hpet.c   |   39 +---
 hw/hpet_emul.h  |   41 +++
 hw/i440fx.c |  431 ++
 hw/i440fx.h |   78 +
 hw/i8254_internal.h |2 +-
 hw/mc146818rtc.c|   26 --
 hw/mc146818rtc.h|   29 ++
 hw/pc.c |  838 +--
 hw/pc.h |   46 +---
 hw/pc_piix.c|  762 --
 hw/pci_host.c   |   26 ++
 hw/pci_host.h   |5 +
 hw/piix3.c  |  274 +
 hw/piix3.h  |   79 +
 hw/piix_pci.c   |  600 
 memory.c|   94 +--
 memory.h|8 +
 18 files changed, 1795 insertions(+), 1586 deletions(-)
 create mode 100644 hw/i440fx.c
 create mode 100644 hw/i440fx.h
 delete mode 100644 hw/pc_piix.c
 create mode 100644 hw/piix3.c
 create mode 100644 hw/piix3.h
 delete mode 100644 hw/piix_pci.c
--




[Qemu-devel] [PATCH 6/6] make some functions static

2012-03-25 Thread Wanpeng Li

From: Anthony Liguori 

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/pc.c |   22 +++---
 hw/pc.h |   26 --
 2 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index d5a557e..5f93643 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -98,7 +98,7 @@ static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
 static struct e820_table e820_table;
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
-void gsi_handler(void *opaque, int n, int level)
+static void gsi_handler(void *opaque, int n, int level)
 {
 GSIState *s = opaque;
 
@@ -116,7 +116,7 @@ static void ioport80_write(void *opaque, uint32_t addr, 
uint32_t data)
 /* MSDOS compatibility mode FPU exception support */
 static qemu_irq ferr_irq;
 
-void pc_register_ferr_irq(qemu_irq irq)
+static void pc_register_ferr_irq(qemu_irq irq)
 {
 ferr_irq = irq;
 }
@@ -339,7 +339,7 @@ static void pc_cmos_init_late(void *opaque)
 qemu_unregister_reset(pc_cmos_init_late, opaque);
 }
 
-void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
+static void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
   const char *boot_device,
   ISADevice *floppy, BusState *idebus0, BusState *idebus1,
   ISADevice *s)
@@ -869,7 +869,7 @@ static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 
3, 4, 5 };
 static const int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
 static const int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
 
-void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
+static void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
 {
 static int nb_ne2k = 0;
 
@@ -926,7 +926,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
 return dev;
 }
 
-void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
+static void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
 {
 CPUX86State *s = opaque;
 
@@ -960,7 +960,7 @@ static CPUX86State *pc_new_cpu(const char *cpu_model)
 return env;
 }
 
-void pc_cpus_init(const char *cpu_model)
+static void pc_cpus_init(const char *cpu_model)
 {
 int i;
 
@@ -978,7 +978,7 @@ void pc_cpus_init(const char *cpu_model)
 }
 }
 
-void pc_memory_init(MemoryRegion *system_memory,
+static void pc_memory_init(MemoryRegion *system_memory,
 const char *kernel_filename,
 const char *kernel_cmdline,
 const char *initrd_filename,
@@ -1002,12 +1002,12 @@ void pc_memory_init(MemoryRegion *system_memory,
 }
 }
 
-qemu_irq *pc_allocate_cpu_irq(void)
+static qemu_irq *pc_allocate_cpu_irq(void)
 {
 return qemu_allocate_irqs(pic_irq_request, NULL, 1);
 }
 
-DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
+static DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
 {
 DeviceState *dev = NULL;
 
@@ -1051,7 +1051,7 @@ static void cpu_request_exit(void *opaque, int irq, int 
level)
 }
 }
 
-void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
+static void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **floppy,
   bool no_vmport)
 {
@@ -1102,7 +1102,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
 *floppy = fdctrl_init_isa(isa_bus, fd);
 }
 
-void pc_pci_device_init(PCIBus *pci_bus)
+static void pc_pci_device_init(PCIBus *pci_bus)
 {
 int max_bus;
 int bus;
diff --git a/hw/pc.h b/hw/pc.h
index 7348da2..89f78dd 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -80,8 +80,6 @@ typedef struct GSIState {
 qemu_irq ioapic_irq[IOAPIC_NUM_PINS];
 } GSIState;
 
-void gsi_handler(void *opaque, int n, int level);
-
 /* vmport.c */
 static inline void vmport_init(ISABus *bus)
 {
@@ -103,30 +101,6 @@ void i8042_setup_a20_line(ISADevice *dev, qemu_irq 
*a20_out);
 /* pc.c */
 extern int fd_bootchk;
 
-void pc_register_ferr_irq(qemu_irq irq);
-void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
-
-void pc_cpus_init(const char *cpu_model);
-void pc_memory_init(MemoryRegion *system_memory,
-const char *kernel_filename,
-const char *kernel_cmdline,
-const char *initrd_filename,
-ram_addr_t below_4g_mem_size,
-ram_addr_t above_4g_mem_size,
-MemoryRegion *rom_memory,
-MemoryRegion **ram_memory);
-qemu_irq *pc_allocate_cpu_irq(void);
-DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
-void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
-  ISADevice **floppy,
-  bool no_vmport);
-void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
-void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
-  const char *boot_device,
-  ISADevice *floppy, BusState *ide0, BusState *ide1,
-  ISADevice *s);
-void pc_pci_device_init(PCIBus

[Qemu-devel] [PATCH 5/6] merge pc_piix.c to pc.c

2012-03-25 Thread Wanpeng Li

From: Anthony Liguori 

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 Makefile.target |1 -
 hw/pc.c |  816 +--
 hw/pc.h |   20 +-
 hw/pc_piix.c|  762 ---
 4 files changed, 739 insertions(+), 860 deletions(-)
 delete mode 100644 hw/pc_piix.c

diff --git a/Makefile.target b/Makefile.target
index 24fb0c0..5c4605f 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -251,7 +251,6 @@ obj-i386-y += sga.o ioapic_common.o ioapic.o i440fx.o 
piix3.o
 obj-i386-y += vmport.o
 obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
-obj-i386-y += pc_piix.o
 obj-i386-y += pc_sysfw.o
 obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o kvm/ioapic.o 
kvm/i8254.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
diff --git a/hw/pc.c b/hw/pc.c
index 83a1b5b..d5a557e 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -36,8 +36,6 @@
 #include "elf.h"
 #include "multiboot.h"
 #include "mc146818rtc.h"
-#include "i8254.h"
-#include "pcspk.h"
 #include "msi.h"
 #include "sysbus.h"
 #include "sysemu.h"
@@ -46,6 +44,11 @@
 #include "ui/qemu-spice.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "kvm/clock.h"
+#include "xen.h"
+#include "arch_init.h"
+#include "smbus.h"
+#include "boards.h"
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -60,6 +63,8 @@
 #define DPRINTF(fmt, ...)
 #endif
 
+#define PC_MAX_BIOS_SIZE (4 * 1024 * 1024)
+
 /* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables.  */
 #define ACPI_DATA_SIZE   0x1
 #define BIOS_CFG_IOPORT 0x510
@@ -73,6 +78,8 @@
 
 #define E820_NR_ENTRIES16
 
+#define MAX_IDE_BUS 2
+
 struct e820_entry {
 uint64_t address;
 uint64_t length;
@@ -84,6 +91,10 @@ struct e820_table {
 struct e820_entry entry[E820_NR_ENTRIES];
 } QEMU_PACKED __attribute((__aligned__(4)));
 
+static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
+static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
+static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
+
 static struct e820_table e820_table;
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
@@ -889,7 +900,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
 DeviceState *dev;
 static int apic_mapped;
 
-if (kvm_irqchip_in_kernel()) {
+if (kvm_enabled() && kvm_irqchip_in_kernel()) {
 dev = qdev_create(NULL, "kvm-apic");
 } else {
 dev = qdev_create(NULL, "apic");
@@ -908,7 +919,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
 }
 
 /* KVM does not support MSI yet. */
-if (!kvm_irqchip_in_kernel()) {
+if (!kvm_enabled() || !kvm_irqchip_in_kernel()) {
 msi_supported = true;
 }
 
@@ -972,50 +983,13 @@ void pc_memory_init(MemoryRegion *system_memory,
 const char *kernel_cmdline,
 const char *initrd_filename,
 ram_addr_t below_4g_mem_size,
-ram_addr_t above_4g_mem_size,
-MemoryRegion *rom_memory,
-MemoryRegion **ram_memory)
+ram_addr_t above_4g_mem_size)
 {
 int linux_boot, i;
-MemoryRegion *ram, *option_rom_mr;
-MemoryRegion *ram_below_4g, *ram_above_4g;
 void *fw_cfg;
 
 linux_boot = (kernel_filename != NULL);
 
-/* Allocate RAM.  We allocate it as a single memory region and use
- * aliases to address portions of it, mostly for backwards compatibility
- * with older qemus that used qemu_ram_alloc().
- */
-ram = g_malloc(sizeof(*ram));
-memory_region_init_ram(ram, "pc.ram",
-   below_4g_mem_size + above_4g_mem_size);
-vmstate_register_ram_global(ram);
-*ram_memory = ram;
-ram_below_4g = g_malloc(sizeof(*ram_below_4g));
-memory_region_init_alias(ram_below_4g, "ram-below-4g", ram,
- 0, below_4g_mem_size);
-memory_region_add_subregion(system_memory, 0, ram_below_4g);
-if (above_4g_mem_size > 0) {
-ram_above_4g = g_malloc(sizeof(*ram_above_4g));
-memory_region_init_alias(ram_above_4g, "ram-above-4g", ram,
- below_4g_mem_size, above_4g_mem_size);
-memory_region_add_subregion(system_memory, 0x1ULL,
-ram_above_4g);
-}
-
-
-/* Initialize PC system firmware */
-pc_system_firmware_init(rom_memory);
-
-option_rom_mr = g_malloc(sizeof(*option_rom_mr));
-memory_region_init_ram(option_rom_mr, "pc.rom", PC_ROM_SIZE);
-vmstate_register_ram_global(option_rom_mr);
-memory_region_add_subregion_over

[Qemu-devel] [PATCH 1/6] eliminate piix_pci.c and module i440fx and piix3

2012-03-25 Thread Wanpeng Li

From: Anthony Liguori 


The big picture about the patch is shown as follows:

1) pc_init creates an I440FX, any bus devices (ISA serial port, PCI
vga and nics, etc.), sets properties appropriately, and realizes the
devices.
2) I440FX is-a PCIHost, has-a I440FX-PMC, has-a PIIX3
3) PIIX3 has-a RTC, has-a I8042, has-a DMAController, etc.

i440fx-pcihost => i440fx
i440fx => i440fx-pmc

i440fx_pmc is Programmable Memory Controller which integrated in I440FX chipset,
and move ram initialization into i440fx-pmc.

It might seem like a small change, but it better reflects the fact that the PMC
is contained within the i440fx which we will now reflect in composition in the
next few changesets.


Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 Makefile.target |2 +-
 hw/i440fx.c |  431 +++
 hw/i440fx.h |   78 +++
 hw/piix3.c  |  274 +
 hw/piix3.h  |   79 
 hw/piix_pci.c   |  600 ---
 6 files changed, 863 insertions(+), 601 deletions(-)
 create mode 100644 hw/i440fx.c
 create mode 100644 hw/i440fx.h
 create mode 100644 hw/piix3.c
 create mode 100644 hw/piix3.h
 delete mode 100644 hw/piix_pci.c

diff --git a/Makefile.target b/Makefile.target
index 63cf769..24fb0c0 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -247,7 +247,7 @@ obj-y += device-hotplug.o
 # Hardware support
 obj-i386-y += mc146818rtc.o pc.o
 obj-i386-y += apic_common.o apic.o kvmvapic.o
-obj-i386-y += sga.o ioapic_common.o ioapic.o piix_pci.o
+obj-i386-y += sga.o ioapic_common.o ioapic.o i440fx.o piix3.o
 obj-i386-y += vmport.o
 obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
diff --git a/hw/i440fx.c b/hw/i440fx.c
new file mode 100644
index 000..3658740
--- /dev/null
+++ b/hw/i440fx.c
@@ -0,0 +1,431 @@
+/*
+ * QEMU i440FX PCI Host Bridge Emulation
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "i440fx.h"
+#include "range.h"
+#include "xen.h"
+#include "loader.h"
+#include "pc.h"
+
+#define BIOS_FILENAME "bios.bin"
+
+/*
+ * I440FX chipset data sheet.
+ * http://download.intel.com/design/chipsets/datashts/29054901.pdf
+ *
+ * The I440FX is a package that contains an integrated PCI Host controller,
+ * memory controller, and is usually packaged with a PCI-ISA bus and super I/O
+ * chipset.
+ *
+ * The "i440FX" device is the PCI host controller.  On function 0.0, there is a
+ * memory controller called the Programmable Memory Controller (PMC).  On
+ * function 1.0, there is the PCI-ISA bus/super I/O chip called the PIIX3.
+ */
+
+#define I440FX_PMC_PCI_HOLE 0xE000ULL
+#define I440FX_PMC_PCI_HOLE_END 0x1ULL
+
+#define I440FX_PAM  0x59
+#define I440FX_PAM_SIZE 7
+#define I440FX_SMRAM0x72
+
+static void piix3_set_irq(void *opaque, int pirq, int level)
+{
+PIIX3State *piix3 = opaque;
+piix3_set_irq_level(piix3, pirq, level);
+}
+
+/* return the global irq number corresponding to a given device irq
+   pin. We could also use the bus number to have a more precise
+   mapping. */
+static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
+{
+int slot_addend;
+slot_addend = (pci_dev->devfn >> 3) - 1;
+return (pci_intx + slot_addend) & 3;
+}
+
+static void update_pam(I440FXPMCState *d, uint32_t start, uint32_t end, int r,
+   PAMMemoryRegion *mem)
+{
+if (mem->initialized) {
+memory_region_del_subregion(d->system_memory, &mem->mem);
+memory_region_destroy(&mem->mem);
+}
+
+//printf("ISA mapping %08x-0x%08x: %d\n", start, end, r);
+switch(r) {
+case 3:
+/* RAM */
+memory_region_init_alias(&mem->mem, &qu

[Qemu-devel] [PATCH 3/6] convert pci-host to QOM

2012-03-26 Thread Wanpeng Li

From: Anthony Liguori 


Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/pci_host.c |   26 ++
 hw/pci_host.h |5 +
 2 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/hw/pci_host.c b/hw/pci_host.c
index 44c6c20..44d7e55 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -162,4 +162,30 @@ const MemoryRegionOps pci_host_data_be_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value)
+{
+object_property_set_link(OBJECT(s), OBJECT(value), "mmio", NULL);
+}
+
+static void pci_host_initfn(Object *obj)
+{
+PCIHostState *s = PCI_HOST(obj);
+
+object_property_add_link(obj, "mmio", TYPE_MEMORY_REGION,
+(Object **)&s->address_space, 
NULL);
+}
+
+static TypeInfo pci_host_type = {
+.name = TYPE_PCI_HOST,
+.parent = TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(PCIHostState),
+.instance_init = pci_host_initfn,
+};
+
+static void register_devices(void)
+{
+type_register_static(&pci_host_type);
+}
+
+type_init(register_devices);
 
diff --git a/hw/pci_host.h b/hw/pci_host.h
index 359e38f..084e15c 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -30,6 +30,9 @@
 
 #include "sysbus.h"
 
+#define TYPE_PCI_HOST "pci-host"
+#define PCI_HOST(obj) OBJECT_CHECK(PCIHostState, (obj), TYPE_PCI_HOST)
+
 struct PCIHostState {
 SysBusDevice busdev;
 MemoryRegion conf_mem;
@@ -49,6 +52,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, 
uint32_t addr,
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value);
+
 extern const MemoryRegionOps pci_host_conf_le_ops;
 extern const MemoryRegionOps pci_host_conf_be_ops;
 extern const MemoryRegionOps pci_host_data_le_ops;
-- 
1.7.5.4




Re: [Qemu-devel] [PATCH 3/6] convert pci-host to QOM

2012-03-26 Thread Wanpeng Li
On Mon, Mar 26, 2012 at 10:06:45AM +0800, Wanpeng Li wrote:
>
>From: Anthony Liguori 
>
>
>Signed-off-by: Anthony Liguori 
>Signed-off-by: Wanpeng Li 
>
>---
> hw/pci_host.c |   26 ++
> hw/pci_host.h |5 +
> 2 files changed, 31 insertions(+), 0 deletions(-)
>
>diff --git a/hw/pci_host.c b/hw/pci_host.c
>index 44c6c20..44d7e55 100644
>--- a/hw/pci_host.c
>+++ b/hw/pci_host.c
>@@ -162,4 +162,30 @@ const MemoryRegionOps pci_host_data_be_ops = {
> .endianness = DEVICE_BIG_ENDIAN,
> };
>
>+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value)
>+{
>+  object_property_set_link(OBJECT(s), OBJECT(value), "mmio", NULL);
>+}
>+
>+static void pci_host_initfn(Object *obj)
>+{
>+PCIHostState *s = PCI_HOST(obj);
>+
>+  object_property_add_link(obj, "mmio", TYPE_MEMORY_REGION,
>+   (Object **)&s->address_space, 
>NULL);
>+}
>+
>+static TypeInfo pci_host_type = {
>+.name = TYPE_PCI_HOST,
>+.parent = TYPE_SYS_BUS_DEVICE,
>+  .instance_size = sizeof(PCIHostState),
>+  .instance_init = pci_host_initfn,
>+};
>+
>+static void register_devices(void)
>+{
>+  type_register_static(&pci_host_type);
>+}
>+
>+type_init(register_devices);
>
>diff --git a/hw/pci_host.h b/hw/pci_host.h
>index 359e38f..084e15c 100644
>--- a/hw/pci_host.h
>+++ b/hw/pci_host.h
>@@ -30,6 +30,9 @@
>
> #include "sysbus.h"
>
>+#define TYPE_PCI_HOST "pci-host"
>+#define PCI_HOST(obj) OBJECT_CHECK(PCIHostState, (obj), TYPE_PCI_HOST)
>+
> struct PCIHostState {
> SysBusDevice busdev;
> MemoryRegion conf_mem;
>@@ -49,6 +52,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, 
>uint32_t addr,
> void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
> uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
>
>+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value);
>+
> extern const MemoryRegionOps pci_host_conf_le_ops;
> extern const MemoryRegionOps pci_host_conf_be_ops;
> extern const MemoryRegionOps pci_host_data_le_ops;
>-- 
>1.7.5.4
>


>From 72bc193e6e25cb393437317843a701b82a9b9233 Mon Sep 17 00:00:00 2001
From: Wanpeng Li 
Date: Thu, 22 Mar 2012 17:57:30 +0800
Subject: [PATCH 3/6] convert pci-host to QOM


From: Anthony Liguori 


Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/pci_host.c |   26 ++
 hw/pci_host.h |5 +
 2 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/hw/pci_host.c b/hw/pci_host.c
index 44c6c20..44d7e55 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -162,4 +162,30 @@ const MemoryRegionOps pci_host_data_be_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value)
+{
+object_property_set_link(OBJECT(s), OBJECT(value), "mmio", NULL);
+}
+
+static void pci_host_initfn(Object *obj)
+{
+PCIHostState *s = PCI_HOST(obj);
+
+object_property_add_link(obj, "mmio", TYPE_MEMORY_REGION,
+	 (Object **)&s->address_space, NULL);
+}
+
+static TypeInfo pci_host_type = {
+.name = TYPE_PCI_HOST,
+.parent = TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(PCIHostState),
+.instance_init = pci_host_initfn,
+};
+
+static void register_devices(void)
+{
+type_register_static(&pci_host_type);
+}
+
+type_init(register_devices);
 
diff --git a/hw/pci_host.h b/hw/pci_host.h
index 359e38f..084e15c 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -30,6 +30,9 @@
 
 #include "sysbus.h"
 
+#define TYPE_PCI_HOST "pci-host"
+#define PCI_HOST(obj) OBJECT_CHECK(PCIHostState, (obj), TYPE_PCI_HOST)
+
 struct PCIHostState {
 SysBusDevice busdev;
 MemoryRegion conf_mem;
@@ -49,6 +52,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value);
+
 extern const MemoryRegionOps pci_host_conf_le_ops;
 extern const MemoryRegionOps pci_host_conf_be_ops;
 extern const MemoryRegionOps pci_host_data_le_ops;
-- 
1.7.5.4



Re: [Qemu-devel] [PATCH 0/6] refactor PC machine, i440fx and piix3 to take advantage of QOM

2012-03-26 Thread Wanpeng Li
On Mon, Mar 26, 2012 at 02:47:19PM +0200, Andreas Färber wrote:
>Am 26.03.2012 04:06, schrieb Wanpeng Li:
>> From: Anthony Liguori 
>
>Resending an old cover letter is not a good idea. This looks like a v2,
>so please mark it as such in the subjects; it's missing a Change Log
>against Anthony's version. I take it, some patches were dropped?
>
No, I just help him rebase his patches.

>> This series aggressively refactors the PC machine initialization to be more
>> modelled and less ad-hoc.  The highlights of this series are:
>> 
>>  1) Things like -m and -bios-name are now device model properties
>> 
>>  2) The i440fx and piix3 are now modelled in a thorough fashion
>> 
>>  3) Most of the chipset features of the piix3 are modelled through 
>> composition
>> 
>>  4) i440fx_init is trivialized to creating devices and setting properties
>> 
>>  5) convert MemoryRegion to QOM
>> 
>>  6) convert PCI host bridge to QOM
>> 
>> The point (4) is the most important one.  As we refactor in this fashion,
>> we should quickly get to the point where machine->init disappears completely 
>> in
>> favor of just creating a handful of devices.
>
>I agree that machine->init needs to be refactored, however I don't think
>it'll disappear, just be moved into initfn/realize functions.
>
>Andreas
>
>> 
>> The two stage initialization of QOM is important here.  instance_init() is 
>> when
>> composed devices are created which means that after you've created a device, 
>> all
>> of its children are visible in the device model.  This lets you set 
>> properties
>> of the parent and its children.
>> 
>> realize() (which is still called DeviceState::init today) will be called 
>> right
>> before the guest starts up for the first time.
>> 
>> Signed-off-by: Anthony Liguori 
>> Signed-off-by: Wanpeng Li 
>> 
>> ---
>>  Makefile.target |3 +-
>>  hw/hpet.c   |   39 +---
>>  hw/hpet_emul.h  |   41 +++
>>  hw/i440fx.c |  431 ++
>>  hw/i440fx.h |   78 +
>>  hw/i8254_internal.h |2 +-
>>  hw/mc146818rtc.c|   26 --
>>  hw/mc146818rtc.h|   29 ++
>>  hw/pc.c |  838 
>> +--
>>  hw/pc.h |   46 +---
>>  hw/pc_piix.c|  762 --
>>  hw/pci_host.c   |   26 ++
>>  hw/pci_host.h   |5 +
>>  hw/piix3.c  |  274 +
>>  hw/piix3.h  |   79 +
>>  hw/piix_pci.c   |  600 
>>  memory.c|   94 +--
>>  memory.h|8 +
>>  18 files changed, 1795 insertions(+), 1586 deletions(-)
>>  create mode 100644 hw/i440fx.c
>>  create mode 100644 hw/i440fx.h
>>  delete mode 100644 hw/pc_piix.c
>>  create mode 100644 hw/piix3.c
>>  create mode 100644 hw/piix3.h
>>  delete mode 100644 hw/piix_pci.c
>> --
>
>-- 
>SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
>GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
>

-- 
LTC China, IBM, Shanghai




[Qemu-devel] RFC: options parsing in vl.c should be module

2012-03-29 Thread Wanpeng Li
Hi all:

Consider of the options parsing process in main function of vl.c is too long.
It should be module into single function to clear Ideas, strengthen the source 
code
management, and increase code readability.

Regards,
Wanpeng Li

-- 
LTC China, IBM, Shanghai




[Qemu-devel] script

2012-03-30 Thread Wanpeng Li
Hi all:

Are there any materials introduce how to use the scripts under scripts/
?

Regards,
Wanpeng Li

-- 
LTC China, IBM, Shanghai




Re: [Qemu-devel] script

2012-03-30 Thread Wanpeng Li
On Fri, Mar 30, 2012 at 11:28:53AM +0200, Andreas Färber wrote:

>Am 30.03.2012 09:11, schrieb Wanpeng Li:
>> Are there any materials introduce how to use the scripts under scripts/
>> ?
>
>Is this a general question about improving our scripts' documentation,
>or are you asking about one or more specific scripts? I'm aware of
>instructions for checkpatch.pl, get_maintainer.pl and most of the
>QMP/QOM stuff in some form. Some other scripts are used as part of the
>build system.
>
Hi,

Hope you can send your QMP/QOM stuff to me, very grateful. I think
familiar with  scripts/ can improve my work efficiency. Any materials
about these scripts is better.

Regards,
Wanpeng Li




[Qemu-devel] [PATCH] RFC: options parse in vl.c should be moduled

2012-03-30 Thread Wanpeng Li
Consider of the options parse process in main function of vl.c is too
long.It should be module into single function to clear ideas, strengthen
the source code management, and increase code readability.So I module the 
process of options parse as function options_parse, and expose some variables
in order to not influence command-line invocations.

Signed-off-by: Wanpeng Li 
---
 vl.c |  159 ++---
 1 files changed, 83 insertions(+), 76 deletions(-)

diff --git a/vl.c b/vl.c
index 0fccf50..fa4d0a9 100644
--- a/vl.c
+++ b/vl.c
@@ -2251,84 +2251,40 @@ int qemu_init_main_loop(void)
 return main_loop_init();
 }
 
-int main(int argc, char **argv, char **envp)
-{
-int i;
-int snapshot, linux_boot;
-const char *icount_option = NULL;
-const char *initrd_filename;
-const char *kernel_filename, *kernel_cmdline;
-char boot_devices[33] = "cad"; /* default to HD->floppy->CD-ROM */
-DisplayState *ds;
-DisplayChangeListener *dcl;
-int cyls, heads, secs, translation;
-QemuOpts *hda_opts = NULL, *opts, *machine_opts;
-QemuOptsList *olist;
-int optind;
-const char *optarg;
-const char *loadvm = NULL;
-QEMUMachine *machine;
-const char *cpu_model;
-const char *vga_model = NULL;
-const char *pid_file = NULL;
-const char *incoming = NULL;
+int snapshot, linux_boot;
+const char *icount_option;
+const char *initrd_filename;
+const char *kernel_filename, *kernel_cmdline;
+char boot_devices[33] = "cad"; /* default to HD->floppy->CD-ROM */
+DisplayState *ds;
+DisplayChangeListener *dcl;
+int cyls, heads, secs, translation;
+QemuOpts *hda_opts , *opts, *machine_opts;
+QemuOptsList *olist;
+int optind;
+const char *loadvm;
+QEMUMachine *machine;
+const char *cpu_model;
+const char *vga_model;
+const char *pid_file;
+const char *incoming;
 #ifdef CONFIG_VNC
-int show_vnc_port = 0;
+int show_vnc_port;
 #endif
-int defconfig = 1;
-const char *log_mask = NULL;
-const char *log_file = NULL;
-GMemVTable mem_trace = {
-.malloc = malloc_and_trace,
-.realloc = realloc_and_trace,
-.free = free_and_trace,
-};
-const char *trace_events = NULL;
-const char *trace_file = NULL;
-
-atexit(qemu_run_exit_notifiers);
-error_set_progname(argv[0]);
-
-g_mem_set_vtable(&mem_trace);
-if (!g_thread_supported()) {
-#if !GLIB_CHECK_VERSION(2, 31, 0)
-g_thread_init(NULL);
-#else
-fprintf(stderr, "glib threading failed to initialize.\n");
-exit(1);
-#endif
-}
-
-module_call_init(MODULE_INIT_QOM);
-
-runstate_init();
-
-init_clocks();
-rtc_clock = host_clock;
-
-qemu_cache_utils_init(envp);
-
-QLIST_INIT (&vm_change_state_head);
-os_setup_early_signal_handling();
-
-module_call_init(MODULE_INIT_MACHINE);
-machine = find_default_machine();
-cpu_model = NULL;
-ram_size = 0;
-snapshot = 0;
-cyls = heads = secs = 0;
-translation = BIOS_ATA_TRANSLATION_AUTO;
-
-for (i = 0; i < MAX_NODES; i++) {
-node_mem[i] = 0;
-node_cpumask[i] = 0;
-}
-
-nb_numa_nodes = 0;
-nb_nics = 0;
-
-autostart= 1;
+int defconfig = 1;
+const char *log_mask;
+const char *log_file;
+GMemVTable mem_trace = {
+.malloc = malloc_and_trace,
+.realloc = realloc_and_trace,
+.free = free_and_trace,
+};
+const char *trace_events;
+const char *trace_file;
 
+static void options_parse(int argc, char **argv)
+{
+const char *optarg;
 /* first pass of option parsing */
 optind = 1;
 while (optind < argc) {
@@ -2867,7 +2823,7 @@ int main(int argc, char **argv, char **envp)
 if (watchdog) {
 fprintf(stderr,
 "qemu: only one watchdog option may be given\n");
-return 1;
+exit(1);
 }
 watchdog = optarg;
 break;
@@ -3186,6 +3142,57 @@ int main(int argc, char **argv, char **envp)
 }
 }
 }
+}
+
+int main(int argc, char **argv, char **envp)
+{
+int i;
+
+atexit(qemu_run_exit_notifiers);
+error_set_progname(argv[0]);
+
+g_mem_set_vtable(&mem_trace);
+if (!g_thread_supported()) {
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+g_thread_init(NULL);
+#else
+fprintf(stderr, "glib threading failed to initialize.\n");
+exit(1);
+#endif
+}
+
+module_call_init(MODULE_INIT_QOM);
+
+runstate_init();
+
+init_clocks();
+rtc_clock = host_clock;
+
+qemu_cache_utils_init(envp);
+
+QLIST_INIT(&vm_change_state_head);
+os_setup_early_signal_handling();
+
+module_call_init(MODULE_INIT_MACHINE);
+machine = find_default_machine();
+cpu_model = NULL;
+ram_size = 0;
+snapshot = 0;
+cyls = heads = secs = 0;
+translation = BIOS_ATA

Re: [Qemu-devel] [PATCH] RFC: options parse in vl.c should be moduled

2012-03-30 Thread Wanpeng Li
On Fri, Mar 30, 2012 at 01:53:14PM +0100, Daniel P. Berrange wrote:
>On Fri, Mar 30, 2012 at 08:36:43PM +0800, Wanpeng Li wrote:
>> Consider of the options parse process in main function of vl.c is too
>> long.It should be module into single function to clear ideas, strengthen
>> the source code management, and increase code readability.So I module the 
>> process of options parse as function options_parse, and expose some variables
>> in order to not influence command-line invocations.
>> 
>> Signed-off-by: Wanpeng Li 
>> ---
>>  vl.c |  159 
>> ++---
>>  1 files changed, 83 insertions(+), 76 deletions(-)
>> 
>> diff --git a/vl.c b/vl.c
>> index 0fccf50..fa4d0a9 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -2251,84 +2251,40 @@ int qemu_init_main_loop(void)
>>  return main_loop_init();
>>  }
>>  
>> -int main(int argc, char **argv, char **envp)
>> -{
>> -int i;
>> -int snapshot, linux_boot;
>> -const char *icount_option = NULL;
>> -const char *initrd_filename;
>> -const char *kernel_filename, *kernel_cmdline;
>> -char boot_devices[33] = "cad"; /* default to HD->floppy->CD-ROM */
>> -DisplayState *ds;
>> -DisplayChangeListener *dcl;
>> -int cyls, heads, secs, translation;
>> -QemuOpts *hda_opts = NULL, *opts, *machine_opts;
>> -QemuOptsList *olist;
>> -int optind;
>> -const char *optarg;
>> -const char *loadvm = NULL;
>> -QEMUMachine *machine;
>> -const char *cpu_model;
>> -const char *vga_model = NULL;
>> -const char *pid_file = NULL;
>> -const char *incoming = NULL;
>> +int snapshot, linux_boot;
>> +const char *icount_option;
>> +const char *initrd_filename;
>> +const char *kernel_filename, *kernel_cmdline;
>> +char boot_devices[33] = "cad"; /* default to HD->floppy->CD-ROM */
>> +DisplayState *ds;
>> +DisplayChangeListener *dcl;
>> +int cyls, heads, secs, translation;
>> +QemuOpts *hda_opts , *opts, *machine_opts;
>> +QemuOptsList *olist;
>> +int optind;
>> +const char *loadvm;
>> +QEMUMachine *machine;
>> +const char *cpu_model;
>> +const char *vga_model;
>> +const char *pid_file;
>> +const char *incoming;
>>  #ifdef CONFIG_VNC
>> -int show_vnc_port = 0;
>> +int show_vnc_port;
>
>[snip]
>
>> +int defconfig = 1;
>> +const char *log_mask;
>> +const char *log_file;
>> +GMemVTable mem_trace = {
>> +.malloc = malloc_and_trace,
>> +.realloc = realloc_and_trace,
>> +.free = free_and_trace,
>> +};
>> +const char *trace_events;
>> +const char *trace_file;
>>  
>> +static void options_parse(int argc, char **argv)
>> +{
>
>While code modularization is a worthy goal, I don't think this patch is
>really an improvement. QEMU already has far too many adhoc global variables,
>without adding another 30 or more. The resulting code isn't even simplified
>or more readable IMHO, it is merely different.
>
>Daniel

There are about 856 lines of codes handle options parse in main function.
It is ugly and reduce readability. So I module these codes to a
single function called "options_parse".Since there are amounts of
command_line parameters which lead to must transfer many parameters to function
options_parse, so I expose some variables to global in order to handler
this issue.

Regards,
Wanpeng Li

-- 
LTC China, IBM, Shanghai




[Qemu-devel] [PATCH] remove useless comments in dma

2012-04-06 Thread Wanpeng Li
This comment is useless, just removes it and makes the codes clear.

Signed-off-by: Wanpeng Li 
---
 dma.h |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/dma.h b/dma.h
index 20e86d2..5bd1fc8 100644
--- a/dma.h
+++ b/dma.h
@@ -11,7 +11,6 @@
 #define DMA_H
 
 #include 
-//#include "cpu.h"
 #include "hw/hw.h"
 #include "block.h"
 
-- 
1.7.5.4




Re: [Qemu-devel] [PATCH] remove useless comments in dma

2012-04-08 Thread Wanpeng Li
On Sat, Apr 07, 2012 at 02:23:43PM +, Blue Swirl wrote:
>On Fri, Apr 6, 2012 at 07:52, Wanpeng Li  wrote:
>> This comment is useless, just removes it and makes the codes clear.
>>
>> Signed-off-by: Wanpeng Li 
>
>Thanks, applied. Patches like these could be directed to trivial
>patches queue, please read
>http://wiki.qemu.org/Contribute/TrivialPatches

Thanks for your reminds.

>
>> ---
>>  dma.h |    1 -
>>  1 files changed, 0 insertions(+), 1 deletions(-)
>>
>> diff --git a/dma.h b/dma.h
>> index 20e86d2..5bd1fc8 100644
>> --- a/dma.h
>> +++ b/dma.h
>> @@ -11,7 +11,6 @@
>>  #define DMA_H
>>
>>  #include 
>> -//#include "cpu.h"
>>  #include "hw/hw.h"
>>  #include "block.h"
>>
>> --
>> 1.7.5.4
>>
>>
>

-- 
LTC China, IBM, Shanghai




[Qemu-devel] relationship between vmport vmmouse i8042 port92

2012-04-08 Thread Wanpeng Li
Hi all:

I know i8042 is a chip in southbridge which control keyboard and mouse.
Keboard is emulated by i8042 in qemu and mouse is emulated by vmmouse.
But what are port92 and vmport, what's the relationship among these four 
stuff ?

Regards,
Wanpeng Li

-- 
LTC China, IBM, Shanghai




[Qemu-devel] [PATCH] avoid repeating contain header file

2012-04-09 Thread Wanpeng Li
Signed-off-by: Wanpeng Li 
---
 hw/ps2.h |   29 +
 1 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/hw/ps2.h b/hw/ps2.h
index 32a4231..d19c226 100644
--- a/hw/ps2.h
+++ b/hw/ps2.h
@@ -1,3 +1,30 @@
+/*
+ * QEMU PS/2 keyboard/mouse emulation
+ *
+ * Copyright (C) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_PS2_H
+#define HW_PS2_H
+
 /* ps2.c */
 void *ps2_kbd_init(void (*update_irq)(void *, int), void *update_arg);
 void *ps2_mouse_init(void (*update_irq)(void *, int), void *update_arg);
@@ -7,3 +34,5 @@ uint32_t ps2_read_data(void *);
 void ps2_queue(void *, int b);
 void ps2_keyboard_set_translation(void *opaque, int mode);
 void ps2_mouse_fake_event(void *opaque);
+
+#endif /* !HW_PS2_H */
-- 
1.7.5.4




[Qemu-devel] [PATCH] avoid repeating contain header file

2012-04-10 Thread Wanpeng Li
Signed-off-by: Wanpeng Li 
---
 hw/ps2.h |   29 +
 1 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/hw/ps2.h b/hw/ps2.h
index 32a4231..d19c226 100644
--- a/hw/ps2.h
+++ b/hw/ps2.h
@@ -1,3 +1,30 @@
+/*
+ * QEMU PS/2 keyboard/mouse emulation
+ *
+ * Copyright (C) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_PS2_H
+#define HW_PS2_H
+
 /* ps2.c */
 void *ps2_kbd_init(void (*update_irq)(void *, int), void *update_arg);
 void *ps2_mouse_init(void (*update_irq)(void *, int), void *update_arg);
@@ -7,3 +34,5 @@ uint32_t ps2_read_data(void *);
 void ps2_queue(void *, int b);
 void ps2_keyboard_set_translation(void *opaque, int mode);
 void ps2_mouse_fake_event(void *opaque);
+
+#endif /* !HW_PS2_H */
-- 
1.7.5.4




[Qemu-devel] [PATCH] PCI Using macro definition instead of a simple digit

2012-03-03 Thread Wanpeng Li
PCI_CLASS_DISPLAY_VGA has already defined in hw/pci_ids.h, so use the
macro definition instead of a simple digit.

Signed-off-by: Wanpeng Li 
---
 hw/pci.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index fe71666..274d86d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1784,7 +1784,7 @@ static int pci_add_option_rom(PCIDevice *pdev, bool 
is_default_rom)
  * for 0.11 compatibility.
  */
 int class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
-if (class == 0x0300) {
+if (class == PCI_CLASS_DISPLAY_VGA) {
 rom_add_vga(pdev->romfile);
 } else {
 rom_add_option(pdev->romfile, -1);
-- 
1.7.5.4




[Qemu-devel] [PATCH] PCI Using macro definition instead of a simple digit

2012-03-03 Thread Wanpeng Li
PCI_CLASS_DISPLAY_VGA has already defined in hw/pci_ids.h, so use the
macro definition instead of a simple digit.

Signed-off-by: Wanpeng Li 
---
 hw/pci.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index fe71666..274d86d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1784,7 +1784,7 @@ static int pci_add_option_rom(PCIDevice *pdev, bool 
is_default_rom)
  * for 0.11 compatibility.
  */
 int class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
-if (class == 0x0300) {
+if (class == PCI_CLASS_DISPLAY_VGA) {
 rom_add_vga(pdev->romfile);
 } else {
 rom_add_option(pdev->romfile, -1);
-- 
1.7.5.4




[Qemu-devel] questions about pci

2012-03-04 Thread WanPeng Li
Hi all:

I read pci code in qemu about i440fx, pci.c and so on. I think if guest
os whose mainboard is based on x86, it will use IO instructions to
access PCI configuration space.If not use passthrough, qemu should
emulate these operations.I find a function called kvm_handle_io who will
emulate ioport write/read, I have traced this function, but I haven't found it 
has
any relationship with pci read/write configuration space functions like
i440fx_write_config and piix3_write_config.So how does it emulate pci
configuration space access when not use passthrough?


thanks 
Wanpeng Li

LTC China, IBM




[Qemu-devel] [PATCH] fix bug of isa_bus irq

2012-03-10 Thread Wanpeng Li
ISA bus only use IRQ 0~15, so don't need to give an array qemu_irq 0~24, just
array qemu_irq i8259 is ok.

Signed-off-by: Wanpeng Li 
---
 hw/pc_piix.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 63dba4e..52f7cf8 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -210,7 +210,6 @@ static void pc_init1(MemoryRegion *system_memory,
 isa_bus = isa_bus_new(NULL, system_io);
 no_hpet = 1;
 }
-isa_bus_irqs(isa_bus, gsi);
 
 if (kvm_irqchip_in_kernel()) {
 i8259 = kvm_i8259_init(isa_bus);
@@ -221,6 +220,8 @@ static void pc_init1(MemoryRegion *system_memory,
 i8259 = i8259_init(isa_bus, cpu_irq[0]);
 }
 
+isa_bus_irqs(isa_bus, i8259);
+
 for (i = 0; i < ISA_NUM_IRQS; i++) {
 gsi_state->i8259_irq[i] = i8259[i];
 }
-- 
1.7.5.4




[Qemu-devel] [PATCH] fix bug of isa_bus irq

2012-03-10 Thread Wanpeng Li
ISA bus only use IRQ 0~15, so don't need to give an array qemu_irq 0~23, just
array qemu_irq i8259 is ok.

Signed-off-by: Wanpeng Li 
---
 hw/pc_piix.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 63dba4e..52f7cf8 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -210,7 +210,6 @@ static void pc_init1(MemoryRegion *system_memory,
 isa_bus = isa_bus_new(NULL, system_io);
 no_hpet = 1;
 }
-isa_bus_irqs(isa_bus, gsi);
 
 if (kvm_irqchip_in_kernel()) {
 i8259 = kvm_i8259_init(isa_bus);
@@ -221,6 +220,8 @@ static void pc_init1(MemoryRegion *system_memory,
 i8259 = i8259_init(isa_bus, cpu_irq[0]);
 }
 
+isa_bus_irqs(isa_bus, i8259);
+
 for (i = 0; i < ISA_NUM_IRQS; i++) {
 gsi_state->i8259_irq[i] = i8259[i];
 }
-- 
1.7.5.4




Re: [Qemu-devel] [PATCH] fix bug of isa_bus irq

2012-03-11 Thread Wanpeng Li
On Sun, Mar 11, 2012 at 08:46:38AM +0100, Jan Kiszka wrote:
>On 2012-03-11 08:04, Wanpeng Li wrote:
>> ISA bus only use IRQ 0~15, so don't need to give an array qemu_irq 0~23, just
>> array qemu_irq i8259 is ok.
>> 
>> Signed-off-by: Wanpeng Li 
>> ---
>>  hw/pc_piix.c |3 ++-
>>  1 files changed, 2 insertions(+), 1 deletions(-)
>> 
>> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
>> index 63dba4e..52f7cf8 100644
>> --- a/hw/pc_piix.c
>> +++ b/hw/pc_piix.c
>> @@ -210,7 +210,6 @@ static void pc_init1(MemoryRegion *system_memory,
>>  isa_bus = isa_bus_new(NULL, system_io);
>>  no_hpet = 1;
>>  }
>> -isa_bus_irqs(isa_bus, gsi);
>>  
>>  if (kvm_irqchip_in_kernel()) {
>>  i8259 = kvm_i8259_init(isa_bus);
>> @@ -221,6 +220,8 @@ static void pc_init1(MemoryRegion *system_memory,
>>  i8259 = i8259_init(isa_bus, cpu_irq[0]);
>>  }
>>  
>> +isa_bus_irqs(isa_bus, i8259);
>> +
>>  for (i = 0; i < ISA_NUM_IRQS; i++) {
>>  gsi_state->i8259_irq[i] = i8259[i];
>>  }
>
>This is bogus. isa_bus_irqs sets the output IRQs of the ISA bus. And
>those are not only delivered to the PIC on the PIIX2, but also the
>IOAPIC. Thus we have to pass in the GSI input lines which dispatch to
>both. Of those lines, only the first 16 will be used by the ISA bus
>(there is even an assert to ensure this).
>
>Did you see any concrete bug in the context of this logic?
>
>Jan
>

Yes, but actually PIC is being used at present, whether passing qemu_irq
0~23 to isa_bus is not safe or not.

Wanpeng Li




Re: [Qemu-devel] QEMU NUMA and memory allocation problem

2013-05-19 Thread Wanpeng Li
On Mon, May 20, 2013 at 10:03:53AM +0800, Wanlong Gao wrote:
>Adding CC AutoNUMA folks:
>
>Paolo said that:
>
>> Pinning memory to host NUMA nodes is not implemented.  Something like
>> AutoNUMA would be able to balance the memory the right way.
>> 
>> Paolo
>
>And Eduardo said that:
>> I had plans to implement a mechanism to allow external tools to
>> implement manual pinning, but it is not one of my top priorities. It's
>> the kind of mechanism that may be obsolete since birth, if we have
>> AutoNUMA working and doing the right thing.
>> 
>> -- Eduardo 
>

Hi Wanlong,

>But I didn't see any change when I enabled the AutoNUMA on my host.
>Can AutoNUMA folks teach me why?
>Or any plans to handle this problem in AutoNUMA? 
>

AutoNUMA is not merged currently, the foundation(automatic NUMA
balancing) that either the policy for schednuma or autonuma can be
rebased on implemented by Mel has already merged.

Regards,
Wanpeng Li 

>
>Thanks,
>Wanlong Gao
>
>
>
>> Hi,
>> 
>> We just met a problem of QEMU memory allocation.
>> Here is the description:
>> 
>> On my host, I have two nodes,
>> # numactl -H
>> available: 2 nodes (0-1)
>> node 0 cpus: 0 2
>> node 0 size: 4010 MB
>> node 0 free: 3021 MB
>> node 1 cpus: 1 3
>> node 1 size: 4030 MB
>> node 1 free: 2881 MB
>> node distances:
>> node   0   1 
>>   0:  10  20 
>>   1:  20  10 
>> 
>> 
>> 
>> I created a guest using the following XML:
>> 
>> ...
>>   1048576
>>   1048576
>>   2
>>   
>> 
>> 
>>   
>>   
>> 
>>   
>>   
>> 
>> 
>>   
>>   
>> 
>>   
>> ...
>> 
>> As you can see, I assigned 1G memory to this guest, pined vcpu0 to the host 
>> CPU 2,
>> it's in host node0, pined vcpu1 to the host CPU 3 that is in host node1.
>> The guest also has two nodes, each node contains 512M memory.
>> 
>> Now, I started the guest, then printed the host numa state :
>> # numactl -H
>> available: 2 nodes (0-1)
>> node 0 cpus: 0 2
>> node 0 size: 4010 MB
>> node 0 free: 2647 MB  <=== freecell of node0
>> node 1 cpus: 1 3
>> node 1 size: 4030 MB
>> node 1 free: 2746 MB
>> node distances:
>> node   0   1 
>>   0:  10  20 
>>   1:  20  10 
>> 
>> Then I tried to allocate memory from guest node0 using the following code:
>>> #include 
>>> #include 
>>>
>>> #define MEM (1024*1024*300)
>>>
>>> int main(void)
>>> {
>>> char *p = numa_alloc_onnode(MEM, 0);
>>> memset(p, 0, MEM);
>>> sleep(1000);
>>> numa_free(p, MEM);
>>> return 0;
>>> }
>> 
>> And printed the host numa state, it shows that this 300M memory is allocated 
>> from host node0,
>> 
>> # numactl -H
>> available: 2 nodes (0-1)
>> node 0 cpus: 0 2
>> node 0 size: 4010 MB
>> node 0 free: 2345 MB <= reduced ~300M
>> node 1 cpus: 1 3
>> node 1 size: 4030 MB
>> node 1 free: 2767 MB
>> node distances:
>> node   0   1 
>>   0:  10  20 
>>   1:  20  10 
>> 
>> 
>> Then, I tried the same method to allocate 300M memory from guest node1, and 
>> printed the host
>> numa state:
>> 
>> # numactl -H
>> available: 2 nodes (0-1)
>> node 0 cpus: 0 2
>> node 0 size: 4010 MB
>> node 0 free: 2059 MB <=== reduced ~300M
>> node 1 cpus: 1 3
>> node 1 size: 4030 MB
>> node 1 free: 2767 MB <=== no change
>> node distances:
>> node   0   1 
>>   0:  10  20 
>>   1:  20  10 
>> 
>> 
>> To see that this 300M memory is allocated from host node0 again, but not 
>> host node1 as
>> I expected.
>> 
>> We think that QEMU can't handled this numa memory allocation well, and it 
>> will cause the
>> cross node memory access performance regression.
>> 
>> Any thoughts? Or, am I missing something?
>> 
>> 
>> Thanks,
>> Wanlong Gao
>> 
>> 
>
>--
>To unsubscribe, send a message with 'unsubscribe linux-mm' in
>the body to majord...@kvack.org.  For more info on Linux MM,
>see: http://www.linux-mm.org/ .
>Don't email: mailto:"d...@kvack.org";> em...@kvack.org 




[Qemu-devel] [PATCH] target-i386: Intel xsaves

2014-12-02 Thread Wanpeng Li
Add xsaves related definition, it also add corresponding part to 
kvm_get/put, and vmstate.

Signed-off-by: Wanpeng Li 
---
 target-i386/cpu.h |  2 ++
 target-i386/kvm.c | 15 +++
 target-i386/machine.c |  3 ++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 015f5b5..cff7433 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -389,6 +389,7 @@
 #define MSR_VM_HSAVE_PA 0xc0010117
 
 #define MSR_IA32_BNDCFGS0x0d90
+#define MSR_IA32_XSS0x0da0
 
 #define XSTATE_FP   (1ULL << 0)
 #define XSTATE_SSE  (1ULL << 1)
@@ -1019,6 +1020,7 @@ typedef struct CPUX86State {
 uint64_t xstate_bv;
 
 uint64_t xcr0;
+uint64_t xss;
 
 TPRAccess tpr_access_type;
 } CPUX86State;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ccf36e8..c6fc417 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -80,6 +80,7 @@ static bool has_msr_hv_hypercall;
 static bool has_msr_hv_vapic;
 static bool has_msr_hv_tsc;
 static bool has_msr_mtrr;
+static bool has_msr_xss;
 
 static bool has_msr_architectural_pmu;
 static uint32_t num_architectural_pmu_counters;
@@ -826,6 +827,10 @@ static int kvm_get_supported_msrs(KVMState *s)
 has_msr_bndcfgs = true;
 continue;
 }
+if (kvm_msr_list->indices[i] == MSR_IA32_XSS) {
+has_msr_xss = true;
+continue;
+}
 }
 }
 
@@ -1224,6 +1229,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
 if (has_msr_bndcfgs) {
 kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs);
 }
+if (has_msr_xss) {
+kvm_msr_entry_set(&msrs[n++], MSR_IA32_XSS, env->xss);
+}
 #ifdef TARGET_X86_64
 if (lm_capable_kernel) {
 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
@@ -1570,6 +1578,10 @@ static int kvm_get_msrs(X86CPU *cpu)
 if (has_msr_bndcfgs) {
 msrs[n++].index = MSR_IA32_BNDCFGS;
 }
+if (has_msr_xss) {
+msrs[n++].index = MSR_IA32_XSS;
+}
+
 
 if (!env->tsc_valid) {
 msrs[n++].index = MSR_IA32_TSC;
@@ -1717,6 +1729,9 @@ static int kvm_get_msrs(X86CPU *cpu)
 case MSR_IA32_BNDCFGS:
 env->msr_bndcfgs = msrs[i].data;
 break;
+case MSR_IA32_XSS:
+env->xss = msrs[i].data;
+break;
 default:
 if (msrs[i].index >= MSR_MC0_CTL &&
 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 1c13b14..43af33f 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -689,7 +689,7 @@ static const VMStateDescription vmstate_avx512 = {
 
 VMStateDescription vmstate_x86_cpu = {
 .name = "cpu",
-.version_id = 12,
+.version_id = 13,
 .minimum_version_id = 3,
 .pre_save = cpu_pre_save,
 .post_load = cpu_post_load,
@@ -786,6 +786,7 @@ VMStateDescription vmstate_x86_cpu = {
 VMSTATE_UINT64_V(env.xcr0, X86CPU, 12),
 VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12),
 VMSTATE_YMMH_REGS_VARS(env.ymmh_regs, X86CPU, CPU_NB_REGS, 12),
+VMSTATE_UINT64_V(env.xss, X86CPU, 13),
 VMSTATE_END_OF_LIST()
 /* The above list is not sorted /wrt version numbers, watch out! */
 },
-- 
1.9.1




[Qemu-devel] [PATCH v2] target-i386: Intel xsaves

2014-12-02 Thread Wanpeng Li
Add xsaves related definition, it also adds corresponding part 
to kvm_get/put, and vmstate.

Signed-off-by: Wanpeng Li 
---
v1 -> v2:
 * use a subsection instead of bumping the version number.

 target-i386/cpu.h |  2 ++
 target-i386/kvm.c | 15 +++
 target-i386/machine.c | 21 +
 3 files changed, 38 insertions(+)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 015f5b5..cff7433 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -389,6 +389,7 @@
 #define MSR_VM_HSAVE_PA 0xc0010117
 
 #define MSR_IA32_BNDCFGS0x0d90
+#define MSR_IA32_XSS0x0da0
 
 #define XSTATE_FP   (1ULL << 0)
 #define XSTATE_SSE  (1ULL << 1)
@@ -1019,6 +1020,7 @@ typedef struct CPUX86State {
 uint64_t xstate_bv;
 
 uint64_t xcr0;
+uint64_t xss;
 
 TPRAccess tpr_access_type;
 } CPUX86State;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ccf36e8..c6fc417 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -80,6 +80,7 @@ static bool has_msr_hv_hypercall;
 static bool has_msr_hv_vapic;
 static bool has_msr_hv_tsc;
 static bool has_msr_mtrr;
+static bool has_msr_xss;
 
 static bool has_msr_architectural_pmu;
 static uint32_t num_architectural_pmu_counters;
@@ -826,6 +827,10 @@ static int kvm_get_supported_msrs(KVMState *s)
 has_msr_bndcfgs = true;
 continue;
 }
+if (kvm_msr_list->indices[i] == MSR_IA32_XSS) {
+has_msr_xss = true;
+continue;
+}
 }
 }
 
@@ -1224,6 +1229,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
 if (has_msr_bndcfgs) {
 kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs);
 }
+if (has_msr_xss) {
+kvm_msr_entry_set(&msrs[n++], MSR_IA32_XSS, env->xss);
+}
 #ifdef TARGET_X86_64
 if (lm_capable_kernel) {
 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
@@ -1570,6 +1578,10 @@ static int kvm_get_msrs(X86CPU *cpu)
 if (has_msr_bndcfgs) {
 msrs[n++].index = MSR_IA32_BNDCFGS;
 }
+if (has_msr_xss) {
+msrs[n++].index = MSR_IA32_XSS;
+}
+
 
 if (!env->tsc_valid) {
 msrs[n++].index = MSR_IA32_TSC;
@@ -1717,6 +1729,9 @@ static int kvm_get_msrs(X86CPU *cpu)
 case MSR_IA32_BNDCFGS:
 env->msr_bndcfgs = msrs[i].data;
 break;
+case MSR_IA32_XSS:
+env->xss = msrs[i].data;
+break;
 default:
 if (msrs[i].index >= MSR_MC0_CTL &&
 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 1c13b14..722d62e 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -687,6 +687,24 @@ static const VMStateDescription vmstate_avx512 = {
 }
 };
 
+static bool xss_needed(void *opaque)
+{
+X86CPU *cpu = opaque;
+CPUX86State *env = &cpu->env;
+
+return env->xss != 0;
+}
+
+static const VMStateDescription vmstate_xss = {
+.name = "cpu/xss",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(env.xss, X86CPU),
+VMSTATE_END_OF_LIST()
+}
+};
+
 VMStateDescription vmstate_x86_cpu = {
 .name = "cpu",
 .version_id = 12,
@@ -832,6 +850,9 @@ VMStateDescription vmstate_x86_cpu = {
 }, {
 .vmsd = &vmstate_avx512,
 .needed = avx512_needed,
+ }, {
+.vmsd = &vmstate_xss,
+.needed = xss_needed,
 } , {
 /* empty */
 }
-- 
1.9.1




[Qemu-devel] [PATCH RESCEND v2] target-i386: Intel xsaves

2014-12-02 Thread Wanpeng Li
Add xsaves related definition, it also adds corresponding part 
to kvm_get/put, and vmstate.

Signed-off-by: Wanpeng Li 
---
v1 -> v2:
 * use a subsection instead of bumping the version number.

 target-i386/cpu.h |  2 ++
 target-i386/kvm.c | 15 +++
 target-i386/machine.c | 21 +
 3 files changed, 38 insertions(+)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 015f5b5..cff7433 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -389,6 +389,7 @@
 #define MSR_VM_HSAVE_PA 0xc0010117
 
 #define MSR_IA32_BNDCFGS0x0d90
+#define MSR_IA32_XSS0x0da0
 
 #define XSTATE_FP   (1ULL << 0)
 #define XSTATE_SSE  (1ULL << 1)
@@ -1019,6 +1020,7 @@ typedef struct CPUX86State {
 uint64_t xstate_bv;
 
 uint64_t xcr0;
+uint64_t xss;
 
 TPRAccess tpr_access_type;
 } CPUX86State;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ccf36e8..c6fc417 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -80,6 +80,7 @@ static bool has_msr_hv_hypercall;
 static bool has_msr_hv_vapic;
 static bool has_msr_hv_tsc;
 static bool has_msr_mtrr;
+static bool has_msr_xss;
 
 static bool has_msr_architectural_pmu;
 static uint32_t num_architectural_pmu_counters;
@@ -826,6 +827,10 @@ static int kvm_get_supported_msrs(KVMState *s)
 has_msr_bndcfgs = true;
 continue;
 }
+if (kvm_msr_list->indices[i] == MSR_IA32_XSS) {
+has_msr_xss = true;
+continue;
+}
 }
 }
 
@@ -1224,6 +1229,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
 if (has_msr_bndcfgs) {
 kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs);
 }
+if (has_msr_xss) {
+kvm_msr_entry_set(&msrs[n++], MSR_IA32_XSS, env->xss);
+}
 #ifdef TARGET_X86_64
 if (lm_capable_kernel) {
 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
@@ -1570,6 +1578,10 @@ static int kvm_get_msrs(X86CPU *cpu)
 if (has_msr_bndcfgs) {
 msrs[n++].index = MSR_IA32_BNDCFGS;
 }
+if (has_msr_xss) {
+msrs[n++].index = MSR_IA32_XSS;
+}
+
 
 if (!env->tsc_valid) {
 msrs[n++].index = MSR_IA32_TSC;
@@ -1717,6 +1729,9 @@ static int kvm_get_msrs(X86CPU *cpu)
 case MSR_IA32_BNDCFGS:
 env->msr_bndcfgs = msrs[i].data;
 break;
+case MSR_IA32_XSS:
+env->xss = msrs[i].data;
+break;
 default:
 if (msrs[i].index >= MSR_MC0_CTL &&
 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 1c13b14..722d62e 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -687,6 +687,24 @@ static const VMStateDescription vmstate_avx512 = {
 }
 };
 
+static bool xss_needed(void *opaque)
+{
+X86CPU *cpu = opaque;
+CPUX86State *env = &cpu->env;
+
+return env->xss != 0;
+}
+
+static const VMStateDescription vmstate_xss = {
+.name = "cpu/xss",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(env.xss, X86CPU),
+VMSTATE_END_OF_LIST()
+}
+};
+
 VMStateDescription vmstate_x86_cpu = {
 .name = "cpu",
 .version_id = 12,
@@ -832,6 +850,9 @@ VMStateDescription vmstate_x86_cpu = {
 }, {
 .vmsd = &vmstate_avx512,
 .needed = avx512_needed,
+ }, {
+.vmsd = &vmstate_xss,
+.needed = xss_needed,
 } , {
 /* empty */
 }
-- 
1.9.1




Re: [Qemu-devel] [PATCH v1 0/7] KVM: Hyper-V SynIC timers

2015-11-25 Thread Wanpeng Li
2015-11-25 23:20 GMT+08:00 Andrey Smetanin :
> Per Hyper-V specification (and as required by Hyper-V-aware guests),
> SynIC provides 4 per-vCPU timers.  Each timer is programmed via a pair
> of MSRs, and signals expiration by delivering a special format message
> to the configured SynIC message slot and triggering the corresponding
> synthetic interrupt.

Could you post a link for this specification?

Regards,
Wanpeng Li



Re: [Qemu-devel] [PATCH v1 0/7] KVM: Hyper-V SynIC timers

2015-11-26 Thread Wanpeng Li
2015-11-26 16:34 GMT+08:00 Andrey Smetanin :
>
>
> On 11/26/2015 08:28 AM, Wanpeng Li wrote:
>>
>> 2015-11-25 23:20 GMT+08:00 Andrey Smetanin :
>>>
>>> Per Hyper-V specification (and as required by Hyper-V-aware guests),
>>> SynIC provides 4 per-vCPU timers.  Each timer is programmed via a pair
>>> of MSRs, and signals expiration by delivering a special format message
>>> to the configured SynIC message slot and triggering the corresponding
>>> synthetic interrupt.
>>
>>
>> Could you post a link for this specification?
>
>
> Official link:
>
> http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx
>
> and there is a pdf variant(my own docx -> pdf conversion):
>
> https://www.dropbox.com/s/ehxictr5wgnedq7/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.pdf?dl=0

Cool, thanks.

Regards,
Wanpeng Li



Re: [Qemu-devel] [PATCH v1 0/7] KVM: Hyper-V SynIC timers

2015-12-01 Thread Wanpeng Li
2015-11-26 16:34 GMT+08:00 Andrey Smetanin :
>
>
> On 11/26/2015 08:28 AM, Wanpeng Li wrote:
>>
>> 2015-11-25 23:20 GMT+08:00 Andrey Smetanin :
>>>
>>> Per Hyper-V specification (and as required by Hyper-V-aware guests),
>>> SynIC provides 4 per-vCPU timers.  Each timer is programmed via a pair
>>> of MSRs, and signals expiration by delivering a special format message
>>> to the configured SynIC message slot and triggering the corresponding
>>> synthetic interrupt.
>>
>>
>> Could you post a link for this specification?
>
>
> Official link:
>
> http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx
>
> and there is a pdf variant(my own docx -> pdf conversion):
>
> https://www.dropbox.com/s/ehxictr5wgnedq7/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.pdf?dl=0

Btw, is there performance data for such feature?

Regards,
Wanpeng Li



[Qemu-devel] [PATCH v5 0/3] refactor PC machine, i440fx and piix3 to take advantage of QOM

2012-11-07 Thread Wanpeng Li
This series aggressively refactors the PC machine initialization to be more
modelled and less ad-hoc.  The highlights of this series are:

1) Things like -m and -bios-name are now device model properties

2) The i440fx and piix3 are now modelled in a thorough fashion

3) i440fx_init is trivialized to creating devices and setting properties

4) convert PCI host bridge to QOM

The point (3) is the most important one.  As we refactor in this fashion,
we should quickly get to the point where machine->init disappears completely in
favor of just creating a handful of devices.

The two stage initialization of QOM is important here.  instance_init() is when
composed devices are created which means that after you've created a device, all
of its children are visible in the device model.  This lets you set properties
of the parent and its children.

realize() (which is still called DeviceState::init today) will be called right
before the guest starts up for the first time.

Changes in v5:
* fix bisect issues
* take advantage of Andreas's pci_host patchset
* drop convert MemoryRegion to QOM 
* drop prepare to create HPET, RTC and i8254 through composition

Changes in v4:

*rebase patchset

Changes in v3:

* fix coding style issues
* fix rebase error
* add changes log

Changes in v2:

* Rebase patch series of i440fx in Anthony's qom-rebase.12 branch to upstream
* convert MemoryRegion to QOM
* convert pci_host to QOM

Anthony Liguori (3):
  eliminate piix_pci.c and module i440fx and piix3
  merge pc_piix.c to pc.c
  convert pci-host to QOM

 hw/i386/Makefile.objs |3 +-
 hw/i440fx.c   |  434 
 hw/i440fx.h   |   76 +
 hw/pc.c   |  753 ++---
 hw/pc.h   |   41 +---
 hw/pc_piix.c  |  716 --
 hw/pci_host.c |9 +
 hw/piix3.c|  258 +
 hw/piix3.h|   73 +
 hw/piix_pci.c |  622 
 10 files changed, 1558 insertions(+), 1427 deletions(-)
 create mode 100644 hw/i440fx.c
 create mode 100644 hw/i440fx.h
 delete mode 100644 hw/pc_piix.c
 create mode 100644 hw/piix3.c
 create mode 100644 hw/piix3.h
 delete mode 100644 hw/piix_pci.c

-- 
1.7.7.6




[Qemu-devel] [PATCH v5 3/3] convert pci-host to QOM

2012-11-07 Thread Wanpeng Li
Take advantage of Andreas's pci-host patchset, add instance_init function
to fully implement convert pci-host to QOM.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 hw/pci_host.c |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/hw/pci_host.c b/hw/pci_host.c
index 68e328c..ce6b28f 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -165,11 +165,20 @@ const MemoryRegionOps pci_host_data_be_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+static void pci_host_initfn(Object *obj)
+{
+PCIHostState *s = PCI_HOST_BRIDGE(obj);
+
+object_property_add_link(obj, "mmio", "memory-region",
+(Object **)&s->address_space, NULL);
+}
+
 static const TypeInfo pci_host_type_info = {
 .name = TYPE_PCI_HOST_BRIDGE,
 .parent = TYPE_SYS_BUS_DEVICE,
 .abstract = true,
 .instance_size = sizeof(PCIHostState),
+.instance_init = pci_host_initfn,
 };
 
 static void pci_host_register_types(void)
-- 
1.7.7.6




[Qemu-devel] [PATCH 01/10] convert RTC as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert RTC as piix3 proper QOM child.

RTC creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 hw/mc146818rtc.c |   29 +
 hw/mc146818rtc.h |   30 ++
 hw/pc.c  |   13 +++--
 hw/piix3.c   |   14 ++
 hw/piix3.h   |5 +
 5 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index 98839f2..f385f4c 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -56,33 +56,6 @@
 #define RTC_CLOCK_RATE32768
 #define UIP_HOLD_LENGTH   (8 * NSEC_PER_SEC / 32768)
 
-typedef struct RTCState {
-ISADevice dev;
-MemoryRegion io;
-uint8_t cmos_data[128];
-uint8_t cmos_index;
-int32_t base_year;
-uint64_t base_rtc;
-uint64_t last_update;
-int64_t offset;
-qemu_irq irq;
-qemu_irq sqw_irq;
-int it_shift;
-/* periodic timer */
-QEMUTimer *periodic_timer;
-int64_t next_periodic_time;
-/* update-ended timer */
-QEMUTimer *update_timer;
-uint64_t next_alarm_time;
-uint16_t irq_reinject_on_ack_count;
-uint32_t irq_coalesced;
-uint32_t period;
-QEMUTimer *coalesced_timer;
-Notifier clock_reset_notifier;
-LostTickPolicy lost_tick_policy;
-Notifier suspend_notifier;
-} RTCState;
-
 static void rtc_set_time(RTCState *s);
 static void rtc_update_time(RTCState *s);
 static void rtc_set_cmos(RTCState *s, const struct tm *tm);
@@ -894,7 +867,7 @@ static void rtc_class_initfn(ObjectClass *klass, void *data)
 }
 
 static TypeInfo mc146818rtc_info = {
-.name  = "mc146818rtc",
+.name  = TYPE_RTC,
 .parent= TYPE_ISA_DEVICE,
 .instance_size = sizeof(RTCState),
 .class_init= rtc_class_initfn,
diff --git a/hw/mc146818rtc.h b/hw/mc146818rtc.h
index f286b6a..a8f1428 100644
--- a/hw/mc146818rtc.h
+++ b/hw/mc146818rtc.h
@@ -3,6 +3,36 @@
 
 #include "isa.h"
 #include "mc146818rtc_regs.h"
+#include "notify.h"
+
+#define TYPE_RTC "mc146818rtc"
+
+typedef struct RTCState {
+ISADevice dev;
+MemoryRegion io;
+uint8_t cmos_data[128];
+uint8_t cmos_index;
+int32_t base_year;
+uint64_t base_rtc;
+uint64_t last_update;
+int64_t offset;
+qemu_irq irq;
+qemu_irq sqw_irq;
+int it_shift;
+/* periodic timer */
+QEMUTimer *periodic_timer;
+int64_t next_periodic_time;
+/* update-ended timer */
+QEMUTimer *update_timer;
+uint64_t next_alarm_time;
+uint16_t irq_reinject_on_ack_count;
+uint32_t irq_coalesced;
+uint32_t period;
+QEMUTimer *coalesced_timer;
+Notifier clock_reset_notifier;
+LostTickPolicy lost_tick_policy;
+Notifier suspend_notifier;
+} RTCState;
 
 ISADevice *rtc_init(ISABus *bus, int base_year, qemu_irq intercept_irq);
 void rtc_set_memory(ISADevice *dev, int addr, int val);
diff --git a/hw/pc.c b/hw/pc.c
index c40e112..7fed363 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -90,6 +90,8 @@ struct e820_table {
 struct e820_entry entry[E820_NR_ENTRIES];
 } QEMU_PACKED __attribute((__aligned__(4)));
 
+qemu_irq rtc_irq;
+
 static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
 static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
 static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
@@ -941,7 +943,6 @@ static void cpu_request_exit(void *opaque, int irq, int 
level)
 }
 
 static void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
-  ISADevice **rtc_state,
   ISADevice **floppy,
   bool no_vmport)
 {
@@ -950,7 +951,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 DeviceState *hpet = NULL;
 int pit_isa_irq = 0;
 qemu_irq pit_alt_irq = NULL;
-qemu_irq rtc_irq = NULL;
 qemu_irq *a20_line;
 ISADevice *i8042, *port92, *vmmouse, *pit = NULL;
 qemu_irq *cpu_exit_irq;
@@ -977,9 +977,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
 }
 }
-*rtc_state = rtc_init(isa_bus, 2000, rtc_irq);
-
-qemu_register_boot_set(pc_boot_set, *rtc_state);
 
 if (!xen_enabled()) {
 if (kvm_irqchip_in_kernel()) {
@@ -1237,7 +1234,7 @@ static void pc_init1(MemoryRegion *system_memory,
 }
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, &rtc_state, &floppy, xen_enabled());
+pc_basic_device_init(isa_bus, gsi, &floppy, xen_enabled());
 
 for (i = 0; i < nb_nics; i++) {
 NICInfo *nd = &nd_table[i];
@@ -1269,6 +1266,10 @@ static void pc_init1(MemoryRegion *system_memory,
 }
 }
 
+/* FIXME */
+rtc_state = ISA_DEVICE(object_resolve_path("rtc", NULL));

[Qemu-devel] [PATCH 04/10] convert PCSPK as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert PCSPK as piix3 proper QOM child.

PCSPK creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Wanpeng Li 
---
 hw/pcspk.c |   19 +--
 hw/pcspk.h |   19 +++
 hw/piix3.c |8 
 hw/piix3.h |2 ++
 4 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/hw/pcspk.c b/hw/pcspk.c
index ad6491b..fc8bc99 100644
--- a/hw/pcspk.c
+++ b/hw/pcspk.c
@@ -25,31 +25,14 @@
 #include "hw.h"
 #include "pc.h"
 #include "isa.h"
-#include "audio/audio.h"
 #include "qemu-timer.h"
 #include "i8254.h"
 #include "pcspk.h"
 
-#define PCSPK_BUF_LEN 1792
 #define PCSPK_SAMPLE_RATE 32000
 #define PCSPK_MAX_FREQ (PCSPK_SAMPLE_RATE >> 1)
 #define PCSPK_MIN_COUNT ((PIT_FREQ + PCSPK_MAX_FREQ - 1) / PCSPK_MAX_FREQ)
 
-typedef struct {
-ISADevice dev;
-MemoryRegion ioport;
-uint32_t iobase;
-uint8_t sample_buf[PCSPK_BUF_LEN];
-QEMUSoundCard card;
-SWVoiceOut *voice;
-void *pit;
-unsigned int pit_count;
-unsigned int samples;
-unsigned int play_pos;
-int data_on;
-int dummy_refresh_clock;
-} PCSpkState;
-
 static const char *s_spk = "pcspk";
 static PCSpkState *pcspk_state;
 
@@ -188,7 +171,7 @@ static void pcspk_class_initfn(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo pcspk_info = {
-.name   = "isa-pcspk",
+.name   = TYPE_PCSPK,
 .parent = TYPE_ISA_DEVICE,
 .instance_size  = sizeof(PCSpkState),
 .class_init = pcspk_class_initfn,
diff --git a/hw/pcspk.h b/hw/pcspk.h
index 7f42bac..07b3a8f 100644
--- a/hw/pcspk.h
+++ b/hw/pcspk.h
@@ -27,6 +27,25 @@
 
 #include "hw.h"
 #include "isa.h"
+#include "audio/audio.h"
+
+#define PCSPK_BUF_LEN 1792
+#define TYPE_PCSPK "isa-pcspk"
+
+typedef struct {
+ISADevice dev;
+MemoryRegion ioport;
+uint32_t iobase;
+uint8_t sample_buf[PCSPK_BUF_LEN];
+QEMUSoundCard card;
+SWVoiceOut *voice;
+void *pit;
+unsigned int pit_count;
+unsigned int samples;
+unsigned int play_pos;
+int data_on;
+int dummy_refresh_clock;
+} PCSpkState;
 
 static inline ISADevice *pcspk_init(ISABus *bus, ISADevice *pit)
 {
diff --git a/hw/piix3.c b/hw/piix3.c
index 41739bd..35a0de9 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -250,6 +250,11 @@ static int piix3_realize(PCIDevice *dev)
 qdev_get_gpio_in(DEVICE(&s->pit), 0));
 }
 }
+/* Realize the PCSPK */
+qdev_set_parent_bus(DEVICE(&s->pcspk), BUS(s->bus));
+qdev_prop_set_uint32(DEVICE(&s->pcspk), "iobase", 0x61);
+qdev_prop_set_ptr(DEVICE(&s->pcspk), "pit", ISA_DEVICE(&s->pit));
+qdev_init_nofail(DEVICE(&s->pcspk));
 
 return 0;
 }
@@ -280,6 +285,9 @@ static void piix3_initfn(Object *obj)
 qdev_prop_set_int32(DEVICE(&s->pit), "iobase", 0x40);
}
 }
+
+object_initialize(&s->pcspk, TYPE_PCSPK);
+object_property_add_child(obj, "pcspk", OBJECT(&s->pcspk), NULL);
 }
 
 static void piix3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/piix3.h b/hw/piix3.h
index 58486b9..32f7a95 100644
--- a/hw/piix3.h
+++ b/hw/piix3.h
@@ -34,6 +34,7 @@
 #include "hpet_emul.h"
 #include "i8254.h"
 #include "i8254_internal.h"
+#include "pcspk.h"
 
 #define PIIX_NUM_PIC_IRQS   16  /* i8259 * 2 */
 #define PIIX_NUM_PIRQS  4ULL/* PIRQ[A-D] */
@@ -77,6 +78,7 @@ typedef struct PIIX3State {
 KVMPITState kvm_pit;
 } pit;
 #endif
+PCSpkState pcspk;
 
 qemu_irq *pic;
 
-- 
1.7.7.6




[Qemu-devel] [PATCH 06/10] convert i8042 as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert i8042 as piix3 proper QOM child.

I8042 creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Wanpeng Li 

---
 hw/pc.c|9 ++---
 hw/pckbd.c |   24 +---
 hw/piix3.c |   13 +++--
 hw/piix3.h |   24 
 hw/ps2.h   |3 +++
 5 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 94fdea9..a14bf5a 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -931,8 +931,7 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 {
 int i;
 DriveInfo *fd[MAX_FD];
-qemu_irq *a20_line;
-ISADevice *i8042, *vmmouse;
+ISADevice *vmmouse;
 qemu_irq *cpu_exit_irq;
 
 register_ioport_write(0x80, 1, 1, ioport80_write, NULL);
@@ -951,9 +950,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 }
 }
 
-a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 1);
-i8042 = isa_create_simple(isa_bus, "i8042");
-i8042_setup_a20_line(i8042, &a20_line[0]);
 if (!no_vmport) {
 vmport_init(isa_bus);
 vmmouse = isa_try_create(isa_bus, "vmmouse");
@@ -961,7 +957,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 vmmouse = NULL;
 }
 if (vmmouse) {
-qdev_prop_set_ptr(&vmmouse->qdev, "ps2_mouse", i8042);
 qdev_init_nofail(&vmmouse->qdev);
 }
 
@@ -1153,8 +1148,8 @@ static void pc_init1(MemoryRegion *system_memory,
 i440fx_state = NULL;
 isa_bus = isa_bus_new(NULL, system_io);
 no_hpet = 1;
+isa_bus_irqs(isa_bus, gsi);
 }
-isa_bus_irqs(isa_bus, gsi);
 
 if (kvm_irqchip_in_kernel()) {
 i8259 = kvm_i8259_init(isa_bus);
diff --git a/hw/pckbd.c b/hw/pckbd.c
index 5bb3e0a..d76bcbb 100644
--- a/hw/pckbd.c
+++ b/hw/pckbd.c
@@ -126,22 +126,6 @@
 #define KBD_PENDING_KBD 1
 #define KBD_PENDING_AUX 2
 
-typedef struct KBDState {
-uint8_t write_cmd; /* if non zero, write data to port 60 is expected */
-uint8_t status;
-uint8_t mode;
-uint8_t outport;
-/* Bitmask of devices with data available.  */
-uint8_t pending;
-void *kbd;
-void *mouse;
-
-qemu_irq irq_kbd;
-qemu_irq irq_mouse;
-qemu_irq *a20_out;
-hwaddr mask;
-} KBDState;
-
 /* update irq and KBD_STAT_[MOUSE_]OBF */
 /* XXX: not generating the irqs if KBD_MODE_DISABLE_KBD is set may be
incorrect, but it avoids having to simulate exact delays */
@@ -431,12 +415,6 @@ void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
 qemu_register_reset(kbd_reset, s);
 }
 
-typedef struct ISAKBDState {
-ISADevice dev;
-KBDState kbd;
-MemoryRegion io[2];
-} ISAKBDState;
-
 void i8042_isa_mouse_fake_event(void *opaque)
 {
 ISADevice *dev = opaque;
@@ -513,7 +491,7 @@ static void i8042_class_initfn(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo i8042_info = {
-.name  = "i8042",
+.name  = TYPE_I8042,
 .parent= TYPE_ISA_DEVICE,
 .instance_size = sizeof(ISAKBDState),
 .class_init= i8042_class_initfn,
diff --git a/hw/piix3.c b/hw/piix3.c
index 675212e..c6bf3cb 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -202,6 +202,8 @@ static int piix3_realize(PCIDevice *dev)
 s->bus = isa_bus_new(DEVICE(dev), pci_address_space_io(dev));
 isa_bus_irqs(s->bus, s->pic);
 
+a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
+
 /* Realize the RTC */
 qdev_set_parent_bus(DEVICE(&s->rtc), BUS(s->bus));
 qdev_init_nofail(DEVICE(&s->rtc));
@@ -260,8 +262,12 @@ static int piix3_realize(PCIDevice *dev)
 /* Realize the PORT92 */
 qdev_set_parent_bus(DEVICE(&s->port92), BUS(s->bus));
 qdev_init_nofail(DEVICE(&s->port92));
-a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 1);
-s->port92.a20_out = &a20_line[0];
+s->port92.a20_out = &a20_line[1];
+
+/* Realize the I8042 */
+qdev_set_parent_bus(DEVICE(&s->i8042), BUS(s->bus));
+i8042_setup_a20_line(ISA_DEVICE(&s->i8042), &a20_line[0]);
+qdev_init_nofail(DEVICE(&s->i8042));
 
 return 0;
 }
@@ -298,6 +304,9 @@ static void piix3_initfn(Object *obj)
 
 object_initialize(&s->port92, TYPE_PORT92);
 object_property_add_child(obj, "port92", OBJECT(&s->port92), NULL);
+
+object_initialize(&s->i8042, TYPE_I8042);
+object_property_add_child(obj, "i8042", OBJECT(&s->i8042), NULL);
 }
 
 static void piix3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/piix3.h b/hw/piix3.h
index 4e5ee20..94a0daf 100644
--- a/hw/piix3.h
+++ b/hw/piix3.h
@@ -35,6 +35,7 @@
 #include "i8254.h"
 #include "i8254_internal.h"
 #include "pcspk.h"

[Qemu-devel] [PATCH 07/10] convert VMPORT as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert VMPORT as piix3 proper QOM child.

VMPORT creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Wanpeng Li 

---
 hw/pc.c |1 -
 hw/pc.h |5 -
 hw/piix3.c  |   11 +++
 hw/piix3.h  |   11 +++
 hw/vmport.c |   10 +-
 5 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index a14bf5a..99cd314 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -951,7 +951,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 }
 
 if (!no_vmport) {
-vmport_init(isa_bus);
 vmmouse = isa_try_create(isa_bus, "vmmouse");
 } else {
 vmmouse = NULL;
diff --git a/hw/pc.h b/hw/pc.h
index 17d48a0..5b7bc26 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -54,11 +54,6 @@ typedef struct GSIState {
 qemu_irq ioapic_irq[IOAPIC_NUM_PINS];
 } GSIState;
 
-/* vmport.c */
-static inline void vmport_init(ISABus *bus)
-{
-isa_create_simple(bus, "vmport");
-}
 void vmport_register(unsigned char command, IOPortReadFunc *func, void 
*opaque);
 void vmmouse_get_data(uint32_t *data);
 void vmmouse_set_data(const uint32_t *data);
diff --git a/hw/piix3.c b/hw/piix3.c
index c6bf3cb..27c8f50 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -269,6 +269,12 @@ static int piix3_realize(PCIDevice *dev)
 i8042_setup_a20_line(ISA_DEVICE(&s->i8042), &a20_line[0]);
 qdev_init_nofail(DEVICE(&s->i8042));
 
+/* Realize the VMPORT */
+if (!xen_enabled()) {
+qdev_set_parent_bus(DEVICE(&s->vmport), BUS(s->bus));
+qdev_init_nofail(DEVICE(&s->vmport));
+}
+
 return 0;
 }
 
@@ -307,6 +313,11 @@ static void piix3_initfn(Object *obj)
 
 object_initialize(&s->i8042, TYPE_I8042);
 object_property_add_child(obj, "i8042", OBJECT(&s->i8042), NULL);
+
+if (!xen_enabled()) {
+object_initialize(&s->vmport, TYPE_VMPORT);
+object_property_add_child(obj, "vmport", OBJECT(&s->vmport), NULL);
+}
 }
 
 static void piix3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/piix3.h b/hw/piix3.h
index 94a0daf..477e39e 100644
--- a/hw/piix3.h
+++ b/hw/piix3.h
@@ -54,6 +54,16 @@ typedef struct KVMPITState {
 
 #define TYPE_PORT92 "port92"
 
+#define TYPE_VMPORT "vmport"
+#define VMPORT_ENTRIES 0x2c
+
+typedef struct _VMPortState {
+ISADevice dev;
+MemoryRegion io;
+IOPortReadFunc *func[VMPORT_ENTRIES];
+void *opaque[VMPORT_ENTRIES];
+} VMPortState;
+
 /* port 92 stuff: could be split off */
 typedef struct Port92State {
 ISADevice dev;
@@ -110,6 +120,7 @@ typedef struct PIIX3State {
 PITCommonState pit;
 KVMPITState kvm_pit;
 } pit;
+VMPortState vmport;
 #endif
 PCSpkState pcspk;
 Port92State port92;
diff --git a/hw/vmport.c b/hw/vmport.c
index 3ab3a14..45daef6 100644
--- a/hw/vmport.c
+++ b/hw/vmport.c
@@ -35,14 +35,6 @@
 #define VMPORT_ENTRIES 0x2c
 #define VMPORT_MAGIC   0x564D5868
 
-typedef struct _VMPortState
-{
-ISADevice dev;
-MemoryRegion io;
-IOPortReadFunc *func[VMPORT_ENTRIES];
-void *opaque[VMPORT_ENTRIES];
-} VMPortState;
-
 static VMPortState *port_state;
 
 void vmport_register(unsigned char command, IOPortReadFunc *func, void *opaque)
@@ -156,7 +148,7 @@ static void vmport_class_initfn(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo vmport_info = {
-.name  = "vmport",
+.name  = TYPE_VMPORT,
 .parent= TYPE_ISA_DEVICE,
 .instance_size = sizeof(VMPortState),
 .class_init= vmport_class_initfn,
-- 
1.7.7.6




[Qemu-devel] [PATCH 08/10] convert VMMOUSE as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert VMMOUSE as piix3 proper QOM child.

VMMOUSE creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Wanpeng Li 

---
 hw/pc.c  |   15 ++-
 hw/piix3.c   |   10 ++
 hw/piix3.h   |   16 
 hw/vmmouse.c |   14 --
 4 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 99cd314..9798c24 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -926,12 +926,10 @@ static void cpu_request_exit(void *opaque, int irq, int 
level)
 }
 
 static void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
-  ISADevice **floppy,
-  bool no_vmport)
+  ISADevice **floppy)
 {
 int i;
 DriveInfo *fd[MAX_FD];
-ISADevice *vmmouse;
 qemu_irq *cpu_exit_irq;
 
 register_ioport_write(0x80, 1, 1, ioport80_write, NULL);
@@ -950,15 +948,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 }
 }
 
-if (!no_vmport) {
-vmmouse = isa_try_create(isa_bus, "vmmouse");
-} else {
-vmmouse = NULL;
-}
-if (vmmouse) {
-qdev_init_nofail(&vmmouse->qdev);
-}
-
 cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
 DMA_init(0, cpu_exit_irq);
 
@@ -1174,7 +1163,7 @@ static void pc_init1(MemoryRegion *system_memory,
 }
 
 /* init basic PC hardware */
-pc_basic_device_init(isa_bus, gsi, &floppy, xen_enabled());
+pc_basic_device_init(isa_bus, gsi, &floppy);
 
 for (i = 0; i < nb_nics; i++) {
 NICInfo *nd = &nd_table[i];
diff --git a/hw/piix3.c b/hw/piix3.c
index 27c8f50..2922fd4 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -271,8 +271,15 @@ static int piix3_realize(PCIDevice *dev)
 
 /* Realize the VMPORT */
 if (!xen_enabled()) {
+/* Realize the VMPORT */
 qdev_set_parent_bus(DEVICE(&s->vmport), BUS(s->bus));
 qdev_init_nofail(DEVICE(&s->vmport));
+
+/* Realize the VMMOUSE */
+qdev_set_parent_bus(DEVICE(&s->vmmouse), BUS(s->bus));
+qdev_prop_set_ptr(DEVICE(&s->vmmouse),
+ "ps2_mouse", ISA_DEVICE(&s->i8042));
+qdev_init_nofail(DEVICE(&s->vmmouse));
 }
 
 return 0;
@@ -318,6 +325,9 @@ static void piix3_initfn(Object *obj)
 object_initialize(&s->vmport, TYPE_VMPORT);
 object_property_add_child(obj, "vmport", OBJECT(&s->vmport), NULL);
 }
+
+object_initialize(&s->vmmouse, TYPE_VMMOUSE);
+object_property_add_child(obj, "vmmouse", OBJECT(&s->vmmouse), NULL);
 }
 
 static void piix3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/piix3.h b/hw/piix3.h
index 477e39e..29ae820 100644
--- a/hw/piix3.h
+++ b/hw/piix3.h
@@ -36,6 +36,7 @@
 #include "i8254_internal.h"
 #include "pcspk.h"
 #include "ps2.h"
+#include "console.h"
 
 #define PIIX_NUM_PIC_IRQS   16  /* i8259 * 2 */
 #define PIIX_NUM_PIRQS  4ULL/* PIRQ[A-D] */
@@ -57,6 +58,20 @@ typedef struct KVMPITState {
 #define TYPE_VMPORT "vmport"
 #define VMPORT_ENTRIES 0x2c
 
+#define TYPE_VMMOUSE "vmmouse"
+#define VMMOUSE_QUEUE_SIZE 1024
+
+typedef struct _VMMouseState {
+ISADevice dev;
+uint32_t queue[VMMOUSE_QUEUE_SIZE];
+int32_t queue_size;
+uint16_t nb_queue;
+uint16_t status;
+uint8_t absolute;
+QEMUPutMouseEntry *entry;
+void *ps2_mouse;
+} VMMouseState;
+
 typedef struct _VMPortState {
 ISADevice dev;
 MemoryRegion io;
@@ -121,6 +136,7 @@ typedef struct PIIX3State {
 KVMPITState kvm_pit;
 } pit;
 VMPortState vmport;
+VMMouseState vmmouse;
 #endif
 PCSpkState pcspk;
 Port92State port92;
diff --git a/hw/vmmouse.c b/hw/vmmouse.c
index 6338efa..022e493 100644
--- a/hw/vmmouse.c
+++ b/hw/vmmouse.c
@@ -41,8 +41,6 @@
 #define VMMOUSE_REQUEST_RELATIVE   0x4c455252
 #define VMMOUSE_REQUEST_ABSOLUTE   0x53424152
 
-#define VMMOUSE_QUEUE_SIZE 1024
-
 #define VMMOUSE_VERSION0x3442554a
 
 #ifdef DEBUG_VMMOUSE
@@ -51,18 +49,6 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif
 
-typedef struct _VMMouseState
-{
-ISADevice dev;
-uint32_t queue[VMMOUSE_QUEUE_SIZE];
-int32_t queue_size;
-uint16_t nb_queue;
-uint16_t status;
-uint8_t absolute;
-QEMUPutMouseEntry *entry;
-void *ps2_mouse;
-} VMMouseState;
-
 static uint32_t vmmouse_get_status(VMMouseState *s)
 {
 DPRINTF("vmmouse_get_status()\n");
-- 
1.7.7.6




[Qemu-devel] [PATCH 10/10] convert IOAPIC as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert IOAPIC as piix3 proper QOM child.

IOAPIC creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Wanpeng Li 

---
 hw/ioapic.c  |2 +-
 hw/ioapic.h  |2 --
 hw/ioapic_internal.h |2 ++
 hw/kvm/ioapic.c  |9 +
 hw/pc.c  |   30 ++
 hw/pc.h  |2 ++
 hw/piix3.c   |   38 ++
 hw/piix3.h   |   13 +
 8 files changed, 59 insertions(+), 39 deletions(-)

diff --git a/hw/ioapic.c b/hw/ioapic.c
index 7273095..927a099 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -245,7 +245,7 @@ static void ioapic_class_init(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo ioapic_info = {
-.name  = "ioapic",
+.name  = TYPE_IOAPIC,
 .parent= TYPE_IOAPIC_COMMON,
 .instance_size = sizeof(IOAPICCommonState),
 .class_init= ioapic_class_init,
diff --git a/hw/ioapic.h b/hw/ioapic.h
index 86e63da..649dd0c 100644
--- a/hw/ioapic.h
+++ b/hw/ioapic.h
@@ -20,8 +20,6 @@
 #ifndef HW_IOAPIC_H
 #define HW_IOAPIC_H
 
-#define IOAPIC_NUM_PINS 24
-
 void ioapic_eoi_broadcast(int vector);
 
 #endif /* !HW_IOAPIC_H */
diff --git a/hw/ioapic_internal.h b/hw/ioapic_internal.h
index e04c9f3..d3cb4c9 100644
--- a/hw/ioapic_internal.h
+++ b/hw/ioapic_internal.h
@@ -88,6 +88,8 @@ typedef struct IOAPICCommonClass {
 void (*post_load)(IOAPICCommonState *s);
 } IOAPICCommonClass;
 
+#define IOAPIC_NUM_PINS 24
+
 struct IOAPICCommonState {
 SysBusDevice busdev;
 MemoryRegion io_memory;
diff --git a/hw/kvm/ioapic.c b/hw/kvm/ioapic.c
index 6c3b8fe..e88843e 100644
--- a/hw/kvm/ioapic.c
+++ b/hw/kvm/ioapic.c
@@ -15,13 +15,6 @@
 #include "hw/apic_internal.h"
 #include "kvm.h"
 
-typedef struct KVMIOAPICState KVMIOAPICState;
-
-struct KVMIOAPICState {
-IOAPICCommonState ioapic;
-uint32_t kvm_gsi_base;
-};
-
 static void kvm_ioapic_get(IOAPICCommonState *s)
 {
 struct kvm_irqchip chip;
@@ -111,7 +104,7 @@ static void kvm_ioapic_class_init(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo kvm_ioapic_info = {
-.name  = "kvm-ioapic",
+.name  = TYPE_KVM_IOAPIC,
 .parent = TYPE_IOAPIC_COMMON,
 .instance_size = sizeof(KVMIOAPICState),
 .class_init = kvm_ioapic_class_init,
diff --git a/hw/pc.c b/hw/pc.c
index 74cec55..31031fa 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -79,6 +79,8 @@
 
 BusState *idebus[MAX_IDE_BUS];
 
+GSIState *gsi_state;
+
 struct e820_entry {
 uint64_t address;
 uint64_t length;
@@ -1011,30 +1013,6 @@ static void kvm_piix3_gsi_handler(void *opaque, int n, 
int level)
 }
 }
 
-static void ioapic_init(GSIState *gsi_state)
-{
-DeviceState *dev;
-SysBusDevice *d;
-unsigned int i;
-
-if (kvm_irqchip_in_kernel()) {
-dev = qdev_create(NULL, "kvm-ioapic");
-} else {
-dev = qdev_create(NULL, "ioapic");
-}
-
-/* FIXME: this should be under the piix3.  */
-object_property_add_child(object_resolve_path("i440fx", NULL),
-  "ioapic", OBJECT(dev), NULL);
-qdev_init_nofail(dev);
-d = sysbus_from_qdev(dev);
-sysbus_mmio_map(d, 0, 0xfec0);
-
-for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
-}
-}
-
 static PCIBus *i440fx_init(I440FXPMCState **pi440fx_state, int *piix3_devfn,
ISABus **isa_bus, qemu_irq *pic,
MemoryRegion *address_space_mem,
@@ -1091,7 +1069,6 @@ static void pc_init1(MemoryRegion *system_memory,
 qemu_irq *gsi;
 qemu_irq *i8259;
 qemu_irq *smi_irq;
-GSIState *gsi_state;
 ISADevice *rtc_state;
 ISADevice *floppy;
 
@@ -1153,9 +1130,6 @@ static void pc_init1(MemoryRegion *system_memory,
 for (i = 0; i < ISA_NUM_IRQS; i++) {
 gsi_state->i8259_irq[i] = i8259[i];
 }
-if (pci_enabled) {
-ioapic_init(gsi_state);
-}
 
 pc_register_ferr_irq(gsi[13]);
 
diff --git a/hw/pc.h b/hw/pc.h
index 620349f..7f6ff93 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -54,6 +54,8 @@ typedef struct GSIState {
 qemu_irq ioapic_irq[IOAPIC_NUM_PINS];
 } GSIState;
 
+extern GSIState *gsi_state;
+
 void vmport_register(unsigned char command, IOPortReadFunc *func, void 
*opaque);
 void vmmouse_get_data(uint32_t *data);
 void vmmouse_set_data(const uint32_t *data);
diff --git a/hw/piix3.c b/hw/piix3.c
index 7ca0f83..99b1ecc 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -203,6 +203,7 @@ static int piix3_realize(PCIDevice *dev)
 qemu_irq *a20_line;
 DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
 int i;
+SysBusDevice *d;
 
 /* Initialize ISA Bus */
 s->bus = isa_bus_new(DEVICE(dev), pci_address_space_io(dev));
@@ -313,6 +314,31 @@ static int piix3_re

[Qemu-devel] [PATCH v5 2/3] merge pc_piix.c to pc.c

2012-11-07 Thread Wanpeng Li
A long time ago, there was a grand plan to merge q35 chipset support. The start
of that series was a refactoring of pc.c which split a bunch of the "common"
functionality into a separate file that could be shared by the two.

But q35 never got merged and the refactoring, in retrospect, just made things
worse. Making things proper objects and using composition is the right way
to share common devices.

By pulling these files back together, we can start to fix some of this mess.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 hw/i386/Makefile.objs |1 -
 hw/pc.c   |  714 ++-
 hw/pc.h   |   23 +--
 hw/pc_piix.c  |  740 -
 4 files changed, 706 insertions(+), 772 deletions(-)
 delete mode 100644 hw/pc_piix.c

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 49b32d0..868020c 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -4,7 +4,6 @@ obj-y += sga.o ioapic_common.o ioapic.o i440fx.o piix3.o
 obj-y += vmport.o
 obj-y += pci-hotplug.o smbios.o wdt_ib700.o
 obj-y += debugcon.o multiboot.o
-obj-y += pc_piix.o
 obj-y += pc_sysfw.o
 obj-$(CONFIG_XEN) += xen_platform.o xen_apic.o
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
diff --git a/hw/pc.c b/hw/pc.c
index 60919e4..55bb797 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -28,6 +28,7 @@
 #include "fdc.h"
 #include "ide.h"
 #include "pci.h"
+#include "usb.h"
 #include "monitor.h"
 #include "fw_cfg.h"
 #include "hpet_emul.h"
@@ -49,8 +50,11 @@
 #include "ui/qemu-spice.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "kvm/clock.h"
 #include "arch_init.h"
 #include "bitmap.h"
+#include "smbus.h"
+#include "boards.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -73,6 +77,8 @@
 
 #define E820_NR_ENTRIES16
 
+#define MAX_IDE_BUS 2
+
 struct e820_entry {
 uint64_t address;
 uint64_t length;
@@ -84,10 +90,14 @@ struct e820_table {
 struct e820_entry entry[E820_NR_ENTRIES];
 } QEMU_PACKED __attribute((__aligned__(4)));
 
+static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
+static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
+static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
+
 static struct e820_table e820_table;
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
-void gsi_handler(void *opaque, int n, int level)
+static void gsi_handler(void *opaque, int n, int level)
 {
 GSIState *s = opaque;
 
@@ -105,7 +115,7 @@ static void ioport80_write(void *opaque, uint32_t addr, 
uint32_t data)
 /* MSDOS compatibility mode FPU exception support */
 static qemu_irq ferr_irq;
 
-void pc_register_ferr_irq(qemu_irq irq)
+static void pc_register_ferr_irq(qemu_irq irq)
 {
 ferr_irq = irq;
 }
@@ -320,7 +330,7 @@ static void pc_cmos_init_late(void *opaque)
 qemu_unregister_reset(pc_cmos_init_late, opaque);
 }
 
-void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
+static void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
   const char *boot_device,
   ISADevice *floppy, BusState *idebus0, BusState *idebus1,
   ISADevice *s)
@@ -827,7 +837,7 @@ static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 
3, 4, 5 };
 static const int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
 static const int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
 
-void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
+static void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
 {
 static int nb_ne2k = 0;
 
@@ -877,7 +887,7 @@ void pc_cpus_init(const char *cpu_model)
 }
 }
 
-void *pc_memory_init(MemoryRegion *system_memory,
+static void *pc_memory_init(MemoryRegion *system_memory,
 const char *kernel_filename,
 const char *kernel_cmdline,
 const char *initrd_filename,
@@ -902,12 +912,12 @@ void *pc_memory_init(MemoryRegion *system_memory,
 return fw_cfg;
 }
 
-qemu_irq *pc_allocate_cpu_irq(void)
+static qemu_irq *pc_allocate_cpu_irq(void)
 {
 return qemu_allocate_irqs(pic_irq_request, NULL, 1);
 }
 
-DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
+static DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
 {
 DeviceState *dev = NULL;
 
@@ -930,7 +940,7 @@ static void cpu_request_exit(void *opaque, int irq, int 
level)
 }
 }
 
-void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
+static void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
   ISADevice **rtc_state,
   ISADevice **floppy,
   bool no_vmport)
@@ -1021,7 +1031,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
 *floppy = fdctrl_init_isa(isa_b

[Qemu-devel] [PATCH 02/10] convert HPET as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert HPET as piix3 proper QOM child.

HPET creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/hpet.c  |   35 ---
 hw/hpet_emul.h |   40 
 hw/pc.c|   21 -
 hw/piix3.c |   28 ++--
 hw/piix3.h |4 ++--
 5 files changed, 68 insertions(+), 60 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index 50ac067..b128505 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -42,41 +42,6 @@
 
 #define HPET_MSI_SUPPORT0
 
-struct HPETState;
-typedef struct HPETTimer {  /* timers */
-uint8_t tn; /*timer number*/
-QEMUTimer *qemu_timer;
-struct HPETState *state;
-/* Memory-mapped, software visible timer registers */
-uint64_t config;/* configuration/cap */
-uint64_t cmp;   /* comparator */
-uint64_t fsb;   /* FSB route */
-/* Hidden register state */
-uint64_t period;/* Last value written to comparator */
-uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
- * mode. Next pop will be actual timer expiration.
- */
-} HPETTimer;
-
-typedef struct HPETState {
-SysBusDevice busdev;
-MemoryRegion iomem;
-uint64_t hpet_offset;
-qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
-uint32_t flags;
-uint8_t rtc_irq_level;
-qemu_irq pit_enabled;
-uint8_t num_timers;
-HPETTimer timer[HPET_MAX_TIMERS];
-
-/* Memory-mapped, software visible registers */
-uint64_t capability;/* capabilities */
-uint64_t config;/* configuration */
-uint64_t isr;   /* interrupt status reg */
-uint64_t hpet_counter;  /* main counter */
-uint8_t  hpet_id;   /* instance id */
-} HPETState;
-
 static uint32_t hpet_in_legacy_mode(HPETState *s)
 {
 return s->config & HPET_CFG_LEGACY;
diff --git a/hw/hpet_emul.h b/hw/hpet_emul.h
index 757f79f..46dee92 100644
--- a/hw/hpet_emul.h
+++ b/hw/hpet_emul.h
@@ -13,6 +13,8 @@
 #ifndef QEMU_HPET_EMUL_H
 #define QEMU_HPET_EMUL_H
 
+#include "sysbus.h"
+
 #define HPET_BASE   0xfed0
 #define HPET_CLK_PERIOD 1000ULL /* 1000 femtoseconds == 10ns*/
 
@@ -71,4 +73,42 @@ struct hpet_fw_config
 } QEMU_PACKED;
 
 extern struct hpet_fw_config hpet_cfg;
+
+#define TYPE_HPET "hpet"
+
+struct HPETState;
+typedef struct HPETTimer {  /* timers */
+uint8_t tn; /*timer number*/
+QEMUTimer *qemu_timer;
+struct HPETState *state;
+/* Memory-mapped, software visible timer registers */
+uint64_t config;/* configuration/cap */
+uint64_t cmp;   /* comparator */
+uint64_t fsb;   /* FSB route */
+/* Hidden register state */
+uint64_t period;/* Last value written to comparator */
+uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
+ * mode. Next pop will be actual timer expiration.
+ */
+} HPETTimer;
+
+typedef struct HPETState {
+SysBusDevice busdev;
+MemoryRegion iomem;
+uint64_t hpet_offset;
+qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
+uint32_t flags;
+uint8_t rtc_irq_level;
+qemu_irq pit_enabled;
+uint8_t num_timers;
+HPETTimer timer[HPET_MAX_TIMERS];
+
+/* Memory-mapped, software visible registers */
+uint64_t capability;/* capabilities */
+uint64_t config;/* configuration */
+uint64_t isr;   /* interrupt status reg */
+uint64_t hpet_counter;  /* main counter */
+uint8_t  hpet_id;   /* instance id */
+} HPETState;
+
 #endif
diff --git a/hw/pc.c b/hw/pc.c
index 7fed363..7105f4e 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -90,8 +90,6 @@ struct e820_table {
 struct e820_entry entry[E820_NR_ENTRIES];
 } QEMU_PACKED __attribute((__aligned__(4)));
 
-qemu_irq rtc_irq;
-
 static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
 static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
 static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
@@ -959,25 +957,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 
 register_ioport_write(0xf0, 1, 1, ioportF0_write, NULL);
 
-/*
- * Check if an HPET shall be created.
- *
- * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT
- * when the HPET wants to take over. Thus we have to disable the latter.
- */
-if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
-hpet = sysbus_try_create_simple("hpet", HPET_BASE, NULL);
-
-if (hpet) {
-for (i = 0; i < GSI_NUM_PINS; i++) {
-sys

[Qemu-devel] [PATCH 05/10] convert PORT92 as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert PORT92 as piix3 proper QOM child.

PORT92 creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Wanpeng Li 

---
 hw/pc.c|   25 -
 hw/pc.h|3 +++
 hw/piix3.c |   10 ++
 hw/piix3.h |   11 +++
 4 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 80b437f..94fdea9 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -421,14 +421,6 @@ static void pc_cmos_init(ram_addr_t ram_size, ram_addr_t 
above_4g_mem_size,
 qemu_register_reset(pc_cmos_init_late, &arg);
 }
 
-/* port 92 stuff: could be split off */
-typedef struct Port92State {
-ISADevice dev;
-MemoryRegion io;
-uint8_t outport;
-qemu_irq *a20_out;
-} Port92State;
-
 static void port92_write(void *opaque, hwaddr addr, uint64_t val,
  unsigned size)
 {
@@ -453,13 +445,6 @@ static uint64_t port92_read(void *opaque, hwaddr addr,
 return ret;
 }
 
-static void port92_init(ISADevice *dev, qemu_irq *a20_out)
-{
-Port92State *s = DO_UPCAST(Port92State, dev, dev);
-
-s->a20_out = a20_out;
-}
-
 static const VMStateDescription vmstate_port92_isa = {
 .name = "port92",
 .version_id = 1,
@@ -510,7 +495,7 @@ static void port92_class_initfn(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo port92_info = {
-.name  = "port92",
+.name  = TYPE_PORT92,
 .parent= TYPE_ISA_DEVICE,
 .instance_size = sizeof(Port92State),
 .class_init= port92_class_initfn,
@@ -523,7 +508,7 @@ static void port92_register_types(void)
 
 type_init(port92_register_types)
 
-static void handle_a20_line_change(void *opaque, int irq, int level)
+void handle_a20_line_change(void *opaque, int irq, int level)
 {
 CPUX86State *cpu = opaque;
 
@@ -947,7 +932,7 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 int i;
 DriveInfo *fd[MAX_FD];
 qemu_irq *a20_line;
-ISADevice *i8042, *port92, *vmmouse;
+ISADevice *i8042, *vmmouse;
 qemu_irq *cpu_exit_irq;
 
 register_ioport_write(0x80, 1, 1, ioport80_write, NULL);
@@ -966,7 +951,7 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 }
 }
 
-a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
+a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 1);
 i8042 = isa_create_simple(isa_bus, "i8042");
 i8042_setup_a20_line(i8042, &a20_line[0]);
 if (!no_vmport) {
@@ -979,8 +964,6 @@ static void pc_basic_device_init(ISABus *isa_bus, qemu_irq 
*gsi,
 qdev_prop_set_ptr(&vmmouse->qdev, "ps2_mouse", i8042);
 qdev_init_nofail(&vmmouse->qdev);
 }
-port92 = isa_create_simple(isa_bus, "port92");
-port92_init(port92, &a20_line[1]);
 
 cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
 DMA_init(0, cpu_exit_irq);
diff --git a/hw/pc.h b/hw/pc.h
index d4b149e..17d48a0 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -9,6 +9,7 @@
 #include "net.h"
 #include "memory.h"
 #include "ioapic.h"
+#include "piix3.h"
 #include "i440fx.h"
 
 /* PC-style peripherals (also used by other machines).  */
@@ -144,4 +145,6 @@ void pc_system_firmware_init(MemoryRegion *rom_memory);
 
 int e820_add_entry(uint64_t, uint64_t, uint32_t);
 
+void handle_a20_line_change(void *opaque, int irq, int level);
+
 #endif
diff --git a/hw/piix3.c b/hw/piix3.c
index 35a0de9..675212e 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -196,6 +196,7 @@ static int piix3_realize(PCIDevice *dev)
 qemu_irq rtc_irq;
 int pit_isa_irq = 0;
 qemu_irq pit_alt_irq = NULL;
+qemu_irq *a20_line;
 
 /* Initialize ISA Bus */
 s->bus = isa_bus_new(DEVICE(dev), pci_address_space_io(dev));
@@ -256,6 +257,12 @@ static int piix3_realize(PCIDevice *dev)
 qdev_prop_set_ptr(DEVICE(&s->pcspk), "pit", ISA_DEVICE(&s->pit));
 qdev_init_nofail(DEVICE(&s->pcspk));
 
+/* Realize the PORT92 */
+qdev_set_parent_bus(DEVICE(&s->port92), BUS(s->bus));
+qdev_init_nofail(DEVICE(&s->port92));
+a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 1);
+s->port92.a20_out = &a20_line[0];
+
 return 0;
 }
 
@@ -288,6 +295,9 @@ static void piix3_initfn(Object *obj)
 
 object_initialize(&s->pcspk, TYPE_PCSPK);
 object_property_add_child(obj, "pcspk", OBJECT(&s->pcspk), NULL);
+
+object_initialize(&s->port92, TYPE_PORT92);
+object_property_add_child(obj, "port92", OBJECT(&s->port92), NULL);
 }
 
 static void piix3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/piix3.h b/hw/piix3.h
index 32f7a95..4e5ee20 100644
--- a/hw/piix3.h
+++ b/hw/piix3.h
@@ -51,6 +51,16 @@ typedef struct KVMPITState 

[Qemu-devel] [PATCH 00/10] piix3: create all child devices as proper QOM children

2012-11-07 Thread Wanpeng Li
All of the devices described in the PIIX3 as being implemented within the
PIIX3 are created as child devices of the PIIX3 object in QEMU.
PIIX3 has-a RTC, has-a I8042, has-a PCSPK, etc.

All child device creation for the PIIX3 is done by calling object_init()
with qdev_init() being called for each child device in the PIIX3 ::init
function.

Anthony Liguori (3):
convert RTC as piix3 proper QOM child
convert HPET as piix3 proper QOM child
convert PIT as piix3 proper QOM child

Wanpeng Li (7):
convert PCSPK as piix3 proper QOM child
convert PORT92 as piix3 proper QOM child
convert I8042 as piixe proper QOM child
convert VMPORT piix3 proper QOM child
convert VMMOUSE as piix3 proper QOM child
convert IDE as piix3 proper QOM child
convert IOAPIC as piix3 proper QOM child

 hw/hpet.c|   35 -
 hw/hpet_emul.h   |   40 ++
 hw/i440fx.c  |6 ++
 hw/i8254.c   |2 +-
 hw/i8254_internal.h  |3 +-
 hw/ide.h |6 +-
 hw/ide/internal.h|9 ++
 hw/ide/isa.c |   14 +---
 hw/ide/piix.c|   24 +--
 hw/ioapic.c  |2 +-
 hw/ioapic.h  |2 -
 hw/ioapic_internal.h |2 +
 hw/kvm/i8254.c   |8 +--
 hw/kvm/ioapic.c  |9 +--
 hw/mc146818rtc.c |   29 +---
 hw/mc146818rtc.h |   30 +++
 hw/pc.c  |  144 ---
 hw/pc.h  |   14 ++-
 hw/pckbd.c   |   24 +--
 hw/pcspk.c   |   19 +-
 hw/pcspk.h   |   19 +
 hw/piix3.c   |  205 ++
 hw/piix3.h   |  104 +
 hw/ps2.h |3 +
 hw/vmmouse.c |   14 
 hw/vmport.c  |   10 +--
 sysemu.h |2 -
 27 files changed, 459 insertions(+), 320 deletions(-)

-- 
1.7.7.6




[Qemu-devel] [PATCH 03/10] convert PIT as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert PIT as piix3 proper QOM child.

PIT creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/i8254.c  |2 +-
 hw/i8254_internal.h |3 ++-
 hw/kvm/i8254.c  |8 +---
 hw/pc.c |   18 +-
 hw/piix3.c  |   34 ++
 hw/piix3.h  |   15 +++
 6 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/hw/i8254.c b/hw/i8254.c
index bea5f92..8d3616d 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -348,7 +348,7 @@ static void pit_class_initfn(ObjectClass *klass, void *data)
 }
 
 static TypeInfo pit_info = {
-.name  = "isa-pit",
+.name  = TYPE_PIT,
 .parent= TYPE_PIT_COMMON,
 .instance_size = sizeof(PITCommonState),
 .class_init= pit_class_initfn,
diff --git a/hw/i8254_internal.h b/hw/i8254_internal.h
index 686f0c2..1aecad3 100644
--- a/hw/i8254_internal.h
+++ b/hw/i8254_internal.h
@@ -26,7 +26,6 @@
 #define QEMU_I8254_INTERNAL_H
 
 #include "hw.h"
-#include "pc.h"
 #include "isa.h"
 
 typedef struct PITChannelState {
@@ -57,6 +56,8 @@ typedef struct PITCommonState {
 PITChannelState channels[3];
 } PITCommonState;
 
+#define TYPE_KVM_PIT "kvm-pit"
+#define TYPE_PIT "isa-pit"
 #define TYPE_PIT_COMMON "pit-common"
 #define PIT_COMMON(obj) \
  OBJECT_CHECK(PITCommonState, (obj), TYPE_PIT_COMMON)
diff --git a/hw/kvm/i8254.c b/hw/kvm/i8254.c
index 53d13e3..9f8fb7c 100644
--- a/hw/kvm/i8254.c
+++ b/hw/kvm/i8254.c
@@ -27,18 +27,12 @@
 #include "hw/i8254.h"
 #include "hw/i8254_internal.h"
 #include "kvm.h"
+#include "hw/piix3.h"
 
 #define KVM_PIT_REINJECT_BIT 0
 
 #define CALIBRATION_ROUNDS   3
 
-typedef struct KVMPITState {
-PITCommonState pit;
-LostTickPolicy lost_tick_policy;
-bool vm_stopped;
-int64_t kernel_clock_offset;
-} KVMPITState;
-
 static int64_t abs64(int64_t v)
 {
 return v < 0 ? -v : v;
diff --git a/hw/pc.c b/hw/pc.c
index 7105f4e..80b437f 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -946,30 +946,14 @@ static void pc_basic_device_init(ISABus *isa_bus, 
qemu_irq *gsi,
 {
 int i;
 DriveInfo *fd[MAX_FD];
-DeviceState *hpet = NULL;
-int pit_isa_irq = 0;
-qemu_irq pit_alt_irq = NULL;
 qemu_irq *a20_line;
-ISADevice *i8042, *port92, *vmmouse, *pit = NULL;
+ISADevice *i8042, *port92, *vmmouse;
 qemu_irq *cpu_exit_irq;
 
 register_ioport_write(0x80, 1, 1, ioport80_write, NULL);
 
 register_ioport_write(0xf0, 1, 1, ioportF0_write, NULL);
 
-if (!xen_enabled()) {
-if (kvm_irqchip_in_kernel()) {
-pit = kvm_pit_init(isa_bus, 0x40);
-} else {
-pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
-}
-if (hpet) {
-/* connect PIT to output control line of the HPET */
-qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(&pit->qdev, 0));
-}
-pcspk_init(isa_bus, pit);
-}
-
 for(i = 0; i < MAX_SERIAL_PORTS; i++) {
 if (serial_hds[i]) {
 serial_isa_init(isa_bus, i, serial_hds[i]);
diff --git a/hw/piix3.c b/hw/piix3.c
index 5fe41cd..41739bd 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -194,6 +194,8 @@ static int piix3_realize(PCIDevice *dev)
 {
 PIIX3State *s = PIIX3(dev);
 qemu_irq rtc_irq;
+int pit_isa_irq = 0;
+qemu_irq pit_alt_irq = NULL;
 
 /* Initialize ISA Bus */
 s->bus = isa_bus_new(DEVICE(dev), pci_address_space_io(dev));
@@ -222,6 +224,8 @@ static int piix3_realize(PCIDevice *dev)
 sysbus_connect_irq(SYS_BUS_DEVICE(&s->hpet), i, s->pic[i]);
 }
 
+pit_isa_irq = -1;
+pit_alt_irq = qdev_get_gpio_in(DEVICE(&s->hpet), HPET_LEGACY_PIT_INT);
 rtc_irq = qdev_get_gpio_in(DEVICE(&s->hpet), HPET_LEGACY_RTC_INT);
 } else {
 isa_init_irq(ISA_DEVICE(&s->rtc), &rtc_irq, RTC_ISA_IRQ);
@@ -230,6 +234,23 @@ static int piix3_realize(PCIDevice *dev)
 /* Setup the RTC IRQ */
 s->rtc.irq = rtc_irq;
 
+/* Realize the PIT */
+if (!xen_enabled()) {
+if (kvm_irqchip_in_kernel()) {
+qdev_set_parent_bus(DEVICE(&s->pit.kvm_pit), BUS(s->bus));
+qdev_init_nofail(DEVICE(&s->pit.kvm_pit));
+qdev_connect_gpio_out(DEVICE(&s->hpet), 0,
+qdev_get_gpio_in(DEVICE(&s->pit.kvm_pit), 0));
+} else {
+qdev_set_parent_bus(DEVICE(&s->pit), BUS(s->bus));
+qdev_init_nofail(DEVICE(&s->pit));
+qdev_connect_gpio_out(DEVICE(&s->pit), 0, pit_isa_irq >= 0 ?
+ isa_get_irq(ISA_DEVICE(&s->pit), pit_isa_irq) : pit_alt_irq);
+ 

[Qemu-devel] [PATCH 09/10] convert IDE as piix3 proper QOM child

2012-11-07 Thread Wanpeng Li
convert IDE as piix3 proper QOM child.

IDE creation for the PIIX3 is done by calling object_init() with
qdev_init() being called for each child device in the PIIX3 ::init
function.

Signed-off-by: Wanpeng Li 

---
 hw/i440fx.c   |6 ++
 hw/ide.h  |6 --
 hw/ide/internal.h |9 +
 hw/ide/isa.c  |   14 +-
 hw/ide/piix.c |   24 ++--
 hw/pc.c   |   28 +---
 hw/pc.h   |4 
 hw/piix3.c|   47 +++
 hw/piix3.h|7 +++
 sysemu.h  |2 --
 10 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/hw/i440fx.c b/hw/i440fx.c
index 5196201..e994722 100644
--- a/hw/i440fx.c
+++ b/hw/i440fx.c
@@ -193,6 +193,12 @@ static int i440fx_realize(SysBusDevice *dev)
 h->bus = pci_bus_new(DEVICE(s), NULL, &s->pci_address_space,
  s->address_space_io, 0);
 
+if (pci_is_enabled) {
+qdev_set_parent_bus(DEVICE(&s->piix3.ide.pci), BUS(h->bus));
+} else {
+qdev_set_parent_bus(DEVICE(&s->piix3.ide.isa), BUS(h->bus));
+}
+
 memory_region_init_io(&h->conf_mem, &pci_host_conf_le_ops, s,
   "pci-conf-idx", 4);
 sysbus_add_io(dev, 0xcf8, &h->conf_mem);
diff --git a/hw/ide.h b/hw/ide.h
index add742c..7060124 100644
--- a/hw/ide.h
+++ b/hw/ide.h
@@ -7,6 +7,10 @@
 
 #define MAX_IDE_DEVS   2
 
+#define TYPE_PIIX3_IDE_XEN "piix3-ide-xen"
+#define TYPE_PIIX3_IDE "piix3-ide"
+#define TYPE_ISA_IDE "isa-ide"
+
 /* ide-isa.c */
 ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, int isairq,
 DriveInfo *hd0, DriveInfo *hd1);
@@ -14,8 +18,6 @@ ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, 
int isairq,
 /* ide-pci.c */
 void pci_cmd646_ide_init(PCIBus *bus, DriveInfo **hd_table,
  int secondary_ide_enabled);
-PCIDevice *pci_piix3_xen_ide_init(PCIBus *bus, DriveInfo **hd_table, int 
devfn);
-PCIDevice *pci_piix3_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 PCIDevice *pci_piix4_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 void vt82c686b_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index bf7d313..3114f8c 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -483,6 +483,15 @@ struct IDEDevice {
 uint64_t wwn;
 };
 
+typedef struct ISAIDEState {
+ISADevice dev;
+IDEBusbus;
+uint32_t  iobase;
+uint32_t  iobase2;
+uint32_t  isairq;
+qemu_irq  irq;
+} ISAIDEState;
+
 #define BM_STATUS_DMAING 0x01
 #define BM_STATUS_ERROR  0x02
 #define BM_STATUS_INT0x04
diff --git a/hw/ide/isa.c b/hw/ide/isa.c
index 8ab2718..ca206ca 100644
--- a/hw/ide/isa.c
+++ b/hw/ide/isa.c
@@ -30,18 +30,6 @@
 
 #include 
 
-/***/
-/* ISA IDE definitions */
-
-typedef struct ISAIDEState {
-ISADevice dev;
-IDEBusbus;
-uint32_t  iobase;
-uint32_t  iobase2;
-uint32_t  isairq;
-qemu_irq  irq;
-} ISAIDEState;
-
 static void isa_ide_reset(DeviceState *d)
 {
 ISAIDEState *s = container_of(d, ISAIDEState, dev.qdev);
@@ -112,7 +100,7 @@ static void isa_ide_class_initfn(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo isa_ide_info = {
-.name  = "isa-ide",
+.name  = TYPE_ISA_IDE,
 .parent= TYPE_ISA_DEVICE,
 .instance_size = sizeof(ISAIDEState),
 .class_init= isa_ide_class_initfn,
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 9431bad..3e53302 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -192,15 +192,6 @@ static int pci_piix3_xen_ide_unplug(DeviceState *dev)
 return 0;
 }
 
-PCIDevice *pci_piix3_xen_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn)
-{
-PCIDevice *dev;
-
-dev = pci_create_simple(bus, devfn, "piix3-ide-xen");
-pci_ide_create_devs(dev, hd_table);
-return dev;
-}
-
 static void pci_piix_ide_exitfn(PCIDevice *dev)
 {
 PCIIDEState *d = DO_UPCAST(PCIIDEState, dev, dev);
@@ -216,17 +207,6 @@ static void pci_piix_ide_exitfn(PCIDevice *dev)
 }
 
 /* hd_table must contain 4 block drivers */
-/* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */
-PCIDevice *pci_piix3_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn)
-{
-PCIDevice *dev;
-
-dev = pci_create_simple(bus, devfn, "piix3-ide");
-pci_ide_create_devs(dev, hd_table);
-return dev;
-}
-
-/* hd_table must contain 4 block drivers */
 /* NOTE: for the PIIX4, the IRQs and IOports are hardcoded */
 PCIDevice *pci_piix4_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn)
 {
@@ -252,7 +232,7 @@ static void piix3_ide_class_init(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo piix3_ide_info = {
-.name  = "pii

Re: [Qemu-devel] [PATCH v5 0/3] refactor PC machine, i440fx and piix3 to take advantage of QOM

2012-11-14 Thread Wanpeng Li
On Thu, Nov 08, 2012 at 01:36:09PM +0800, Wanpeng Li wrote:
>This series aggressively refactors the PC machine initialization to be more
>modelled and less ad-hoc.  The highlights of this series are:
>
>1) Things like -m and -bios-name are now device model properties
>
>2) The i440fx and piix3 are now modelled in a thorough fashion
>
>3) i440fx_init is trivialized to creating devices and setting properties
>
>4) convert PCI host bridge to QOM
>
>The point (3) is the most important one.  As we refactor in this fashion,
>we should quickly get to the point where machine->init disappears completely in
>favor of just creating a handful of devices.
>
>The two stage initialization of QOM is important here.  instance_init() is when
>composed devices are created which means that after you've created a device, 
>all
>of its children are visible in the device model.  This lets you set properties
>of the parent and its children.
>
>realize() (which is still called DeviceState::init today) will be called right
>before the guest starts up for the first time.
>

Hi Anthony,

What's the state of these two patchsets, are they ready to be merged? 

Regards,
Wanpeng Li 

>Changes in v5:
>* fix bisect issues
>* take advantage of Andreas's pci_host patchset
>* drop convert MemoryRegion to QOM 
>* drop prepare to create HPET, RTC and i8254 through composition
>
>Changes in v4:
>
>*rebase patchset
>
>Changes in v3:
>
>* fix coding style issues
>* fix rebase error
>* add changes log
>
>Changes in v2:
>
>* Rebase patch series of i440fx in Anthony's qom-rebase.12 branch to upstream
>* convert MemoryRegion to QOM
>* convert pci_host to QOM
>
>Anthony Liguori (3):
>  eliminate piix_pci.c and module i440fx and piix3
>  merge pc_piix.c to pc.c
>  convert pci-host to QOM
>
> hw/i386/Makefile.objs |3 +-
> hw/i440fx.c   |  434 
> hw/i440fx.h   |   76 +
> hw/pc.c   |  753 ++---
> hw/pc.h   |   41 +---
> hw/pc_piix.c  |  716 --
> hw/pci_host.c |9 +
> hw/piix3.c|  258 +
> hw/piix3.h|   73 +
> hw/piix_pci.c |  622 
> 10 files changed, 1558 insertions(+), 1427 deletions(-)
> create mode 100644 hw/i440fx.c
> create mode 100644 hw/i440fx.h
> delete mode 100644 hw/pc_piix.c
> create mode 100644 hw/piix3.c
> create mode 100644 hw/piix3.h
> delete mode 100644 hw/piix_pci.c
>
>-- 
>1.7.7.6




Re: [Qemu-devel] [PATCH 7/12] kvm/x86: added hyper-v crash data and ctl msr's get/set'ers

2015-08-18 Thread Wanpeng Li

On 7/3/15 8:01 PM, Denis V. Lunev wrote:

From: Andrey Smetanin 

Added hyper-v crash msr's(HV_X64_MSR_CRASH*) data and control
geters and setters. Userspace should check that such msr's
available by check of KVM_CAP_HYPERV_MSR_CRASH capability.


I didn't see the KVM_CAP_HYPERV_MSR_CRASH in this patchset. :(

Regards,
Wanpeng Li



Re: [Qemu-devel] [PATCH v5] i386: Introduce ARAT CPU feature

2015-06-23 Thread Wanpeng Li



On 6/22/15 1:38 AM, Jan Kiszka wrote:

On 2015-06-18 22:21, Eduardo Habkost wrote:

On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote:

From: Jan Kiszka 

ARAT signals that the APIC timer does not stop in power saving states.
As our APICs are emulated, it's fine to expose this feature to guests,
at least when asking for KVM host features or with CPU types that
include the flag. The exact model number that introduced the feature is
not known, but reports can be found that it's at least available since
Sandy Bridge.

Signed-off-by: Jan Kiszka 

The code looks good now, but: what are the real consequences of
enabling/disabling the flag? What exactly guests use it for?

Isn't this going to make guests have additional expectations about the
APIC timer that may be broken when live-migrating or pausing the VM?

ARAT only refers to stopping of the timer in certain power states (which
we do not even emulate IIRC). In that case, the OS is under risk of
sleeping forever, thus need to look for a different wakeup source.


HPET will always be the default broadcast event device I think.

Regards,
Wanpeng Li


Live-migration or VM pausing are external effects on all timers of the
guest, not only the APIC. However, none of them cause a wakeup miss -
provided the host decides to resume the guest eventually.

Jan






[Qemu-devel] [PATCH v4 3/5] convert pci-host to QOM

2012-07-18 Thread Wanpeng Li
[CCing ML]

From: Anthony Liguori 

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 hw/pci_host.c |   26 ++
 hw/pci_host.h |5 +
 2 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/hw/pci_host.c b/hw/pci_host.c
index 8041778..095bfe3 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -165,4 +165,30 @@ const MemoryRegionOps pci_host_data_be_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value)
+{
+object_property_set_link(OBJECT(s), OBJECT(value), "mmio", NULL);
+}
+
+static void pci_host_initfn(Object *obj)
+{
+PCIHostState *s = PCI_HOST(obj);
+
+object_property_add_link(obj, "mmio", TYPE_MEMORY_REGION,
+(Object **)&s->address_space, NULL);
+}
+
+static TypeInfo pci_host_type_info = {
+.name = TYPE_PCI_HOST,
+.parent = TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(PCIHostState),
+.instance_init = pci_host_initfn,
+};
+
+static void register_devices(void)
+{
+type_register_static(&pci_host_type_info);
+}
+
+type_init(register_devices)
 
diff --git a/hw/pci_host.h b/hw/pci_host.h
index 359e38f..084e15c 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -30,6 +30,9 @@
 
 #include "sysbus.h"
 
+#define TYPE_PCI_HOST "pci-host"
+#define PCI_HOST(obj) OBJECT_CHECK(PCIHostState, (obj), TYPE_PCI_HOST)
+
 struct PCIHostState {
 SysBusDevice busdev;
 MemoryRegion conf_mem;
@@ -49,6 +52,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, 
uint32_t addr,
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value);
+
 extern const MemoryRegionOps pci_host_conf_le_ops;
 extern const MemoryRegionOps pci_host_conf_be_ops;
 extern const MemoryRegionOps pci_host_data_le_ops;
-- 
1.7.5.4




[Qemu-devel] [PATCH v4 2/5] convert MemoryRegion to QOM

2012-07-18 Thread Wanpeng Li
[CCing ML]

From: Anthony Liguori 

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 memory.c |   94 ++
 memory.h |8 +
 2 files changed, 78 insertions(+), 24 deletions(-)

diff --git a/memory.c b/memory.c
index aab4a31..3674535 100644
--- a/memory.c
+++ b/memory.c
@@ -797,35 +797,26 @@ static bool memory_region_wrong_endianness(MemoryRegion 
*mr)
 #endif
 }
 
-void memory_region_init(MemoryRegion *mr,
-const char *name,
-uint64_t size)
+void memory_region_set_name(MemoryRegion *mr, const char *name)
+{
+mr->name = g_strdup(name);
+}
+
+void memory_region_set_size(MemoryRegion *mr, uint64_t size)
 {
-mr->ops = NULL;
-mr->parent = NULL;
 mr->size = int128_make64(size);
 if (size == UINT64_MAX) {
 mr->size = int128_2_64();
 }
-mr->addr = 0;
-mr->subpage = false;
-mr->enabled = true;
-mr->terminates = false;
-mr->ram = false;
-mr->readable = true;
-mr->readonly = false;
-mr->rom_device = false;
-mr->destructor = memory_region_destructor_none;
-mr->priority = 0;
-mr->may_overlap = false;
-mr->alias = NULL;
-QTAILQ_INIT(&mr->subregions);
-memset(&mr->subregions_link, 0, sizeof mr->subregions_link);
-QTAILQ_INIT(&mr->coalesced);
-mr->name = g_strdup(name);
-mr->dirty_log_mask = 0;
-mr->ioeventfd_nb = 0;
-mr->ioeventfds = NULL;
+}
+
+void memory_region_init(MemoryRegion *mr,
+const char *name,
+uint64_t size)
+{
+object_initialize(mr, TYPE_MEMORY_REGION);
+memory_region_set_name(mr, name);
+memory_region_set_size(mr, size);
 }
 
 static bool memory_region_access_valid(MemoryRegion *mr,
@@ -1645,3 +1636,58 @@ void mtree_info(fprintf_function mon_printf, void *f)
 g_free(ml);
 }
 }
+
+static void memory_region_initfn(Object *obj)
+{
+MemoryRegion *mr = MEMORY_REGION(obj);
+mr->ops = NULL;
+mr->parent = NULL;
+mr->size = int128_2_64();
+mr->addr = 0;
+mr->subpage = false;
+mr->enabled = true;
+mr->terminates = false;
+mr->ram = false;
+mr->readable = true;
+mr->readonly = false;
+mr->rom_device = false;
+mr->destructor = memory_region_destructor_none;
+mr->priority = 0;
+mr->may_overlap = false;
+mr->alias = NULL;
+mr->name = NULL;
+QTAILQ_INIT(&mr->subregions);
+memset(&mr->subregions_link, 0, sizeof mr->subregions_link);
+QTAILQ_INIT(&mr->coalesced);
+mr->dirty_log_mask = 0;
+mr->ioeventfd_nb = 0;
+mr->ioeventfds = NULL;
+}
+
+static void memory_region_finalize(Object *obj)
+{
+MemoryRegion *mr = MEMORY_REGION(obj);
+
+assert(QTAILQ_EMPTY(&mr->subregions));
+mr->destructor(mr);
+memory_region_clear_coalescing(mr);
+if (mr->name) {
+g_free((char *)mr->name);
+}
+g_free(mr->ioeventfds);
+}
+
+static TypeInfo memory_region_type = {
+.name = TYPE_MEMORY_REGION,
+.parent = TYPE_OBJECT,
+.instance_size = sizeof(MemoryRegion),
+.instance_init = memory_region_initfn,
+.instance_finalize = memory_region_finalize,
+};
+
+static void register_devices(void)
+{
+type_register_static(&memory_region_type);
+}
+
+type_init(register_devices)
diff --git a/memory.h b/memory.h
index 740c48e..90a53f7 100644
--- a/memory.h
+++ b/memory.h
@@ -25,6 +25,7 @@
 #include "iorange.h"
 #include "ioport.h"
 #include "int128.h"
+#include "qemu/object.h"
 
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegion MemoryRegion;
@@ -116,6 +117,9 @@ struct MemoryRegionOps {
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
 
+#define TYPE_MEMORY_REGION "memory-region"
+#define MEMORY_REGION(obj) OBJECT_CHECK(MemoryRegion, (obj), 
TYPE_MEMORY_REGION)
+
 struct MemoryRegion {
 /* All fields are private - violators will be prosecuted */
 const MemoryRegionOps *ops;
@@ -748,6 +752,10 @@ void memory_global_dirty_log_stop(void);
 
 void mtree_info(fprintf_function mon_printf, void *f);
 
+void memory_region_set_name(MemoryRegion *mr, const char *name);
+
+void memory_region_set_size(MemoryRegion *mr, uint64_t size);
+
 #endif
 
 #endif
-- 
1.7.5.4




[Qemu-devel] [PATCH v4 4/5] prepare to create HPET, RTC and i8254 through composition

2012-07-18 Thread Wanpeng Li
[CCing ML]
 
 From: Anthony Liguori 

The HPET usually sits on the LPC bus (which replaces ISA in modern systems).
It's sometimes a dedicated chip but can certain co-exist in a Super IO chip.
I think in terms of where it would live in this hypothetical device model,
putting it in the PIIX is rational.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 hw/hpet.c   |   39 ++-
 hw/hpet_emul.h  |   41 +
 hw/i8254.c  |2 +-
 hw/i8254_internal.h |2 +-
 hw/mc146818rtc.c|   26 --
 hw/mc146818rtc.h|   30 ++
 6 files changed, 75 insertions(+), 65 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index fd3ddca..fc0ff6c 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -42,41 +42,6 @@
 
 #define HPET_MSI_SUPPORT0
 
-struct HPETState;
-typedef struct HPETTimer {  /* timers */
-uint8_t tn; /*timer number*/
-QEMUTimer *qemu_timer;
-struct HPETState *state;
-/* Memory-mapped, software visible timer registers */
-uint64_t config;/* configuration/cap */
-uint64_t cmp;   /* comparator */
-uint64_t fsb;   /* FSB route */
-/* Hidden register state */
-uint64_t period;/* Last value written to comparator */
-uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
- * mode. Next pop will be actual timer expiration.
- */
-} HPETTimer;
-
-typedef struct HPETState {
-SysBusDevice busdev;
-MemoryRegion iomem;
-uint64_t hpet_offset;
-qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
-uint32_t flags;
-uint8_t rtc_irq_level;
-qemu_irq pit_enabled;
-uint8_t num_timers;
-HPETTimer timer[HPET_MAX_TIMERS];
-
-/* Memory-mapped, software visible registers */
-uint64_t capability;/* capabilities */
-uint64_t config;/* configuration */
-uint64_t isr;   /* interrupt status reg */
-uint64_t hpet_counter;  /* main counter */
-uint8_t  hpet_id;   /* instance id */
-} HPETState;
-
 static uint32_t hpet_in_legacy_mode(HPETState *s)
 {
 return s->config & HPET_CFG_LEGACY;
@@ -278,7 +243,7 @@ static const VMStateDescription vmstate_hpet_timer = {
 };
 
 static const VMStateDescription vmstate_hpet = {
-.name = "hpet",
+.name = TYPE_HPET,
 .version_id = 2,
 .minimum_version_id = 1,
 .minimum_version_id_old = 1,
@@ -746,7 +711,7 @@ static void hpet_device_class_init(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo hpet_device_info = {
-.name  = "hpet",
+.name  = TYPE_HPET,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(HPETState),
 .class_init= hpet_device_class_init,
diff --git a/hw/hpet_emul.h b/hw/hpet_emul.h
index 757f79f..836c5c8 100644
--- a/hw/hpet_emul.h
+++ b/hw/hpet_emul.h
@@ -13,6 +13,9 @@
 #ifndef QEMU_HPET_EMUL_H
 #define QEMU_HPET_EMUL_H
 
+#include "hw.h"
+#include "sysbus.h"
+
 #define HPET_BASE   0xfed0
 #define HPET_CLK_PERIOD 1000ULL /* 1000 femtoseconds == 10ns*/
 
@@ -71,4 +74,42 @@ struct hpet_fw_config
 } QEMU_PACKED;
 
 extern struct hpet_fw_config hpet_cfg;
+
+#define TYPE_HPET "hpet"
+
+struct HPETState;
+typedef struct HPETTimer {  /* timers */
+uint8_t tn; /*timer number*/
+QEMUTimer *qemu_timer;
+struct HPETState *state;
+/* Memory-mapped, software visible timer registers */
+uint64_t config;/* configuration/cap */
+uint64_t cmp;   /* comparator */
+uint64_t fsb;   /* FSB route */
+/* Hidden register state */
+uint64_t period;/* Last value written to comparator */
+uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
+ * mode. Next pop will be actual timer expiration.
+ */
+} HPETTimer;
+
+typedef struct HPETState {
+SysBusDevice busdev;
+MemoryRegion iomem;
+uint64_t hpet_offset;
+qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
+uint32_t flags;
+uint8_t rtc_irq_level;
+qemu_irq pit_enabled;
+uint8_t num_timers;
+HPETTimer timer[HPET_MAX_TIMERS];
+
+/* Memory-mapped, software visible registers */
+uint64_t capability;/* capabilities */
+uint64_t config;/* configuration */
+uint64_t isr;   /* interrupt status reg */
+uint64_t hpet_counter;  /* main counter */
+uint8_t  hpet_id;   /* instance id */
+} HPETState;
+
 #endif
diff --git a/hw/i8254.c b/hw/i8254.c
index 77bd5e8..9d855ec 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -346,7 +346,7 @@ static void pit_class_initfn(ObjectClass *klass, void *data)
 }
 
 static TypeInfo pit_info = {
-.name  = "isa-pit"

[Qemu-devel] [PATCH v4 1/5] eliminate piix_pci.c and module i440fx and piix3

2012-07-18 Thread Wanpeng Li
[CCing ML]

From: Anthony Liguori 

The big picture about the patch is shown as follows:

1) pc_init creates an I440FX, any bus devices (ISA serial port, PCI
vga and nics, etc.), sets properties appropriately, and realizes the
devices.
2) I440FX is-a PCIHost, has-a I440FX-PMC, has-a PIIX3
3) PIIX3 has-a RTC, has-a I8042, has-a DMAController, etc.

i440fx-pcihost => i440fx
i440fx => i440fx-pmc

i440fx_pmc is Programmable Memory Controller which integrated in I440FX
chipset, and move ram initialization into i440fx-pmc.

It might seem like a small change, but it better reflects the fact
that the PMC is contained within the i440fx which we will now reflect in
composition in the next few changesets.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 hw/i386/Makefile.objs |2 +-
 hw/i440fx.c   |  434 +++
 hw/i440fx.h   |   77 +++
 hw/piix3.c|  292 
 hw/piix3.h|   79 +++
 hw/piix_pci.c |  599 -
 6 files changed, 883 insertions(+), 600 deletions(-)
 create mode 100644 hw/i440fx.c
 create mode 100644 hw/i440fx.h
 create mode 100644 hw/piix3.c
 create mode 100644 hw/piix3.h
 delete mode 100644 hw/piix_pci.c

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 8c764bb..49b32d0 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -1,6 +1,6 @@
 obj-y += mc146818rtc.o pc.o
 obj-y += apic_common.o apic.o kvmvapic.o
-obj-y += sga.o ioapic_common.o ioapic.o piix_pci.o
+obj-y += sga.o ioapic_common.o ioapic.o i440fx.o piix3.o
 obj-y += vmport.o
 obj-y += pci-hotplug.o smbios.o wdt_ib700.o
 obj-y += debugcon.o multiboot.o
diff --git a/hw/i440fx.c b/hw/i440fx.c
new file mode 100644
index 000..8c4408f
--- /dev/null
+++ b/hw/i440fx.c
@@ -0,0 +1,434 @@
+/*
+ * QEMU i440FX PCI Host Bridge Emulation
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "i440fx.h"
+#include "range.h"
+#include "xen.h"
+#include "loader.h"
+#include "pc.h"
+
+#define BIOS_FILENAME "bios.bin"
+
+/*
+ * I440FX chipset data sheet.
+ * http://download.intel.com/design/chipsets/datashts/29054901.pdf
+ *
+ * The I440FX is a package that contains an integrated PCI Host controller,
+ * memory controller, and is usually packaged with a PCI-ISA bus and super I/O
+ * chipset.
+ *
+ * The "i440FX" device is the PCI host controller.  On function 0.0, there is a
+ * memory controller called the Programmable Memory Controller (PMC).  On
+ * function 1.0, there is the PCI-ISA bus/super I/O chip called the PIIX3.
+ */
+
+#define I440FX_PMC_PCI_HOLE 0xE000ULL
+#define I440FX_PMC_PCI_HOLE_END 0x1ULL
+
+#define I440FX_PAM  0x59
+#define I440FX_PAM_SIZE 7
+#define I440FX_SMRAM0x72
+
+static void piix3_set_irq(void *opaque, int pirq, int level)
+{
+PIIX3State *piix3 = opaque;
+piix3_set_irq_level(piix3, pirq, level);
+}
+
+/*
+ * return the global irq number corresponding to a given device irq
+ * pin. We could also use the bus number to have a more precise
+ * mapping.
+ */
+static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
+{
+int slot_addend;
+slot_addend = (pci_dev->devfn >> 3) - 1;
+return (pci_intx + slot_addend) & 3;
+}
+
+static void update_pam(I440FXPMCState *d, uint32_t start, uint32_t end, int r,
+   PAMMemoryRegion *mem)
+{
+if (mem->initialized) {
+memory_region_del_subregion(d->system_memory, &mem->mem);
+memory_region_destroy(&mem->mem);
+}
+
+switch (r) {
+case 3:
+/* RAM */
+memory_region_init_alias(&mem->mem, "pam-ram", d->ram_memory,
+ start, end - s

[Qemu-devel] [PATCH v4 0/5] refactor PC machine, i440fx and piix3 to take advantage of QOM

2012-07-18 Thread Wanpeng Li
[CCing ML]

This series aggressively refactors the PC machine initialization to be more
modelled and less ad-hoc.  The highlights of this series are:

1) Things like -m and -bios-name are now device model properties

2) The i440fx and piix3 are now modelled in a thorough fashion

3) Most of the chipset features of the piix3 are modelled through composition

4) i440fx_init is trivialized to creating devices and setting properties

5) convert MemoryRegion to QOM

6) convert PCI host bridge to QOM

The point (4) is the most important one.  As we refactor in this fashion,
we should quickly get to the point where machine->init disappears completely in
favor of just creating a handful of devices.

The two stage initialization of QOM is important here.  instance_init() is when
composed devices are created which means that after you've created a device, all
of its children are visible in the device model.  This lets you set properties
of the parent and its children.

realize() (which is still called DeviceState::init today) will be called right
before the guest starts up for the first time.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

Change in v4:

*rebase patchset 

Changes in v3:

* fix coding style issues
* fix rebase error 
* add changes log 

Changes in v2:

* Rebase patch series of i440fx in Anthony's qom-rebase.12 branch to upstream
* convert MemoryRegion to QOM
* convert pci_host to QOM


Anthony Liguori (5):
  eliminate piix_pci.c and module i440fx and piix3
  convert MemoryRegion to QOM
  convert pci-host to QOM
  prepare to create HPET, RTC and i8254 through composition
  merge pc_piix.c to pc.c

 hw/hpet.c |   39 +---
 hw/hpet_emul.h|   41 +++
 hw/i386/Makefile.objs |3 +-
 hw/i440fx.c   |  434 +
 hw/i440fx.h   |   77 +
 hw/i8254.c|2 +-
 hw/i8254_internal.h   |2 +-
 hw/mc146818rtc.c  |   26 --
 hw/mc146818rtc.h  |   30 ++
 hw/pc.c   |  741 +++--
 hw/pc.h   |   46 +---
 hw/pc_piix.c  |  661 ---
 hw/pci_host.c |   26 ++
 hw/pci_host.h |5 +
 hw/piix3.c|  292 +++
 hw/piix3.h|   79 ++
 hw/piix_pci.c |  599 ---
 memory.c  |   94 +--
 memory.h  |8 +
 19 files changed, 1722 insertions(+), 1483 deletions(-)
 create mode 100644 hw/i440fx.c
 create mode 100644 hw/i440fx.h
 delete mode 100644 hw/pc_piix.c
 create mode 100644 hw/piix3.c
 create mode 100644 hw/piix3.h
 delete mode 100644 hw/piix_pci.c

-- 
1.7.5.4




[Qemu-devel] [PATCH v4 5/5] merge pc_piix.c to pc.c

2012-07-18 Thread Wanpeng Li
[CCing ML]

From: Anthony Liguori 

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 
---
 hw/i386/Makefile.objs |1 -
 hw/pc.c   |  753 +++--
 hw/pc.h   |   46 +---
 hw/pc_piix.c  |  661 ---
 4 files changed, 667 insertions(+), 794 deletions(-)
 delete mode 100644 hw/pc_piix.c

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 49b32d0..868020c 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -4,7 +4,6 @@ obj-y += sga.o ioapic_common.o ioapic.o i440fx.o piix3.o
 obj-y += vmport.o
 obj-y += pci-hotplug.o smbios.o wdt_ib700.o
 obj-y += debugcon.o multiboot.o
-obj-y += pc_piix.o
 obj-y += pc_sysfw.o
 obj-$(CONFIG_XEN) += xen_platform.o xen_apic.o
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
diff --git a/hw/pc.c b/hw/pc.c
index c7e9ab3..7c04339 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -27,6 +27,7 @@
 #include "fdc.h"
 #include "ide.h"
 #include "pci.h"
+#include "usb.h"
 #include "vmware_vga.h"
 #include "monitor.h"
 #include "fw_cfg.h"
@@ -47,7 +48,10 @@
 #include "ui/qemu-spice.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "kvm/clock.h"
 #include "arch_init.h"
+#include "smbus.h"
+#include "boards.h"
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -75,6 +79,8 @@
 
 #define E820_NR_ENTRIES16
 
+#define MAX_IDE_BUS 2
+
 struct e820_entry {
 uint64_t address;
 uint64_t length;
@@ -86,10 +92,14 @@ struct e820_table {
 struct e820_entry entry[E820_NR_ENTRIES];
 } QEMU_PACKED __attribute((__aligned__(4)));
 
+static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
+static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
+static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
+
 static struct e820_table e820_table;
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
-void gsi_handler(void *opaque, int n, int level)
+static void gsi_handler(void *opaque, int n, int level)
 {
 GSIState *s = opaque;
 
@@ -107,7 +117,7 @@ static void ioport80_write(void *opaque, uint32_t addr, 
uint32_t data)
 /* MSDOS compatibility mode FPU exception support */
 static qemu_irq ferr_irq;
 
-void pc_register_ferr_irq(qemu_irq irq)
+static void pc_register_ferr_irq(qemu_irq irq)
 {
 ferr_irq = irq;
 }
@@ -330,7 +340,7 @@ static void pc_cmos_init_late(void *opaque)
 qemu_unregister_reset(pc_cmos_init_late, opaque);
 }
 
-void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
+static void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
   const char *boot_device,
   ISADevice *floppy, BusState *idebus0, BusState *idebus1,
   ISADevice *s)
@@ -860,7 +870,7 @@ static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 
3, 4, 5 };
 static const int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
 static const int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
 
-void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
+static void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
 {
 static int nb_ne2k = 0;
 
@@ -915,7 +925,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
 return dev;
 }
 
-void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
+static void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
 {
 CPUX86State *s = opaque;
 
@@ -952,7 +962,7 @@ static X86CPU *pc_new_cpu(const char *cpu_model)
 return cpu;
 }
 
-void pc_cpus_init(const char *cpu_model)
+static void pc_cpus_init(const char *cpu_model)
 {
 int i;
 
@@ -970,55 +980,18 @@ void pc_cpus_init(const char *cpu_model)
 }
 }
 
-void *pc_memory_init(MemoryRegion *system_memory,
+static void *pc_memory_init(MemoryRegion *system_memory,
 const char *kernel_filename,
 const char *kernel_cmdline,
 const char *initrd_filename,
 ram_addr_t below_4g_mem_size,
-ram_addr_t above_4g_mem_size,
-MemoryRegion *rom_memory,
-MemoryRegion **ram_memory)
+ram_addr_t above_4g_mem_size)
 {
 int linux_boot, i;
-MemoryRegion *ram, *option_rom_mr;
-MemoryRegion *ram_below_4g, *ram_above_4g;
 void *fw_cfg;
 
 linux_boot = (kernel_filename != NULL);
 
-/* Allocate RAM.  We allocate it as a single memory region and use
- * aliases to address portions of it, mostly for backwards compatibility
- * with older qemus that used qemu_ram_alloc().
- */
-ram = g_malloc(sizeof(*ram));
-memory_region_init_ram(ram, "pc.ram",
-   below_4g_mem_size + above_4g_mem_size);
-vmstate_register_ram_global(ram);
-*ram_memory = ram;
-ram_below_4g 

Re: [Qemu-devel] [PATCH v4 4/5] prepare to create HPET, RTC and i8254 through composition

2012-07-19 Thread Wanpeng Li
On Thu, Jul 19, 2012 at 03:23:32PM -0500, Anthony Liguori wrote:
>Wanpeng Li  writes:
>
>> [CCing ML]
>>  
>>  From: Anthony Liguori 
>
>Each of these devices should be a separate patch.
>
>Please don't just send patches from branches of mine.  Spend some time
>to understand the code and break things up appropriately.

Yes, I will. But some guys still object export structure from *.c to
*.h, It also lead to compile error which I send you. :-)

Regards,
Wanpeng Li
>
>Regards,
>
>Anthony Liguori
>
>> The HPET usually sits on the LPC bus (which replaces ISA in modern systems).
>> It's sometimes a dedicated chip but can certain co-exist in a Super IO chip.
>> I think in terms of where it would live in this hypothetical device model,
>> putting it in the PIIX is rational.
>>
>> Signed-off-by: Anthony Liguori 
>> Signed-off-by: Wanpeng Li 
>> ---
>>  hw/hpet.c   |   39 ++-
>>  hw/hpet_emul.h  |   41 +
>>  hw/i8254.c  |2 +-
>>  hw/i8254_internal.h |2 +-
>>  hw/mc146818rtc.c|   26 --
>>  hw/mc146818rtc.h|   30 ++
>>  6 files changed, 75 insertions(+), 65 deletions(-)
>>
>> diff --git a/hw/hpet.c b/hw/hpet.c
>> index fd3ddca..fc0ff6c 100644
>> --- a/hw/hpet.c
>> +++ b/hw/hpet.c
>> @@ -42,41 +42,6 @@
>>  
>>  #define HPET_MSI_SUPPORT0
>>  
>> -struct HPETState;
>> -typedef struct HPETTimer {  /* timers */
>> -uint8_t tn; /*timer number*/
>> -QEMUTimer *qemu_timer;
>> -struct HPETState *state;
>> -/* Memory-mapped, software visible timer registers */
>> -uint64_t config;/* configuration/cap */
>> -uint64_t cmp;   /* comparator */
>> -uint64_t fsb;   /* FSB route */
>> -/* Hidden register state */
>> -uint64_t period;/* Last value written to comparator */
>> -uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 
>> 32-bit
>> - * mode. Next pop will be actual timer 
>> expiration.
>> - */
>> -} HPETTimer;
>> -
>> -typedef struct HPETState {
>> -SysBusDevice busdev;
>> -MemoryRegion iomem;
>> -uint64_t hpet_offset;
>> -qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
>> -uint32_t flags;
>> -uint8_t rtc_irq_level;
>> -qemu_irq pit_enabled;
>> -uint8_t num_timers;
>> -HPETTimer timer[HPET_MAX_TIMERS];
>> -
>> -/* Memory-mapped, software visible registers */
>> -uint64_t capability;/* capabilities */
>> -uint64_t config;/* configuration */
>> -uint64_t isr;   /* interrupt status reg */
>> -uint64_t hpet_counter;  /* main counter */
>> -uint8_t  hpet_id;   /* instance id */
>> -} HPETState;
>> -
>>  static uint32_t hpet_in_legacy_mode(HPETState *s)
>>  {
>>  return s->config & HPET_CFG_LEGACY;
>> @@ -278,7 +243,7 @@ static const VMStateDescription vmstate_hpet_timer = {
>>  };
>>  
>>  static const VMStateDescription vmstate_hpet = {
>> -.name = "hpet",
>> +.name = TYPE_HPET,
>>  .version_id = 2,
>>  .minimum_version_id = 1,
>>  .minimum_version_id_old = 1,
>> @@ -746,7 +711,7 @@ static void hpet_device_class_init(ObjectClass *klass, 
>> void *data)
>>  }
>>  
>>  static TypeInfo hpet_device_info = {
>> -.name  = "hpet",
>> +.name  = TYPE_HPET,
>>  .parent= TYPE_SYS_BUS_DEVICE,
>>  .instance_size = sizeof(HPETState),
>>  .class_init= hpet_device_class_init,
>> diff --git a/hw/hpet_emul.h b/hw/hpet_emul.h
>> index 757f79f..836c5c8 100644
>> --- a/hw/hpet_emul.h
>> +++ b/hw/hpet_emul.h
>> @@ -13,6 +13,9 @@
>>  #ifndef QEMU_HPET_EMUL_H
>>  #define QEMU_HPET_EMUL_H
>>  
>> +#include "hw.h"
>> +#include "sysbus.h"
>> +
>>  #define HPET_BASE   0xfed0
>>  #define HPET_CLK_PERIOD 1000ULL /* 1000 femtoseconds == 
>> 10ns*/
>>  
>> @@ -71,4 +74,42 @@ struct hpet_fw_config
>>  } QEMU_PACKED;
>>  
>>  extern struct hpet_fw_config hpet_cfg;
>> +
>> +#define TYPE_HPET "hpet"
>> +
>> +struct HPETState;
>> +typedef struct HPETTimer {  /* timers */
>> +uint8_t

[Qemu-devel] [PATCH v5 0/3] refactor PC machine, i440fx and piix3 to take advantage of QOM

2012-07-23 Thread Wanpeng Li
This series aggressively refactors the PC machine initialization to be more
modelled and less ad-hoc.  The highlights of this series are:

1) Things like -m and -bios-name are now device model properties

2) The i440fx and piix3 are now modelled in a thorough fashion

3) i440fx_init is trivialized to creating devices and setting properties

4) convert PCI host bridge to QOM

The point (3) is the most important one.  As we refactor in this fashion,
we should quickly get to the point where machine->init disappears completely in
favor of just creating a handful of devices.

The two stage initialization of QOM is important here.  instance_init() is when
composed devices are created which means that after you've created a device, all
of its children are visible in the device model.  This lets you set properties
of the parent and its children.

realize() (which is still called DeviceState::init today) will be called right
before the guest starts up for the first time.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

Change in v5:
* drop patch "convert MemoryRegion to QOM" and 
"prepare to create HPET, RTC and i8254 through composition"
* add Andreas' recent attempt against pci_host

Change in v4:

*rebase patchset

Changes in v3:

* fix coding style issues
* fix rebase error
* add changes log

Changes in v2:

* Rebase patch series of i440fx in Anthony's qom-rebase.12 branch to upstream
* convert MemoryRegion to QOM
* convert pci_host to QOM


Anthony Liguori (3):
  eliminate piix_pci.c and module i440fx and piix3
  merge pc_piix.c to pc.c
  convert pci-host to QOM

 hw/i386/Makefile.objs |3 +-
 hw/i440fx.c   |  434 ++
 hw/i440fx.h   |   77 ++
 hw/pc.c   |  695 +
 hw/pc.h   |   46 +---
 hw/pc_piix.c  |  661 --
 hw/pci_host.c |   14 +
 hw/pci_host.h |2 +
 hw/piix3.c|  234 +
 hw/piix3.h|   69 +
 hw/piix_pci.c |  599 --
 11 files changed, 1481 insertions(+), 1353 deletions(-)
 create mode 100644 hw/i440fx.c
 create mode 100644 hw/i440fx.h
 delete mode 100644 hw/pc_piix.c
 create mode 100644 hw/piix3.c
 create mode 100644 hw/piix3.h
 delete mode 100644 hw/piix_pci.c

-- 
1.7.7.6




[Qemu-devel] [PATCH v5 3/3] convert pci-host to QOM

2012-07-23 Thread Wanpeng Li
From: Anthony Liguori 

makes pci_host a proper QOM type.

Changelog:
* against Andreas pci_host branch
* make host bridge TypeInfos const
* use PCI_HOST_BRIDGE() where appropriate 

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/i440fx.c   |6 +++---
 hw/pc.c   |2 +-
 hw/pci_host.c |   14 ++
 hw/pci_host.h |2 ++
 hw/piix3.c|4 ++--
 5 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/hw/i440fx.c b/hw/i440fx.c
index 720a25a..fdf040b 100644
--- a/hw/i440fx.c
+++ b/hw/i440fx.c
@@ -191,7 +191,7 @@ static const VMStateDescription vmstate_i440fx_pmc = {
 static int i440fx_realize(SysBusDevice *dev)
 {
 I440FXState *s = I440FX(dev);
-PCIHostState *h = PCI_HOST(s);
+PCIHostState *h = PCI_HOST_BRIDGE(s);
 int bios_size, isa_bios_size;
 char *filename;
 int ret;
@@ -401,7 +401,7 @@ static void i440fx_pmc_class_init(ObjectClass *klass, void 
*data)
 dc->vmsd = &vmstate_i440fx_pmc;
 }
 
-static TypeInfo i440fx_pmc_info = {
+static const TypeInfo i440fx_pmc_info = {
 .name  = TYPE_I440FX_PMC,
 .parent= TYPE_PCI_DEVICE,
 .instance_size = sizeof(I440FXPMCState),
@@ -418,7 +418,7 @@ static void i440fx_class_init(ObjectClass *klass, void 
*data)
 dc->no_user = 1;
 }
 
-static TypeInfo i440fx_info = {
+static const TypeInfo i440fx_info = {
 .name  = TYPE_I440FX,
 .parent= TYPE_PCI_HOST_BRIDGE,
 .instance_size = sizeof(I440FXState),
diff --git a/hw/pc.c b/hw/pc.c
index d9a0443..f095109 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1217,7 +1217,7 @@ static PCIBus *i440fx_init(I440FXPMCState 
**pi440fx_state, int *piix3_devfn,
 PCIHostState *h;
 
 s = I440FX(object_new(TYPE_I440FX));
-h = PCI_HOST(s);
+h = PCI_HOST_BRIDGE(s);
 
 /* FIXME make a properties */
 h->address_space = address_space_mem;
diff --git a/hw/pci_host.c b/hw/pci_host.c
index 3950e94..4e10042 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -165,11 +165,25 @@ const MemoryRegionOps pci_host_data_be_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value)
+{
+object_property_set_link(OBJECT(s), OBJECT(value), "mmio", NULL);
+}
+
+static void pci_host_initfn(Object *obj)
+{
+PCIHostState *s = PCI_HOST_BRIDGE(obj);
+
+object_property_add_link(obj, "mmio", "memory-region",
+(Object **)&s->address_space, NULL);
+}
+
 static const TypeInfo pci_host_type_info = {
 .name = TYPE_PCI_HOST_BRIDGE,
 .parent = TYPE_SYS_BUS_DEVICE,
 .abstract = true,
 .instance_size = sizeof(PCIHostState),
+.instance_init = pci_host_initfn,
 };
 
 static void pci_host_register_types(void)
diff --git a/hw/pci_host.h b/hw/pci_host.h
index 4b9c300..9f28728 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -54,6 +54,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, 
uint32_t addr,
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
 
+void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value);
+
 extern const MemoryRegionOps pci_host_conf_le_ops;
 extern const MemoryRegionOps pci_host_conf_be_ops;
 extern const MemoryRegionOps pci_host_data_le_ops;
diff --git a/hw/piix3.c b/hw/piix3.c
index eca6ec8..3b69b15 100644
--- a/hw/piix3.c
+++ b/hw/piix3.c
@@ -204,7 +204,7 @@ static void piix3_class_init(ObjectClass *klass, void *data)
 k->class_id = PCI_CLASS_BRIDGE_ISA;
 }
 
-static TypeInfo piix3_info = {
+static const TypeInfo piix3_info = {
 .name  = TYPE_PIIX3,
 .parent= TYPE_PCI_DEVICE,
 .instance_size = sizeof(PIIX3State),
@@ -219,7 +219,7 @@ static void piix3_xen_class_init(ObjectClass *klass, void 
*data)
 k->config_write = piix3_write_config_xen;
 };
 
-static TypeInfo piix3_xen_info = {
+static const TypeInfo piix3_xen_info = {
 .name  = "PIIX3-xen",
 .parent= TYPE_PIIX3,
 .instance_size = sizeof(PIIX3State),
-- 
1.7.5.4




[Qemu-devel] [PATCH v5 2/3] merge pc_piix.c to pc.c

2012-07-23 Thread Wanpeng Li
From: Anthony Liguori 

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/i386/Makefile.objs |1 -
 hw/pc.c   |  695 +
 hw/pc.h   |   46 +---
 hw/pc_piix.c  |  661 --
 4 files changed, 650 insertions(+), 753 deletions(-)
 delete mode 100644 hw/pc_piix.c

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 49b32d0..868020c 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -4,7 +4,6 @@ obj-y += sga.o ioapic_common.o ioapic.o i440fx.o piix3.o
 obj-y += vmport.o
 obj-y += pci-hotplug.o smbios.o wdt_ib700.o
 obj-y += debugcon.o multiboot.o
-obj-y += pc_piix.o
 obj-y += pc_sysfw.o
 obj-$(CONFIG_XEN) += xen_platform.o xen_apic.o
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
diff --git a/hw/pc.c b/hw/pc.c
index 598267a..d9a0443 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -27,6 +27,7 @@
 #include "fdc.h"
 #include "ide.h"
 #include "pci.h"
+#include "usb.h"
 #include "vmware_vga.h"
 #include "monitor.h"
 #include "fw_cfg.h"
@@ -48,7 +49,10 @@
 #include "ui/qemu-spice.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "kvm/clock.h"
 #include "arch_init.h"
+#include "smbus.h"
+#include "boards.h"
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -76,6 +80,8 @@
 
 #define E820_NR_ENTRIES16
 
+#define MAX_IDE_BUS 2
+
 struct e820_entry {
 uint64_t address;
 uint64_t length;
@@ -87,10 +93,14 @@ struct e820_table {
 struct e820_entry entry[E820_NR_ENTRIES];
 } QEMU_PACKED __attribute((__aligned__(4)));
 
+static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
+static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
+static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
+
 static struct e820_table e820_table;
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
-void gsi_handler(void *opaque, int n, int level)
+static void gsi_handler(void *opaque, int n, int level)
 {
 GSIState *s = opaque;
 
@@ -108,7 +118,7 @@ static void ioport80_write(void *opaque, uint32_t addr, 
uint32_t data)
 /* MSDOS compatibility mode FPU exception support */
 static qemu_irq ferr_irq;
 
-void pc_register_ferr_irq(qemu_irq irq)
+static void pc_register_ferr_irq(qemu_irq irq)
 {
 ferr_irq = irq;
 }
@@ -323,7 +333,7 @@ static void pc_cmos_init_late(void *opaque)
 qemu_unregister_reset(pc_cmos_init_late, opaque);
 }
 
-void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
+static void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
   const char *boot_device,
   ISADevice *floppy, BusState *idebus0, BusState *idebus1,
   ISADevice *s)
@@ -846,7 +856,7 @@ static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 
3, 4, 5 };
 static const int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
 static const int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
 
-void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
+static void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
 {
 static int nb_ne2k = 0;
 
@@ -901,7 +911,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
 return dev;
 }
 
-void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
+static void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
 {
 CPUX86State *s = opaque;
 
@@ -938,7 +948,7 @@ static X86CPU *pc_new_cpu(const char *cpu_model)
 return cpu;
 }
 
-void pc_cpus_init(const char *cpu_model)
+static void pc_cpus_init(const char *cpu_model)
 {
 int i;
 
@@ -956,55 +966,18 @@ void pc_cpus_init(const char *cpu_model)
 }
 }
 
-void *pc_memory_init(MemoryRegion *system_memory,
+static void *pc_memory_init(MemoryRegion *system_memory,
 const char *kernel_filename,
 const char *kernel_cmdline,
 const char *initrd_filename,
 ram_addr_t below_4g_mem_size,
-ram_addr_t above_4g_mem_size,
-MemoryRegion *rom_memory,
-MemoryRegion **ram_memory)
+ram_addr_t above_4g_mem_size)
 {
 int linux_boot, i;
-MemoryRegion *ram, *option_rom_mr;
-MemoryRegion *ram_below_4g, *ram_above_4g;
 void *fw_cfg;
 
 linux_boot = (kernel_filename != NULL);
 
-/* Allocate RAM.  We allocate it as a single memory region and use
- * aliases to address portions of it, mostly for backwards compatibility
- * with older qemus that used qemu_ram_alloc().
- */
-ram = g_malloc(sizeof(*ram));
-memory_region_init_ram(ram, "pc.ram",
-   below_4g_mem_size + above_4g_mem_size);
-vmstate_register_ram_global(ram);
-*ram_memory = ram;
-ram_below_4g 

[Qemu-devel] [PATCH v5 1/3] eliminate piix_pci.c and module i440fx and piix3

2012-07-23 Thread Wanpeng Li
From: Anthony Liguori 

The big picture about the patch is shown as follows:

1) pc_init creates an I440FX, any bus devices (ISA serial port, PCI
vga and nics, etc.), sets properties appropriately, and realizes the
devices.
2) I440FX is-a PCIHost, has-a I440FX-PMC, has-a PIIX3
3) PIIX3 has-a RTC, has-a I8042, has-a DMAController, etc.

i440fx-pcihost => i440fx
i440fx => i440fx-pmc

i440fx_pmc is Programmable Memory Controller which integrated in I440FX
chipset, and move ram initialization into i440fx-pmc.

It might seem like a small change, but it better reflects the fact
that the PMC is contained within the i440fx which we will now reflect in
composition in the next few changesets.

Signed-off-by: Anthony Liguori 
Signed-off-by: Wanpeng Li 

---
 hw/i386/Makefile.objs |2 +-
 hw/i440fx.c   |  434 +++
 hw/i440fx.h   |   77 +++
 hw/piix3.c|  234 +++
 hw/piix3.h|   69 ++
 hw/piix_pci.c |  599 -
 6 files changed, 815 insertions(+), 600 deletions(-)
 create mode 100644 hw/i440fx.c
 create mode 100644 hw/i440fx.h
 create mode 100644 hw/piix3.c
 create mode 100644 hw/piix3.h
 delete mode 100644 hw/piix_pci.c

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 8c764bb..49b32d0 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -1,6 +1,6 @@
 obj-y += mc146818rtc.o pc.o
 obj-y += apic_common.o apic.o kvmvapic.o
-obj-y += sga.o ioapic_common.o ioapic.o piix_pci.o
+obj-y += sga.o ioapic_common.o ioapic.o i440fx.o piix3.o
 obj-y += vmport.o
 obj-y += pci-hotplug.o smbios.o wdt_ib700.o
 obj-y += debugcon.o multiboot.o
diff --git a/hw/i440fx.c b/hw/i440fx.c
new file mode 100644
index 000..720a25a
--- /dev/null
+++ b/hw/i440fx.c
@@ -0,0 +1,434 @@
+/*
+ * QEMU i440FX PCI Host Bridge Emulation
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "i440fx.h"
+#include "range.h"
+#include "xen.h"
+#include "loader.h"
+#include "pc.h"
+
+#define BIOS_FILENAME "bios.bin"
+
+/*
+ * I440FX chipset data sheet.
+ * http://download.intel.com/design/chipsets/datashts/29054901.pdf
+ *
+ * The I440FX is a package that contains an integrated PCI Host controller,
+ * memory controller, and is usually packaged with a PCI-ISA bus and super I/O
+ * chipset.
+ *
+ * The "i440FX" device is the PCI host controller.  On function 0.0, there is a
+ * memory controller called the Programmable Memory Controller (PMC).  On
+ * function 1.0, there is the PCI-ISA bus/super I/O chip called the PIIX3.
+ */
+
+#define I440FX_PMC_PCI_HOLE 0xE000ULL
+#define I440FX_PMC_PCI_HOLE_END 0x1ULL
+
+#define I440FX_PAM  0x59
+#define I440FX_PAM_SIZE 7
+#define I440FX_SMRAM0x72
+
+static void piix3_set_irq(void *opaque, int pirq, int level)
+{
+PIIX3State *piix3 = opaque;
+piix3_set_irq_level(piix3, pirq, level);
+}
+
+/*
+ * return the global irq number corresponding to a given device irq
+ * pin. We could also use the bus number to have a more precise
+ * mapping.
+ */
+static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
+{
+int slot_addend;
+slot_addend = (pci_dev->devfn >> 3) - 1;
+return (pci_intx + slot_addend) & 3;
+}
+
+static void update_pam(I440FXPMCState *d, uint32_t start, uint32_t end, int r,
+   PAMMemoryRegion *mem)
+{
+if (mem->initialized) {
+memory_region_del_subregion(d->system_memory, &mem->mem);
+memory_region_destroy(&mem->mem);
+}
+
+switch (r) {
+case 3:
+/* RAM */
+memory_region_init_alias(&mem->mem, "pam-ram", d->ram_memory,
+ start, end - s

Re: [Qemu-devel] [PATCH v5 3/3] convert pci-host to QOM

2012-07-23 Thread Wanpeng Li
On Mon, Jul 23, 2012 at 02:57:20PM +0200, Andreas Färber wrote:
>Am 23.07.2012 14:35, schrieb Wanpeng Li:
>> From: Anthony Liguori 
>> 
>> makes pci_host a proper QOM type.
>> 
>> Changelog:
>> * against Andreas pci_host branch
>> * make host bridge TypeInfos const
>> * use PCI_HOST_BRIDGE() where appropriate 
>> 
>> Signed-off-by: Anthony Liguori 
>> Signed-off-by: Wanpeng Li 
>> 
>> ---
>>  hw/i440fx.c   |6 +++---
>>  hw/pc.c   |2 +-
>>  hw/pci_host.c |   14 ++
>>  hw/pci_host.h |2 ++
>>  hw/piix3.c|4 ++--
>>  5 files changed, 22 insertions(+), 6 deletions(-)
>> 
>> diff --git a/hw/i440fx.c b/hw/i440fx.c
>> index 720a25a..fdf040b 100644
>> --- a/hw/i440fx.c
>> +++ b/hw/i440fx.c
>> @@ -191,7 +191,7 @@ static const VMStateDescription vmstate_i440fx_pmc = {
>>  static int i440fx_realize(SysBusDevice *dev)
>>  {
>>  I440FXState *s = I440FX(dev);
>> -PCIHostState *h = PCI_HOST(s);
>> +PCIHostState *h = PCI_HOST_BRIDGE(s);
>>  int bios_size, isa_bios_size;
>>  char *filename;
>>  int ret;
>
>Either there's a miscommunication or a technical error: My branch surely
>is using PCI_HOST_BRIDGE(), so these PCI_HOST -> PCI_HOST_BRIDGE changes
>look bogus. Did you make sure each patch compiles?
The third patch against your pci-host branch which you hope to, the three 
patches can overall compile success, but not each patch compile success. 
Because PATCH 1/3 should take advantage of PCI_HOST_BRIDGE(), and
PATCH 2/3 also should take advantage of PATCH 1/3.

>
>> @@ -401,7 +401,7 @@ static void i440fx_pmc_class_init(ObjectClass *klass, 
>> void *data)
>>  dc->vmsd = &vmstate_i440fx_pmc;
>>  }
>>  
>> -static TypeInfo i440fx_pmc_info = {
>> +static const TypeInfo i440fx_pmc_info = {
>>  .name  = TYPE_I440FX_PMC,
>>  .parent= TYPE_PCI_DEVICE,
>>  .instance_size = sizeof(I440FXPMCState),
>> @@ -418,7 +418,7 @@ static void i440fx_class_init(ObjectClass *klass, void 
>> *data)
>>  dc->no_user = 1;
>>  }
>>  
>> -static TypeInfo i440fx_info = {
>> +static const TypeInfo i440fx_info = {
>>  .name  = TYPE_I440FX,
>>  .parent= TYPE_PCI_HOST_BRIDGE,
>>  .instance_size = sizeof(I440FXState),
>
>Patch 1/3 does not have const, patch 2/3 adds new TypeInfos without const.

Patch 2/3 doesn't add any new TypeInfos, where you see the new
TypeInfos you mentioned.

>

>So my guess is you've not rebased this on my pci-host branch [1] but

No, I rebase the third patch against your pci_host branch. Since your patch 

pci: Derive PCI host bridges from TYPE_PCI_HOST_BRIDGE
Some typedef'ed their state to PCIHostState. Use a proper struct,
and use PCIHostState and PCI_HOST_BRIDGE() where appropriate.

Signed-off-by: Andreas Färber 
 
want to use PCI_HOST_BRIDGE() where appropriate, so I add them to Patch
1/3 codes.

Regards,
Wanpeng Li
>onto something else? If they're not against master it's advisable to
>mark patches [PATCH treename xx/nn] btw, for clarity.
>
>For the new/changed TypeInfos please add const from the start.
>
>Regards,
>Andreas
>
>[1] http://repo.or.cz/w/qemu/afaerber.git/shortlog/refs/heads/pci-host
>git://repo.or.cz/qemu/afaerber.git pci-host
>
>> diff --git a/hw/pc.c b/hw/pc.c
>> index d9a0443..f095109 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -1217,7 +1217,7 @@ static PCIBus *i440fx_init(I440FXPMCState 
>> **pi440fx_state, int *piix3_devfn,
>>  PCIHostState *h;
>>  
>>  s = I440FX(object_new(TYPE_I440FX));
>> -h = PCI_HOST(s);
>> +h = PCI_HOST_BRIDGE(s);
>>  
>>  /* FIXME make a properties */
>>  h->address_space = address_space_mem;
>> diff --git a/hw/pci_host.c b/hw/pci_host.c
>> index 3950e94..4e10042 100644
>> --- a/hw/pci_host.c
>> +++ b/hw/pci_host.c
>> @@ -165,11 +165,25 @@ const MemoryRegionOps pci_host_data_be_ops = {
>>  .endianness = DEVICE_BIG_ENDIAN,
>>  };
>>  
>> +void pci_host_set_mmio(PCIHostState *s, MemoryRegion *value)
>> +{
>> +object_property_set_link(OBJECT(s), OBJECT(value), "mmio", NULL);
>> +}
>> +
>> +static void pci_host_initfn(Object *obj)
>> +{
>> +PCIHostState *s = PCI_HOST_BRIDGE(obj);
>> +
>> +object_property_add_link(obj, "mmio", "memory-region",
>> +(Object **)&s->address_space, NULL);
>> +}
>> +
>>  static const TypeInfo

[Qemu-devel] [PATCH] i386: Enable IA32_MISC_ENABLE MWAIT bit when exposing mwait/monitor

2019-05-13 Thread Wanpeng Li
From: Wanpeng Li 

The CPUID.01H:ECX[bit 3] ought to mirror the value of the MSR 
IA32_MISC_ENABLE MWAIT bit and as userspace has control of them 
both, it is userspace's job to configure both bits to match on 
the initial setup.

Cc: Eduardo Habkost 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
 target/i386/cpu.c | 3 +++
 target/i386/cpu.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 722c551..40b6108 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4729,6 +4729,9 @@ static void x86_cpu_reset(CPUState *s)
 
 env->pat = 0x0007040600070406ULL;
 env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT;
+if (enable_cpu_pm) {
+env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT;
+}
 
 memset(env->dr, 0, sizeof(env->dr));
 env->dr[6] = DR6_FIXED_1;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 0128910..b94c329 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -387,6 +387,7 @@ typedef enum X86Seg {
 #define MSR_IA32_MISC_ENABLE0x1a0
 /* Indicates good rep/movs microcode on some processors: */
 #define MSR_IA32_MISC_ENABLE_DEFAULT1
+#define MSR_IA32_MISC_ENABLE_MWAIT  (1ULL << 18)
 
 #define MSR_MTRRphysBase(reg)   (0x200 + 2 * (reg))
 #define MSR_MTRRphysMask(reg)   (0x200 + 2 * (reg) + 1)
-- 
2.7.4




Re: [Qemu-devel] [PATCH v3 kernel 0/7] Extend virtio-balloon for fast (de)inflating & fast live migration

2016-08-31 Thread Wanpeng Li
2016-08-08 14:35 GMT+08:00 Liang Li :
> This patch set contains two parts of changes to the virtio-balloon.
>
> One is the change for speeding up the inflating & deflating process,
> the main idea of this optimization is to use bitmap to send the page
> information to host instead of the PFNs, to reduce the overhead of
> virtio data transmission, address translation and madvise(). This can
> help to improve the performance by about 85%.
>
> Another change is for speeding up live migration. By skipping process
> guest's free pages in the first round of data copy, to reduce needless
> data processing, this can help to save quite a lot of CPU cycles and
> network bandwidth. We put guest's free page information in bitmap and
> send it to host with the virt queue of virtio-balloon. For an idle 8GB
> guest, this can help to shorten the total live migration time from 2Sec
> to about 500ms in the 10Gbps network environment.

I just read the slides of this feature for recent kvm forum, the cloud
providers more care about live migration downtime to avoid customers'
perception than total time, however, this feature will increase
downtime when acquire the benefit of reducing total time, maybe it
will be more acceptable if there is no downside for downtime.

Regards,
Wanpeng Li



Re: [Qemu-devel] [PATCH v3 kernel 0/7] Extend virtio-balloon for fast (de)inflating & fast live migration

2016-08-31 Thread Wanpeng Li
2016-09-01 13:46 GMT+08:00 Li, Liang Z :
>> Subject: Re: [PATCH v3 kernel 0/7] Extend virtio-balloon for fast 
>> (de)inflating
>> & fast live migration
>>
>> 2016-08-08 14:35 GMT+08:00 Liang Li :
>> > This patch set contains two parts of changes to the virtio-balloon.
>> >
>> > One is the change for speeding up the inflating & deflating process,
>> > the main idea of this optimization is to use bitmap to send the page
>> > information to host instead of the PFNs, to reduce the overhead of
>> > virtio data transmission, address translation and madvise(). This can
>> > help to improve the performance by about 85%.
>> >
>> > Another change is for speeding up live migration. By skipping process
>> > guest's free pages in the first round of data copy, to reduce needless
>> > data processing, this can help to save quite a lot of CPU cycles and
>> > network bandwidth. We put guest's free page information in bitmap and
>> > send it to host with the virt queue of virtio-balloon. For an idle 8GB
>> > guest, this can help to shorten the total live migration time from
>> > 2Sec to about 500ms in the 10Gbps network environment.
>>
>> I just read the slides of this feature for recent kvm forum, the cloud
>> providers more care about live migration downtime to avoid customers'
>> perception than total time, however, this feature will increase downtime
>> when acquire the benefit of reducing total time, maybe it will be more
>> acceptable if there is no downside for downtime.
>>
>> Regards,
>> Wanpeng Li
>
> In theory, there is no factor that will increase the downtime. There is no 
> additional operation
> and no more data copy during the stop and copy stage. But in the test, the 
> downtime increases
> and this can be reproduced. I think the busy network line maybe the reason 
> for this. With this
>  optimization, a huge amount of data is written to the socket in a shorter 
> time, so some of the write
> operation may need to wait. Without this optimization, zero page checking 
> takes more time,
> the network is not so busy.
>
> If the guest is not an idle one, I think the gap of the downtime will not so 
> obvious.  Anyway, the

http://www.linux-kvm.org/images/c/c3/03x06B-Liang_Li-Real_Time_and_Fast_Live_Migration_Update_for_NFV.pdf
The slides show almost the similar percentage for the idle and the
non-idle guests, they both increase  ~50% downtime.

Regards,
Wanpeng Li



[Qemu-devel] [PATCH] pc: apic: fix touch LAPIC when irqchip is split

2016-09-13 Thread Wanpeng Li
From: Wanpeng Li 

Add -kernel_irqchip=split
./x86-run x86/eventinj.flat

qemu-system-x86_64 -enable-kvm -machine kernel_irqchip=split -cpu host
-device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc
none -serial stdio -device pci-testdev -kernel x86/eventinj.flat
enabling apic
paging enabled
cr0 = 80010011
cr3 = 7fff000
cr4 = 20
Sending vec 33 and 62 and mask one with TPR
irq1 running
irq1 running
After 33/62 TPR test
FAIL: TPR
irq0 running
irq0 running

Both irq1 and irq0 are executing twice.

kvm_entry: vcpu 0
kvm_exit: reason MSR_WRITE rip 0x401f33 info 0 0
kvm_apic: apic_write APIC_EOI = 0x0
kvm_eoi: apicid 0 vector 62
kvm_msr: msr_write 80b = 0x0
kvm_entry: vcpu 0
kvm_exit: reason PENDING_INTERRUPT rip 0x401f35 info 0 0
kvm_userspace_exit: reason KVM_EXIT_IRQ_WINDOW_OPEN (7)
kvm_inj_virq: irq 62
kvm_entry: vcpu 0
kvm_exit: reason IO_INSTRUCTION rip 0x4016ec info 3fd0008 0

>From the trace we can see there is an interrupt window exit 
after the first interrupt EOI(irq 62), and the same irq(62) 
is injected duplicately after the interrupt window.

QEMU does KVM_INTERRUPT(62) ioctl after KVM exits with 
KVM_EXIT_IRQ_WINDOW_OPEN, which QEMU requested while the
guest was printing.  The printing calls

serial_update_irq() -> qemu_irq_lower() -> qemu_set_irq() ->
gsi_handler() -> qemu_set_irq() -> pic_irq_request() ->
apic_deliver_pic_intr() -> kvm_handle_interrupt()

kvm_handle_interrupt() does

interrupt_request |= CPU_INTERRUPT_HARD

which later calls cpu_get_pic_interrupt() in kvm_arch_pre_run(), 
but that function uses stale information from APIC and injects 
62 again. If we synchronized the APIC, then the test would #GP, 
because there would be no injectable interrupt in LAPIC or PIC, 
so pic_read_irq() would return 15, thinking it was spurious.

This patch fix it by don't touch LAPIC if LAPIC is in kernel.

Suggested-by: Paolo Bonzini 
Suggested-by: Radim Krčmář 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Michael S. Tsirkin 
Cc: Eduardo Habkost 
Signed-off-by: Wanpeng Li 
---
 hw/i386/pc.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e31f70f..4f3d508 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -161,14 +161,16 @@ int cpu_get_pic_interrupt(CPUX86State *env)
 X86CPU *cpu = x86_env_get_cpu(env);
 int intno;
 
-intno = apic_get_interrupt(cpu->apic_state);
-if (intno >= 0) {
-return intno;
-}
-/* read the irq from the PIC */
-if (!apic_accept_pic_intr(cpu->apic_state)) {
-return -1;
-}
+if (!kvm_irqchip_in_kernel()) {
+ intno = apic_get_interrupt(cpu->apic_state);
+ if (intno >= 0) {
+ return intno;
+ }
+ /* read the irq from the PIC */
+ if (!apic_accept_pic_intr(cpu->apic_state)) {
+ return -1;
+ }
+}
 
 intno = pic_read_irq(isa_pic);
 return intno;
@@ -180,7 +182,7 @@ static void pic_irq_request(void *opaque, int irq, int 
level)
 X86CPU *cpu = X86_CPU(cs);
 
 DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
-if (cpu->apic_state) {
+if (cpu->apic_state && !kvm_irqchip_in_kernel()) {
 CPU_FOREACH(cs) {
 cpu = X86_CPU(cs);
 if (apic_accept_pic_intr(cpu->apic_state)) {
-- 
1.9.1




Re: [Qemu-devel] [PATCH] pc: apic: fix touch LAPIC when irqchip is split

2016-09-13 Thread Wanpeng Li
2016-09-14 11:40 GMT+08:00 Fam Zheng :
> On Tue, 09/13 20:21, no-re...@ec2-52-6-146-230.compute-1.amazonaws.com wrote:
>> Hi,
>>
>> Your series seems to have some coding style problems. See output below for
>> more information:
>>
>> Type: series
>> Message-id: 1473822299-6302-1-git-send-email-wanpeng...@hotmail.com
>> Subject: [Qemu-devel] [PATCH] pc: apic: fix touch LAPIC when irqchip is split
>>
>> === TEST SCRIPT BEGIN ===
>> #!/bin/bash
>>
>> BASE=base
>> n=1
>> total=$(git log --oneline $BASE.. | wc -l)
>> failed=0
>>
>> # Useful git options
>> git config --local diff.renamelimit 0
>> git config --local diff.renames True
>>
>> commits="$(git log --format=%H --reverse $BASE..)"
>> for c in $commits; do
>> echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
>> if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; 
>> then
>> failed=1
>> echo
>> fi
>> n=$((n+1))
>> done
>>
>> exit $failed
>> === TEST SCRIPT END ===
>>
>> Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
>> Switched to a new branch 'test'
>> 9bcacc6 pc: apic: fix touch LAPIC when irqchip is split
>>
>> === OUTPUT BEGIN ===
>> Checking PATCH 1/1: pc: apic: fix touch LAPIC when irqchip is split...
>> ERROR: suspect code indent for conditional statements (4, 9)
>> #90: FILE: hw/i386/pc.c:164:
>> +if (!kvm_irqchip_in_kernel()) {
>> + intno = apic_get_interrupt(cpu->apic_state);
>    ^
> Should be 4 spaces here.
>
>>
>> ERROR: suspect code indent for conditional statements (9, 13)
>> #92: FILE: hw/i386/pc.c:166:
>> + if (intno >= 0) {
>> + return intno;
>
> Then this and later will be indented back by 1 column.

You are right, thanks Fam.

Regards,
Wanpeng Li



[Qemu-devel] [PATCH v2] pc: apic: fix touch LAPIC when irqchip is split

2016-09-13 Thread Wanpeng Li
From: Wanpeng Li 

Add -kernel_irqchip=split
./x86-run x86/eventinj.flat

qemu-system-x86_64 -enable-kvm -machine kernel_irqchip=split -cpu host
-device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc
none -serial stdio -device pci-testdev -kernel x86/eventinj.flat
enabling apic
paging enabled
cr0 = 80010011
cr3 = 7fff000
cr4 = 20
Sending vec 33 and 62 and mask one with TPR
irq1 running
irq1 running
After 33/62 TPR test
FAIL: TPR
irq0 running
irq0 running

Both irq1 and irq0 are executing twice.

kvm_entry: vcpu 0
kvm_exit: reason MSR_WRITE rip 0x401f33 info 0 0
kvm_apic: apic_write APIC_EOI = 0x0
kvm_eoi: apicid 0 vector 62
kvm_msr: msr_write 80b = 0x0
kvm_entry: vcpu 0
kvm_exit: reason PENDING_INTERRUPT rip 0x401f35 info 0 0
kvm_userspace_exit: reason KVM_EXIT_IRQ_WINDOW_OPEN (7)
kvm_inj_virq: irq 62
kvm_entry: vcpu 0
kvm_exit: reason IO_INSTRUCTION rip 0x4016ec info 3fd0008 0

>From the trace we can see there is an interrupt window exit 
after the first interrupt EOI(irq 62), and the same irq(62) 
is injected duplicately after the interrupt window.

QEMU does KVM_INTERRUPT(62) ioctl after KVM exits with 
KVM_EXIT_IRQ_WINDOW_OPEN, which QEMU requested while the
guest was printing.  The printing calls

serial_update_irq() -> qemu_irq_lower() -> qemu_set_irq() ->
gsi_handler() -> qemu_set_irq() -> pic_irq_request() ->
apic_deliver_pic_intr() -> kvm_handle_interrupt()

kvm_handle_interrupt() does

interrupt_request |= CPU_INTERRUPT_HARD

which later calls cpu_get_pic_interrupt() in kvm_arch_pre_run(), 
but that function uses stale information from APIC and injects 
62 again. If we synchronized the APIC, then the test would #GP, 
because there would be no injectable interrupt in LAPIC or PIC, 
so pic_read_irq() would return 15, thinking it was spurious.

This patch fix it by don't touch LAPIC if LAPIC is in kernel.

Suggested-by: Paolo Bonzini 
Suggested-by: Radim Krčmář 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Michael S. Tsirkin 
Cc: Eduardo Habkost 
Signed-off-by: Wanpeng Li 
---
v1 -> v2:
 * cleanup coding style issue

 hw/i386/pc.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e31f70f..4f3d508 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -161,14 +161,16 @@ int cpu_get_pic_interrupt(CPUX86State *env)
 X86CPU *cpu = x86_env_get_cpu(env);
 int intno;
 
-intno = apic_get_interrupt(cpu->apic_state);
-if (intno >= 0) {
-return intno;
-}
-/* read the irq from the PIC */
-if (!apic_accept_pic_intr(cpu->apic_state)) {
-return -1;
-}
+if (!kvm_irqchip_in_kernel()) {
+intno = apic_get_interrupt(cpu->apic_state);
+if (intno >= 0) {
+return intno;
+}
+/* read the irq from the PIC */
+if (!apic_accept_pic_intr(cpu->apic_state)) {
+return -1;
+}
+}
 
 intno = pic_read_irq(isa_pic);
 return intno;
@@ -180,7 +182,7 @@ static void pic_irq_request(void *opaque, int irq, int 
level)
 X86CPU *cpu = X86_CPU(cs);
 
 DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
-if (cpu->apic_state) {
+if (cpu->apic_state && !kvm_irqchip_in_kernel()) {
 CPU_FOREACH(cs) {
 cpu = X86_CPU(cs);
 if (apic_accept_pic_intr(cpu->apic_state)) {
-- 
1.9.1




[Qemu-devel] [PATCH] pc: apic: introduce APIC macro

2016-09-14 Thread Wanpeng Li
From: Wanpeng Li 

Introduce a new APIC macro to replace APIC_COMMON macro in 
hw/intc/apic.c in order to capture access LAPIC in qemu 
even if LAPIC is emulated in kvm.

Suggested-by: Paolo Bonzini 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Michael S. Tsirkin 
Cc: Eduardo Habkost 
Signed-off-by: Wanpeng Li 
---
 hw/intc/apic.c  | 20 ++--
 include/hw/i386/apic_internal.h |  4 
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 45887d9..577f095 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -163,7 +163,7 @@ static void apic_local_deliver(APICCommonState *s, int 
vector)
 
 void apic_deliver_pic_intr(DeviceState *dev, int level)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 if (level) {
 apic_local_deliver(s, APIC_LVT_LINT0);
@@ -373,7 +373,7 @@ static void apic_update_irq(APICCommonState *s)
 
 void apic_poll_irq(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
 apic_update_irq(s);
@@ -479,7 +479,7 @@ static void apic_startup(APICCommonState *s, int vector_num)
 
 void apic_sipi(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 cpu_reset_interrupt(CPU(s->cpu), CPU_INTERRUPT_SIPI);
 
@@ -493,7 +493,7 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, 
uint8_t dest_mode,
  uint8_t delivery_mode, uint8_t vector_num,
  uint8_t trigger_mode)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 uint32_t deliver_bitmask[MAX_APIC_WORDS];
 int dest_shorthand = (s->icr[0] >> 18) & 3;
 APICCommonState *apic_iter;
@@ -550,7 +550,7 @@ static bool apic_check_pic(APICCommonState *s)
 
 int apic_get_interrupt(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 int intno;
 
 /* if the APIC is installed or enabled, we let the 8259 handle the
@@ -584,7 +584,7 @@ int apic_get_interrupt(DeviceState *dev)
 
 int apic_accept_pic_intr(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 uint32_t lvt0;
 
 if (!s)
@@ -663,7 +663,7 @@ static uint32_t apic_mem_readl(void *opaque, hwaddr addr)
 if (!dev) {
 return 0;
 }
-s = APIC_COMMON(dev);
+s = APIC(dev);
 
 index = (addr >> 4) & 0xff;
 switch(index) {
@@ -766,7 +766,7 @@ static void apic_mem_writel(void *opaque, hwaddr addr, 
uint32_t val)
 if (!dev) {
 return;
 }
-s = APIC_COMMON(dev);
+s = APIC(dev);
 
 trace_apic_mem_writel(addr, val);
 
@@ -870,7 +870,7 @@ static const MemoryRegionOps apic_io_ops = {
 
 static void apic_realize(DeviceState *dev, Error **errp)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 if (s->id >= MAX_APICS) {
 error_setg(errp, "%s initialization failed. APIC ID %d is invalid",
@@ -889,7 +889,7 @@ static void apic_realize(DeviceState *dev, Error **errp)
 
 static void apic_unrealize(DeviceState *dev, Error **errp)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 timer_del(s->timer);
 timer_free(s->timer);
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
index 06c4e9f..5e36016 100644
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -131,6 +131,10 @@ typedef struct APICCommonState APICCommonState;
 #define APIC_COMMON_GET_CLASS(obj) \
  OBJECT_GET_CLASS(APICCommonClass, (obj), TYPE_APIC_COMMON)
 
+#define TYPE_APIC "apic"
+#define APIC(obj) \
+OBJECT_CHECK(APICCommonState, (obj), TYPE_APIC)
+
 typedef struct APICCommonClass
 {
 DeviceClass parent_class;
-- 
1.9.1




Re: [Qemu-devel] [PATCH] pc: apic: introduce APIC macro

2016-09-14 Thread Wanpeng Li
2016-09-15 14:27 GMT+08:00 Paolo Bonzini :
[...]
>> +#define TYPE_APIC "apic"
>> +#define APIC(obj) \
>> +OBJECT_CHECK(APICCommonState, (obj), TYPE_APIC)
>
> This should be in apic.c.  Also please replace the other occurrence of
> "apic" in apic.c by TYPE_APIC.

Will do, thanks for your review.

Regards,
Wanpeng Li



[Qemu-devel] [PATCH v2] pc: apic: introduce APIC macro

2016-09-15 Thread Wanpeng Li
From: Wanpeng Li 

Introduce a new APIC macro to replace APIC_COMMON macro in 
hw/intc/apic.c in order to capture access LAPIC in qemu 
even if LAPIC is emulated in kvm.

Suggested-by: Paolo Bonzini 
Reviewed-by: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Michael S. Tsirkin 
Cc: Eduardo Habkost 
Signed-off-by: Wanpeng Li 
---
v1 -> v2:
 * move the new APIC macro to apic.c
 * replace the occurrence of "apic" in apic.c by TYPE_APIC

 hw/intc/apic.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 45887d9..7bd1d27 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -39,6 +39,10 @@
 
 static APICCommonState *local_apics[MAX_APICS + 1];
 
+#define TYPE_APIC "apic"
+#define APIC(obj) \
+OBJECT_CHECK(APICCommonState, (obj), TYPE_APIC)
+
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
@@ -163,7 +167,7 @@ static void apic_local_deliver(APICCommonState *s, int 
vector)
 
 void apic_deliver_pic_intr(DeviceState *dev, int level)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 if (level) {
 apic_local_deliver(s, APIC_LVT_LINT0);
@@ -373,7 +377,7 @@ static void apic_update_irq(APICCommonState *s)
 
 void apic_poll_irq(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
 apic_update_irq(s);
@@ -479,7 +483,7 @@ static void apic_startup(APICCommonState *s, int vector_num)
 
 void apic_sipi(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 cpu_reset_interrupt(CPU(s->cpu), CPU_INTERRUPT_SIPI);
 
@@ -493,7 +497,7 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, 
uint8_t dest_mode,
  uint8_t delivery_mode, uint8_t vector_num,
  uint8_t trigger_mode)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 uint32_t deliver_bitmask[MAX_APIC_WORDS];
 int dest_shorthand = (s->icr[0] >> 18) & 3;
 APICCommonState *apic_iter;
@@ -550,7 +554,7 @@ static bool apic_check_pic(APICCommonState *s)
 
 int apic_get_interrupt(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 int intno;
 
 /* if the APIC is installed or enabled, we let the 8259 handle the
@@ -584,7 +588,7 @@ int apic_get_interrupt(DeviceState *dev)
 
 int apic_accept_pic_intr(DeviceState *dev)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 uint32_t lvt0;
 
 if (!s)
@@ -663,7 +667,7 @@ static uint32_t apic_mem_readl(void *opaque, hwaddr addr)
 if (!dev) {
 return 0;
 }
-s = APIC_COMMON(dev);
+s = APIC(dev);
 
 index = (addr >> 4) & 0xff;
 switch(index) {
@@ -766,7 +770,7 @@ static void apic_mem_writel(void *opaque, hwaddr addr, 
uint32_t val)
 if (!dev) {
 return;
 }
-s = APIC_COMMON(dev);
+s = APIC(dev);
 
 trace_apic_mem_writel(addr, val);
 
@@ -870,7 +874,7 @@ static const MemoryRegionOps apic_io_ops = {
 
 static void apic_realize(DeviceState *dev, Error **errp)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 if (s->id >= MAX_APICS) {
 error_setg(errp, "%s initialization failed. APIC ID %d is invalid",
@@ -889,7 +893,7 @@ static void apic_realize(DeviceState *dev, Error **errp)
 
 static void apic_unrealize(DeviceState *dev, Error **errp)
 {
-APICCommonState *s = APIC_COMMON(dev);
+APICCommonState *s = APIC(dev);
 
 timer_del(s->timer);
 timer_free(s->timer);
@@ -912,7 +916,7 @@ static void apic_class_init(ObjectClass *klass, void *data)
 }
 
 static const TypeInfo apic_info = {
-.name  = "apic",
+.name  = TYPE_APIC,
 .instance_size = sizeof(APICCommonState),
 .parent= TYPE_APIC_COMMON,
 .class_init= apic_class_init,
-- 
1.9.1




[Qemu-devel] [PATCH] KVM: Add async pf flag to KVM_GET/SET_VCPU_EVENTS interface

2017-06-20 Thread Wanpeng Li
From: Wanpeng Li 

This patch adds async pf flag to KVM_GET/SET_VCPU_EVENTS interface.

Signed-off-by: Wanpeng Li 
---
 linux-headers/asm-x86/kvm.h | 2 ++
 target/i386/cpu.h   | 1 +
 target/i386/kvm.c   | 6 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index c2824d0..435f03f 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -287,6 +287,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR0x0002
 #define KVM_VCPUEVENT_VALID_SHADOW 0x0004
 #define KVM_VCPUEVENT_VALID_SMM0x0008
+#define KVM_VCPUEVENT_VALID_ASYNC_PF 0x0010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS  0x01
@@ -300,6 +301,7 @@ struct kvm_vcpu_events {
__u8 has_error_code;
__u8 pad;
__u32 error_code;
+   bool async_page_fault;
} exception;
struct {
__u8 injected;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index cfe825f..f409958 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1105,6 +1105,7 @@ typedef struct CPUX86State {
 
 /* exception/interrupt handling */
 int error_code;
+bool async_page_fault;
 int exception_is_int;
 target_ulong exception_next_eip;
 target_ulong dr[8]; /* debug registers; note dr4 and dr5 are unused */
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 49b6115..793d1e1 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -2493,6 +2493,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 events.exception.has_error_code = env->has_error_code;
 events.exception.error_code = env->error_code;
 events.exception.pad = 0;
+events.exception.async_page_fault = env->async_page_fault;
 
 events.interrupt.injected = (env->interrupt_injected >= 0);
 events.interrupt.nr = env->interrupt_injected;
@@ -2531,7 +2532,8 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 
 if (level >= KVM_PUT_RESET_STATE) {
 events.flags |=
-KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
+KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
+KVM_VCPUEVENT_VALID_ASYNC_PF;
 }
 
 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
@@ -2556,6 +2558,8 @@ static int kvm_get_vcpu_events(X86CPU *cpu)
events.exception.injected ? events.exception.nr : -1;
 env->has_error_code = events.exception.has_error_code;
 env->error_code = events.exception.error_code;
+if (events.flags & KVM_VCPUEVENT_VALID_ASYNC_PF)
+env->async_page_fault = events.exception.async_page_fault;
 
 env->interrupt_injected =
 events.interrupt.injected ? events.interrupt.nr : -1;
-- 
2.7.4




[Qemu-devel] [PATCH v2] KVM: Add async pf flag to KVM_GET/SET_VCPU_EVENTS interface

2017-06-21 Thread Wanpeng Li
From: Wanpeng Li 

This patch adds async page fault flag to KVM_GET/SET_VCPU_EVENTS interface.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
v1 -> v2:
 * reuse the pad for async_page_fault
 * cleanup coding style

 linux-headers/asm-x86/kvm.h | 3 ++-
 target/i386/cpu.h   | 1 +
 target/i386/kvm.c   | 8 ++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index c2824d0..27b368b 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -287,6 +287,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR0x0002
 #define KVM_VCPUEVENT_VALID_SHADOW 0x0004
 #define KVM_VCPUEVENT_VALID_SMM0x0008
+#define KVM_VCPUEVENT_VALID_ASYNC_PF 0x0010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS  0x01
@@ -298,7 +299,7 @@ struct kvm_vcpu_events {
__u8 injected;
__u8 nr;
__u8 has_error_code;
-   __u8 pad;
+   __u8 async_page_fault;
__u32 error_code;
} exception;
struct {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index cfe825f..90cb692 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1105,6 +1105,7 @@ typedef struct CPUX86State {
 
 /* exception/interrupt handling */
 int error_code;
+uint8_t async_page_fault;
 int exception_is_int;
 target_ulong exception_next_eip;
 target_ulong dr[8]; /* debug registers; note dr4 and dr5 are unused */
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 49b6115..b2405ec 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -2491,8 +2491,8 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 events.exception.injected = (env->exception_injected >= 0);
 events.exception.nr = env->exception_injected;
 events.exception.has_error_code = env->has_error_code;
+events.exception.async_page_fault = env->async_page_fault;
 events.exception.error_code = env->error_code;
-events.exception.pad = 0;
 
 events.interrupt.injected = (env->interrupt_injected >= 0);
 events.interrupt.nr = env->interrupt_injected;
@@ -2531,7 +2531,8 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 
 if (level >= KVM_PUT_RESET_STATE) {
 events.flags |=
-KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
+KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
+KVM_VCPUEVENT_VALID_ASYNC_PF;
 }
 
 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
@@ -2555,6 +2556,9 @@ static int kvm_get_vcpu_events(X86CPU *cpu)
 env->exception_injected =
events.exception.injected ? events.exception.nr : -1;
 env->has_error_code = events.exception.has_error_code;
+if (events.flags & KVM_VCPUEVENT_VALID_ASYNC_PF) {
+env->async_page_fault = events.exception.async_page_fault;
+}
 env->error_code = events.exception.error_code;
 
 env->interrupt_injected =
-- 
2.7.4




Re: [Qemu-devel] [PATCH] KVM: Add async pf flag to KVM_GET/SET_VCPU_EVENTS interface

2017-06-21 Thread Wanpeng Li
2017-06-22 0:28 GMT+08:00 Radim Krčmář :
> 2017-06-20 20:14-0700, Wanpeng Li:
>> From: Wanpeng Li 
>>
>> This patch adds async pf flag to KVM_GET/SET_VCPU_EVENTS interface.
>>
>> Signed-off-by: Wanpeng Li 
>> ---
>> diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
>> @@ -300,6 +301,7 @@ struct kvm_vcpu_events {
>>   __u8 has_error_code;
>>   __u8 pad;
>>   __u32 error_code;
>> + bool async_page_fault;
>
> Touching userspace interfaces is always a major fun ...
>
> You must not change the layout of an existing structure.  You can try to
> reuse the pad and hope that some userspace didn't check it for 0.
> (I think it's a decent compromise between safety and sanity.)

Thanks for pointing out. Just fixes it in v2.

Regards,
Wanpeng Li

>
>>   } exception;
>>   struct {
>>   __u8 injected;
>> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
>> @@ -2493,6 +2493,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
>>  events.exception.has_error_code = env->has_error_code;
>>  events.exception.error_code = env->error_code;
>>  events.exception.pad = 0;
>> +events.exception.async_page_fault = env->async_page_fault;
>>
>>  events.interrupt.injected = (env->interrupt_injected >= 0);
>
> Old QEMUs would break below this point, because interrupt.injected used
> to be where exception.async_page_fault is.
>
>>  events.interrupt.nr = env->interrupt_injected;



Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save

2017-02-07 Thread Wanpeng Li
2016-11-08 3:41 GMT+08:00 Marcelo Tosatti :
> On Mon, Nov 07, 2016 at 03:46:11PM +, Dr. David Alan Gilbert wrote:
>> * Marcelo Tosatti (mtosa...@redhat.com) wrote:
>> > This patch, relative to pre-copy migration codepath,
>> > measures the time between vm_stop() and pre_save(),
>> > which includes copying the remaining RAM to destination,
>> > and advances the clock by that amount.
>> >
>> > In a VM with 5 seconds downtime, this reduces the guest
>> > clock difference on destination from 5s to 0.2s.
>> >
>> > Tested with Linux and Windows 2012 R2 guests with -cpu XXX,+hv-time.
>>
>> One thing that bothers me is that it's only this clock that's
>> getting corrected; doesn't it cause things to get upset when
>> one clock moves and the others dont?
>
> If you are correlating the clocks, then yes.
>
> Older Linux guests get upset (marking the TSC clocksource unstable
> because the watchdog checks TSC vs kvmclock), but there is a workaround for it
> in newer guests
> (kvmclock interface to notify watchdog to not complain).

Could you point out which interface? I didn't find it.

Regards,
Wanpeng Li

>
> Note marking TSC clocksource unstable on older guests is harmless
> because kvmclock is the standard clocksource.
>
> For Windows guests, i don't know that Windows correlates between different
> clocks.
>
> That is, there is relative control as to which software reads kvmclock
> or Windows TIMER MSR, so i don't see the need to advance every clock
> exposed.
>
>> Shouldn't the pause delay be recorded somewhere architecturally
>> independent and then be a thing that kvm-clock happens to use and
>> other clocks might as well?
>
> In theory, yes. In practice, i don't see the need for this...
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [Qemu-devel] About QEMU BQL and dirty log switch in Migration

2017-05-11 Thread Wanpeng Li
2017-05-11 20:24 GMT+08:00 Paolo Bonzini :
>
>
> On 11/05/2017 14:07, Zhoujian (jay) wrote:
>> -* Scan sptes if dirty logging has been stopped, dropping those
>> -* which can be collapsed into a single large-page spte.  Later
>> -* page faults will create the large-page sptes.
>> +* Reset each vcpu's mmu, then page faults will create the large-page
>> +* sptes later.
>>  */
>> if ((change != KVM_MR_DELETE) &&
>> (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
>> -   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
>> -   kvm_mmu_zap_collapsible_sptes(kvm, new);

This is an unlikely branch(unless guest live migration fails and
continue to run on the source machine) instead of hot path, do you
have any performance number for your real workloads?

Regards,
Wanpeng Li

>> +   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
>> +   kvm_for_each_vcpu(i, vcpu, kvm)
>> +   kvm_mmu_reset_context(vcpu);
>
> This should be "kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);" but
> I am not sure it is enough.  I think that if you do not zap the SPTEs,
> the page faults will use 4K SPTEs, not large ones (though I'd have to
> check better; CCing Xiao and Wanpeng).
>
> Paolo



Re: [Qemu-devel] About QEMU BQL and dirty log switch in Migration

2017-05-11 Thread Wanpeng Li
2017-05-11 21:43 GMT+08:00 Wanpeng Li :
> 2017-05-11 20:24 GMT+08:00 Paolo Bonzini :
>>
>>
>> On 11/05/2017 14:07, Zhoujian (jay) wrote:
>>> -* Scan sptes if dirty logging has been stopped, dropping those
>>> -* which can be collapsed into a single large-page spte.  Later
>>> -* page faults will create the large-page sptes.
>>> +* Reset each vcpu's mmu, then page faults will create the 
>>> large-page
>>> +* sptes later.
>>>  */
>>> if ((change != KVM_MR_DELETE) &&
>>> (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
>>> -   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
>>> -   kvm_mmu_zap_collapsible_sptes(kvm, new);
>
> This is an unlikely branch(unless guest live migration fails and
> continue to run on the source machine) instead of hot path, do you
> have any performance number for your real workloads?

I find the original discussion by google.
https://lists.nongnu.org/archive/html/qemu-devel/2017-04/msg04143.html
You will not go to this branch if the guest live migration
successfully.

Regards,
Wanpeng Li



Re: [Qemu-devel] About QEMU BQL and dirty log switch in Migration

2017-05-11 Thread Wanpeng Li
2017-05-11 22:18 GMT+08:00 Zhoujian (jay) :
> Hi Wanpeng,
>
>> 2017-05-11 21:43 GMT+08:00 Wanpeng Li :
>> > 2017-05-11 20:24 GMT+08:00 Paolo Bonzini :
>> >>
>> >>
>> >> On 11/05/2017 14:07, Zhoujian (jay) wrote:
>> >>> -* Scan sptes if dirty logging has been stopped, dropping
>> those
>> >>> -* which can be collapsed into a single large-page spte.
>> Later
>> >>> -* page faults will create the large-page sptes.
>> >>> +* Reset each vcpu's mmu, then page faults will create the
>> large-page
>> >>> +* sptes later.
>> >>>  */
>> >>> if ((change != KVM_MR_DELETE) &&
>> >>> (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
>> >>> -   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
>> >>> -   kvm_mmu_zap_collapsible_sptes(kvm, new);
>> >
>> > This is an unlikely branch(unless guest live migration fails and
>> > continue to run on the source machine) instead of hot path, do you
>> > have any performance number for your real workloads?
>>
>> I find the original discussion by google.
>> https://lists.nongnu.org/archive/html/qemu-devel/2017-04/msg04143.html
>> You will not go to this branch if the guest live migration successfully.
>
>  In our tests, this branch is taken when living migration is successful.
>  AFAIK, the kmod does not know whether living migration successful or not
>  when dealing with KVM_SET_USER_MEMORY_REGION ioctl. Do I miss something?

Original there is a bug which will not clear memslot dirty log flag
after live migration fails, a patch is submitted to fix it,
https://lists.nongnu.org/archive/html/qemu-devel/2015-04/msg00794.html,
however, I can't remember whether the dirty log flag will be cleared
if live migration complete successfully at that time, but maybe not.
Paolo replied to the patch he has a better method. Then I'm too busy
and didn't follow the qemu patch for this fix any more, I just find
this commit is merged currently:
http://git.qemu.org/?p=qemu.git;a=commit;h=6f6a5ef3e429f92f987678ea8c396aab4dc6aa19.
This commit will clear memslot dirty log flag after live migration no
matter whether it is successful or not.

Regards,
Wanpeng Li



Re: [Qemu-devel] About QEMU BQL and dirty log switch in Migration

2017-05-16 Thread Wanpeng Li
Hi Zhoujian,
2017-05-17 10:20 GMT+08:00 Zhoujian (jay) :
> Hi Wanpeng,
>
>> > On 11/05/2017 14:07, Zhoujian (jay) wrote:
>> >> -* Scan sptes if dirty logging has been stopped, dropping those
>> >> -* which can be collapsed into a single large-page spte.  Later
>> >> -* page faults will create the large-page sptes.
>> >> +* Reset each vcpu's mmu, then page faults will create the
>> large-page
>> >> +* sptes later.
>> >>  */
>> >> if ((change != KVM_MR_DELETE) &&
>> >> (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
>> >> -   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
>> >> -   kvm_mmu_zap_collapsible_sptes(kvm, new);
>>
>> This is an unlikely branch(unless guest live migration fails and continue
>> to run on the source machine) instead of hot path, do you have any
>> performance number for your real workloads?
>>
>
> Sorry to bother you again.
>
> Recently, I have tested the performance before migration and after migration 
> failure
> using spec cpu2006 https://www.spec.org/cpu2006/, which is a standard 
> performance
> evaluation tool.
>
> These are the results:
> **
> Before migration the score is 153, and the TLB miss statistics of the 
> qemu process is:
> linux-sjrfac:/mnt/zhoujian # perf stat -e 
> dTLB-load-misses,dTLB-loads,dTLB-store-misses, \
> dTLB-stores,iTLB-load-misses,iTLB-loads -p 26463 sleep 10
>
> Performance counter stats for process id '26463':
>
>698,938  dTLB-load-misses  #0.13% of all dTLB 
> cache hits   (50.46%)
>543,303,875  dTLB-loads
> (50.43%)
>199,597  dTLB-store-misses 
> (16.51%)
> 60,128,561  dTLB-stores   
> (16.67%)
> 69,986  iTLB-load-misses  #6.17% of all iTLB 
> cache hits   (16.67%)
>  1,134,097  iTLB-loads
> (33.33%)
>
>   10.000684064 seconds time elapsed
>
> After migration failure the score is 149, and the TLB miss statistics of 
> the qemu process is:
> linux-sjrfac:/mnt/zhoujian # perf stat -e 
> dTLB-load-misses,dTLB-loads,dTLB-store-misses, \
> dTLB-stores,iTLB-load-misses,iTLB-loads -p 26463 sleep 10
>
> Performance counter stats for process id '26463':
>
>765,400  dTLB-load-misses  #0.14% of all dTLB 
> cache hits   (50.50%)
>540,972,144  dTLB-loads
> (50.47%)
>207,670  dTLB-store-misses 
> (16.50%)
> 58,363,787  dTLB-stores   
> (16.67%)
>109,772  iTLB-load-misses  #    9.52% of all iTLB 
> cache hits   (16.67%)
>  1,152,784  iTLB-loads
> (33.32%)
>
>   10.000703078 seconds time elapsed
> **

Could you comment out the original "lazy collapse small sptes into
large sptes" codes in the function kvm_arch_commit_memory_region() and
post the results here?

Regards,
Wanpeng Li

>
> These are the steps:
> ==
>  (1) the version of kmod is 4.4.11(with slightly modified) and the version of 
> qemu is 2.6.0
> (with slightly modified), the kmod is applied with the following patch 
> according to
> Paolo's advice:
>
> diff --git a/source/x86/x86.c b/source/x86/x86.c
> index 054a7d3..75a4bb3 100644
> --- a/source/x86/x86.c
> +++ b/source/x86/x86.c
> @@ -8550,8 +8550,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
>  */
> if ((change != KVM_MR_DELETE) &&
> (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
> -   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
> -   kvm_mmu_zap_collapsible_sptes(kvm, new);
> +   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
> +   printk(KERN_ERR "zj make KVM_REQ_MMU_RELOAD request\n");
> +   kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
> +   }
>
> /*
>  * Set up write protection and/or dirty logging for the new slot.
>
> (2) I started up a memory preoccupied 10G VM(suse11sp3), which means its "RES 
> column&quo

Re: [Qemu-devel] About QEMU BQL and dirty log switch in Migration

2017-05-17 Thread Wanpeng Li
2017-05-17 15:43 GMT+08:00 Paolo Bonzini :
>> Recently, I have tested the performance before migration and after migration 
>> failure
>> using spec cpu2006 https://www.spec.org/cpu2006/, which is a standard 
>> performance
>> evaluation tool.
>>
>> These are the steps:
>> ==
>>  (1) the version of kmod is 4.4.11(with slightly modified) and the version of
>>  qemu is 2.6.0
>> (with slightly modified), the kmod is applied with the following patch
>>
>> diff --git a/source/x86/x86.c b/source/x86/x86.c
>> index 054a7d3..75a4bb3 100644
>> --- a/source/x86/x86.c
>> +++ b/source/x86/x86.c
>> @@ -8550,8 +8550,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
>>  */
>> if ((change != KVM_MR_DELETE) &&
>> (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
>> -   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
>> -   kvm_mmu_zap_collapsible_sptes(kvm, new);
>> +   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
>> +   printk(KERN_ERR "zj make KVM_REQ_MMU_RELOAD request\n");
>> +   kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
>> +   }
>>
>> /*
>>  * Set up write protection and/or dirty logging for the new slot.
>
> Try these modifications to the setup:
>
> 1) set up 1G hugetlbfs hugepages and use those for the guest's memory
>
> 2) test both without and with the above patch.
>

In addition, we can compare /sys/kernel/debug/kvm/largepages w/ and
w/o the patch. IIRC, /sys/kernel/debug/kvm/largepages will drop during
live migration, it will keep a small value if live migration fails and
w/o "lazy collapse small sptes into large sptes" codes, however, it
will increase gradually if w/ the "lazy collapse small sptes into
large sptes" codes.

Regards,
Wanpeng Li



[Qemu-devel] [PATCH v3] KVM: Add async pf flag to KVM_GET/SET_VCPU_EVENTS interface

2017-06-28 Thread Wanpeng Li
From: Wanpeng Li 

This patch adds async page fault flag to KVM_GET/SET_VCPU_EVENTS interface.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
v2 -> v3:
 * nested_apf for vcpu events
v1 -> v2:
 * reuse the pad for async_page_fault
 * cleanup coding style

 linux-headers/asm-x86/kvm.h | 3 ++-
 target/i386/cpu.h   | 1 +
 target/i386/kvm.c   | 8 ++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index c2824d0..27b368b 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -287,6 +287,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR0x0002
 #define KVM_VCPUEVENT_VALID_SHADOW 0x0004
 #define KVM_VCPUEVENT_VALID_SMM0x0008
+#define KVM_VCPUEVENT_VALID_ASYNC_PF 0x0010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS  0x01
@@ -298,7 +299,7 @@ struct kvm_vcpu_events {
__u8 injected;
__u8 nr;
__u8 has_error_code;
-   __u8 pad;
+   __u8 nested_apf;
__u32 error_code;
} exception;
struct {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index cfe825f..90cb692 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1105,6 +1105,7 @@ typedef struct CPUX86State {
 
 /* exception/interrupt handling */
 int error_code;
+uint8_t async_page_fault;
 int exception_is_int;
 target_ulong exception_next_eip;
 target_ulong dr[8]; /* debug registers; note dr4 and dr5 are unused */
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 49b6115..b2405ec 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -2491,8 +2491,8 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 events.exception.injected = (env->exception_injected >= 0);
 events.exception.nr = env->exception_injected;
 events.exception.has_error_code = env->has_error_code;
+events.exception.nested_apf = env->async_page_fault;
 events.exception.error_code = env->error_code;
-events.exception.pad = 0;
 
 events.interrupt.injected = (env->interrupt_injected >= 0);
 events.interrupt.nr = env->interrupt_injected;
@@ -2531,7 +2531,8 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 
 if (level >= KVM_PUT_RESET_STATE) {
 events.flags |=
-KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
+KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
+KVM_VCPUEVENT_VALID_ASYNC_PF;
 }
 
 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
@@ -2555,6 +2556,9 @@ static int kvm_get_vcpu_events(X86CPU *cpu)
 env->exception_injected =
events.exception.injected ? events.exception.nr : -1;
 env->has_error_code = events.exception.has_error_code;
+if (events.flags & KVM_VCPUEVENT_VALID_ASYNC_PF) {
+env->async_page_fault = events.exception.nested_apf;
+}
 env->error_code = events.exception.error_code;
 
 env->interrupt_injected =
-- 
2.7.4




Re: [Qemu-devel] [PATCH v3] target-i386/cpu: Add new EPYC CPU model

2017-08-16 Thread Wanpeng Li
Cc Chandu,

On 8/16/17 1:00 AM, Brijesh Singh wrote:
> Add a new base CPU model called 'EPYC' to model processors from AMD EPYC
> family (which includes EPYC 76xx,75xx,74xx, 73xx and 72xx).
>
> The following features bits have been added/removed compare to Opteron_G5
>
> Added: monitor, movbe, rdrand, mmxext, ffxsr, rdtscp, cr8legacy, osvw,
> fsgsbase, bmi1, avx2, smep, bmi2, rdseed, adx, smap, clfshopt, sha
> xsaveopt, xsavec, xgetbv1, arat

Hi Brijesh,

Actually I wonder whether or not the support for these new instructions 
have already been merged in kvm/qemu?

Regards,
Wanpeng Li

>
> Removed: xop, fma4, tbm
>
> Cc: Paolo Bonzini 
> Cc: Richard Henderson 
> Cc: Eduardo Habkost 
> Cc: Tom Lendacky 
> Signed-off-by: Brijesh Singh 
> ---
>
> Changes since v2:
>   * limit the xlevel to 0x800a
>
> Changes since v1:
>   * fix typo EYPC -> EPYC to reflect the correct branding name
>
>   target/i386/cpu.c | 44 
>   1 file changed, 44 insertions(+)
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index ddc45ab..6617e01 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -1522,6 +1522,50 @@ static X86CPUDefinition builtin_x86_defs[] = {
>   .xlevel = 0x801A,
>   .model_id = "AMD Opteron 63xx class CPU",
>   },
> +{
> +.name = "EPYC",
> +.level = 0xd,
> +.vendor = CPUID_VENDOR_AMD,
> +.family = 23,
> +.model = 1,
> +.stepping = 2,
> +.features[FEAT_1_EDX] =
> +CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH |
> +CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE |
> +CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE |
> +CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE |
> +CPUID_VME | CPUID_FP87,
> +.features[FEAT_1_ECX] =
> +CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX |
> +CPUID_EXT_XSAVE | CPUID_EXT_AES |  CPUID_EXT_POPCNT |
> +CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
> +CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 |
> +CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
> +.features[FEAT_8000_0001_EDX] =
> +CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB |
> +CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX |
> +CPUID_EXT2_SYSCALL,
> +.features[FEAT_8000_0001_ECX] =
> +CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH |
> +CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM |
> +CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
> +.features[FEAT_7_0_EBX] =
> +CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 
> |
> +CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED |
> +CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | 
> CPUID_7_0_EBX_CLFLUSHOPT |
> +CPUID_7_0_EBX_SHA_NI,
> +/* Missing: XSAVES (not supported by some Linux versions,
> + * including v4.1 to v4.12).
> + * KVM doesn't yet expose any XSAVES state save component.
> + */
> +.features[FEAT_XSAVE] =
> +CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
> +CPUID_XSAVE_XGETBV1,
> +.features[FEAT_6_EAX] =
> +CPUID_6_EAX_ARAT,
> +.xlevel = 0x800A,
> +.model_id = "AMD EPYC Processor",
> +},
>   };
>   
>   typedef struct PropValue {



Re: [Qemu-devel] [PATCH] target-i386 : fix a race condition result to lost INIT

2017-07-30 Thread Wanpeng Li
2017-07-31 4:01 GMT+08:00 Peng Hao :
> when SMP VM start, AP may lost INIT because of receiving INIT between
> kvm_vcpu_ioctl_x86_get/set_vcpu_events.
>
>vcpu 0 vcpu 1
>kvm_vcpu_ioctl_x86_get_vcpu_events
>events->smi.latched_init=0
>  send INIT to vcpu1
>set vcpu1's pending_events
>kvm_vcpu_ioctl_x86_set_vcpu_events
> events->smi.latched_init == 0
>   clear INIT in pending_events
> considering migration, just at level >= KVM_PUT_RESET_STATE,
> add KVM_VCPUEVENT_VALID_SMM in events.flags.I think it is better to
> modify in qemu.

Do it in qemu just avoids to trigger the bug instead of fixing it. Why
we touch INIT pending if INIT is not latched in SMM?

Regards,
Wanpeng Li

>
> Signed-off-by: Peng Hao 
> ---
>  target/i386/kvm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index a6613e1..be146cb 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -2438,7 +2438,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
>  /* Stop SMI delivery on old machine types to avoid a reboot
>   * on an inward migration of an old VM.
>   */
> -if (!cpu->kvm_no_smi_migration) {
> +if (!cpu->kvm_no_smi_migration && (level >= KVM_PUT_RESET_STATE)) {
>  events.flags |= KVM_VCPUEVENT_VALID_SMM;
>  }
>  }
> --
> 1.8.3.1
>
>



Re: [Qemu-devel] About QEMU BQL and dirty log switch in Migration

2018-12-10 Thread Wanpeng Li
On Fri, 19 May 2017 at 16:10, Jay Zhou  wrote:
>
> Hi Paolo and Wanpeng,
>
> On 2017/5/17 16:38, Wanpeng Li wrote:
> > 2017-05-17 15:43 GMT+08:00 Paolo Bonzini :
> >>> Recently, I have tested the performance before migration and after 
> >>> migration failure
> >>> using spec cpu2006 https://www.spec.org/cpu2006/, which is a standard 
> >>> performance
> >>> evaluation tool.
> >>>
> >>> These are the steps:
> >>> ==
> >>>   (1) the version of kmod is 4.4.11(with slightly modified) and the 
> >>> version of
> >>>   qemu is 2.6.0
> >>>  (with slightly modified), the kmod is applied with the following 
> >>> patch
> >>>
> >>> diff --git a/source/x86/x86.c b/source/x86/x86.c
> >>> index 054a7d3..75a4bb3 100644
> >>> --- a/source/x86/x86.c
> >>> +++ b/source/x86/x86.c
> >>> @@ -8550,8 +8550,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
> >>>   */
> >>>  if ((change != KVM_MR_DELETE) &&
> >>>  (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
> >>> -   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
> >>> -   kvm_mmu_zap_collapsible_sptes(kvm, new);
> >>> +   !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
> >>> +   printk(KERN_ERR "zj make KVM_REQ_MMU_RELOAD request\n");
> >>> +   kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
> >>> +   }
> >>>
> >>>  /*
> >>>   * Set up write protection and/or dirty logging for the new slot.
> >>
> >> Try these modifications to the setup:
> >>
> >> 1) set up 1G hugetlbfs hugepages and use those for the guest's memory
> >>
> >> 2) test both without and with the above patch.
> >>
>
> In order to avoid random memory allocation issues, I reran the test cases:
> (1) setup: start a 4U10G VM with memory preoccupied, each vcpu is pinned to a
> pcpu respectively, these resources(memory and pcpu) allocated to VM are all
> from NUMA node 0
> (2) sequence: firstly, I run the 429.mcf of spec cpu2006 before migration, and
> get a result. And then, migration failure is constructed. At last, I run the
> test case again, and get an another result.
> (3) results:
> Host hugepages   THP on(2M)  THP on(2M)   THP on(2M)   THP on(2M)
> Patchpatch1  patch2   patch3   -
> Before migration No  No   No   Yes
> After migration failed   Yes Yes  Yes  No
> Largepages   67->186262->1890 95->1865 1926
> score of 429.mcf 189 188  188  189
>
> Host hugepages   1G hugepages  1G hugepages  1G hugepages  1G 
> hugepages
> Patchpatch1patch2patch3-
> Before migration NoNoNoYes
> After migration failed   Yes   Yes   Yes   No
> Largepages   21212639
> score of 429.mcf 188   188   186   188
>
> Notes:
> patch1  means with "lazy collapse small sptes into large sptes" codes
> patch2  means comment out "lazy collapse small sptes into large sptes" codes
> patch3  means using kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD)
>  instead of kvm_mmu_zap_collapsible_sptes(kvm, new)
>
> "Largepages" means the value of /sys/kernel/debug/kvm/largepages
>
> > In addition, we can compare /sys/kernel/debug/kvm/largepages w/ and
> > w/o the patch. IIRC, /sys/kernel/debug/kvm/largepages will drop during
> > live migration, it will keep a small value if live migration fails and
> > w/o "lazy collapse small sptes into large sptes" codes, however, it
> > will increase gradually if w/ the "lazy collapse small sptes into
> > large sptes" codes.
> >
>
> No, without the "lazy collapse small sptes into large sptes" codes,
> /sys/kernel/debug/kvm/largepages does drop during live migration,
> but it still will increase gradually if live migration fails, see the result
> above. I printed out the back trace when it increases after migration failure,
>
> [139574.369098]  [] dump_stack+0x19/0x1b
> [139574.369111]  [] mmu_set_spte+0x2f6/0x310 [kvm]
> [139574.369122]  [] __direct_map.isra.109+0x1de/0x250 [kvm]
> [139574.369133]  [] tdp_page_fault+0x

Re: [Qemu-devel] [PATCH v3 1/2] kvm: support -dedicated cpu-pm=on|off

2018-07-04 Thread Wanpeng Li
On Wed, 20 Jun 2018 at 10:41, Michael S. Tsirkin  wrote:
>
> On Wed, Jun 20, 2018 at 08:46:10AM +0800, Wanpeng Li wrote:
> > On Wed, 20 Jun 2018 at 08:07, Michael S. Tsirkin  wrote:
> > >
> > > On Tue, Jun 19, 2018 at 05:07:46PM -0500, Eric Blake wrote:
> > > > On 06/19/2018 10:17 AM, Paolo Bonzini wrote:
> > > > > On 16/06/2018 00:29, Michael S. Tsirkin wrote:
> > > > > > +static QemuOptsList qemu_dedicated_opts = {
> > > > > > +.name = "dedicated",
> > > > > > +.head = QTAILQ_HEAD_INITIALIZER(qemu_dedicated_opts.head),
> > > > > > +.desc = {
> > > > > > +{
> > > > > > +.name = "mem-lock",
> > > > > > +.type = QEMU_OPT_BOOL,
> > > > > > +},
> > > > > > +{
> > > > > > +.name = "cpu-pm",
> > > > > > +.type = QEMU_OPT_BOOL,
> > > > > > +},
> > > > > > +{ /* end of list */ }
> > > > > > +},
> > > > > > +};
> > > > > > +
> > > > >
> > > > > Let the bikeshedding begin!
> > > > >
> > > > > 1) Should we deprecate -realtime?
> > > > >
> > > > > 2) Maybe -hostresource?
> > > >
> > > > What further things might we add in the future?
> > > >
> > > > -dedicated sounds wrong (it is an adjective, while most of our options 
> > > > are
> > > > nouns - thing -machine, -drive, -object, ...)
> > > >
> > > > -hostresource at least sounds like a noun, but is long to type.  But at
> > > > least '-hostresource cpu-pm=on' reads reasonably well.
> > >
> > > Yes but host resource what? I feel it says nothing at all about what
> > > one can expect to find in this flag.
> > >
> > > > About the only other noun I could think of would be '-feature 
> > > > cpu-pm=on'.
> > >
> > > If we have nothing at all to say about what is grouping these things,
> > > we don't need a new flag. We can make it a machine property.
> > >
> > > It's user's hint that some host resource is dedicated to a VM.
> >
> > The commit 633711e82 (kvm: rename KVM_HINTS_DEDICATED to
> > KVM_HINTS_REALTIME) should be reverted according to several threads
> > discussion I think.
> >
> > Regards,
> > Wanpeng Li
>
> IMHO that is unrelated - these KVM hints are hints to *guest*.

Actually I really don't like the KVM_HINT_REALTIME renaming, there are
dedicated instances in public cloud environment consider security or
performance. The financial customers may prefer dedicated pCPUs when
considering security, and other gaming customers may prefer dedicated
pCPUs when considering performance. So "realtime" is not suitable.

Regards,
Wanpeng Li



[Qemu-devel] [PATCH] target-i386: coalesced PIO support for RTC

2018-07-10 Thread Wanpeng Li
Windows I/O, such as the real-time clock. The address register (port
0x70 in the RTC case) can use coalesced I/O, cutting the number of
userspace exits by half when reading or writing the RTC.

Guest access rtc like this: write register index to 0x70, then write or 
read data from 0x71. writing 0x70 port is just as index and do nothing 
else. So we can use coalesced mmio to handle this scene to reduce VM-EXIT 
time.

In our environment, 12 windows guest running on a Skylake server:

Before patch:

IO Port Access  Samples  Samples%   Time%Avg time

0x70:POUT2067546.04%92.72%   67.15us ( +-   7.93% )

After patch:

IO Port Access  Samples  Samples%   Time%Avg time

0x70:POUT1750945.42%42.08%   6.37us ( +-  20.37% )

Thanks to Peng Hao's initial patch.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Eduardo Habkost 
Signed-off-by: Wanpeng Li 
---
 accel/kvm/kvm-all.c   | 56 +++
 hw/timer/mc146818rtc.c|  8 +++
 include/exec/memattrs.h   |  1 +
 include/exec/memory.h |  5 +
 include/sysemu/kvm.h  |  8 +++
 linux-headers/linux/kvm.h |  5 +++--
 memory.c  |  5 +
 7 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index eb7db92..7a12341 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -128,6 +128,7 @@ bool kvm_direct_msi_allowed;
 bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
 static bool kvm_immediate_exit;
+bool kvm_coalesced_pio_allowed;
 
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
 KVM_CAP_INFO(USER_MEMORY),
@@ -536,7 +537,7 @@ static void kvm_coalesce_mmio_region(MemoryListener 
*listener,
 
 zone.addr = start;
 zone.size = size;
-zone.pad = 0;
+zone.pio = 0;
 
 (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
 }
@@ -553,7 +554,7 @@ static void kvm_uncoalesce_mmio_region(MemoryListener 
*listener,
 
 zone.addr = start;
 zone.size = size;
-zone.pad = 0;
+zone.pio = 0;
 
 (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
 }
@@ -877,6 +878,45 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
 }
 }
 
+static void kvm_coalesce_io_add(MemoryListener *listener,
+MemoryRegionSection *section,
+hwaddr start, hwaddr size)
+{
+KVMState *s = kvm_state;
+
+if (kvm_coalesced_pio_allowed) {
+struct kvm_coalesced_mmio_zone zone;
+
+zone.addr = start;
+zone.size = size;
+zone.pio = 1;
+
+(void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
+}
+}
+
+static void kvm_coalesce_io_del(MemoryListener *listener,
+MemoryRegionSection *section,
+hwaddr start, hwaddr size)
+{
+KVMState *s = kvm_state;
+
+if (kvm_coalesced_pio_allowed) {
+struct kvm_coalesced_mmio_zone zone;
+
+zone.addr = start;
+zone.size = size;
+zone.pio = 1;
+
+(void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+}
+}
+
+static MemoryListener kvm_coalesced_io_listener = {
+.coalesced_mmio_add = kvm_coalesce_io_add,
+.coalesced_mmio_del = kvm_coalesce_io_del,
+.priority = 10,
+};
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
   AddressSpace *as, int as_id)
 {
@@ -1615,6 +1655,8 @@ static int kvm_init(MachineState *ms)
 }
 
 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
+kvm_coalesced_pio_allowed = s->coalesced_mmio &&
+kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
 
 #ifdef KVM_CAP_VCPU_EVENTS
 s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
@@ -1694,6 +1736,8 @@ static int kvm_init(MachineState *ms)
  &address_space_memory, 0);
 memory_listener_register(&kvm_io_listener,
  &address_space_io);
+memory_listener_register(&kvm_coalesced_io_listener,
+ &address_space_io);
 
 s->many_ioeventfds = kvm_check_many_ioeventfds();
 
@@ -1775,8 +1819,12 @@ void kvm_flush_coalesced_mmio_buffer(void)
 struct kvm_coalesced_mmio *ent;
 
 ent = &ring->coalesced_mmio[ring->first];
-
-cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
+if (ent->pio) {
+address_space_rw(&address_space_io, ent->phys_addr,
+ MEMTXATTRS_NONE, ent->data, ent->len, true);
+} else {
+cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
+}
 smp_wmb();
 ring->first = (ring->fir

[Qemu-devel] [PATCH v2] target-i386: coalesced PIO support for RTC

2018-07-11 Thread Wanpeng Li
From: Peng Hao 

Windows I/O, such as the real-time clock. The address register (port
0x70 in the RTC case) can use coalesced I/O, cutting the number of
userspace exits by half when reading or writing the RTC.

Guest access rtc like this: write register index to 0x70, then write or 
read data from 0x71. writing 0x70 port is just as index and do nothing 
else. So we can use coalesced mmio to handle this scene to reduce VM-EXIT 
time.

In our environment, 12 windows guest running on a Skylake server:

Before patch:

IO Port Access  Samples  Samples%   Time%Avg time

0x70:POUT2067546.04%92.72%   67.15us ( +-   7.93% )

After patch:

IO Port Access  Samples  Samples%   Time%Avg time

0x70:POUT1750945.42%42.08%   6.37us ( +-  20.37% )

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Eduardo Habkost 
Cc: Peng Hao 
Signed-off-by: Peng Hao 
Signed-off-by: Wanpeng Li 
---
v1 -> v2:
 * add the original author

 accel/kvm/kvm-all.c   | 56 +++
 hw/timer/mc146818rtc.c|  8 +++
 include/exec/memattrs.h   |  1 +
 include/exec/memory.h |  5 +
 include/sysemu/kvm.h  |  8 +++
 linux-headers/linux/kvm.h |  5 +++--
 memory.c  |  5 +
 7 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index eb7db92..7a12341 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -128,6 +128,7 @@ bool kvm_direct_msi_allowed;
 bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
 static bool kvm_immediate_exit;
+bool kvm_coalesced_pio_allowed;
 
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
 KVM_CAP_INFO(USER_MEMORY),
@@ -536,7 +537,7 @@ static void kvm_coalesce_mmio_region(MemoryListener 
*listener,
 
 zone.addr = start;
 zone.size = size;
-zone.pad = 0;
+zone.pio = 0;
 
 (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
 }
@@ -553,7 +554,7 @@ static void kvm_uncoalesce_mmio_region(MemoryListener 
*listener,
 
 zone.addr = start;
 zone.size = size;
-zone.pad = 0;
+zone.pio = 0;
 
 (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
 }
@@ -877,6 +878,45 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
 }
 }
 
+static void kvm_coalesce_io_add(MemoryListener *listener,
+MemoryRegionSection *section,
+hwaddr start, hwaddr size)
+{
+KVMState *s = kvm_state;
+
+if (kvm_coalesced_pio_allowed) {
+struct kvm_coalesced_mmio_zone zone;
+
+zone.addr = start;
+zone.size = size;
+zone.pio = 1;
+
+(void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
+}
+}
+
+static void kvm_coalesce_io_del(MemoryListener *listener,
+MemoryRegionSection *section,
+hwaddr start, hwaddr size)
+{
+KVMState *s = kvm_state;
+
+if (kvm_coalesced_pio_allowed) {
+struct kvm_coalesced_mmio_zone zone;
+
+zone.addr = start;
+zone.size = size;
+zone.pio = 1;
+
+(void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+}
+}
+
+static MemoryListener kvm_coalesced_io_listener = {
+.coalesced_mmio_add = kvm_coalesce_io_add,
+.coalesced_mmio_del = kvm_coalesce_io_del,
+.priority = 10,
+};
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
   AddressSpace *as, int as_id)
 {
@@ -1615,6 +1655,8 @@ static int kvm_init(MachineState *ms)
 }
 
 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
+kvm_coalesced_pio_allowed = s->coalesced_mmio &&
+kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
 
 #ifdef KVM_CAP_VCPU_EVENTS
 s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
@@ -1694,6 +1736,8 @@ static int kvm_init(MachineState *ms)
  &address_space_memory, 0);
 memory_listener_register(&kvm_io_listener,
  &address_space_io);
+memory_listener_register(&kvm_coalesced_io_listener,
+ &address_space_io);
 
 s->many_ioeventfds = kvm_check_many_ioeventfds();
 
@@ -1775,8 +1819,12 @@ void kvm_flush_coalesced_mmio_buffer(void)
 struct kvm_coalesced_mmio *ent;
 
 ent = &ring->coalesced_mmio[ring->first];
-
-cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
+if (ent->pio) {
+address_space_rw(&address_space_io, ent->phys_addr,
+ MEMTXATTRS_NONE, ent->data, ent->len, true);
+} else {
+cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
+}
 sm

Re: [Qemu-devel] [PATCH] target-i386: adds PV_SEND_IPI CPUID feature bit

2018-07-23 Thread Wanpeng Li
Ping,
On Tue, 3 Jul 2018 at 05:43, Eduardo Habkost  wrote:
>
> On Mon, Jul 02, 2018 at 06:22:51PM +0800, Wanpeng Li wrote:
> > From: Wanpeng Li 
> >
> > Adds PV_SEND_IPI CPUID feature bit.
> >
>
> Thanks!
>
> Paolo, Radim: can I assume the bit is already reserved and queue
> this patch, or should I wait until the KVM patch is merged into
> kvm.git?



Re: [Qemu-devel] [PATCH] target-i386: adds PV_SEND_IPI CPUID feature bit

2018-08-07 Thread Wanpeng Li
Hi Eduardo,
On Tue, 3 Jul 2018 at 05:43, Eduardo Habkost  wrote:
>
> On Mon, Jul 02, 2018 at 06:22:51PM +0800, Wanpeng Li wrote:
> > From: Wanpeng Li 
> >
> > Adds PV_SEND_IPI CPUID feature bit.
> >
>
> Thanks!
>
> Paolo, Radim: can I assume the bit is already reserved and queue
> this patch, or should I wait until the KVM patch is merged into
> kvm.git?

I guess you can apply the qemu patch now since the kvm part is merged.
https://git.kernel.org/pub/scm/virt/kvm/kvm.git/commit/?h=queue&id=4180bf1b655a791a0a6ef93a2c762722c782

Regards,
Wanpeng Li



[Qemu-devel] [PATCH] target-i386: adds PV_SEND_IPI CPUID feature bit

2018-07-02 Thread Wanpeng Li
From: Wanpeng Li 

Adds PV_SEND_IPI CPUID feature bit.

Cc: Paolo Bonzini 
Cc: Eduardo Habkost 
Cc: Radim Krčmář 
Cc: Vitaly Kuznetsov 
Signed-off-by: Wanpeng Li 
---
 target/i386/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1e6a7d0..24e425a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -870,7 +870,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
 .feat_names = {
 "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock",
 "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt",
-NULL, "kvm-pv-tlb-flush", NULL, NULL,
+NULL, "kvm-pv-tlb-flush", NULL, "kvm-pv-ipi",
 NULL, NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
-- 
2.7.4




Re: [Qemu-devel] [PATCH v3 1/2] kvm: support -dedicated cpu-pm=on|off

2018-06-19 Thread Wanpeng Li
On Wed, 20 Jun 2018 at 08:07, Michael S. Tsirkin  wrote:
>
> On Tue, Jun 19, 2018 at 05:07:46PM -0500, Eric Blake wrote:
> > On 06/19/2018 10:17 AM, Paolo Bonzini wrote:
> > > On 16/06/2018 00:29, Michael S. Tsirkin wrote:
> > > > +static QemuOptsList qemu_dedicated_opts = {
> > > > +.name = "dedicated",
> > > > +.head = QTAILQ_HEAD_INITIALIZER(qemu_dedicated_opts.head),
> > > > +.desc = {
> > > > +{
> > > > +.name = "mem-lock",
> > > > +.type = QEMU_OPT_BOOL,
> > > > +},
> > > > +{
> > > > +.name = "cpu-pm",
> > > > +.type = QEMU_OPT_BOOL,
> > > > +},
> > > > +{ /* end of list */ }
> > > > +},
> > > > +};
> > > > +
> > >
> > > Let the bikeshedding begin!
> > >
> > > 1) Should we deprecate -realtime?
> > >
> > > 2) Maybe -hostresource?
> >
> > What further things might we add in the future?
> >
> > -dedicated sounds wrong (it is an adjective, while most of our options are
> > nouns - thing -machine, -drive, -object, ...)
> >
> > -hostresource at least sounds like a noun, but is long to type.  But at
> > least '-hostresource cpu-pm=on' reads reasonably well.
>
> Yes but host resource what? I feel it says nothing at all about what
> one can expect to find in this flag.
>
> > About the only other noun I could think of would be '-feature cpu-pm=on'.
>
> If we have nothing at all to say about what is grouping these things,
> we don't need a new flag. We can make it a machine property.
>
> It's user's hint that some host resource is dedicated to a VM.

The commit 633711e82 (kvm: rename KVM_HINTS_DEDICATED to
KVM_HINTS_REALTIME) should be reverted according to several threads
discussion I think.

Regards,
Wanpeng Li



[Qemu-devel] [PATCH] hmp: fix qemu crash due to ioapic state dump w/ split irqchip

2016-09-22 Thread Wanpeng Li
From: Wanpeng Li 

The qemu will crash when info ioapic through hmp if irqchip 
is split. Below message is splat:

KVM_GET_IRQCHIP failed: Unknown error -6

This patch fix it by dumping the ioapic state from the qemu 
emulated ioapic if irqchip is split.

Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Signed-off-by: Wanpeng Li 
---
 target-i386/monitor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target-i386/monitor.c b/target-i386/monitor.c
index fccfe40..bf1e983 100644
--- a/target-i386/monitor.c
+++ b/target-i386/monitor.c
@@ -504,7 +504,8 @@ void hmp_info_local_apic(Monitor *mon, const QDict *qdict)
 
 void hmp_info_io_apic(Monitor *mon, const QDict *qdict)
 {
-if (kvm_irqchip_in_kernel()) {
+if (kvm_irqchip_in_kernel() &&
+!kvm_irqchip_is_split()) {
 kvm_ioapic_dump_state(mon, qdict);
 } else {
 ioapic_dump_state(mon, qdict);
-- 
1.9.1




[Qemu-devel] [PATCH] target-i386: fix losing XCR0 processor state component bits

2016-09-27 Thread Wanpeng Li
From: Wanpeng Li 

Commit 96193c22a "target-i386: Move xsave component mask to features array"
leverages features array to handle XCR0 processor state component bits, 
however, it introduces a regression:

warning: host doesn't support requested feature: CPUID.0DH:EAX [bit 0]
warning: host doesn't support requested feature: CPUID.0DH:EAX [bit 1]
warning: host doesn't support requested feature: CPUID.0DH:EAX [bit 2]

My desktop doesn't have enough advance features, so just X87,SSE,AVX 
warnings are splat when I boot a guest.

The get migratable flags logic in x86_cpu_filter_features() path will 
filter out the feature flags which are unsupported and unmigratable. 
However, the bits of XCR0 processor state component featureword don't 
have feat_names, and some features like SSE/AVX etc have feat_names in 
CPUID.01H:EDX, CPUID.01H:ECX, so they are treated as unsupported.

This patch fix it by don't filter out XCR0 processor state components 
bits though they don't have feat_names just as before commit 96193c22ab3.

Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: Michael S. Tsirkin 
Signed-off-by: Wanpeng Li 
---
 target-i386/cpu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index ad09246..9d24eff 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2156,6 +2156,10 @@ static uint32_t 
x86_cpu_get_supported_feature_word(FeatureWord w,
 r = kvm_arch_get_supported_cpuid(kvm_state, wi->cpuid_eax,
 wi->cpuid_ecx,
 wi->cpuid_reg);
+if ((w == FEAT_XSAVE_COMP_LO) ||
+(w == FEAT_XSAVE_COMP_HI)) {
+return r;
+}
 } else if (tcg_enabled()) {
 r = wi->tcg_features;
 } else {
-- 
1.9.1




Re: [Qemu-devel] [PATCH] target-i386: fix losing XCR0 processor state component bits

2016-09-28 Thread Wanpeng Li
2016-09-28 15:54 GMT+08:00 Paolo Bonzini :
[...]
> I think the right place to add the test is x86_cpu_get_migratable_flags.

I just sent out v2 to handle this, thanks for pointing out.

Regards,
Wanpeng Li



  1   2   >