[Qemu-devel] [PATCH v2] target-mips: Use TCG registers for the FPU.

2012-09-19 Thread Richard Henderson
With normal FP, this doesn't have much affect on the generated code,
because most of the FP operations are not CONST/PURE, and so we spill
registers in about the same frequency as the explicit load/stores.

But with Loongson multimedia instructions, which are all integral and
whose helpers are in fact CONST+PURE, this greatly improves the code.

Signed-off-by: Richard Henderson r...@twiddle.net
---

As requested, only generating 64-bit fp registers now.  The generated
code looks quite good on i386.  It could be a tad better for x86_64,
but it's still better than it was.


r~


 target-mips/translate.c | 96 +++--
 1 file changed, 54 insertions(+), 42 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 52eeb2b..4ae15aa 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -478,6 +478,7 @@ static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], 
cpu_ACX[MIPS_DSP_ACC];
 static TCGv cpu_dspctrl, btarget, bcond;
 static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
+static TCGv_i64 fpu_f64[32];
 
 static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
 
@@ -545,26 +546,31 @@ enum {
 BS_EXCP = 3, /* We reached an exception condition */
 };
 
-static const char *regnames[] =
-{ r0, at, v0, v1, a0, a1, a2, a3,
-  t0, t1, t2, t3, t4, t5, t6, t7,
-  s0, s1, s2, s3, s4, s5, s6, s7,
-  t8, t9, k0, k1, gp, sp, s8, ra, };
+static const char * const regnames[] = {
+r0, at, v0, v1, a0, a1, a2, a3,
+t0, t1, t2, t3, t4, t5, t6, t7,
+s0, s1, s2, s3, s4, s5, s6, s7,
+t8, t9, k0, k1, gp, sp, s8, ra,
+};
 
-static const char *regnames_HI[] =
-{ HI0, HI1, HI2, HI3, };
+static const char * const regnames_HI[] = {
+HI0, HI1, HI2, HI3,
+};
 
-static const char *regnames_LO[] =
-{ LO0, LO1, LO2, LO3, };
+static const char * const regnames_LO[] = {
+LO0, LO1, LO2, LO3,
+};
 
-static const char *regnames_ACX[] =
-{ ACX0, ACX1, ACX2, ACX3, };
+static const char * const regnames_ACX[] = {
+ACX0, ACX1, ACX2, ACX3,
+};
 
-static const char *fregnames[] =
-{ f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
-  f8,  f9,  f10, f11, f12, f13, f14, f15,
-  f16, f17, f18, f19, f20, f21, f22, f23,
-  f24, f25, f26, f27, f28, f29, f30, f31, };
+static const char * const fregnames[] = {
+f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
+f8,  f9,  f10, f11, f12, f13, f14, f15,
+f16, f17, f18, f19, f20, f21, f22, f23,
+f24, f25, f26, f27, f28, f29, f30, f31,
+};
 
 #ifdef MIPS_DEBUG_DISAS
 #define MIPS_DEBUG(fmt, ...) \
@@ -658,54 +664,54 @@ static inline void gen_store_srsgpr (int from, int to)
 }
 
 /* Floating point register moves. */
-static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
+static void gen_load_fpr32(TCGv_i32 t, int reg)
 {
-tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, 
active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+tcg_gen_trunc_i64_i32(t, fpu_f64[reg]);
 }
 
-static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
+static void gen_store_fpr32(TCGv_i32 t, int reg)
 {
-tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, 
active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+TCGv_i64 t64 = tcg_temp_new_i64();
+tcg_gen_extu_i32_i64(t64, t);
+tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
+tcg_temp_free_i64(t64);
 }
 
-static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
+static void gen_load_fpr32h(TCGv_i32 t, int reg)
 {
-tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, 
active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+TCGv_i64 t64 = tcg_temp_new_i64();
+tcg_gen_shri_i64(t64, fpu_f64[reg], 32);
+tcg_gen_trunc_i64_i32(t, t64);
+tcg_temp_free_i64(t64);
 }
 
-static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
+static void gen_store_fpr32h(TCGv_i32 t, int reg)
 {
-tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, 
active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+TCGv_i64 t64 = tcg_temp_new_i64();
+tcg_gen_extu_i32_i64(t64, t);
+tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
+tcg_temp_free_i64(t64);
 }
 
-static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
 if (ctx-hflags  MIPS_HFLAG_F64) {
-tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, 
active_fpu.fpr[reg].d));
+tcg_gen_mov_i64(t, fpu_f64[reg]);
 } else {
-TCGv_i32 t0 = tcg_temp_new_i32();
-TCGv_i32 t1 = tcg_temp_new_i32();
-gen_load_fpr32(t0, reg  ~1);
-gen_load_fpr32(t1, reg | 1);
-tcg_gen_concat_i32_i64(t, t0, t1);
-tcg_temp_free_i32(t0);
-tcg_temp_free_i32(t1);
+tcg_gen_concat32_i64(t, fpu_f64[reg  ~1], fpu_f64[reg | 1]);
 }
 }
 
-static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
 if (ctx-hflags  MIPS_HFLAG_F64) {
-

Re: [Qemu-devel] [PATCH V3 1/5] libqblock build system

2012-09-19 Thread Wenchao Xia

于 2012-9-18 18:05, Paolo Bonzini 写道:

Il 18/09/2012 11:01, Wenchao Xia ha scritto:

   Libqblock was placed in new directory ./libqblock, libtool will build
dynamic library there, source files of block layer remains in ./block.
So block related source code will generate 3 sets of binary, first is old
ones used in qemu, second and third are non PIC and PIC ones in ./libqblock.
   GCC compiler flag visibility=hidden was used with special macro, to export
only symbols that was marked as PUBLIC.

Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
  Makefile|   13 -
  Makefile.objs   |6 
  libqblock/Makefile  |   64 +++
  3 files changed, 82 insertions(+), 1 deletions(-)
  create mode 100644 libqblock/Makefile
  create mode 100644 libqblock/libqblock-error.c
  create mode 100644 libqblock/libqblock.c

diff --git a/Makefile b/Makefile
index 971e92f..b0b9b8d 100644
--- a/Makefile
+++ b/Makefile
@@ -164,6 +164,17 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) 
$(block-obj-y)

  qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o

+##
+# Support building shared library libqblock
+ifeq ($(LIBTOOL),)
+$(libqblock-lib-la):
+   @echo libtool is missing, please install and rerun configure; exit 1
+else
+$(libqblock-lib-la):
+   $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C libqblock V=$(V) 
TARGET_DIR=$*/ $(libqblock-lib-la),)
+endif
+###
+
  vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) 
$(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o
$(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS),  
LINK  $@)

@@ -227,7 +238,7 @@ clean:
rm -rf qapi-generated
rm -rf qga/qapi-generated
$(MAKE) -C tests/tcg clean
-   for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard; do \
+   for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \
if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \
rm -f $$d/qemu-options.def; \
  done
diff --git a/Makefile.objs b/Makefile.objs
index 4412757..8a4c9fc 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -248,3 +248,9 @@ nested-vars += \
common-obj-y \
extra-obj-y
  dummy := $(call unnest-vars)
+
+#
+# libqblock
+
+libqblock-lib-la = libqblock.la
+libqblock-lib-path = libqblock
diff --git a/libqblock/Makefile b/libqblock/Makefile
new file mode 100644
index 000..bf7abcc
--- /dev/null
+++ b/libqblock/Makefile
@@ -0,0 +1,64 @@
+###
+# libqblock Makefile
+# Todo:
+#1 trace related files is generated in this directory, move
+#  them to the root directory.
+##
+-include ../config-host.mak
+-include $(SRC_PATH)/Makefile.objs
+-include $(SRC_PATH)/rules.mak
+
+#
+# Library settings
+#
+$(call set-vpath, $(SRC_PATH))
+
+#expand the foldered vars,especially ./block
+dummy := $(call unnest-vars-1)
+
+#library objects
+tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
+   qemu-timer-common.o main-loop.o notify.o \
+   iohandler.o cutils.o iov.o async.o
+tools-obj-$(CONFIG_POSIX) += compatfd.o


Do you really need all of these?

  I guess only some .o are needed, but not sure how to pick them out, 
is there some tools which can help detect which object files are needed

in final linkage?


(BTW, I posted recently a patch to move tools-obj-y to Makefile.objs.
It doesn't apply anymore, I'll repost---but the conflicts are trivial).


  greate, then I can use tools-obj-y directly.


+libqblock-y=libqblock.o libqblock-error.o
+libqblock-lib-y=$(patsubst %.o,%.lo,$(libqblock-y))
+
+QEMU_OBJS=$(tools-obj-y) $(block-obj-y)
+QEMU_OBJS_FILTERED=$(filter %.o,$(QEMU_OBJS))


What does this filter out?


  $(block-obj-y) contains /block, which would cause trouble in building,
so filtered it out.


Paolo


+QEMU_OBJS_LIB=$(patsubst %.o, %.lo,$(QEMU_OBJS_FILTERED))
+
+QEMU_CFLAGS+= -I../ -I../include
+#adding magic macro define for symbol hiding and exposing
+QEMU_CFLAGS+= -fvisibility=hidden -D LIBQB_BUILD
+
+#dependency libraries
+LIBS+=-lz $(LIBS_TOOLS)
+
+#
+# Runtime rules
+#
+clean:
+   rm -f *.lo *.o *.d *.la libqblock-test trace.c trace.c-timestamp
+   rm -rf .libs block trace
+
+all: libqblock-test
+   @true
+
+help:
+   @echo type make libqblock-test at root dirtory, libtool is required
+

Re: [Qemu-devel] [PATCH V3 5/5] libqblock test example code

2012-09-19 Thread Wenchao Xia

于 2012-9-18 18:10, Paolo Bonzini 写道:

Il 18/09/2012 11:01, Wenchao Xia ha scritto:

   In this example, user first create two qcow2 images, and then get the
backing file relationship information of them. Then does write and read
sync IO on them.


Please use gtest so that this can be easily extensible.

Paolo


  OK.


Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
  tests/libqblock/libqblock-test.c |  233 ++
  1 files changed, 233 insertions(+), 0 deletions(-)

diff --git a/tests/libqblock/libqblock-test.c b/tests/libqblock/libqblock-test.c
index c05c0c4..c0b7963 100644
--- a/tests/libqblock/libqblock-test.c
+++ b/tests/libqblock/libqblock-test.c
@@ -1,4 +1,237 @@
+/*
+ * QEMU block layer library test
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Wenchao Xia   xiaw...@linux.vnet.ibm.com
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include stdarg.h
+#include stdio.h
+#include unistd.h
+#include inttypes.h
+#include string.h
+#include stdlib.h
+#include libqblock.h
+
+#define TEST_BUF_SIZE 1024
+static unsigned char buf_r[TEST_BUF_SIZE];
+static unsigned char buf_w[TEST_BUF_SIZE] = {0, 0, 0, 0};
+
+typedef struct VerifyData {
+unsigned char *buf_r;
+unsigned char *buf_w;
+int len;
+} VerifyData;
+
+static void print_loc(QBlockProtInfo *loc)
+{
+if (loc == NULL) {
+printf(backing file is NULL.);
+return;
+}
+switch (loc-prot_type) {
+case QB_PROT_NONE:
+printf(protocol type [none].);
+break;
+case QB_PROT_FILE:
+printf(protocol type [file], filename [%s].,
+   loc-prot_op.o_file.filename);
+break;
+default:
+printf(protocol type not supported.);
+break;
+}
+}
+
+static void print_info_image_static(QBlockStaticInfo *info)
+{
+printf(===image location:\n);
+print_loc(info-loc);
+printf(\nvirtual_size % PRId64 , format type %d [%s],
+   *(info-member_addr-virt_size),
+   info-fmt.fmt_type, qb_fmttype2str(info-fmt.fmt_type));
+printf(\nbacking image location:\n);
+print_loc(info-member_addr-backing_loc);
+printf(\n);
+}
+
+static void test_check(VerifyData *vdata)
+{
+int cmp;
+cmp = memcmp(vdata-buf_r, vdata-buf_w, vdata-len);
+if (cmp == 0) {
+printf(compare succeed, %d.\n, vdata-buf_r[24]);
+} else {
+printf(!!! compare fail, %d.\n, vdata-buf_r[24]);
+exit(1);
+}
+}
+
  int main(int argc, char **argv)
  {
+const char *filename1, *filename2;
+QBroker *broker = NULL;
+QBlockState *qbs = NULL;
+QBlockProtInfo *ol = NULL;
+QBlockFmtInfo *of = NULL;
+QBlockStaticInfo *info_st = NULL;
+int ret, flag;
+int test_offset = 510;
+int test_len = 520;
+VerifyData vdata;
+char err_str[1024];
+
+vdata.buf_r = buf_r;
+vdata.buf_w = buf_w;
+vdata.len = test_len;
+
+filename1 = ./qemu_image1;
+filename2 = ./qemu_image2;
+printf(qemu test, filename1 is %s, filename2 is %s.\n,
+   filename1, filename2);
+
+ret = qb_broker_new(broker);
+if (ret  0) {
+goto free;
+}
+
+ret = qb_state_new(broker, qbs);
+if (ret  0) {
+goto free;
+}
+
+ret = qb_prot_info_new(broker, ol);
+if (ret  0) {
+goto free;
+}
+
+ret = qb_fmt_info_new(broker, of);
+if (ret  0) {
+goto free;
+}
+
+/* create a new image */
+
+ol-prot_type = QB_PROT_FILE;
+ol-prot_op.o_file.filename = filename2;
+of-fmt_type = QB_FMT_QCOW2;
+of-fmt_op.o_qcow2.virt_size = 100 * 1024;
+flag = 0;
+
+ret = qb_create(broker, qbs, ol, of, flag);
+if (ret  0) {
+qb_error_get_human_str(broker, err_str, sizeof(err_str));
+printf(create fail 1. %s.\n, err_str);
+goto unlink;
+}
+
+ol-prot_type = QB_PROT_FILE;
+ol-prot_op.o_file.filename = filename1;
+of-fmt_type = QB_FMT_QCOW2;
+of-fmt_op.o_qcow2.backing_loc.prot_type = QB_PROT_FILE;
+of-fmt_op.o_qcow2.backing_loc.prot_op.o_file.filename = filename2;
+flag = 0;
+ret = qb_create(broker, qbs, ol, of, flag);
+if (ret  0) {
+qb_error_get_human_str(broker, err_str, sizeof(err_str));
+printf(create fail 2. %s.\n, err_str);
+goto unlink;
+}
+
+/* get informations */
+ol-prot_type = QB_PROT_FILE;
+ol-prot_op.o_file.filename = filename1;
+of-fmt_type = QB_FMT_NONE;
+flag = LIBQBLOCK_O_NO_BACKING;
+ret = qb_open(broker, qbs, ol, of, flag);
+if (ret  0) {
+qb_error_get_human_str(broker, err_str, sizeof(err_str));
+printf(info getting, open failed. %s.\n, err_str);
+goto free;
+}
+
+while (1) {
+ret = qb_info_image_static_get(broker, qbs, info_st);
+if (ret  0) {
+qb_error_get_human_str(broker, 

Re: [Qemu-devel] [PATCH V3 4/5] libqblock test build system

2012-09-19 Thread Wenchao Xia

于 2012-9-18 18:10, Paolo Bonzini 写道:

Il 18/09/2012 11:01, Wenchao Xia ha scritto:

   Created a new directory in tests, make chekc-libqblock will build an
executable binrary, make clean will delete it.

Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
  .gitignore   |1 +
  Makefile |1 +
  tests/Makefile   |3 +++
  tests/libqblock/Makefile |   32 
  tests/libqblock/libqblock-test.c |4 
  5 files changed, 41 insertions(+), 0 deletions(-)
  create mode 100644 tests/libqblock/Makefile
  create mode 100644 tests/libqblock/libqblock-test.c

diff --git a/.gitignore b/.gitignore
index 824c0d2..eccb637 100644
--- a/.gitignore
+++ b/.gitignore
@@ -95,3 +95,4 @@ cscope.*
  tags
  TAGS
  *~
+tests/libqblock/*.bin
diff --git a/Makefile b/Makefile
index b0b9b8d..de8ea17 100644
--- a/Makefile
+++ b/Makefile
@@ -238,6 +238,7 @@ clean:
rm -rf qapi-generated
rm -rf qga/qapi-generated
$(MAKE) -C tests/tcg clean
+   $(MAKE) -C tests/libqblock clean
for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \
if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \
rm -f $$d/qemu-options.def; \
diff --git a/tests/Makefile b/tests/Makefile
index 26a67ce..69af1e2 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -148,4 +148,7 @@ check-unit: $(patsubst %,check-%, $(check-unit-y))
  check-block: $(patsubst %,check-%, $(check-block-y))
  check: check-unit check-qtest

+check-libqblock:
+   $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C tests/libqblock V=$(V) 
TARGET_DIR=$*/ check-libqblock,)


Please just put everything in tests/Makefile. make check should run it
if LIBTOOL is available.


  OK.


+libqblock-test.bin: $(libqblock-test-objs) $(libqblock-la-path)
+   $(call quiet-command,$(LIBTOOL) --mode=link --quiet --tag=CC $(CC) -shared -rpath 
$(libdir) -o $@ $^,  lt LINK $@)


.bin looks so MS-DOS. :)




Paolo




--
Best Regards

Wenchao Xia




Re: [Qemu-devel] [PATCH] hw/pflash_cfi0[12]: Use host-utils.h ctz32()

2012-09-19 Thread Markus Armbruster
Peter Maydell peter.mayd...@linaro.org writes:

 On 18 September 2012 17:53, Stefan Weil s...@weilnetz.de wrote:
 What about moving the ctz32 function (or even all of host-utils.h) to
 bitops.h?

 I guess that would make some sense, though I'm not generally
 a huge fan of shuffling code around.

 (bitops.h and host-utils.h are not under the same license terms,
 which might complicate things slightly.)

Have a look at hw/hd-geometry.c for a suitable boilerplate comment.



Re: [Qemu-devel] [PATCH] qapi: convert add_client

2012-09-19 Thread Markus Armbruster
Luiz Capitulino lcapitul...@redhat.com writes:

 On Tue, 18 Sep 2012 13:13:16 -0600
 Eric Blake ebl...@redhat.com wrote:

 On 09/18/2012 01:06 PM, Luiz Capitulino wrote:
  Also fixes a few issues while there:
  
   1. The fd returned by monitor_get_fd() leaks in most error conditions
   2. monitor_get_fd() return value is not checked. Best case we get
  an error that is not correctly reported, worse case one of the
  functions using the fd (with value of -1) will explode
   3. A few error conditions aren't reported
  
  Signed-off-by: Luiz Capitulino lcapitul...@redhat.com
  ---
   monitor.c| 39 ---
   qapi-schema.json | 23 +++
   qmp-commands.hx  |  5 +
   qmp.c| 44 
   4 files changed, 68 insertions(+), 43 deletions(-)
  
 
   { 'command': 'screendump', 'data': {'filename': 'str'} }
  +
  +##
  +# @add_client
[...]
 If this were a new command for 1.3, I'd say to name it 'add-client'; but
 since QMP has already been exposing it and you are now just documenting
 it, you can't change the name.

 Yes, we just have to live with that for all old commands.

If the inconsistency bothers us, we can either

* add suitable aliases for every QMP name containing '_', or

* fix the QMP names, and fold '_' to '-' in names received from client.



Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?

2012-09-19 Thread Markus Armbruster
Anthony Liguori anth...@codemonkey.ws writes:

 Markus Armbruster arm...@redhat.com writes:

 Jan Kiszka jan.kis...@siemens.com writes:

  * The issues discussed in this email plus the fact that the guest
memory may be corrupted, and the guest may be in real-mode even
when paging is enabled


 Yes, there are some limitations with this option. Jan said that he
 always use gdb to deal with vmcore, so he needs such information.

 The point is to overcome the focus on Linux-only dump processing tools.
 
 While I don't care for supporting alternate dump processing tools
 myself, I certainly don't mind supporting them, as long as the code
 satisfies basic safety and reliability requirements.
 
 This code doesn't, as far as I can tell.

 It works, thought not under all circumstances.

 I don't doubt it works often enough to be useful to somebody.  But basic
 safety and reliability requirements are a bit more than that.  They
 include don't explode in ways a reasonable user can't be expected to
 foresee.  I don't think a reasonable user can be expected to see that
 -p is safe only for trusted guests.

 We shipped the API, we're not removing it.  Our compatibility isn't
 whatever libvirt is currently using.

Bah, don't be more catholic than the pope.  It's been out for how many
days?  Have you looked at the code?

 It's perfectly reasonable to ask to document the behavior of the
 method.  It's also a trivial patch to qapi-schema.json.

I'm okay with leaving obscure security holes in upstream QEMU,
indefinitely, as long as they're documented.  I don't think it's a good
idea, but it's something reasonable people can disagree about.

We need to document that -p is unreliable and unsafe, therefore should
only be used with trusted guests, and its result need not be correct
even then.

 It's unreasonable to ask for an interface to be removed just because it
 could be misused when it has a legimitate use-case.

The issue isn't misuse (operator does something stupid), it's abuse
(guest can make it blow up when operator uses it as intended), and
fragility (produces crap when operator uses it as intended, but the
guest isn't in a sufficiently nice state).



Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd

2012-09-19 Thread Paolo Bonzini
Il 18/09/2012 22:37, Anthony Liguori ha scritto:
 Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
 right now otherwise the refactoring would be trivial.  I'll leave that for
 another day.
 
 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Jan Kiszka jan.kis...@siemens.com
 Signed-off-by: Anthony Liguori aligu...@us.ibm.com
 ---
 Please note, this is lightly tested.  Since this is such a fundamental change,
 I'd like to do some performance analysis before committing but wanted to share
 early.

Looks good.  I think Peter Portante tested something similar, and found no big
difference between the two.  But it's a good thing and, in my opinion, for
non-timerfd OSes we should simply adjust the select() timeout and not bother
with signals.

I'm not sure if the same can be done for Windows, but I think it's possible as 
long
as you keep the timeBeginPeriod/timeEndPeriod calls.  As a start, Stefan, can 
you
check if the win32 timer works for you with the calls added?  Like this:

diff --git a/qemu-timer.c b/qemu-timer.c
index c7a1551..721c769 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
 HANDLE hTimer;
 BOOLEAN success;
 
+timeGetDevCaps(mm_tc, sizeof(mm_tc));
+
+timeBeginPeriod(mm_tc.wPeriodMin);
+
 /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
is zero) that has already expired, the timer is not updated.  Since
creating a new timer is relatively expensive, set a bogus one-hour
@@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
 if (!success) {
 fprintf(stderr, Failed to initialize win32 alarm timer: %ld\n,
 GetLastError());
+timeEndPeriod(mm_tc.wPeriodMin);
 return -1;
 }
 
@@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t)
 if (hTimer) {
 DeleteTimerQueueTimer(NULL, hTimer, NULL);
 }
+timeEndPeriod(mm_tc.wPeriodMin);
 }
 
 static void win32_rearm_timer(struct qemu_alarm_timer *t,

Paolo



Re: [Qemu-devel] [PATCH 4/4] vl: add -late-object to create QOM objects after machine init

2012-09-19 Thread Paolo Bonzini
Il 19/09/2012 01:07, Anthony Liguori ha scritto:
 In order to create qdev objects via -late-object, we almost always have to
  specify the parent_bus which is usually created during machine init.  
  Until we
  properly support two stage init, introduce a -late-object option that 
  allows for
  creation of objects post-machine init.
  
  This is only needed for -device, no?  So we can avoid introducing it and
  just use -device.
 
  +1, was thinking along the same lines...
 Sorry for such a delayed response...
 
 The semantics of how -device is different in that it implies realize.

Yes, all I'm saying is, I don't see a need for -late-object since it is
only needed for devices and we have a separate method to create them.

Paolo



Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd

2012-09-19 Thread Jan Kiszka
On 2012-09-19 09:26, Paolo Bonzini wrote:
 Il 18/09/2012 22:37, Anthony Liguori ha scritto:
 Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
 right now otherwise the refactoring would be trivial.  I'll leave that for
 another day.

 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Jan Kiszka jan.kis...@siemens.com
 Signed-off-by: Anthony Liguori aligu...@us.ibm.com
 ---
 Please note, this is lightly tested.  Since this is such a fundamental 
 change,
 I'd like to do some performance analysis before committing but wanted to 
 share
 early.
 
 Looks good.  I think Peter Portante tested something similar, and found no big
 difference between the two.  But it's a good thing and, in my opinion, for
 non-timerfd OSes we should simply adjust the select() timeout and not bother
 with signals.

What would be the advantage of timerfd over select? On Linux, both use
hrtimers (and low slack for RT processes). I'm starting to like the
select/WaitForMultipleObjects pattern as it would allow to consolidate
over basically two versions of timers and simplify the code.

Jan

 
 I'm not sure if the same can be done for Windows, but I think it's possible 
 as long
 as you keep the timeBeginPeriod/timeEndPeriod calls.  As a start, Stefan, can 
 you
 check if the win32 timer works for you with the calls added?  Like this:
 
 diff --git a/qemu-timer.c b/qemu-timer.c
 index c7a1551..721c769 100644
 --- a/qemu-timer.c
 +++ b/qemu-timer.c
 @@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
  HANDLE hTimer;
  BOOLEAN success;
  
 +timeGetDevCaps(mm_tc, sizeof(mm_tc));
 +
 +timeBeginPeriod(mm_tc.wPeriodMin);
 +
  /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
 is zero) that has already expired, the timer is not updated.  Since
 creating a new timer is relatively expensive, set a bogus one-hour
 @@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
  if (!success) {
  fprintf(stderr, Failed to initialize win32 alarm timer: %ld\n,
  GetLastError());
 +timeEndPeriod(mm_tc.wPeriodMin);
  return -1;
  }
  
 @@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t)
  if (hTimer) {
  DeleteTimerQueueTimer(NULL, hTimer, NULL);
  }
 +timeEndPeriod(mm_tc.wPeriodMin);
  }
  
  static void win32_rearm_timer(struct qemu_alarm_timer *t,
 
 Paolo
 

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux



[Qemu-devel] [PATCH] Adding BAR0 for e500 PCI controller

2012-09-19 Thread Bharat Bhushan
PCI Root complex have TYPE-1 configuration header while PCI endpoint
have type-0 configuration header. The type-1 configuration header have
a BAR (BAR0). In Freescale PCI controller BAR0 is used for mapping pci
address space to CCSR address space. This can used for 2 purposes: 1)
for MSI interrupt generation 2) Allow CCSR registers access when configured
as PCI endpoint, which I am not sure is a use case with QEMU-KVM guest.

What I observed is that when guest read the size of BAR0 of host controller
configuration header (TYPE1 header) then it always reads it as 0. When
looking into the QEMU hw/ppce500_pci.c, I do not find the PCI controller
device registering BAR0. I do not find any other controller also doing so
may they do not use BAR0.

There are two issues when BAR0 is not there (which I can think of):
1) There should be BAR0 emulated for PCI Root comaplex (TYPE1 header) and
when reading the size of BAR0, it should give size as per real h/w.

This patch solves this problem.

2) Do we need this BAR0 inbound address translation?
When BAR0 is of non-zero size then it will be configured for PCI
address space to local address(CCSR) space translation on inbound access.
The primary use case is for MSI interrupt generation. The device is
configured with a address offsets in PCI address space, which will be
translated to MSI interrupt generation MPIC registers. Currently I do
not understand the MSI interrupt generation mechanism in QEMU and also
IIRC we do not use QEMU MSI interrupt mechanism on e500 guest machines.
But this BAR0 will be used when using MSI on e500.

I can see one more issue, There are ATMUs emulated in hw/ppce500_pci.c,
but i do not see these being used for address translation.
So far that works because pci address space and local address space are 1:1
mapped. BAR0 inbound translation + ATMU translation will complete the address
translation of inbound traffic.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
 hw/pci_host.h|9 +
 hw/ppc/e500.c|   21 +
 hw/ppce500_pci.c |7 +++
 3 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/hw/pci_host.h b/hw/pci_host.h
index 4b9c300..c1ec7eb 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -41,10 +41,19 @@ struct PCIHostState {
 MemoryRegion data_mem;
 MemoryRegion mmcfg;
 MemoryRegion *address_space;
+MemoryRegion bar0;
 uint32_t config_reg;
 PCIBus *bus;
 };
 
+typedef struct PPCE500CCSRState {
+SysBusDevice *parent;
+MemoryRegion ccsr_space;
+} PPCE500CCSRState;
+
+#define TYPE_CCSR e500-ccsr
+#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR)
+
 /* common internal helpers for PCI/PCIe hosts, cut off overflows */
 void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
   uint32_t limit, uint32_t val, uint32_t len);
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 6f0de6d..1f5da28 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -31,6 +31,7 @@
 #include hw/loader.h
 #include elf.h
 #include hw/sysbus.h
+#include hw/pci_host.h
 #include exec-memory.h
 #include host-utils.h
 
@@ -587,3 +588,23 @@ void ppce500_init(PPCE500Params *params)
 kvmppc_init();
 }
 }
+
+static void e500_ccsr_type_init(Object *obj)
+{
+PPCE500CCSRState *ccsr = CCSR(obj);
+ccsr-ccsr_space.size = int128_make64(MPC8544_CCSRBAR_SIZE);
+}
+
+static const TypeInfo e500_ccsr_info = {
+.name  = TYPE_CCSR,
+.parent=  TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(PPCE500CCSRState),
+.instance_init = e500_ccsr_type_init,
+};
+
+static void e500_ccsr_register_types(void)
+{
+type_register_static(e500_ccsr_info);
+}
+
+type_init(e500_ccsr_register_types)
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 92b1dc0..135f2a6 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -315,6 +315,8 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
 int i;
 MemoryRegion *address_space_mem = get_system_memory();
 MemoryRegion *address_space_io = get_system_io();
+PPCE500CCSRState *ccsr;
+PCIDevice *pdev;
 
 h = PCI_HOST_BRIDGE(dev);
 s = PPC_E500_PCI_HOST_BRIDGE(dev);
@@ -342,6 +344,11 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
 memory_region_add_subregion(s-container, PCIE500_REG_BASE, s-iomem);
 sysbus_init_mmio(dev, s-container);
 
+ccsr = CCSR(object_new(TYPE_CCSR));
+memory_region_init_io(h-bar0, pci_host_conf_be_ops, h,
+  pci-bar0, memory_region_size(ccsr-ccsr_space));
+pdev = pci_find_device(b, 0, 0);
+pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, h-bar0);
 return 0;
 }
 
-- 
1.7.0.4





Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Avi Kivity
On 09/19/2012 07:40 AM, Peter Crosthwaite wrote:
 On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias
 edgar.igles...@gmail.com wrote:
 On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote:
 Ping for PMM,

 This is the root case of your block on the SDHCI series - this is a
 discussion on resolution to bogus infinite looping DMA. For current
 participants in this discussion, heres our thread on the same topic
 over in SD land:

 http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html

 With the findings here and acknowledgement that this is a general
 problem, we either need to declare this issue of scope for SDHCI, or
 work with these guys (in the immediate future) on the DMA infinite
 loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig
 here, but can we get a decisive plan for going forward with this issue
 if it is going to continue to block SDHCI.

 Thanks to Igor for identifying the overlap between these discussions.

 Hi,

 A couple of dumb questions.

 What is the reason for the blocker? that possible guest dos is worse
 than no functionality at all?

 Can't you put the DMA/IO processing into the background?
 
 I dont know a way do do asynchronous DMA unless I am missing something
 here.

You could schedule a bottom half and do the accesses from there.  Solves
nothing though.

 So what happens is we have a device that walks a SG list
 synchronously while holding all the locks and stuff being discussed
 here. If that SG list infinite loops then crash.

Did you mean loop, or recursion into the memory region that initiates DMA?

As this problem already exists I don't think new device models need
block on it.

Maybe a simple fix is:

void my_device_write(...)
{
   if (dma_in_progress) {
   return;
   }
   ...
   s-dma_in_progress = true;
   cpu_physical_memory_rw(...)
   s-dma_in_progress = false;
}

and we could try to figure out how to move this into common code.
-- 

error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Jan Kiszka
On 2012-09-19 06:40, Peter Crosthwaite wrote:
 On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias
 edgar.igles...@gmail.com wrote:
 On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote:
 Ping for PMM,

 This is the root case of your block on the SDHCI series - this is a
 discussion on resolution to bogus infinite looping DMA. For current
 participants in this discussion, heres our thread on the same topic
 over in SD land:

 http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html

 With the findings here and acknowledgement that this is a general
 problem, we either need to declare this issue of scope for SDHCI, or
 work with these guys (in the immediate future) on the DMA infinite
 loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig
 here, but can we get a decisive plan for going forward with this issue
 if it is going to continue to block SDHCI.

 Thanks to Igor for identifying the overlap between these discussions.

 Hi,

 A couple of dumb questions.

 What is the reason for the blocker? that possible guest dos is worse
 than no functionality at all?

 Can't you put the DMA/IO processing into the background?
 
 I dont know a way do do asynchronous DMA unless I am missing something
 here. So what happens is we have a device that walks a SG list
 synchronously while holding all the locks and stuff being discussed
 here. If that SG list infinite loops then crash.

We could switch to async DMA, but most DMA-capable devices aren't
prepared for rescheduling points in the middle of their code. This will
require quite a bit of code review.

 

 what's the diff between setup of bad descriptors and  writing a
 while (1) loop in the guest CPU?

 
 While(1) in guest doesnt freeze all of QEMU (monitor still works and
 stuff), wheras the DMA ops going in circles does, as locks are taken
 by an infinite loop.

That's the point. If we loop, we need at least a way to stop it by
issuing a device or system reset. But I tend to think that detecting
loops and failing/ignoring requests is easier implementable.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [RFC PATCH 11/13] hmp: add NBD server commands

2012-09-19 Thread Paolo Bonzini
Il 18/09/2012 22:22, Luiz Capitulino ha scritto:
  +if (addr.host == NULL || addr.port == NULL) {
  +error_set(errp, QERR_SOCKET_CREATE_FAILED);
 You can and should use monitor_printf() directly.

Hmm, why? Then I would have two error paths, one with monitor_printf and
one with hmp_handle_error.  But I will change this to add an Error* to
inet_parse.

  +goto exit;
  +}
  +
  +qmp_nbd_server_start(addr, errp);
  +if (errp != NULL) {
 Please, use error_is_set() instead.

I'll just rename errp to local_err, which is the usual convention.

  +goto exit;
  +}
  +
  +/* Then try adding all block devices.  If one fails, close all and
  + * exit.
  + */
  +bs = NULL;
  +while ((bs = bdrv_next(bs))) {
 To keep hmp.c an honest qmp client, you should use qmp_query_block() instead
 of looping through BSs directly. You'll find other examples if you find for
 qmp_query_block() in this file.

Right, thanks!

Paolo



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Avi Kivity
On 09/17/2012 05:24 AM, liu ping fan wrote:
 On Thu, Sep 13, 2012 at 4:19 PM, Avi Kivity a...@redhat.com wrote:
 On 09/13/2012 09:55 AM, liu ping fan wrote:
 On Tue, Sep 11, 2012 at 8:41 PM, Avi Kivity a...@redhat.com wrote:
 On 09/11/2012 03:24 PM, Avi Kivity wrote:
 On 09/11/2012 12:57 PM, Jan Kiszka wrote:
 On 2012-09-11 11:44, liu ping fan wrote:
 On Tue, Sep 11, 2012 at 4:35 PM, Avi Kivity a...@redhat.com wrote:
 On 09/11/2012 10:51 AM, Liu Ping Fan wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com

 The func call chain can suffer from recursively hold
 qemu_mutex_lock_iothread. We introduce lockmap to record the
 lock depth.

 What is the root cause?  io handlers initiating I/O?

 cpu_physical_memory_rw() can be called nested, and when called, it can
 be protected by no-lock/device lock/ big-lock.
 I think without big lock, io-dispatcher should face the same issue.
 As to main-loop, have not carefully consider, but at least, dma-helper
 will call cpu_physical_memory_rw() with big-lock.

 That is our core problem: inconsistent invocation of existing services
 /wrt locking. For portio, I was lucky that there is no nesting and I was
 able to drop the big lock around all (x86) call sites. But MMIO is way
 more tricky due to DMA nesting.

 Maybe we need to switch to a continuation style.  Instead of expecting
 cpu_physical_memory_rw() to complete synchronously, it becomes an
 asynchronous call and you provide it with a completion.  That means
 devices which use it are forced to drop the lock in between.  Block and
 network clients will be easy to convert since they already use APIs that
 drop the lock (except for accessing the descriptors).

 We could try to introduce a different version of cpu_physical_memory_rw,
 cpu_physical_memory_rw_unlocked. But the problem remains that an MMIO
 request can trigger the very same access in a nested fashion, and we
 will have to detect this to avoid locking up QEMU (locking up the guest
 might be OK).

 An async version of c_p_m_rw() will just cause a completion to bounce
 around, consuming cpu but not deadlocking anything.  If we can keep a
 count of the bounces, we might be able to stall it indefinitely or at
 least ratelimit it.


 Another option is to require all users of c_p_m_rw() and related to use
 a coroutine or thread.  That makes the programming easier (but still
 required a revalidation after the dropped lock).

 For the nested cpu_physical_memory_rw(), we change it internal but
 keep it sync API as it is.  (Wrapper current cpu_physical_memory_rw()
 into cpu_physical_memory_rw_internal() )


 LOCK()  // can be device or big lock or both, depend on caller.
 ..
 cpu_physical_memory_rw()
 {
UNLOCK() //unlock all the locks
queue_work_on_thread(cpu_physical_memory_rw_internal, completion);
 // cpu_physical_memory_rw_internal can take lock(device,biglock) again
wait_for_completion(completion)
LOCK()
 }
 ..
 UNLOCK()


 This is dangerous.  The caller expects to hold the lock across the call,
 and will not re-validate its state.

 Although cpu_physical_memory_rw_internal() can be freed to use lock,
 but with precondition. -- We still need to trace lock stack taken by
 cpu_physical_memory_rw(), so that it can return to caller correctly.
 Is that OK?

 I'm convinced that we need a recursive lock if we don't convert
 everything at once.

 So how about:

 - make bql a recursive lock (use pthreads, don't invent our own)
 - change c_p_m_rw() calling convention to caller may hold the BQL, but
 no device lock

 this allows devices to DMA each other.  Unconverted device models will
 work as they used to.  Converted device models will need to drop the
 device lock, re-acquire it, and revalidate any state.  That will cause
 
 I think we are cornered by devices to DMA each other, and it raises
 the unavoidable nested lock. Also to avoid deadlock caused by device's
 lock sequence, we should drop the current device lock  to acquire
 another one.   The only little  diverge is about the revalidate, do
 we need to rollback?

It basically means you can't hold contents of device state in local
variables.  You need to read everything again from the device.  That
includes things like DMA enable bits.

(btw: to we respect PCI_COMMAND_MASTER? it seems that we do not.  This
is a trivial example of an iommu, we should get that going).

 Or for converted device, we can just tag it a
 busy flag, that is checkset busy flag at the entry of device's
 mmio-dispatch.  So  when re-acquire device's lock, the device's state
 is intact.

The state can be changed by a parallel access to another register, which
is valid.

 
 Has anybody other suggestion?


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Avi Kivity
On 09/19/2012 06:02 AM, liu ping fan wrote:
 Currently, cpu_physical_memory_rw() can be used directly or indirectly
 by mmio-dispatcher to access other devices' memory region. This can
 cause some problem when adopting device's private lock.
 
 Back ground refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html
 For lazy, just refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html
 
 
 --1st. the recursive lock of biglock.
 If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have
 the following (section of the whole call chain, and with
 private_lockA):
   lockA-mmio-dispatcher   -- hold biglock -- c_p_m_rw() --- 
 Before c_p_m_rw(), we drop private_lockA to anti the possibly of
 deadlock.  But we can not anti the nested of this chain or calling to
 another lockB-mmio-dispatcher. So we can not avoid the possibility of
 nested lock of biglock.  And another important factor is that we break
 the lock sequence: private_lock--biglock.
 All of these require us to push biglock's holding into c_p_m_rw(), the
 wrapper can not give help.

I agree that this is unavoidable.

 
 --2nd. c_p_m_rw(), sync or async?
 
 IF we convert all of the device to be protected by refcount, then we can have
 //no big lock
  c_p_m_rw()
 {
devB-ref++;
{
 ---pushed onto another thread.
lock_privatelock
mr-ops-write();
unlock_privatelock
}
wait_for_completion();
devB-ref--;
 }
 This model can help c_p_m_rw() present as a SYNC API.  But currently,
 we mix biglock and private lock together, and wait_for_completion()
 maybe block the release of big lock, which finally causes deadlock. So
 we can not simply rely on this model.
 Instead, we need to classify the calling scene into three cases:
   case1. lockA--dispatcher --- lockB-dispatcher   //can use
 async+completion model
   case2. lockA--dispatcher --- biglock-dispatcher // sync, but can
 cause the nested lock of biglock
   case3. biglock-dispacher --- lockB-dispatcher  // async to avoid
 the lock sequence problem, (as to completion, it need to be placed
 outside the top level biglock, and it is hard to do so. Suggest to
 change to case 1. Or at present, just leave it async)
 
 This new model will require the biglock can be nested.

I think changing to an async model is too complicated.  It's difficult
enough already.  Isn't dropping private locks + recursive big locks
sufficient?

-- 
error compiling committee.c: too many arguments to function



[Qemu-devel] [PATCH] Revert serial: fix retry logic

2012-09-19 Thread Michael Tokarev
This reverts commit 67c5322d7000fd105a926eec44bc1765b7d70bdd:

I'm not sure if the retry logic has ever worked when not using FIFO mode.  I
found this while writing a test case although code inspection confirms it is
definitely broken.

The TSR retry logic will never actually happen because it is guarded by an
'if (s-tsr_rety  0)' but this is the only place that can ever make the
variable greater than zero.  That effectively makes the retry logic an 'if 
(0)

I believe this is a typo and the intention was = 0.  Once this is fixed 
thoug
I see double transmits with my test case.  This is because in the non FIFO
case, serial_xmit may get invoked while LSR.THRE is still high because the
character was processed but the retransmit timer was still active.

We can handle this by simply checking for LSR.THRE and returning early.  
It's
possible that the FIFO paths also need some attention.

Cc: Stefano Stabellini stefano.stabell...@eu.citrix.com
Signed-off-by: Anthony Liguori aligu...@us.ibm.com

Even if the previous logic was never worked, new logic breaks stuff -
namely,

 qemu -enable-kvm -nographic -kernel /boot/vmlinuz-$(uname -r) -append 
console=ttyS0 -serial pty

the above command will cause the virtual machine to stuck at startup
using 100% CPU till one connects to the pty and sends any char to it.

Note this is rather typical invocation for various headless virtual
machines by libvirt.

So revert this change for now, till a better solution will be found.

Signed-off-by: Michael Tokarev m...@tls.msk.ru
---
 hw/serial.c |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hw/serial.c b/hw/serial.c
index a421d1e..df54de2 100644
--- a/hw/serial.c
+++ b/hw/serial.c
@@ -327,8 +327,6 @@ static void serial_xmit(void *opaque)
 s-tsr = fifo_get(s,XMIT_FIFO);
 if (!s-xmit_fifo.count)
 s-lsr |= UART_LSR_THRE;
-} else if ((s-lsr  UART_LSR_THRE)) {
-return;
 } else {
 s-tsr = s-thr;
 s-lsr |= UART_LSR_THRE;
@@ -340,7 +338,7 @@ static void serial_xmit(void *opaque)
 /* in loopback mode, say that we just received a char */
 serial_receive1(s, s-tsr, 1);
 } else if (qemu_chr_fe_write(s-chr, s-tsr, 1) != 1) {
-if ((s-tsr_retry = 0)  (s-tsr_retry = MAX_XMIT_RETRY)) {
+if ((s-tsr_retry  0)  (s-tsr_retry = MAX_XMIT_RETRY)) {
 s-tsr_retry++;
 qemu_mod_timer(s-transmit_timer,  new_xmit_ts + 
s-char_transmit_time);
 return;
-- 
1.7.10.4




Re: [Qemu-devel] [RFC PATCH 09/13] qmp: add NBD server commands

2012-09-19 Thread Paolo Bonzini
Il 18/09/2012 22:11, Luiz Capitulino ha scritto:
 (apart from TODOs of course, specially on error handling).

Yes, those were waiting for error_setg to get in the tree. :)

Paolo



Re: [Qemu-devel] [PATCH v3 3/3] Fix address handling in inet_nonblocking_connect

2012-09-19 Thread Amos Kong

On 14/09/12 02:58, Orit Wasserman wrote:

getaddrinfo can give us a list of addresses, but we only try to
connect to the first one. If that fails we never proceed to
the next one.  This is common on desktop setups that often have ipv6
configured but not actually working.

To fix this make inet_connect_nonblocking retry connection with a different
address.
callers on inet_nonblocking_connect register a callback function that will
be called when connect opertion completes, in case of failure the fd will have
a negative value



Hi Orit,

We almost get to the destination :)


Signed-off-by: Orit Wasserman owass...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
  migration-tcp.c |   29 +---
  qemu-char.c |2 +-
  qemu-sockets.c  |   95 +++---
  qemu_socket.h   |   13 ++--
  4 files changed, 102 insertions(+), 37 deletions(-)



...


diff --git a/qemu-sockets.c b/qemu-sockets.c
index 212075d..d321c58 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -24,6 +24,7 @@

  #include qemu_socket.h
  #include qemu-common.h /* for qemu_isdigit */
+#include main-loop.h

  #ifndef AI_ADDRCONFIG
  # define AI_ADDRCONFIG 0
@@ -217,11 +218,69 @@ listen:
  ((rc) == -EINPROGRESS)
  #endif

+/* Struct to store connect state for non blocking connect */
+typedef struct ConnectState {
+int fd;
+struct addrinfo *addr_list;
+struct addrinfo *current_addr;
+ConnectHandler *callback;
+void *opaque;



+Error *errp;


This member is not used.


+} ConnectState;


# make(gcc-4.4.6-4.el6.x86_64)
  CCqemu-sockets.o
qemu-sockets.c:229: error: redefinition of typedef ‘ConnectState’
qemu_socket.h:46: note: previous declaration of ‘ConnectState’ was here
make: *** [qemu-sockets.o] Error 1


struct ConnectState {
int fd;
struct addrinfo *addr_list;
struct addrinfo *current_addr;
ConnectHandler *callback;
void *opaque;
};



+
  static int inet_connect_addr(struct addrinfo *addr, bool block,
- bool *in_progress)
+ bool *in_progress, ConnectState *connect_state);
+
+static void wait_for_connect(void *opaque)
+{
+ConnectState *s = opaque;
+int val = 0, rc = 0;
+socklen_t valsize = sizeof(val);
+bool in_progress = false;
+
+qemu_set_fd_handler2(s-fd, NULL, NULL, NULL, NULL);
+
+do {
+rc = getsockopt(s-fd, SOL_SOCKET, SO_ERROR, (void *) val, valsize);
+} while (rc == -1  socket_error() == EINTR);


# man getsockopt
RETURN VALUE
   On success, zero is returned.  On error, -1 is returned, and 
errno is set appropriately.


.. original code:
..
.. do {
.. ret = getsockopt(s-fd, SOL_SOCKET, SO_ERROR, (void *) val, 
valsize);

.. } while (ret == -1  (socket_error()) == EINTR);
..
.. if (ret  0) {  // ret equals to -1, and socket_error() != EINTR

If ret  0, just set the error state, but the callback function is not 
set to NULL.
Then tcp_wait_for_connect() will be called again, and retry to checking 
current

socket by getsockopt().

But in this patch, we will abandon current socket, and connect next 
address on the list.
We should reserve that block, and move qemu_set_fd_handler2(...NULL..) 
below it.


..  migrate_fd_error(s);
.. return;
.. }
..
.. qemu_set_fd_handler2(s-fd, NULL, NULL, NULL, NULL);


+/* update rc to contain error details */
+if (!rc  val) {
+rc = -val;


another issue: val = 0 all the time?


+}
+
+/* connect error */
+if (rc  0) {
+closesocket(s-fd);
+s-fd = rc;
+}
+
+/* try to connect to the next address on the list */
+while (s-current_addr-ai_next != NULL  s-fd  0) {
+s-current_addr = s-current_addr-ai_next;
+s-fd = inet_connect_addr(s-current_addr, false, in_progress, s);
+/* connect in progress */
+if (in_progress) {
+return;
+}
+}
+
+freeaddrinfo(s-addr_list);
+if (s-callback) {
+s-callback(s-fd, s-opaque);
+}
+g_free(s);
+return;
+}




--
Amos.



Re: [Qemu-devel] [PATCH v3 1/3] Refactor inet_connect_opts function

2012-09-19 Thread Amos Kong

On 14/09/12 02:58, Orit Wasserman wrote:

From: Michael S. Tsirkin m...@redhat.com

refactor address resolution code to fix nonblocking connect
remove getnameinfo call

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Amos Kong ak...@redhat.com
Signed-off-by: Orit Wasserman owass...@redhat.com


Hi Orit,

Happy new year!


---
  qemu-sockets.c |  144 +++
  1 files changed, 81 insertions(+), 63 deletions(-)

diff --git a/qemu-sockets.c b/qemu-sockets.c
index 361d890..939a453 100644


...


+
+int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp)
+{
+struct addrinfo *res, *e;
+int sock = -1;
+bool block = qemu_opt_get_bool(opts, block, 0);
+
+res = inet_parse_connect_opts(opts, errp);
+if (!res) {
+return -1;
+}
+



+if (in_progress) {
+*in_progress = false;
  }


trivial comment:

You moved this block to head of inet_connect_addr() in [patch 3/3],
why not do this in this patch?

I know if *in_progress becomes true, sock is always = 0, for loop 
will exits.

But moving this block to inet_connect_addr() is clearer.



  for (e = res; e != NULL; e = e-ai_next) {
-if (getnameinfo((struct sockaddr*)e-ai_addr,e-ai_addrlen,
-uaddr,INET6_ADDRSTRLEN,uport,32,
-NI_NUMERICHOST | NI_NUMERICSERV) != 0) {
-fprintf(stderr,%s: getnameinfo: oops\n, __FUNCTION__);
-continue;
-}
-sock = qemu_socket(e-ai_family, e-ai_socktype, e-ai_protocol);
-if (sock  0) {
-fprintf(stderr,%s: socket(%s): %s\n, __FUNCTION__,
-inet_strfamily(e-ai_family), strerror(errno));
-continue;
-}
-setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(void*)on,sizeof(on));
-if (!block) {
-socket_set_nonblock(sock);
+sock = inet_connect_addr(e, block, in_progress);
+if (sock = 0) {
+break;
  }
-/* connect to peer */
-do {
-rc = 0;
-if (connect(sock, e-ai_addr, e-ai_addrlen)  0) {
-rc = -socket_error();
-}
-} while (rc == -EINTR);
-
-  #ifdef _WIN32
-if (!block  (rc == -EINPROGRESS || rc == -EWOULDBLOCK
-   || rc == -WSAEALREADY)) {
-  #else
-if (!block  (rc == -EINPROGRESS)) {
-  #endif
-if (in_progress) {
-*in_progress = true;
-}
-} else if (rc  0) {
-if (NULL == e-ai_next)
-fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, __FUNCTION__,
-inet_strfamily(e-ai_family),
-e-ai_canonname, uaddr, uport, strerror(errno));
-closesocket(sock);
-continue;
-}
-freeaddrinfo(res);
-return sock;
  }
-error_set(errp, QERR_SOCKET_CONNECT_FAILED);
+if (sock  0) {
+error_set(errp, QERR_SOCKET_CONNECT_FAILED);
+}
  freeaddrinfo(res);
-return -1;
+return sock;
  }

  int inet_dgram_opts(QemuOpts *opts)



--
Amos.



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread liu ping fan
On Wed, Sep 19, 2012 at 4:01 PM, Avi Kivity a...@redhat.com wrote:
 On 09/17/2012 05:24 AM, liu ping fan wrote:
 On Thu, Sep 13, 2012 at 4:19 PM, Avi Kivity a...@redhat.com wrote:
 On 09/13/2012 09:55 AM, liu ping fan wrote:
 On Tue, Sep 11, 2012 at 8:41 PM, Avi Kivity a...@redhat.com wrote:
 On 09/11/2012 03:24 PM, Avi Kivity wrote:
 On 09/11/2012 12:57 PM, Jan Kiszka wrote:
 On 2012-09-11 11:44, liu ping fan wrote:
 On Tue, Sep 11, 2012 at 4:35 PM, Avi Kivity a...@redhat.com wrote:
 On 09/11/2012 10:51 AM, Liu Ping Fan wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com

 The func call chain can suffer from recursively hold
 qemu_mutex_lock_iothread. We introduce lockmap to record the
 lock depth.

 What is the root cause?  io handlers initiating I/O?

 cpu_physical_memory_rw() can be called nested, and when called, it can
 be protected by no-lock/device lock/ big-lock.
 I think without big lock, io-dispatcher should face the same issue.
 As to main-loop, have not carefully consider, but at least, dma-helper
 will call cpu_physical_memory_rw() with big-lock.

 That is our core problem: inconsistent invocation of existing services
 /wrt locking. For portio, I was lucky that there is no nesting and I was
 able to drop the big lock around all (x86) call sites. But MMIO is way
 more tricky due to DMA nesting.

 Maybe we need to switch to a continuation style.  Instead of expecting
 cpu_physical_memory_rw() to complete synchronously, it becomes an
 asynchronous call and you provide it with a completion.  That means
 devices which use it are forced to drop the lock in between.  Block and
 network clients will be easy to convert since they already use APIs that
 drop the lock (except for accessing the descriptors).

 We could try to introduce a different version of cpu_physical_memory_rw,
 cpu_physical_memory_rw_unlocked. But the problem remains that an MMIO
 request can trigger the very same access in a nested fashion, and we
 will have to detect this to avoid locking up QEMU (locking up the guest
 might be OK).

 An async version of c_p_m_rw() will just cause a completion to bounce
 around, consuming cpu but not deadlocking anything.  If we can keep a
 count of the bounces, we might be able to stall it indefinitely or at
 least ratelimit it.


 Another option is to require all users of c_p_m_rw() and related to use
 a coroutine or thread.  That makes the programming easier (but still
 required a revalidation after the dropped lock).

 For the nested cpu_physical_memory_rw(), we change it internal but
 keep it sync API as it is.  (Wrapper current cpu_physical_memory_rw()
 into cpu_physical_memory_rw_internal() )


 LOCK()  // can be device or big lock or both, depend on caller.
 ..
 cpu_physical_memory_rw()
 {
UNLOCK() //unlock all the locks
queue_work_on_thread(cpu_physical_memory_rw_internal, completion);
 // cpu_physical_memory_rw_internal can take lock(device,biglock) again
wait_for_completion(completion)
LOCK()
 }
 ..
 UNLOCK()


 This is dangerous.  The caller expects to hold the lock across the call,
 and will not re-validate its state.

 Although cpu_physical_memory_rw_internal() can be freed to use lock,
 but with precondition. -- We still need to trace lock stack taken by
 cpu_physical_memory_rw(), so that it can return to caller correctly.
 Is that OK?

 I'm convinced that we need a recursive lock if we don't convert
 everything at once.

 So how about:

 - make bql a recursive lock (use pthreads, don't invent our own)
 - change c_p_m_rw() calling convention to caller may hold the BQL, but
 no device lock

 this allows devices to DMA each other.  Unconverted device models will
 work as they used to.  Converted device models will need to drop the
 device lock, re-acquire it, and revalidate any state.  That will cause

 I think we are cornered by devices to DMA each other, and it raises
 the unavoidable nested lock. Also to avoid deadlock caused by device's
 lock sequence, we should drop the current device lock  to acquire
 another one.   The only little  diverge is about the revalidate, do
 we need to rollback?

 It basically means you can't hold contents of device state in local
 variables.  You need to read everything again from the device.  That
 includes things like DMA enable bits.

I think that read everything again from the device can not work.
Suppose the following scene: If the device's state contains the change
of a series of internal registers (supposing partA+partB;
partC+partD), after partA changed, the device's lock is broken.  At
this point, another access to this device, it will work on partA+partB
to determine C+D, but since partB is not correctly updated yet. So C+D
may be decided by broken context and be wrong.

 (btw: to we respect PCI_COMMAND_MASTER? it seems that we do not.  This
 is a trivial example of an iommu, we should get that going).

 Or for converted device, we can just tag it a
 busy flag, that is checkset busy flag at 

Re: [Qemu-devel] [hellogcc] Call for Chinese QEMU developers

2012-09-19 Thread Zhi Yong Wu
+1

By the way, forward it to QEMU mail list in order that some qemu guys
can see this.

2012/9/19 陳韋任 (Wei-Ren Chen) che...@cs.nctu.edu.tw:
 Hi all,

   借個場地,我們想知道在 QEMU 上的中國/台灣開發者有哪些人。
 有興趣露個臉的朋友,請回覆此帖。彼此認識一下。

 韋任

 --
 Wei-Ren Chen (陳韋任)
 Computer Systems Lab, Institute of Information Science,
 Academia Sinica, Taiwan (R.O.C.)
 Tel:886-2-2788-3799 #1667
 Homepage: http://people.cs.nctu.edu.tw/~chenwj




-- 
Regards,

Zhi Yong Wu



Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd

2012-09-19 Thread Paolo Bonzini
Il 19/09/2012 09:44, Jan Kiszka ha scritto:
  Looks good.  I think Peter Portante tested something similar, and found no 
  big
  difference between the two.  But it's a good thing and, in my opinion, for
  non-timerfd OSes we should simply adjust the select() timeout and not 
  bother
  with signals.
 What would be the advantage of timerfd over select? On Linux, both use
 hrtimers (and low slack for RT processes). I'm starting to like the
 select/WaitForMultipleObjects pattern as it would allow to consolidate
 over basically two versions of timers and simplify the code.

Oh, I didn't know this.  Even better.

Paolo



Re: [Qemu-devel] [PULL 00/11] Block patches

2012-09-19 Thread Kevin Wolf
Am 18.09.2012 19:49, schrieb Michael Tokarev:
 On 14.09.2012 16:39, Kevin Wolf wrote:
 The following changes since commit e0a1e32dbc41e6b2aabb436a9417dfd32177a3dc:

   Merge branch 'usb.64' of git://git.kraxel.org/qemu (2012-09-11 18:06:56 
 +0200)

 are available in the git repository at:

   git://repo.or.cz/qemu/kevin.git for-anthony
 
 Are any of these appropriate for -stable?  I think some are, below:

I think all of your suggestions would be appropriate, even though most
of them fix rather theoretical cases.

 Jason Baron (1):
   ahci: properly reset PxCMD on HBA reset
 
 Most likely this.  Jason?

Yes, this fixes a real visible bug with AHCI CD-ROMs.

Kevin



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread liu ping fan
On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote:
 On 09/19/2012 06:02 AM, liu ping fan wrote:
 Currently, cpu_physical_memory_rw() can be used directly or indirectly
 by mmio-dispatcher to access other devices' memory region. This can
 cause some problem when adopting device's private lock.

 Back ground refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html
 For lazy, just refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html


 --1st. the recursive lock of biglock.
 If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have
 the following (section of the whole call chain, and with
 private_lockA):
   lockA-mmio-dispatcher   -- hold biglock -- c_p_m_rw() --- 
 Before c_p_m_rw(), we drop private_lockA to anti the possibly of
 deadlock.  But we can not anti the nested of this chain or calling to
 another lockB-mmio-dispatcher. So we can not avoid the possibility of
 nested lock of biglock.  And another important factor is that we break
 the lock sequence: private_lock--biglock.
 All of these require us to push biglock's holding into c_p_m_rw(), the
 wrapper can not give help.

 I agree that this is unavoidable.


 --2nd. c_p_m_rw(), sync or async?

 IF we convert all of the device to be protected by refcount, then we can have
 //no big lock
  c_p_m_rw()
 {
devB-ref++;
{
 ---pushed onto another thread.
lock_privatelock
mr-ops-write();
unlock_privatelock
}
wait_for_completion();
devB-ref--;
 }
 This model can help c_p_m_rw() present as a SYNC API.  But currently,
 we mix biglock and private lock together, and wait_for_completion()
 maybe block the release of big lock, which finally causes deadlock. So
 we can not simply rely on this model.
 Instead, we need to classify the calling scene into three cases:
   case1. lockA--dispatcher --- lockB-dispatcher   //can use
 async+completion model
   case2. lockA--dispatcher --- biglock-dispatcher // sync, but can
 cause the nested lock of biglock
   case3. biglock-dispacher --- lockB-dispatcher  // async to avoid
 the lock sequence problem, (as to completion, it need to be placed
 outside the top level biglock, and it is hard to do so. Suggest to
 change to case 1. Or at present, just leave it async)

 This new model will require the biglock can be nested.

 I think changing to an async model is too complicated.  It's difficult
 enough already.  Isn't dropping private locks + recursive big locks
 sufficient?

I think that dropping private locks + recursive big locks just cover
case 2. And most of the important, it dont describe case3 which break
the rule of lock sequence private-lock -- biglock. Scene:
devA_lock--(devX_with-biglock---devB_lock).
I just want to classify and post these cases to discuss. Maybe we can
achieve without async.

Thanks and regards,
pingfan

 --
 error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Avi Kivity
On 09/19/2012 11:36 AM, liu ping fan wrote:

 It basically means you can't hold contents of device state in local
 variables.  You need to read everything again from the device.  That
 includes things like DMA enable bits.

 I think that read everything again from the device can not work.
 Suppose the following scene: If the device's state contains the change
 of a series of internal registers (supposing partA+partB;
 partC+partD), after partA changed, the device's lock is broken.  At
 this point, another access to this device, it will work on partA+partB
 to determine C+D, but since partB is not correctly updated yet. So C+D
 may be decided by broken context and be wrong.

That's the guest's problem.  Note it could have happened even before the
change, since the writes to A/B/C/D are unordered wrt the DMA.

 
 (btw: to we respect PCI_COMMAND_MASTER? it seems that we do not.  This
 is a trivial example of an iommu, we should get that going).

 Or for converted device, we can just tag it a
 busy flag, that is checkset busy flag at the entry of device's
 mmio-dispatch.  So  when re-acquire device's lock, the device's state
 is intact.

 The state can be changed by a parallel access to another register, which
 is valid.

 Do you mean that the device can be accessed in parallel? But how? we
 use device's lock.

Some DMA is asynchronous already, like networking and IDE DMA.  So we
drop the big lock while doing it.  With the change to drop device locks
around c_p_m_rw(), we have that problem everywhere.

 What my suggestion is:
 lock();
 set_and_test(dev-busy);
 if busy
   unlock and return;
 changing device registers;
 do other things including calling to c_p_m_rw() //here,lock broken,
 but set_and_test() works
 clear(dev-busy);
 unlock();
 
 So changing device registers is protected, and unbreakable.

But the changes may be legitimate.  Maybe you're writing to a completely
unrelated register, from a different vcpu, now that write is lost.


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Avi Kivity
On 09/19/2012 12:00 PM, liu ping fan wrote:
 On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote:
 On 09/19/2012 06:02 AM, liu ping fan wrote:
 Currently, cpu_physical_memory_rw() can be used directly or indirectly
 by mmio-dispatcher to access other devices' memory region. This can
 cause some problem when adopting device's private lock.

 Back ground refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html
 For lazy, just refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html


 --1st. the recursive lock of biglock.
 If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have
 the following (section of the whole call chain, and with
 private_lockA):
   lockA-mmio-dispatcher   -- hold biglock -- c_p_m_rw() --- 
 Before c_p_m_rw(), we drop private_lockA to anti the possibly of
 deadlock.  But we can not anti the nested of this chain or calling to
 another lockB-mmio-dispatcher. So we can not avoid the possibility of
 nested lock of biglock.  And another important factor is that we break
 the lock sequence: private_lock--biglock.
 All of these require us to push biglock's holding into c_p_m_rw(), the
 wrapper can not give help.

 I agree that this is unavoidable.


 --2nd. c_p_m_rw(), sync or async?

 IF we convert all of the device to be protected by refcount, then we can 
 have
 //no big lock
  c_p_m_rw()
 {
devB-ref++;
{
 ---pushed onto another thread.
lock_privatelock
mr-ops-write();
unlock_privatelock
}
wait_for_completion();
devB-ref--;
 }
 This model can help c_p_m_rw() present as a SYNC API.  But currently,
 we mix biglock and private lock together, and wait_for_completion()
 maybe block the release of big lock, which finally causes deadlock. So
 we can not simply rely on this model.
 Instead, we need to classify the calling scene into three cases:
   case1. lockA--dispatcher --- lockB-dispatcher   //can use
 async+completion model
   case2. lockA--dispatcher --- biglock-dispatcher // sync, but can
 cause the nested lock of biglock
   case3. biglock-dispacher --- lockB-dispatcher  // async to avoid
 the lock sequence problem, (as to completion, it need to be placed
 outside the top level biglock, and it is hard to do so. Suggest to
 change to case 1. Or at present, just leave it async)

 This new model will require the biglock can be nested.

 I think changing to an async model is too complicated.  It's difficult
 enough already.  Isn't dropping private locks + recursive big locks
 sufficient?

 I think that dropping private locks + recursive big locks just cover
 case 2. And most of the important, it dont describe case3 which break
 the rule of lock sequence private-lock -- biglock. Scene:
 devA_lock--(devX_with-biglock---devB_lock).

Why not? devA will drop its local lock, devX will retake the big lock
recursively, devB will take its local lock.  In the end, we have biglock
- devB.

 I just want to classify and post these cases to discuss. Maybe we can
 achieve without async.





-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread liu ping fan
On Wed, Sep 19, 2012 at 5:07 PM, Avi Kivity a...@redhat.com wrote:
 On 09/19/2012 12:00 PM, liu ping fan wrote:
 On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote:
 On 09/19/2012 06:02 AM, liu ping fan wrote:
 Currently, cpu_physical_memory_rw() can be used directly or indirectly
 by mmio-dispatcher to access other devices' memory region. This can
 cause some problem when adopting device's private lock.

 Back ground refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html
 For lazy, just refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html


 --1st. the recursive lock of biglock.
 If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have
 the following (section of the whole call chain, and with
 private_lockA):
   lockA-mmio-dispatcher   -- hold biglock -- c_p_m_rw() --- 
 Before c_p_m_rw(), we drop private_lockA to anti the possibly of
 deadlock.  But we can not anti the nested of this chain or calling to
 another lockB-mmio-dispatcher. So we can not avoid the possibility of
 nested lock of biglock.  And another important factor is that we break
 the lock sequence: private_lock--biglock.
 All of these require us to push biglock's holding into c_p_m_rw(), the
 wrapper can not give help.

 I agree that this is unavoidable.


 --2nd. c_p_m_rw(), sync or async?

 IF we convert all of the device to be protected by refcount, then we can 
 have
 //no big lock
  c_p_m_rw()
 {
devB-ref++;
{
 ---pushed onto another thread.
lock_privatelock
mr-ops-write();
unlock_privatelock
}
wait_for_completion();
devB-ref--;
 }
 This model can help c_p_m_rw() present as a SYNC API.  But currently,
 we mix biglock and private lock together, and wait_for_completion()
 maybe block the release of big lock, which finally causes deadlock. So
 we can not simply rely on this model.
 Instead, we need to classify the calling scene into three cases:
   case1. lockA--dispatcher --- lockB-dispatcher   //can use
 async+completion model
   case2. lockA--dispatcher --- biglock-dispatcher // sync, but can
 cause the nested lock of biglock
   case3. biglock-dispacher --- lockB-dispatcher  // async to avoid
 the lock sequence problem, (as to completion, it need to be placed
 outside the top level biglock, and it is hard to do so. Suggest to
 change to case 1. Or at present, just leave it async)

 This new model will require the biglock can be nested.

 I think changing to an async model is too complicated.  It's difficult
 enough already.  Isn't dropping private locks + recursive big locks
 sufficient?

 I think that dropping private locks + recursive big locks just cover
 case 2. And most of the important, it dont describe case3 which break
 the rule of lock sequence private-lock -- biglock. Scene:
 devA_lock--(devX_with-biglock---devB_lock).

 Why not? devA will drop its local lock, devX will retake the big lock
 recursively, devB will take its local lock.  In the end, we have biglock
 - devB.

But when adopting local lock, we assume take local lock, then biglock.
Otherwise another thread will take biglock then local lock, which
cause the possibility of deadlock.

 I just want to classify and post these cases to discuss. Maybe we can
 achieve without async.





 --
 error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Paolo Bonzini
Il 19/09/2012 11:11, liu ping fan ha scritto:
  Why not? devA will drop its local lock, devX will retake the big lock
  recursively, devB will take its local lock.  In the end, we have biglock
  - devB.
 
 But when adopting local lock, we assume take local lock, then biglock.

No, because the local lock will be dropped before taking the biglock.
The order must always be coarse-fine.

I don't know if the front-end (device) lock should come before or after
the back-end (e.g. netdev) lock in the hierarchy, but that's another story.

Paolo

 Otherwise another thread will take biglock then local lock, which
 cause the possibility of deadlock.
 




Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga

2012-09-19 Thread Avi Kivity
On 09/18/2012 12:51 PM, Gerd Hoffmann wrote:
 This patch adds a mmio bar to the qemu standard vga which allows to
 access the standard vga registers and bochs dispi interface registers
 via mmio.
 
 diff --git a/hw/vga-pci.c b/hw/vga-pci.c
 index 9abbada..e05e2ef 100644
 --- a/hw/vga-pci.c
 +++ b/hw/vga-pci.c
 @@ -30,9 +30,36 @@
  #include qemu-timer.h
  #include loader.h
  
 +/*
 + * QEMU Standard VGA -- MMIO area spec.
 + *
 + * Using PCI bar #2, keeping #1 free, which leaves the
 + * door open to upgrade bar #0 to 64bit.
 + *
 + * mmio area layout:
 + *   0x - 0x03ff  reserved, for possible virtio extension.
 + *   0x0400 - 0x041f  vga ioports (0x3c0 - 0x3df), remapped 1:1

Do they support word accesses to set both index and data?

 + *   0x0500 - 0x0515  bochs dispi interface registers, mapped flat without
 + * index/data ports.  Use (index  1) as offset for
 + * (16bit) register access.
 + */

BAR should disappear with -M old.

 +
 +static const MemoryRegionOps pci_vga_ioport_ops = {
 +.read = pci_vga_ioport_read,
 +.write = pci_vga_ioport_write,
 +.valid.min_access_size = 1,
 +.valid.max_access_size = 4,
 +.impl.min_access_size = 1,
 +.impl.max_access_size = 1,
 +.endianness = DEVICE_LITTLE_ENDIAN,
 +};

Looks like word writes are supported provided the memory API breaks up
writes in little endian order.  Better to make it explicit.



-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [PATCH] usb-redir: Allow to attach USB 2.0 devices to 1.1 host controller

2012-09-19 Thread Hans de Goede

Hi,

On 09/18/2012 11:18 PM, Anthony Liguori wrote:

Hans de Goede hdego...@redhat.com writes:


Hi,

On 09/17/2012 11:18 AM, Jan Kiszka wrote:

On 2012-09-17 11:08, Hans de Goede wrote:


snip


Although not pretty I'm ok with this, since I actually want to add
similar code to allow usb-3 (superspeed) devices like a usb-3 usb-stick
to work with ehci or uhci controllers :)


Great, that would have been my next question, but I don't have hardware
for that around yet.


I do have hardware for that around, so once you've respun your patch to
address the issues discussed, then that will give me a nice basis to
add usb-3 usb-stick to ehci-controller redirection :)


BTW, I'm facing several incompatibilities with passed-through CDC/ACM
devices (e.g. a Galaxy S2), independent of my patch. Both host-linux and
redir doesn't allow to use them properly but show different symptoms.
Need to analyze and report once time permits.


Hmm, there is (was) one know issues with these devices, which has been fixed
in usbredir, so first of all make sure that the usbredir on your spice-client
/ usbredirserver, has this patch:
http://cgit.freedesktop.org/spice/usbredir/commit/?id=7783d3db61083bbf7f61b1ea8608c666b4c6a1dd

If that does not work, add the debug parameter to the usb-redir device, set it
to 4, collect logs of trying to redirect the device and send me the logs
please, ie:
-device usb-redir,chardev=usbredirchardev1,id=usbredirdev1,debug=4

Also be aware that usb-redir relies on chardev flowcontrol working,
which it does not upstream! See for example here for the chardev flow
control patch set which RHEL / Fedora carry:
http://cgit.freedesktop.org/~jwrdegoede/qemu/log/?h=qemu-kvm-1.2-usbredirofs=50


Those patches are garbage.


sigh, very very deep sigh

No they are not garbage. They:

1) Fix a real issue, which is independent of spice / usb-redir but happens to be
more easily triggerable with them. Note that the original set was developed to
fix a bug which did not involve either!

2) Are disliked by you because they build upon the existing not pretty chardev
code. We all agree that needs a rewrite. But that is not a valid reason to
keep this perfectly fine patchset out of qemu master for years now!

3) You keep saying they are racy, but you've never given a simple step by
step thread 1 does foo, thread 2 does bar replay of such a theoretical race.

4) Have caused no issues in almost 2 years of deployment in the field in both
RHEL and Fedora.

5) Are kept out of master by you for no valid reasons, you seem to use them
as a tool to strongarm people into rewriting the qemu chardev layer. Well it
seems that no one is biting because of -ENOTIME, so can we now please get
these included, after almost 2 years since their first posting ?


Are you saying that usb-redir doesn't work in qemu.git?  So why are we
even carrying it?


It works just as well as virtio-console or any other chardev frontend, any 
chardev
frontent can currently trigger asserts by writing data faster then the backend
can handle. This is just more easily triggerable with USB trafic then with a
serial console.

The original bug-report this patch set once was written for was about 
virtio-console
and is dated 2010-08-05 !

So why are we even carrying virtio-console ?

Regards,

Hans

p.s.

Note that I'm *not* the original author of these patches.



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread liu ping fan
On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 19/09/2012 11:11, liu ping fan ha scritto:
  Why not? devA will drop its local lock, devX will retake the big lock
  recursively, devB will take its local lock.  In the end, we have biglock
  - devB.
 
 But when adopting local lock, we assume take local lock, then biglock.

 No, because the local lock will be dropped before taking the biglock.
 The order must always be coarse-fine.

But if we takes coarse firstly, then the mmio-dispatcher will still
contend for the big lock against each other.

 I don't know if the front-end (device) lock should come before or after
 the back-end (e.g. netdev) lock in the hierarchy, but that's another story.

I think fine-coarse may be the rule, since for some code path, it is
not necessary to take coarse lock.

Thanks and regards,
pingfan
 Paolo

 Otherwise another thread will take biglock then local lock, which
 cause the possibility of deadlock.





Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Avi Kivity
On 09/19/2012 12:14 PM, Paolo Bonzini wrote:

 I don't know if the front-end (device) lock should come before or after
 the back-end (e.g. netdev) lock in the hierarchy, but that's another story.

I would say device - backend.  It's natural if the backend is the timer
subsystem, so extend it from there to the block and network layers.  Of
course callbacks want it to be the other way round.

We could solve the callback problem in the same way we're using for mmio
callbacks.  When providing the callback, give the subsystem -ref() and
-unref() methods.  As long as a callback is pending, the subsystem
holds a reference.  Once the callback has been invoked or cancelled, the
reference can be released.

-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Avi Kivity
On 09/19/2012 12:19 PM, liu ping fan wrote:
 On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 19/09/2012 11:11, liu ping fan ha scritto:
  Why not? devA will drop its local lock, devX will retake the big lock
  recursively, devB will take its local lock.  In the end, we have biglock
  - devB.
 
 But when adopting local lock, we assume take local lock, then biglock.

 No, because the local lock will be dropped before taking the biglock.
 The order must always be coarse-fine.

 But if we takes coarse firstly, then the mmio-dispatcher will still
 contend for the big lock against each other.

Can you detail the sequence?

 
 I don't know if the front-end (device) lock should come before or after
 the back-end (e.g. netdev) lock in the hierarchy, but that's another story.

 I think fine-coarse may be the rule, since for some code path, it is
 not necessary to take coarse lock.

coarse-fine doesn't mean you have to take the coarse lock.

Valid:
  lock(coarse)
  lock(fine)

Valid:
  lock(find)

Valid:
  lock(coarse)

Invalid:
  lock(fine)
  lock(coarse)


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Jan Kiszka
On 2012-09-19 11:23, Avi Kivity wrote:
 On 09/19/2012 12:19 PM, liu ping fan wrote:
 On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 19/09/2012 11:11, liu ping fan ha scritto:
 Why not? devA will drop its local lock, devX will retake the big lock
 recursively, devB will take its local lock.  In the end, we have biglock
 - devB.

 But when adopting local lock, we assume take local lock, then biglock.

 No, because the local lock will be dropped before taking the biglock.
 The order must always be coarse-fine.

 But if we takes coarse firstly, then the mmio-dispatcher will still
 contend for the big lock against each other.
 
 Can you detail the sequence?
 

 I don't know if the front-end (device) lock should come before or after
 the back-end (e.g. netdev) lock in the hierarchy, but that's another story.

 I think fine-coarse may be the rule, since for some code path, it is
 not necessary to take coarse lock.
 
 coarse-fine doesn't mean you have to take the coarse lock.
 
 Valid:
   lock(coarse)
   lock(fine)

This is invalid due to prio inversion issues, independent of potential
deadlocks.

Jan

 
 Valid:
   lock(find)
 
 Valid:
   lock(coarse)
 
 Invalid:
   lock(fine)
   lock(coarse)
 
 

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Jan Kiszka
On 2012-09-19 11:27, Jan Kiszka wrote:
 On 2012-09-19 11:23, Avi Kivity wrote:
 On 09/19/2012 12:19 PM, liu ping fan wrote:
 On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 19/09/2012 11:11, liu ping fan ha scritto:
 Why not? devA will drop its local lock, devX will retake the big lock
 recursively, devB will take its local lock.  In the end, we have biglock
 - devB.

 But when adopting local lock, we assume take local lock, then biglock.

 No, because the local lock will be dropped before taking the biglock.
 The order must always be coarse-fine.

 But if we takes coarse firstly, then the mmio-dispatcher will still
 contend for the big lock against each other.

 Can you detail the sequence?


 I don't know if the front-end (device) lock should come before or after
 the back-end (e.g. netdev) lock in the hierarchy, but that's another story.

 I think fine-coarse may be the rule, since for some code path, it is
 not necessary to take coarse lock.

 coarse-fine doesn't mean you have to take the coarse lock.

 Valid:
   lock(coarse)
   lock(fine)
 
 This is invalid due to prio inversion issues, independent of potential
 deadlocks.

Err, forget it - the other way around is the problem.

Sorry,
Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Jan Kiszka
On 2012-09-19 11:00, liu ping fan wrote:
 On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote:
 On 09/19/2012 06:02 AM, liu ping fan wrote:
 Currently, cpu_physical_memory_rw() can be used directly or indirectly
 by mmio-dispatcher to access other devices' memory region. This can
 cause some problem when adopting device's private lock.

 Back ground refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html
 For lazy, just refer to:
 http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html


 --1st. the recursive lock of biglock.
 If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have
 the following (section of the whole call chain, and with
 private_lockA):
   lockA-mmio-dispatcher   -- hold biglock -- c_p_m_rw() --- 
 Before c_p_m_rw(), we drop private_lockA to anti the possibly of
 deadlock.  But we can not anti the nested of this chain or calling to
 another lockB-mmio-dispatcher. So we can not avoid the possibility of
 nested lock of biglock.  And another important factor is that we break
 the lock sequence: private_lock--biglock.
 All of these require us to push biglock's holding into c_p_m_rw(), the
 wrapper can not give help.

 I agree that this is unavoidable.


 --2nd. c_p_m_rw(), sync or async?

 IF we convert all of the device to be protected by refcount, then we can 
 have
 //no big lock
  c_p_m_rw()
 {
devB-ref++;
{
 ---pushed onto another thread.
lock_privatelock
mr-ops-write();
unlock_privatelock
}
wait_for_completion();
devB-ref--;
 }
 This model can help c_p_m_rw() present as a SYNC API.  But currently,
 we mix biglock and private lock together, and wait_for_completion()
 maybe block the release of big lock, which finally causes deadlock. So
 we can not simply rely on this model.
 Instead, we need to classify the calling scene into three cases:
   case1. lockA--dispatcher --- lockB-dispatcher   //can use
 async+completion model
   case2. lockA--dispatcher --- biglock-dispatcher // sync, but can
 cause the nested lock of biglock
   case3. biglock-dispacher --- lockB-dispatcher  // async to avoid
 the lock sequence problem, (as to completion, it need to be placed
 outside the top level biglock, and it is hard to do so. Suggest to
 change to case 1. Or at present, just leave it async)

 This new model will require the biglock can be nested.

 I think changing to an async model is too complicated.  It's difficult
 enough already.  Isn't dropping private locks + recursive big locks
 sufficient?

 I think that dropping private locks + recursive big locks just cover
 case 2. And most of the important, it dont describe case3 which break
 the rule of lock sequence private-lock -- biglock. Scene:
 devA_lock--(devX_with-biglock---devB_lock).
 I just want to classify and post these cases to discuss. Maybe we can
 achieve without async.

What about the following:

What we really need to support in practice is MMIO access triggers RAM
access of device model. Scenarios where a device access triggers another
MMIO access could likely just be rejected without causing troubles.

So, when we dispatch a request to a device, we mark that the current
thread is in a MMIO dispatch and reject any follow-up c_p_m_rw that does
_not_ target RAM, ie. is another, nested MMIO request - independent of
its destination. How much of the known issues would this solve? And what
would remain open?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Avi Kivity
On 09/19/2012 12:34 PM, Jan Kiszka wrote:
 
 What about the following:
 
 What we really need to support in practice is MMIO access triggers RAM
 access of device model. Scenarios where a device access triggers another
 MMIO access could likely just be rejected without causing troubles.
 
 So, when we dispatch a request to a device, we mark that the current
 thread is in a MMIO dispatch and reject any follow-up c_p_m_rw that does
 _not_ target RAM, ie. is another, nested MMIO request - independent of
 its destination. How much of the known issues would this solve? And what
 would remain open?

Various iommu-like devices re-dispatch I/O, like changing endianness or
bitband.  I don't know whether it targets I/O rather than RAM.

If they do, we can push the support into the memory API.  I think it's
acceptable as a short term solution (short term meaning as long as needed).



-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Paolo Bonzini
Il 19/09/2012 11:21, Avi Kivity ha scritto:
  I don't know if the front-end (device) lock should come before or after
  the back-end (e.g. netdev) lock in the hierarchy, but that's another story.
 I would say device - backend.  It's natural if the backend is the timer
 subsystem, so extend it from there to the block and network layers.  Of
 course callbacks want it to be the other way round.

Yes, that's what I wasn't sure about.  In many cases I believe callbacks
can just release the backend lock.  It works for timers, for example:

for(;;) {
ts = clock-active_timers;
if (!qemu_timer_expired_ns(ts, current_time)) {
break;
}
/* remove timer from the list before calling the callback */
clock-active_timers = ts-next;
ts-next = NULL;

/* run the callback (the timer list can be modified) */
-   ts-cb(ts-opaque);
+   cb = ts-cb;
+   opaque = ts-opaque;
+   unlock();
+   cb(opaque);
+   lock();
}

(The hunch is that ts could be deleted exactly at the moment the
callback is unlocked.  This can be solved with ref/unref on the opaque
value, as you mention below).

Paolo

 We could solve the callback problem in the same way we're using for mmio
 callbacks.  When providing the callback, give the subsystem -ref() and
 -unref() methods.  As long as a callback is pending, the subsystem
 holds a reference.  Once the callback has been invoked or cancelled, the
 reference can be released.





Re: [Qemu-devel] [PATCH V3 1/5] libqblock build system

2012-09-19 Thread Paolo Bonzini
Il 19/09/2012 08:35, Wenchao Xia ha scritto:

 +QEMU_OBJS=$(tools-obj-y) $(block-obj-y)
 +QEMU_OBJS_FILTERED=$(filter %.o,$(QEMU_OBJS))

 What does this filter out?

   $(block-obj-y) contains /block, which would cause trouble in building,
 so filtered it out.

I think that's because you have to call unnest-vars, not unnest-vars-1.

Paolo



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Avi Kivity
On 09/19/2012 12:51 PM, Paolo Bonzini wrote:
 Il 19/09/2012 11:21, Avi Kivity ha scritto:
  I don't know if the front-end (device) lock should come before or after
  the back-end (e.g. netdev) lock in the hierarchy, but that's another 
  story.

 I would say device - backend.  It's natural if the backend is the timer
 subsystem, so extend it from there to the block and network layers.  Of
 course callbacks want it to be the other way round.
 
 Yes, that's what I wasn't sure about.  In many cases I believe callbacks
 can just release the backend lock.  It works for timers, for example:
 
 for(;;) {
 ts = clock-active_timers;
 if (!qemu_timer_expired_ns(ts, current_time)) {
 break;
 }
 /* remove timer from the list before calling the callback */
 clock-active_timers = ts-next;
 ts-next = NULL;
 
 /* run the callback (the timer list can be modified) */
 -   ts-cb(ts-opaque);
 +   cb = ts-cb;
 +   opaque = ts-opaque;
 +   unlock();
 +   cb(opaque);
 +   lock();
 }
 
 (The hunch is that ts could be deleted exactly at the moment the
 callback is unlocked.  This can be solved with ref/unref on the opaque
 value, as you mention below).

Are you saying that this works as is or not?  It does seem broken wrt
deletion; after qemu_del_timer() completes the caller expects the
callback not to be called.

This isn't trivial to guarantee, we need something like

   dispatch_timer():
  pending += 1
  timer.ref()
  drop lock
  timer.cb()
  take lock
  timer.unref()
  pending -= 1
  notify

   del_timer():
  take lock
  timer.unlink()
  while pending:
  wait for notification
  drop lock

but, if del_timer is called with the device lock held, we deadlock. ugh.

-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [PATCH V9 0/8] memory: unify ioport registration

2012-09-19 Thread Avi Kivity
On 09/04/2012 06:13 PM, Julien Grall wrote:
 This is the nineth version of patch series about ioport registration.
 
 Some part of QEMU still use register_ioport* functions to register ioport.
 These functions doesn't allow to use Memory Listener on it.

Jan't patch that dropped the debug ioports causes conflicts, please
rebase atop upstream.


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [RFC PATCH 00/13] Embedded NBD server

2012-09-19 Thread Daniel P. Berrange
On Mon, Aug 27, 2012 at 05:00:13PM +0200, Paolo Bonzini wrote:
 Hi all,
 
 this is an RFC series implementing an NBD server embedded inside QEMU.
 This can be used in various cases, including migration with non-shared
 storage.
 
 Three new commands are introduced at the QMP level
 
   { 'command': 'nbd-server-start',  'data': { 'addr': 'IPSocketAddress' } }

It is probably desirable/required to also have a command where libvirt
can pass in a pre-opened listening socket, since I think SELinux policy
will not want to allow QEMU to create listening sockets itself.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Jan Kiszka
On 2012-09-19 11:50, Avi Kivity wrote:
 On 09/19/2012 12:34 PM, Jan Kiszka wrote:

 What about the following:

 What we really need to support in practice is MMIO access triggers RAM
 access of device model. Scenarios where a device access triggers another
 MMIO access could likely just be rejected without causing troubles.

 So, when we dispatch a request to a device, we mark that the current
 thread is in a MMIO dispatch and reject any follow-up c_p_m_rw that does
 _not_ target RAM, ie. is another, nested MMIO request - independent of
 its destination. How much of the known issues would this solve? And what
 would remain open?
 
 Various iommu-like devices re-dispatch I/O, like changing endianness or
 bitband.  I don't know whether it targets I/O rather than RAM.

If such dispatching is lock-less and we validate it's not recursive,
then it should be fine even when not targeting RAM. And we could also
factor out generic dispatchers like MMIO-PIO.

 
 If they do, we can push the support into the memory API.  I think it's
 acceptable as a short term solution (short term meaning as long as needed).

We will always have to avoid loops. I think all we can improve is what
we still allow by better detecting what would be wrong.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Paolo Bonzini
Il 19/09/2012 12:06, Avi Kivity ha scritto:
  (The hunch is that ts could be deleted exactly at the moment the
  callback is unlocked.  This can be solved with ref/unref on the opaque
  value, as you mention below).
 Are you saying that this works as is or not?  It does seem broken wrt
 deletion; after qemu_del_timer() completes the caller expects the
 callback not to be called.

Ouch, then I think the only solution is to remove this invariant if you
add fine-grained locking and re-check the enable conditions in the timer
callback.

If you allow calling del_timer to be called with the device lock held,
you cannot fixing without deadlocking.  If you don't, the caller of
del_timer cannot set any expectation because the timer could be run in
the window between dropping the device lock and calling del_timer

Paolo

 This isn't trivial to guarantee, we need something like
 
dispatch_timer():
   pending += 1
   timer.ref()
   drop lock
   timer.cb()
   take lock
   timer.unref()
   pending -= 1
   notify
 
del_timer():
   take lock
   timer.unlink()
   while pending:
   wait for notification
   drop lock
 
 but, if del_timer is called with the device lock held, we deadlock. ugh.

Wait for notification should be a cond_wait that drops the lock.

Alternatively:

dispatch_timer():
   drop lock
   if timer has expired:
   timer.cb()
   take lock
   notify

del_timer():
   take lock
   timer.unlink()
   drop lock




Re: [Qemu-devel] [RFC PATCH 00/13] Embedded NBD server

2012-09-19 Thread Paolo Bonzini
Il 19/09/2012 12:16, Daniel P. Berrange ha scritto:
  Three new commands are introduced at the QMP level
  
{ 'command': 'nbd-server-start',  'data': { 'addr': 'IPSocketAddress' } }
 It is probably desirable/required to also have a command where libvirt
 can pass in a pre-opened listening socket, since I think SELinux policy
 will not want to allow QEMU to create listening sockets itself.

Ok, I'll make it like this:

{ 'type': 'UnixSocketAddress',
  'data': {
'path': 'str' } }

{ 'type': 'String',
  'data': {
'str': 'str' } }

{'union': 'SocketAddress',
  'data': {
   'inet': 'IPSocketAddress',
   'unix': 'UnixSocketAddress',
   'fd': 'String',
   } }

Paolo



Re: [Qemu-devel] [PATCH v2 2/2] Versatile Express: add modelling of NOR flash

2012-09-19 Thread Peter Maydell
On 18 September 2012 21:59, Francesco Lavra francescolavra...@gmail.com wrote:
 On 09/18/2012 03:46 PM, Peter Maydell wrote:
 On 17 September 2012 21:08, Francesco Lavra francescolavra...@gmail.com 
 wrote:
 This patch adds modelling of the two NOR flash banks found on the
 Versatile Express motherboard. Tested with U-Boot running on an emulated
 Versatile Express, with either A9 or A15 CoreTile.

 Signed-off-by: Francesco Lavra francescolavra...@gmail.com
 ---
 Changes in v2:
 Use drive_get_next() instead of drive_get() to get a backing storage for
 each flash bank.

  hw/vexpress.c |   24 ++--
  1 files changed, 22 insertions(+), 2 deletions(-)

 diff --git a/hw/vexpress.c b/hw/vexpress.c
 index 454c2bb..2ffeab1 100644
 --- a/hw/vexpress.c
 +++ b/hw/vexpress.c
 @@ -29,8 +29,12 @@
  #include sysemu.h
  #include boards.h
  #include exec-memory.h
 +#include blockdev.h
 +#include flash.h

  #define VEXPRESS_BOARD_ID 0x8e0
 +#define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024)
 +#define VEXPRESS_FLASH_SECT_SIZE (256 * 1024)

  static struct arm_boot_info vexpress_binfo;

 @@ -355,6 +359,7 @@ static void vexpress_common_init(const VEDBoardInfo
 *daughterboard,

 Something in your email send path is wrapping long lines, which
 means 'git am' doesn't work cleanly. If you're planning on sending
 more QEMU patches you might want to look into getting this fixed.

 Oops, sorry about that, if you want me to re-send the patch with these
 artifacts fixed, just let me know.

  qemu_irq pic[64];
  uint32_t proc_id;
  uint32_t sys_id;
 +DriveInfo *dinfo;
  ram_addr_t vram_size, sram_size;
  MemoryRegion *sysmem = get_system_memory();
  MemoryRegion *vram = g_new(MemoryRegion, 1);
 @@ -410,8 +415,23 @@ static void vexpress_common_init(const VEDBoardInfo
 *daughterboard,

  sysbus_create_simple(pl111, map[VE_CLCD], pic[14]);

 -/* VE_NORFLASH0: not modelled */
 -/* VE_NORFLASH1: not modelled */
 +dinfo = drive_get_next(IF_PFLASH);
 +if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, vexpress.flash0,
 +VEXPRESS_FLASH_SIZE, dinfo ? dinfo-bdrv : NULL,
 +VEXPRESS_FLASH_SECT_SIZE,
 +VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
 +0x00, 0x89, 0x00, 0x18, 0)) {
 +fprintf(stderr, vexpress: error registering flash 0.\n);

 Shouldn't these errors be fatal?

 I checked the existing uses of pflash_cfi_0[1,2]_register() in the code,
 and if I'm not mistaken only in 5 out of 19 devices these errors are
 fatal, in the other 14 cases initialization continues even after flash
 registration failure, with or without an error message. Let me know if
 you still prefer these errors to be fatal.

So the only reason this can fail is if the user specified a file
to back the flash but trying to read it failed (ie, bad filename
or file not the same size as the flash). I think that merits
actually stopping on error.

Ideally in the long term the flash devices should be converted
to proper QOM devices and we could push the error handling back
into the device itself (which is better positioned to distinguish
bad filename from wrong length I suspect).

-- PMM



Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock

2012-09-19 Thread Avi Kivity
On 09/19/2012 01:19 PM, Paolo Bonzini wrote:
 Il 19/09/2012 12:06, Avi Kivity ha scritto:
  (The hunch is that ts could be deleted exactly at the moment the
  callback is unlocked.  This can be solved with ref/unref on the opaque
  value, as you mention below).
 Are you saying that this works as is or not?  It does seem broken wrt
 deletion; after qemu_del_timer() completes the caller expects the
 callback not to be called.
 
 Ouch, then I think the only solution is to remove this invariant if you
 add fine-grained locking and re-check the enable conditions in the timer
 callback.

I think so too, I know authors of GUI frameworks suffer from this
problem (the GUI calling into the framework vs. the framework asking the
GUI to repaint).  Don't know what the state of the art solution is.

 
 If you allow calling del_timer to be called with the device lock held,
 you cannot fixing without deadlocking.  If you don't, the caller of
 del_timer cannot set any expectation because the timer could be run in
 the window between dropping the device lock and calling del_timer
 

Right.  Callbacks should check qemu_timer_deleted() after taking their
little lock.  The reference held by the timer subsystem protects against
qemu_free_timer().

-- 
error compiling committee.c: too many arguments to function



[Qemu-devel] [Bug 1042388] Re: qemu: Unsupported syscall: 257 (timer_create)

2012-09-19 Thread Erik de Castro Lopo
I have a fix for this. I can now successfully install ghc and compile
programs with it.

In the process of cleaning up the patch and working on a test for the
test suite.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1042388

Title:
  qemu: Unsupported syscall: 257 (timer_create)

Status in QEMU:
  New

Bug description:
  Running qemu-arm-static for git HEAD. When I try to install ghc from
  debian into my arm chroot I get:

  Setting up ghc (7.4.1-4) ...
  qemu: Unsupported syscall: 257
  ghc: timer_create: Function not implemented
  qemu: Unsupported syscall: 257
  ghc-pkg: timer_create: Function not implemented
  dpkg: error processing ghc (--configure):
   subprocess installed post-installation script returned error exit status 1
  Errors were encountered while processing:
   ghc
  E: Sub-process /usr/bin/dpkg returned an error code (1)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1042388/+subscriptions



Re: [Qemu-devel] [PATCH] Adding BAR0 for e500 PCI controller

2012-09-19 Thread Alexander Graf

On 19.09.2012, at 09:41, Bharat Bhushan wrote:

 PCI Root complex have TYPE-1 configuration header while PCI endpoint
 have type-0 configuration header. The type-1 configuration header have
 a BAR (BAR0). In Freescale PCI controller BAR0 is used for mapping pci
 address space to CCSR address space. This can used for 2 purposes: 1)
 for MSI interrupt generation 2) Allow CCSR registers access when configured
 as PCI endpoint, which I am not sure is a use case with QEMU-KVM guest.
 
 What I observed is that when guest read the size of BAR0 of host controller
 configuration header (TYPE1 header) then it always reads it as 0. When
 looking into the QEMU hw/ppce500_pci.c, I do not find the PCI controller
 device registering BAR0. I do not find any other controller also doing so
 may they do not use BAR0.
 
 There are two issues when BAR0 is not there (which I can think of):
 1) There should be BAR0 emulated for PCI Root comaplex (TYPE1 header) and
 when reading the size of BAR0, it should give size as per real h/w.
 
 This patch solves this problem.
 
 2) Do we need this BAR0 inbound address translation?
When BAR0 is of non-zero size then it will be configured for PCI
 address space to local address(CCSR) space translation on inbound access.
 The primary use case is for MSI interrupt generation. The device is
 configured with a address offsets in PCI address space, which will be
 translated to MSI interrupt generation MPIC registers. Currently I do
 not understand the MSI interrupt generation mechanism in QEMU and also
 IIRC we do not use QEMU MSI interrupt mechanism on e500 guest machines.
 But this BAR0 will be used when using MSI on e500.
 
 I can see one more issue, There are ATMUs emulated in hw/ppce500_pci.c,
 but i do not see these being used for address translation.
 So far that works because pci address space and local address space are 1:1
 mapped. BAR0 inbound translation + ATMU translation will complete the address
 translation of inbound traffic.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 hw/pci_host.h|9 +
 hw/ppc/e500.c|   21 +
 hw/ppce500_pci.c |7 +++
 3 files changed, 37 insertions(+), 0 deletions(-)
 
 diff --git a/hw/pci_host.h b/hw/pci_host.h
 index 4b9c300..c1ec7eb 100644
 --- a/hw/pci_host.h
 +++ b/hw/pci_host.h
 @@ -41,10 +41,19 @@ struct PCIHostState {
 MemoryRegion data_mem;
 MemoryRegion mmcfg;
 MemoryRegion *address_space;
 +MemoryRegion bar0;
 uint32_t config_reg;
 PCIBus *bus;
 };
 
 +typedef struct PPCE500CCSRState {
 +SysBusDevice *parent;
 +MemoryRegion ccsr_space;
 +} PPCE500CCSRState;
 +
 +#define TYPE_CCSR e500-ccsr
 +#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR)
 +

All of this is e500 specific, so it should definitely not be in any generic pci 
host header.

 /* common internal helpers for PCI/PCIe hosts, cut off overflows */
 void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
   uint32_t limit, uint32_t val, uint32_t len);
 diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
 index 6f0de6d..1f5da28 100644
 --- a/hw/ppc/e500.c
 +++ b/hw/ppc/e500.c
 @@ -31,6 +31,7 @@
 #include hw/loader.h
 #include elf.h
 #include hw/sysbus.h
 +#include hw/pci_host.h
 #include exec-memory.h
 #include host-utils.h
 
 @@ -587,3 +588,23 @@ void ppce500_init(PPCE500Params *params)
 kvmppc_init();
 }
 }
 +
 +static void e500_ccsr_type_init(Object *obj)
 +{
 +PPCE500CCSRState *ccsr = CCSR(obj);
 +ccsr-ccsr_space.size = int128_make64(MPC8544_CCSRBAR_SIZE);
 +}
 +
 +static const TypeInfo e500_ccsr_info = {
 +.name  = TYPE_CCSR,
 +.parent=  TYPE_SYS_BUS_DEVICE,
 +.instance_size = sizeof(PPCE500CCSRState),
 +.instance_init = e500_ccsr_type_init,
 +};
 +
 +static void e500_ccsr_register_types(void)
 +{
 +type_register_static(e500_ccsr_info);
 +}
 +
 +type_init(e500_ccsr_register_types)
 diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
 index 92b1dc0..135f2a6 100644
 --- a/hw/ppce500_pci.c
 +++ b/hw/ppce500_pci.c
 @@ -315,6 +315,8 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
 int i;
 MemoryRegion *address_space_mem = get_system_memory();
 MemoryRegion *address_space_io = get_system_io();
 +PPCE500CCSRState *ccsr;
 +PCIDevice *pdev;
 
 h = PCI_HOST_BRIDGE(dev);
 s = PPC_E500_PCI_HOST_BRIDGE(dev);
 @@ -342,6 +344,11 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
 memory_region_add_subregion(s-container, PCIE500_REG_BASE, s-iomem);
 sysbus_init_mmio(dev, s-container);
 
 +ccsr = CCSR(object_new(TYPE_CCSR));
 +memory_region_init_io(h-bar0, pci_host_conf_be_ops, h,
 +  pci-bar0, memory_region_size(ccsr-ccsr_space));
 +pdev = pci_find_device(b, 0, 0);
 +pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, h-bar0);

No. The PCI host bridge doesn't create the CCSR object. The machine model 

Re: [Qemu-devel] [PATCH] qapi: convert add_client

2012-09-19 Thread Paolo Bonzini
Il 18/09/2012 21:06, Luiz Capitulino ha scritto:
 +fd = monitor_get_fd(cur_mon, fdname);
 +if (fd  0) {
 +error_setg(errp, file-description '%s' has not been found, fdname);
 +return;
 +}

Please change this to add an Error * argument to monitor_get_fd, and
change non-Error-friendly callers of monitor_get_fd to
monitor_handle_fd_param.

You can then reuse the error in monitor_handle_fd_param (there is
already QERR_FD_NOT_FOUND, by the way).

I'll post the patches later in my reviewed NBD series, please take them
from there and I'll rebase before my pull request.

Paolo



Re: [Qemu-devel] [PATCH 000/126] Rewrite s390x translator

2012-09-19 Thread Alexander Graf

On 19.09.2012, at 02:32, Richard Henderson wrote:

 On 09/18/2012 02:09 PM, Alexander Graf wrote:
 Also, 102-126 are missing for me :).
 
 Yeah, they never made it to the list.  I guess next time I shouldn't
 try to send to many at the same time.  I should stagger them into
 hunks of 25 at a time or so.
 
 I've fixed some bugs since the v1, and I believe I've incorporated
 all of the comments to date.  Is it helpful for you if I re-send, or
 would it simply be easier to examine the branch?

I find email easier to read usually :). Please make sure to test a full Debian 
guest as well as something s390x based (RHEL/SLES).


Alex

 
  git://repo.or.cz/qemu/rth.git rth/s390-reorg-3
 
 
 r~




[Qemu-devel] [PATCH] xen: Fix, no unplug of pt device by platform device.

2012-09-19 Thread Anthony PERARD
The Xen platform device will unplug any NICs if requested by the guest (PVonHVM)
including a NIC that would have been passthrough. This patch makes sure that a
passthrough device will not be unplug.

Reported-by: Zhang, Yang Z yang.z.zh...@intel.com
Signed-off-by: Anthony PERARD anthony.per...@citrix.com

---
 hw/xen_platform.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/xen_platform.c b/hw/xen_platform.c
index 0d6c2ff..956dbfe 100644
--- a/hw/xen_platform.c
+++ b/hw/xen_platform.c
@@ -85,8 +85,10 @@ static void log_writeb(PCIXenPlatformState *s, char val)
 
 static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
 {
+/* We have to ignore passthrough devices */
 if (pci_get_word(d-config + PCI_CLASS_DEVICE) ==
-PCI_CLASS_NETWORK_ETHERNET) {
+PCI_CLASS_NETWORK_ETHERNET
+ strcmp(d-name, xen-pci-passthrough) != 0) {
 qdev_free(d-qdev);
 }
 }
@@ -98,8 +100,10 @@ static void pci_unplug_nics(PCIBus *bus)
 
 static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
 {
+/* We have to ignore passthrough devices */
 if (pci_get_word(d-config + PCI_CLASS_DEVICE) ==
-PCI_CLASS_STORAGE_IDE) {
+PCI_CLASS_STORAGE_IDE
+ strcmp(d-name, xen-pci-passthrough) != 0) {
 qdev_unplug((d-qdev), NULL);
 }
 }
-- 
Anthony PERARD



Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling

2012-09-19 Thread Alexander Graf

On 19.09.2012, at 02:14, Richard Henderson wrote:

 On 09/18/2012 01:18 PM, Alexander Graf wrote:
 -/* remember what pgm exeption this was */
 +/* Remember what pgm exeption this was.  */
  tmp = tcg_const_i32(code);
  tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
  tcg_temp_free_i32(tmp);
  -tmp = tcg_const_i32(ilc);
 +tmp = tcg_const_i32(s-next_pc - s-pc);
 
 Mind to explain this one?
 
 ILC = the size of the insn.  Rather than passing ILC around into
 gen_program_exception, get it back from the s-next_pc value that
 we stored into DisasContext.

Ah, makes sense. Maybe create a small helper that makes it more obvious:

static int current_ilc(DisasContext *s)
{
/* Next pc - current pc = current instruction length. */
return s-next_pc - s-pc;
}


Alex




[Qemu-devel] [PATCH 4/9] fbdev: add linux framebuffer display driver.

2012-09-19 Thread Gerd Hoffmann
Display works, requires truecolor framebuffer with 16 or 32 bpp on the
host.  32bpp is recommended.  The framebuffer is used as-is, qemu
doesn't try to switch modes.  With LCD displays mode switching is pretty
pointless IMHO, also it wouldn't work anyway with the most common
fbdev drivers (vesafb, KMS).  Guest display is centered on the host
screen.

Mouse works, uses /dev/input/mice.

Keyboard works.  Guest screen has whatever keymap you load inside
the guest.  Text windows (monitor, serial, ...) have a simple en-us
keymap.  Good enough to type monitor commands.  Not goot enough to
work seriously on a serial terminal.  But the qemu terminal emulation
isn't good enough for that anyway ;)

Hot keys:
  Ctrl-Alt-Fnr  - host console switching.
  Ctrl-Alt-nr   - qemu console switching.
  Ctrl-Alt-ESC- exit qemu.

Special feature:  Sane console switching.  Switching away stops screen
updates.  Switching back redraws the screen.  When started from the
linux console qemu uses the vt you've started it from (requires just
read/write access to /dev/fb0).  When starting from somewhere else qemu
tries to open a unused virtual terminal and switch to it (usually
requires root privileges to open /dev/ttynr).

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 console.h   |4 +
 qemu-options.hx |8 +
 sysemu.h|1 +
 trace-events|   15 +
 ui/Makefile.objs|1 +
 ui/fbdev.c  |  965 +++
 ui/linux-keynames.h |  388 +
 vl.c|   19 +
 8 files changed, 1401 insertions(+), 0 deletions(-)
 create mode 100644 ui/fbdev.c
 create mode 100644 ui/linux-keynames.h

diff --git a/console.h b/console.h
index bef2d2d..aa9959b 100644
--- a/console.h
+++ b/console.h
@@ -417,6 +417,10 @@ void qemu_console_copy(DisplayState *ds, int src_x, int 
src_y,
 /* sdl.c */
 void sdl_display_init(DisplayState *ds, int full_screen, int no_frame);
 
+/* fbdev.c */
+int fbdev_display_init(DisplayState *ds, const char *device, Error **err);
+void fbdev_display_uninit(DisplayState *ds);
+
 /* cocoa.m */
 void cocoa_display_init(DisplayState *ds, int full_screen);
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 09c86c4..3445655 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -947,6 +947,14 @@ Enable/disable spice seamless migration. Default is off.
 @end table
 ETEXI
 
+DEF(fbdev, 0, QEMU_OPTION_fbdev,
+-fbdev  enable fbdev\n, QEMU_ARCH_ALL)
+STEXI
+@item -fbdev
+@findex -fbdev
+Enable fbdev (linux framebuffer).
+ETEXI
+
 DEF(portrait, 0, QEMU_OPTION_portrait,
 -portrait   rotate graphical output 90 deg left (only PXA LCD)\n,
 QEMU_ARCH_ALL)
diff --git a/sysemu.h b/sysemu.h
index 65552ac..34e6bfa 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -93,6 +93,7 @@ typedef enum DisplayType
 DT_DEFAULT,
 DT_CURSES,
 DT_SDL,
+DT_FBDEV,
 DT_NOGRAPHIC,
 DT_NONE,
 } DisplayType;
diff --git a/trace-events b/trace-events
index b48fe2d..0d0b7fa 100644
--- a/trace-events
+++ b/trace-events
@@ -994,3 +994,18 @@ spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) 
func %u, requested %
 spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) 
queries for #%u, IRQ%u
 spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) 
@%PRIx64=%PRIx64 IRQ %u
 spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) %s PIN%d IRQ %u
+
+# ui/fbdev.c
+fbdev_enabled(void) 
+fbdev_cleanup(void) 
+fbdev_vt_activate(int vtno, int wait) vtno %d, wait %d
+fbdev_vt_activated(void) 
+fbdev_vt_release_request(void) 
+fbdev_vt_released(void) 
+fbdev_vt_aquire_request(void) 
+fbdev_vt_aquired(void) 
+fbdev_kbd_raw(int enable) enable %d
+fbdev_kbd_event(int keycode, const char *kname, int up) keycode 0x%x [%s], 
down %d
+fbdev_dpy_resize(int w, int h) %dx%d
+fbdev_dpy_redraw(void)
+
diff --git a/ui/Makefile.objs b/ui/Makefile.objs
index adc07be..55ddcf2 100644
--- a/ui/Makefile.objs
+++ b/ui/Makefile.objs
@@ -12,3 +12,4 @@ common-obj-$(CONFIG_SDL) += sdl.o sdl_zoom.o x_keymap.o
 common-obj-$(CONFIG_COCOA) += cocoa.o
 common-obj-$(CONFIG_CURSES) += curses.o
 common-obj-$(CONFIG_VNC) += $(vnc-obj-y)
+common-obj-$(CONFIG_LINUX) += fbdev.o
diff --git a/ui/fbdev.c b/ui/fbdev.c
new file mode 100644
index 000..6dfbacc
--- /dev/null
+++ b/ui/fbdev.c
@@ -0,0 +1,965 @@
+/*
+ * linux fbdev output driver.
+ *
+ * Author: Gerd Hoffmann kra...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include stdio.h
+#include stdlib.h
+#include stdbool.h
+#include string.h
+#include unistd.h
+#include fcntl.h
+#include signal.h
+#include termios.h
+
+#include sys/ioctl.h
+#include sys/mman.h
+
+#include linux/kd.h
+#include linux/vt.h
+#include linux/fb.h
+
+#include qemu-common.h
+#include console.h
+#include keymaps.h
+#include sysemu.h
+#include pflib.h
+
+/*
+ * must be last so 

[Qemu-devel] [PATCH 5/9] fbdev: add monitor command to enable/disable

2012-09-19 Thread Gerd Hoffmann
This patch adds a fbdev monitor command to enable/disable
the fbdev display at runtime to both qmp and hmp.

qmp: framebuffer-display enable=on|off
hmp: framebuffer-display on|off

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hmp-commands.hx  |   15 +++
 hmp.c|9 +
 hmp.h|1 +
 qapi-schema.json |   13 +
 qmp-commands.hx  |6 ++
 qmp.c|   19 +++
 6 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index ed67e99..93e75ab 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1377,6 +1377,21 @@ passed since 1970, i.e. unix epoch.
 ETEXI
 
 {
+.name   = framebuffer-display,
+.args_type  = enable:b,
+.params = on|off,
+.help   = enable/disable linux console framebuffer display,
+.mhandler.cmd = hmp_framebuffer_display,
+},
+
+STEXI
+@item framebuffer-display on | off
+@findex framebuffer-display
+
+enable/disable linux console framebuffer display.
+ETEXI
+
+{
 .name   = info,
 .args_type  = item:s?,
 .params = [subcommand],
diff --git a/hmp.c b/hmp.c
index ba6fbd3..9677ef6 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1168,3 +1168,12 @@ void hmp_screen_dump(Monitor *mon, const QDict *qdict)
 qmp_screendump(filename, err);
 hmp_handle_error(mon, err);
 }
+
+void hmp_framebuffer_display(Monitor *mon, const QDict *qdict)
+{
+int enable = qdict_get_bool(qdict, enable);
+Error *errp = NULL;
+
+qmp_framebuffer_display(enable, errp);
+hmp_handle_error(mon, errp);
+}
diff --git a/hmp.h b/hmp.h
index 48b9c59..20aa08c 100644
--- a/hmp.h
+++ b/hmp.h
@@ -73,5 +73,6 @@ void hmp_getfd(Monitor *mon, const QDict *qdict);
 void hmp_closefd(Monitor *mon, const QDict *qdict);
 void hmp_send_key(Monitor *mon, const QDict *qdict);
 void hmp_screen_dump(Monitor *mon, const QDict *qdict);
+void hmp_framebuffer_display(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/qapi-schema.json b/qapi-schema.json
index 14e4419..87c77f7 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2619,3 +2619,16 @@
 # Since: 0.14.0
 ##
 { 'command': 'screendump', 'data': {'filename': 'str'} }
+
+# @framebuffer-display:
+#
+# Enable/disable linux console framebuffer display.
+#
+# @enable: whenever the framebuffer display should be enabled or disabled.
+#
+# Returns: Nothing.
+#
+# Since: 1.3
+#
+##
+{ 'command': 'framebuffer-display', 'data': {'enable': 'bool'} }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6e21ddb..e0a747a 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2539,3 +2539,9 @@ EQMP
 .args_type  = ,
 .mhandler.cmd_new = qmp_marshal_input_query_target,
 },
+
+{
+.name   = framebuffer-display,
+.args_type  = enable:b,
+.mhandler.cmd_new = qmp_marshal_input_framebuffer_display,
+},
diff --git a/qmp.c b/qmp.c
index 8463922..56e007f 100644
--- a/qmp.c
+++ b/qmp.c
@@ -391,6 +391,25 @@ void qmp_change(const char *device, const char *target,
 }
 }
 
+void qmp_framebuffer_display(bool enable, Error **errp)
+{
+#if defined(CONFIG_LINUX)
+DisplayState *ds = get_displaystate();
+
+if (enable) {
+if (fbdev_display_init(ds, NULL, errp) != 0) {
+if (!error_is_set(errp)) {
+error_setg(errp, fbdev initialization failed);
+}
+}
+} else {
+fbdev_display_uninit(ds);
+}
+#else
+error_setg(errp, fbdev support disabled at compile time);
+#endif
+}
+
 static void qom_list_types_tramp(ObjectClass *klass, void *data)
 {
 ObjectTypeInfoList *e, **pret = data;
-- 
1.7.1




Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling

2012-09-19 Thread Peter Maydell
On 19 September 2012 01:14, Richard Henderson r...@twiddle.net wrote:
 On 09/18/2012 01:18 PM, Alexander Graf wrote:
 -/* remember what pgm exeption this was */
 +/* Remember what pgm exeption this was.  */

...if you're changing this comment then at least fix the typo
(exception)...

   tmp = tcg_const_i32(code);
   tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
   tcg_temp_free_i32(tmp);
   -tmp = tcg_const_i32(ilc);
 +tmp = tcg_const_i32(s-next_pc - s-pc);

 Mind to explain this one?

 ILC = the size of the insn.  Rather than passing ILC around into
 gen_program_exception, get it back from the s-next_pc value that
 we stored into DisasContext.

...but isn't (s-next_pc - s-pc) ilc*2, not ilc? Compare this hunk
from elsewhere in the patch:

-tmp32_2 = tcg_const_i32(ilc * 2);
-tmp32_3 = tcg_const_i32(EXCP_SVC);
+tmp32_2 = tcg_const_i32(s-next_pc - s-pc);

-- PMM



Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling

2012-09-19 Thread Alexander Graf

On 19.09.2012, at 13:29, Peter Maydell wrote:

 On 19 September 2012 01:14, Richard Henderson r...@twiddle.net wrote:
 On 09/18/2012 01:18 PM, Alexander Graf wrote:
 -/* remember what pgm exeption this was */
 +/* Remember what pgm exeption this was.  */
 
 ...if you're changing this comment then at least fix the typo
 (exception)...
 
  tmp = tcg_const_i32(code);
  tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
  tcg_temp_free_i32(tmp);
  -tmp = tcg_const_i32(ilc);
 +tmp = tcg_const_i32(s-next_pc - s-pc);
 
 Mind to explain this one?
 
 ILC = the size of the insn.  Rather than passing ILC around into
 gen_program_exception, get it back from the s-next_pc value that
 we stored into DisasContext.
 
 ...but isn't (s-next_pc - s-pc) ilc*2, not ilc? Compare this hunk
 from elsewhere in the patch:
 
 -tmp32_2 = tcg_const_i32(ilc * 2);
 -tmp32_3 = tcg_const_i32(EXCP_SVC);
 +tmp32_2 = tcg_const_i32(s-next_pc - s-pc);

Yes, it is, and hence it's correct. The exception field encodes the real 
instruction length, not the ILC bits in the opcode. Maybe the naming is 
unfortunate...


Alex




Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga

2012-09-19 Thread Gerd Hoffmann
 + *   0x0400 - 0x041f  vga ioports (0x3c0 - 0x3df), remapped 1:1
 
 Do they support word accesses to set both index and data?
 
 + *   0x0500 - 0x0515  bochs dispi interface registers, mapped flat without
 + * index/data ports.  Use (index  1) as offset for
 + * (16bit) register access.
 + */
 
 BAR should disappear with -M old.

Sure.  I have a patch in flight which adds the pc-1.3 machine type, once
this is in I can easily add the compat stuff.

 +static const MemoryRegionOps pci_vga_ioport_ops = {
 +.read = pci_vga_ioport_read,
 +.write = pci_vga_ioport_write,
 +.valid.min_access_size = 1,
 +.valid.max_access_size = 4,
 +.impl.min_access_size = 1,
 +.impl.max_access_size = 1,
 +.endianness = DEVICE_LITTLE_ENDIAN,
 +};
 
 Looks like word writes are supported provided the memory API breaks up
 writes in little endian order.  Better to make it explicit.

Like the attached incremental patch?

cheers,
  Gerd

From d46b14dfd74dcc37fe187dc76cd681ad7dbff2d5 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann kra...@redhat.com
Date: Wed, 19 Sep 2012 13:31:04 +0200
Subject: [PATCH] [fixup] vga mmio

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/vga-pci.c |   25 ++---
 1 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index e05e2ef..7fd305d 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -78,14 +78,33 @@ static uint64_t pci_vga_ioport_read(void *ptr, 
target_phys_addr_t addr,
 unsigned size)
 {
 PCIVGAState *d = ptr;
-return vga_ioport_read(d-vga, addr);
+uint64_t ret = 0;
+
+switch (size) {
+case 1:
+ret = vga_ioport_read(d-vga, addr);
+break;
+case 2:
+ret  = vga_ioport_read(d-vga, addr);
+ret |= vga_ioport_read(d-vga, addr+1)  8;
+break;
+}
+return ret;
 }
 
 static void pci_vga_ioport_write(void *ptr, target_phys_addr_t addr,
  uint64_t val, unsigned size)
 {
 PCIVGAState *d = ptr;
-vga_ioport_write(d-vga, addr, val);
+switch (size) {
+case 1:
+vga_ioport_write(d-vga, addr, val);
+break;
+case 2:
+vga_ioport_write(d-vga, addr, val  0xff);
+vga_ioport_write(d-vga, addr+1, (val  8)  0xff);
+break;
+}
 }
 
 static const MemoryRegionOps pci_vga_ioport_ops = {
@@ -94,7 +113,7 @@ static const MemoryRegionOps pci_vga_ioport_ops = {
 .valid.min_access_size = 1,
 .valid.max_access_size = 4,
 .impl.min_access_size = 1,
-.impl.max_access_size = 1,
+.impl.max_access_size = 2,
 .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-- 
1.7.1



[Qemu-devel] [PATCH 2/9] add unregister_displaychangelistener

2012-09-19 Thread Gerd Hoffmann
Also change the way the gui_timer is initialized: each time a
displaychangelistener is registered or unregistered we'll check
whenever we need a timer (due to dpy_refresh callback being present)
and if so setup a timer, otherwise zap it.  This way the gui timer works
correctly with displaychangelisteners coming and going.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 console.h |   10 ++
 vl.c  |   31 +++
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/console.h b/console.h
index 646ad4b..48fef22 100644
--- a/console.h
+++ b/console.h
@@ -229,9 +229,19 @@ static inline int is_buffer_shared(DisplaySurface *surface)
 !(surface-flags  QEMU_REALPIXELS_FLAG));
 }
 
+void gui_setup_refresh(DisplayState *ds);
+
 static inline void register_displaychangelistener(DisplayState *ds, 
DisplayChangeListener *dcl)
 {
 QLIST_INSERT_HEAD(ds-listeners, dcl, next);
+gui_setup_refresh(ds);
+}
+
+static inline void unregister_displaychangelistener(DisplayState *ds,
+DisplayChangeListener *dcl)
+{
+QLIST_REMOVE(dcl, next);
+gui_setup_refresh(ds);
 }
 
 static inline void dpy_update(DisplayState *s, int x, int y, int w, int h)
diff --git a/vl.c b/vl.c
index 2a7c92a..fbb77fe 100644
--- a/vl.c
+++ b/vl.c
@@ -1288,6 +1288,29 @@ static void gui_update(void *opaque)
 qemu_mod_timer(ds-gui_timer, interval + qemu_get_clock_ms(rt_clock));
 }
 
+void gui_setup_refresh(DisplayState *ds)
+{
+DisplayChangeListener *dcl;
+bool need_timer = false;
+
+QLIST_FOREACH(dcl, ds-listeners, next) {
+if (dcl-dpy_refresh != NULL) {
+need_timer = true;
+break;
+}
+}
+
+if (need_timer  ds-gui_timer == NULL) {
+ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds);
+qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock));
+}
+if (!need_timer  ds-gui_timer != NULL) {
+qemu_del_timer(ds-gui_timer);
+qemu_free_timer(ds-gui_timer);
+ds-gui_timer = NULL;
+}
+}
+
 struct vm_change_state_entry {
 VMChangeStateHandler *cb;
 void *opaque;
@@ -2350,7 +2373,6 @@ int main(int argc, char **argv, char **envp)
 const char *kernel_filename, *kernel_cmdline;
 char boot_devices[33] = cad; /* default to HD-floppy-CD-ROM */
 DisplayState *ds;
-DisplayChangeListener *dcl;
 int cyls, heads, secs, translation;
 QemuOpts *hda_opts = NULL, *opts, *machine_opts;
 QemuOptsList *olist;
@@ -3698,13 +3720,6 @@ int main(int argc, char **argv, char **envp)
 
 /* display setup */
 dpy_resize(ds);
-QLIST_FOREACH(dcl, ds-listeners, next) {
-if (dcl-dpy_refresh != NULL) {
-ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds);
-qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock));
-break;
-}
-}
 text_consoles_set_display(ds);
 
 if (foreach_device_config(DEV_GDB, gdbserver_start)  0) {
-- 
1.7.1




[Qemu-devel] [PATCH 9/9] fbdev: add display scaling support

2012-09-19 Thread Gerd Hoffmann
Add support for scaling the guest display.
Ctrl-Alt-S hotkey toggles scaling.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 ui/fbdev.c |   61 ++-
 1 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/ui/fbdev.c b/ui/fbdev.c
index d859d84..0f40259 100644
--- a/ui/fbdev.c
+++ b/ui/fbdev.c
@@ -81,6 +81,7 @@ static pixman_image_t *surface;
 static pixman_image_t *framebuffer;
 static pixman_transform_t transform;
 static pixman_region16_t  dirty;
+static double scale;
 
 static QEMUCursor *ptr_cursor;
 static pixman_image_t *ptr_image;
@@ -88,6 +89,10 @@ static intptr_refresh;
 static intpx, py, pw, ph;
 static intmx, my, mon;
 
+/* options */
+static intuse_scale = 1;
+static pixman_filter_tpfilter = PIXMAN_FILTER_GOOD;
+
 /* fwd decls */
 static int fbdev_activate_vt(int tty, int vtno, bool wait);
 
@@ -182,13 +187,14 @@ static void read_mouse(void *opaque)
 if (ay  0) {
 ay = 0;
 }
-if (ax = cw) {
-ax = cw-1;
+if (ax = cw*scale) {
+ax = cw*scale-1;
 }
-if (ay = ch) {
-ay = ch-1;
+if (ay = ch*scale) {
+ay = ch*scale-1;
 }
-kbd_mouse_event(ax * 0x7FFF / cw, ay * 0x7FFF / ch, 0, b);
+kbd_mouse_event(ax * 0x7FFF / (cw*scale),
+ay * 0x7FFF / (ch*scale), 0, b);
 } else {
 kbd_mouse_event(x, y, 0, b);
 }
@@ -543,6 +549,12 @@ static void read_mediumraw(void *opaque)
 (ctrl-alt-esc) ===\n);
 exit(1);
 }
+if (keycode == KEY_S) {
+use_scale = !use_scale;
+resize_screen++;
+redraw_screen++;
+continue;
+}
 if (keycode = KEY_F1  keycode = KEY_F10) {
 fbdev_activate_vt(tty, keycode+1-KEY_F1, false);
 key_down[keycode] = false;
@@ -913,6 +925,11 @@ static void fbdev_render_ptr(DisplayState *ds)
 pixman_transform_translate(transform, NULL,
pixman_int_to_fixed(-cx),
pixman_int_to_fixed(-cy));
+if (use_scale) {
+pixman_transform_scale(transform, NULL,
+   pixman_double_to_fixed(1/scale),
+   pixman_double_to_fixed(1/scale));
+}
 pixman_transform_translate(transform, NULL,
pixman_int_to_fixed(-px),
pixman_int_to_fixed(-py));
@@ -938,16 +955,32 @@ static void fbdev_update(DisplayState *ds, int x, int y, 
int w, int h)
 }
 
 if (resize_screen) {
+double xs, ys;
+
 trace_fbdev_dpy_resize(ds_get_width(ds), ds_get_height(ds));
 resize_screen = 0;
 cx = 0; cy = 0;
 cw = ds_get_width(ds);
 ch = ds_get_height(ds);
-if (ds_get_width(ds)  fb_var.xres) {
-cx = (fb_var.xres - ds_get_width(ds)) / 2;
-}
-if (ds_get_height(ds)  fb_var.yres) {
-cy = (fb_var.yres - ds_get_height(ds)) / 2;
+
+if (use_scale) {
+xs = (double)fb_var.xres / cw;
+ys = (double)fb_var.yres / ch;
+if (xs  ys) {
+scale = ys;
+cx = (fb_var.xres - ds_get_width(ds)*scale) / 2;
+} else {
+scale = xs;
+cy = (fb_var.yres - ds_get_height(ds)*scale) / 2;
+}
+} else {
+scale = 1;
+if (ds_get_width(ds)  fb_var.xres) {
+cx = (fb_var.xres - ds_get_width(ds)) / 2;
+}
+if (ds_get_height(ds)  fb_var.yres) {
+cy = (fb_var.yres - ds_get_height(ds)) / 2;
+}
 }
 if (surface) {
 pixman_image_unref(surface);
@@ -958,7 +991,14 @@ static void fbdev_update(DisplayState *ds, int x, int y, 
int w, int h)
 pixman_transform_translate(transform, NULL,
pixman_int_to_fixed(-cx),
pixman_int_to_fixed(-cy));
+if (use_scale) {
+pixman_transform_scale(transform, NULL,
+   pixman_double_to_fixed(1/scale),
+   pixman_double_to_fixed(1/scale));
+}
 pixman_image_set_transform(surface, transform);
+
+pixman_image_set_filter(surface, pfilter, NULL, 0);
 }
 
 if (redraw_screen) {
@@ -1050,6 +1090,7 @@ static void fbdev_cursor_define(DisplayState *ds, 
QEMUCursor *cursor)
  cursor-width, cursor-height,
  

[Qemu-devel] [PATCH v4 0/9] linux framebuffer display driver

2012-09-19 Thread Gerd Hoffmann
  Hi,

Next round of the framebuffer display driver patches, including git
tree for your convinience.

Changes:
  - fix bisectability.
  - Use 'struct Error' to report initialization errors, so we can pass
up more detailed errors to the monitor.
  - change monitor command names.

Sneak preview: branch rebase/fbdev has patches to switch over spice
to pixman (http://www.kraxel.org/cgit/qemu/log/?h=rebase/fbdev).

cheers,
  Gerd

The following changes since commit 6b80f7db8a7f84d21e46d01e30c8497733bb23a0:

  Merge remote-tracking branch 'kiszka/queues/slirp' into staging (2012-09-17 
10:23:20 -0500)

are available in the git repository at:

  git://git.kraxel.org/qemu fbdev.2

Gerd Hoffmann (9):
  QLIST-ify display change listeners.
  add unregister_displaychangelistener
  move set_mouse + cursor_define callbacks
  fbdev: add linux framebuffer display driver.
  fbdev: add monitor command to enable/disable
  fbdev: make configurable at compile time.
  fbdev: move to pixman
  fbdev: add mouse pointer support
  fbdev: add display scaling support

 configure   |   24 ++
 console.c   |2 +-
 console.h   |  123 --
 hmp-commands.hx |   15 +
 hmp.c   |9 +
 hmp.h   |1 +
 hw/jazz_led.c   |2 +-
 hw/qxl-render.c |2 +-
 hw/vga.c|   10 +-
 hw/vmware_vga.c |   11 +-
 hw/xenfb.c  |2 +-
 qapi-schema.json|   13 +
 qemu-options.hx |8 +
 qmp-commands.hx |6 +
 qmp.c   |   19 +
 sysemu.h|1 +
 trace-events|   15 +
 ui/Makefile.objs|1 +
 ui/fbdev.c  | 1143 +++
 ui/linux-keynames.h |  388 +
 ui/sdl.c|8 +-
 ui/spice-display.c  |4 +-
 ui/vnc.c|8 +-
 vl.c|   57 ++-
 24 files changed, 1798 insertions(+), 74 deletions(-)
 create mode 100644 ui/fbdev.c
 create mode 100644 ui/linux-keynames.h



[Qemu-devel] [PATCH 6/9] fbdev: make configurable at compile time.

2012-09-19 Thread Gerd Hoffmann
Add CONFIG_FBDEV, add --enable-fbdev and --disable-fbdev
configure switches so fbdev can be enabled/disabled at
compile time.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 configure|   12 
 qmp.c|2 +-
 ui/Makefile.objs |2 +-
 vl.c |4 ++--
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index 8564142..c4ba338 100755
--- a/configure
+++ b/configure
@@ -148,6 +148,7 @@ docs=
 fdt=
 nptl=
 sdl=
+fbdev=no
 virtfs=
 vnc=yes
 sparse=no
@@ -527,6 +528,7 @@ Haiku)
   usb=linux
   kvm=yes
   vhost_net=yes
+  fbdev=yes
   if [ $cpu = i386 -o $cpu = x86_64 ] ; then
 audio_possible_drivers=$audio_possible_drivers fmod
   fi
@@ -658,6 +660,10 @@ for opt do
   ;;
   --enable-sdl) sdl=yes
   ;;
+  --disable-fbdev) fbdev=no
+  ;;
+  --enable-fbdev) fbdev=yes
+  ;;
   --disable-virtfs) virtfs=no
   ;;
   --enable-virtfs) virtfs=yes
@@ -1070,6 +1076,8 @@ echo   --disable-strip  disable stripping 
binaries
 echo   --disable-werror disable compilation abort on warning
 echo   --disable-sdldisable SDL
 echo   --enable-sdl enable SDL
+echo   --disable-fbdev  disable linux framebuffer
+echo   --enable-fbdev   enable linux framebuffer
 echo   --disable-virtfs disable VirtFS
 echo   --enable-virtfs  enable VirtFS
 echo   --disable-vncdisable VNC
@@ -3159,6 +3167,7 @@ if test $darwin = yes ; then
 echo Cocoa support $cocoa
 fi
 echo SDL support   $sdl
+echo fbdev support $fbdev
 echo curses support$curses
 echo curl support  $curl
 echo mingw32 support   $mingw32
@@ -3367,6 +3376,9 @@ if test $sdl = yes ; then
   echo CONFIG_SDL=y  $config_host_mak
   echo SDL_CFLAGS=$sdl_cflags  $config_host_mak
 fi
+if test $fbdev = yes ; then
+  echo CONFIG_FBDEV=y  $config_host_mak
+fi
 if test $cocoa = yes ; then
   echo CONFIG_COCOA=y  $config_host_mak
 fi
diff --git a/qmp.c b/qmp.c
index 56e007f..712ab58 100644
--- a/qmp.c
+++ b/qmp.c
@@ -393,7 +393,7 @@ void qmp_change(const char *device, const char *target,
 
 void qmp_framebuffer_display(bool enable, Error **errp)
 {
-#if defined(CONFIG_LINUX)
+#if defined(CONFIG_FBDEV)
 DisplayState *ds = get_displaystate();
 
 if (enable) {
diff --git a/ui/Makefile.objs b/ui/Makefile.objs
index 55ddcf2..479cd01 100644
--- a/ui/Makefile.objs
+++ b/ui/Makefile.objs
@@ -12,4 +12,4 @@ common-obj-$(CONFIG_SDL) += sdl.o sdl_zoom.o x_keymap.o
 common-obj-$(CONFIG_COCOA) += cocoa.o
 common-obj-$(CONFIG_CURSES) += curses.o
 common-obj-$(CONFIG_VNC) += $(vnc-obj-y)
-common-obj-$(CONFIG_LINUX) += fbdev.o
+common-obj-$(CONFIG_FBDEV) += fbdev.o
diff --git a/vl.c b/vl.c
index 85f2d37..4159035 100644
--- a/vl.c
+++ b/vl.c
@@ -3041,7 +3041,7 @@ int main(int argc, char **argv, char **envp)
 fprintf(stderr, SDL support is disabled\n);
 exit(1);
 #endif
-#ifdef CONFIG_LINUX
+#ifdef CONFIG_FBDEV
 case QEMU_OPTION_fbdev:
 display_type = DT_FBDEV;
 break;
@@ -3686,7 +3686,7 @@ int main(int argc, char **argv, char **envp)
 curses_display_init(ds, full_screen);
 break;
 #endif
-#if defined(CONFIG_LINUX)
+#if defined(CONFIG_FBDEV)
 case DT_FBDEV:
 {
 Error *errp = NULL;
-- 
1.7.1




Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Edgar E. Iglesias
On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote:
 On 09/19/2012 07:40 AM, Peter Crosthwaite wrote:
  On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias
  edgar.igles...@gmail.com wrote:
  On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote:
  Ping for PMM,
 
  This is the root case of your block on the SDHCI series - this is a
  discussion on resolution to bogus infinite looping DMA. For current
  participants in this discussion, heres our thread on the same topic
  over in SD land:
 
  http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html
 
  With the findings here and acknowledgement that this is a general
  problem, we either need to declare this issue of scope for SDHCI, or
  work with these guys (in the immediate future) on the DMA infinite
  loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig
  here, but can we get a decisive plan for going forward with this issue
  if it is going to continue to block SDHCI.
 
  Thanks to Igor for identifying the overlap between these discussions.
 
  Hi,
 
  A couple of dumb questions.
 
  What is the reason for the blocker? that possible guest dos is worse
  than no functionality at all?
 
  Can't you put the DMA/IO processing into the background?
  
  I dont know a way do do asynchronous DMA unless I am missing something
  here.
 
 You could schedule a bottom half and do the accesses from there.  Solves
 nothing though.
 
  So what happens is we have a device that walks a SG list
  synchronously while holding all the locks and stuff being discussed
  here. If that SG list infinite loops then crash.
 
 Did you mean loop, or recursion into the memory region that initiates DMA?

I think we were discussing the loop issue (I hadn't thought of recursion
issues before) :)

Jan's point of resetability was interesting.

Processing a finite amount of dma descriptors and scheduling bh's
to continously get back and continue processing should be OK no?

That would avoid the lockups and allow the device to be reset at
any time. Or am I missing something?

IIRC the etrax dma does something like that but only for receive
dma rings...

Cheers,
Edgar



[Qemu-devel] [PATCH 7/9] fbdev: move to pixman

2012-09-19 Thread Gerd Hoffmann
Stop reinventing the wheel, use the pixman library for raster ops.
Also add setdata callback.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 configure  |   12 
 ui/fbdev.c |  182 +---
 2 files changed, 124 insertions(+), 70 deletions(-)

diff --git a/configure b/configure
index c4ba338..d10ff78 100755
--- a/configure
+++ b/configure
@@ -148,6 +148,7 @@ docs=
 fdt=
 nptl=
 sdl=
+pixman=
 fbdev=no
 virtfs=
 vnc=yes
@@ -2153,6 +2154,17 @@ else
 exit 1
 fi
 
+if $pkg_config pixman-1  /dev/null 21
+then
+pixman=yes
+pixman_cflags=`$pkg_config --cflags pixman-1 2/dev/null`
+pixman_libs=`$pkg_config --libs pixman-1 2/dev/null`
+QEMU_CFLAGS=$QEMU_CFLAGS $pixman_cflags
+libs_softmmu=$libs_softmmu $pixman_libs
+else
+fbdev=no
+fi
+
 ##
 # libcap probe
 
diff --git a/ui/fbdev.c b/ui/fbdev.c
index 6dfbacc..d55850e 100644
--- a/ui/fbdev.c
+++ b/ui/fbdev.c
@@ -23,11 +23,12 @@
 #include linux/vt.h
 #include linux/fb.h
 
+#include pixman.h
+
 #include qemu-common.h
 #include console.h
 #include keymaps.h
 #include sysemu.h
-#include pflib.h
 
 /*
  * must be last so we get the linux input layer
@@ -70,19 +71,82 @@ static bool   key_down[KEY_CNT];
 #define FB_ACQ_REQ   3
 static int fb_switch_state;
 
-/* qdev windup */
+/* qemu windup */
 static DisplayChangeListener  *dcl;
-static QemuPfConv *conv;
-static PixelFormatfbpf;
 static intresize_screen;
 static intredraw_screen;
 static intcx, cy, cw, ch;
 static Notifier   exit_notifier;
+static pixman_image_t *surface;
+static pixman_image_t *framebuffer;
+static pixman_transform_t transform;
+static pixman_region16_t  dirty;
 
 /* fwd decls */
 static int fbdev_activate_vt(int tty, int vtno, bool wait);
 
 /*  */
+/* pixman helpers   */
+
+static int pixman_shifts_to_type(int rshift, int gshift, int bshift)
+{
+int type = PIXMAN_TYPE_OTHER;
+
+if (rshift  gshift  gshift  bshift) {
+if (bshift == 0) {
+type = PIXMAN_TYPE_ARGB;
+} else {
+#if PIXMAN_VERSION = PIXMAN_VERSION_ENCODE(0, 21, 8)
+type = PIXMAN_TYPE_RGBA;
+#endif
+}
+} else if (rshift  gshift  gshift  bshift) {
+if (rshift == 0) {
+type = PIXMAN_TYPE_ABGR;
+} else {
+type = PIXMAN_TYPE_BGRA;
+}
+}
+return type;
+}
+
+static pixman_image_t *pixman_from_displaystate(DisplayState *ds)
+{
+PixelFormat *pf = ds-surface-pf;
+pixman_format_code_t format;
+pixman_image_t *image;
+int type;
+
+type = pixman_shifts_to_type(pf-rshift, pf-gshift, pf-bshift);
+format = PIXMAN_FORMAT(pf-bits_per_pixel, type,
+   pf-abits, pf-rbits, pf-gbits, pf-bbits);
+image = pixman_image_create_bits(format, ds_get_width(ds),
+ ds_get_height(ds),
+ (void *)ds_get_data(ds),
+ ds_get_linesize(ds));
+return image;
+}
+
+static pixman_image_t *pixman_from_framebuffer(void)
+{
+pixman_format_code_t format;
+pixman_image_t *image;
+int type;
+
+type = pixman_shifts_to_type(fb_var.red.offset,
+ fb_var.green.offset,
+ fb_var.blue.offset);
+format = PIXMAN_FORMAT(fb_var.bits_per_pixel, type,
+   fb_var.transp.length,
+   fb_var.red.length,
+   fb_var.green.length,
+   fb_var.blue.length);
+image = pixman_image_create_bits(format, fb_var.xres, fb_var.yres,
+ (void *)fb_mem, fb_fix.line_length);
+return image;
+}
+
+/*  */
 /* mouse*/
 
 static void read_mouse(void *opaque)
@@ -529,6 +593,17 @@ static void fbdev_cleanup(void)
 {
 trace_fbdev_cleanup();
 
+/* release pixman stuff */
+pixman_region_fini(dirty);
+if (framebuffer) {
+pixman_image_unref(framebuffer);
+framebuffer = NULL;
+}
+if (surface) {
+pixman_image_unref(surface);
+surface = NULL;
+}
+
 /* restore console */
 if (fb_mem != NULL) {
 munmap(fb_mem, fb_fix.smem_len+fb_mem_offset);
@@ -682,36 +757,8 @@ static int fbdev_init(const char *device, Error **err)
 start_mediumraw(tty);
 qemu_set_fd_handler(tty, read_mediumraw, NULL, NULL);
 
-/* create PixelFormat from fbdev structs */
-fbpf.bits_per_pixel  = fb_var.bits_per_pixel;
-

[Qemu-devel] [PATCH 1/9] QLIST-ify display change listeners.

2012-09-19 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 console.h  |   72 +++
 hw/xenfb.c |2 +-
 vl.c   |9 ++-
 3 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/console.h b/console.h
index f990684..646ad4b 100644
--- a/console.h
+++ b/console.h
@@ -164,7 +164,7 @@ struct DisplayChangeListener {
  int w, int h, uint32_t c);
 void (*dpy_text_cursor)(struct DisplayState *s, int x, int y);
 
-struct DisplayChangeListener *next;
+QLIST_ENTRY(DisplayChangeListener) next;
 };
 
 struct DisplayAllocator {
@@ -179,7 +179,7 @@ struct DisplayState {
 struct QEMUTimer *gui_timer;
 
 struct DisplayAllocator* allocator;
-struct DisplayChangeListener* listeners;
+QLIST_HEAD(, DisplayChangeListener) listeners;
 
 void (*mouse_set)(int x, int y, int on);
 void (*cursor_define)(QEMUCursor *cursor);
@@ -231,72 +231,76 @@ static inline int is_buffer_shared(DisplaySurface 
*surface)
 
 static inline void register_displaychangelistener(DisplayState *ds, 
DisplayChangeListener *dcl)
 {
-dcl-next = ds-listeners;
-ds-listeners = dcl;
+QLIST_INSERT_HEAD(ds-listeners, dcl, next);
 }
 
 static inline void dpy_update(DisplayState *s, int x, int y, int w, int h)
 {
-struct DisplayChangeListener *dcl = s-listeners;
-while (dcl != NULL) {
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
 dcl-dpy_update(s, x, y, w, h);
-dcl = dcl-next;
 }
 }
 
 static inline void dpy_resize(DisplayState *s)
 {
-struct DisplayChangeListener *dcl = s-listeners;
-while (dcl != NULL) {
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
 dcl-dpy_resize(s);
-dcl = dcl-next;
 }
 }
 
 static inline void dpy_setdata(DisplayState *s)
 {
-struct DisplayChangeListener *dcl = s-listeners;
-while (dcl != NULL) {
-if (dcl-dpy_setdata) dcl-dpy_setdata(s);
-dcl = dcl-next;
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_setdata) {
+dcl-dpy_setdata(s);
+}
 }
 }
 
 static inline void dpy_refresh(DisplayState *s)
 {
-struct DisplayChangeListener *dcl = s-listeners;
-while (dcl != NULL) {
-if (dcl-dpy_refresh) dcl-dpy_refresh(s);
-dcl = dcl-next;
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_refresh) {
+dcl-dpy_refresh(s);
+}
 }
 }
 
 static inline void dpy_copy(struct DisplayState *s, int src_x, int src_y,
- int dst_x, int dst_y, int w, int h) {
-struct DisplayChangeListener *dcl = s-listeners;
-while (dcl != NULL) {
-if (dcl-dpy_copy)
+ int dst_x, int dst_y, int w, int h)
+{
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_copy) {
 dcl-dpy_copy(s, src_x, src_y, dst_x, dst_y, w, h);
-else /* TODO */
+} else { /* TODO */
 dcl-dpy_update(s, dst_x, dst_y, w, h);
-dcl = dcl-next;
+}
 }
 }
 
 static inline void dpy_fill(struct DisplayState *s, int x, int y,
- int w, int h, uint32_t c) {
-struct DisplayChangeListener *dcl = s-listeners;
-while (dcl != NULL) {
-if (dcl-dpy_fill) dcl-dpy_fill(s, x, y, w, h, c);
-dcl = dcl-next;
+ int w, int h, uint32_t c)
+{
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_fill) {
+dcl-dpy_fill(s, x, y, w, h, c);
+}
 }
 }
 
-static inline void dpy_cursor(struct DisplayState *s, int x, int y) {
-struct DisplayChangeListener *dcl = s-listeners;
-while (dcl != NULL) {
-if (dcl-dpy_text_cursor) dcl-dpy_text_cursor(s, x, y);
-dcl = dcl-next;
+static inline void dpy_cursor(struct DisplayState *s, int x, int y)
+{
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_text_cursor) {
+dcl-dpy_text_cursor(s, x, y);
+}
 }
 }
 
diff --git a/hw/xenfb.c b/hw/xenfb.c
index 338800a..ef24c33 100644
--- a/hw/xenfb.c
+++ b/hw/xenfb.c
@@ -717,7 +717,7 @@ static void xenfb_update(void *opaque)
if (xenfb_queue_full(xenfb))
return;
 
-for (l = xenfb-c.ds-listeners; l != NULL; l = l-next) {
+QLIST_FOREACH(l, xenfb-c.ds-listeners, next) {
 if (l-idle)
 continue;
 idle = 0;
diff --git a/vl.c b/vl.c
index 7c577fa..2a7c92a 100644
--- a/vl.c
+++ b/vl.c
@@ -1276,15 +1276,14 @@ static void gui_update(void *opaque)
 {
 uint64_t interval = GUI_REFRESH_INTERVAL;
 DisplayState *ds = opaque;
-DisplayChangeListener *dcl = ds-listeners;
+DisplayChangeListener *dcl;
 
 

Re: [Qemu-devel] [PATCH] Adding BAR0 for e500 PCI controller

2012-09-19 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Wednesday, September 19, 2012 4:33 PM
 To: Bhushan Bharat-R65777
 Cc: qemu-devel@nongnu.org; qemu-...@nongnu.org; afaer...@suse.de; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH] Adding BAR0 for e500 PCI controller
 
 
 On 19.09.2012, at 09:41, Bharat Bhushan wrote:
 
  PCI Root complex have TYPE-1 configuration header while PCI endpoint
  have type-0 configuration header. The type-1 configuration header have
  a BAR (BAR0). In Freescale PCI controller BAR0 is used for mapping pci
  address space to CCSR address space. This can used for 2 purposes: 1)
  for MSI interrupt generation 2) Allow CCSR registers access when
  configured as PCI endpoint, which I am not sure is a use case with QEMU-KVM
 guest.
 
  What I observed is that when guest read the size of BAR0 of host
  controller configuration header (TYPE1 header) then it always reads it
  as 0. When looking into the QEMU hw/ppce500_pci.c, I do not find the
  PCI controller device registering BAR0. I do not find any other
  controller also doing so may they do not use BAR0.
 
  There are two issues when BAR0 is not there (which I can think of):
  1) There should be BAR0 emulated for PCI Root comaplex (TYPE1 header)
  and when reading the size of BAR0, it should give size as per real h/w.
 
  This patch solves this problem.
 
  2) Do we need this BAR0 inbound address translation?
 When BAR0 is of non-zero size then it will be configured for
  PCI address space to local address(CCSR) space translation on inbound 
  access.
  The primary use case is for MSI interrupt generation. The device is
  configured with a address offsets in PCI address space, which will be
  translated to MSI interrupt generation MPIC registers. Currently I do
  not understand the MSI interrupt generation mechanism in QEMU and also
  IIRC we do not use QEMU MSI interrupt mechanism on e500 guest machines.
  But this BAR0 will be used when using MSI on e500.
 
  I can see one more issue, There are ATMUs emulated in
  hw/ppce500_pci.c, but i do not see these being used for address translation.
  So far that works because pci address space and local address space
  are 1:1 mapped. BAR0 inbound translation + ATMU translation will
  complete the address translation of inbound traffic.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  hw/pci_host.h|9 +
  hw/ppc/e500.c|   21 +
  hw/ppce500_pci.c |7 +++
  3 files changed, 37 insertions(+), 0 deletions(-)
 
  diff --git a/hw/pci_host.h b/hw/pci_host.h index 4b9c300..c1ec7eb
  100644
  --- a/hw/pci_host.h
  +++ b/hw/pci_host.h
  @@ -41,10 +41,19 @@ struct PCIHostState {
  MemoryRegion data_mem;
  MemoryRegion mmcfg;
  MemoryRegion *address_space;
  +MemoryRegion bar0;
  uint32_t config_reg;
  PCIBus *bus;
  };
 
  +typedef struct PPCE500CCSRState {
  +SysBusDevice *parent;
  +MemoryRegion ccsr_space;
  +} PPCE500CCSRState;
  +
  +#define TYPE_CCSR e500-ccsr
  +#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR)
  +
 
 All of this is e500 specific, so it should definitely not be in any generic 
 pci
 host header.

Yes, ok.

 
  /* common internal helpers for PCI/PCIe hosts, cut off overflows */
  void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t limit, uint32_t val,
  uint32_t len); diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index
  6f0de6d..1f5da28 100644
  --- a/hw/ppc/e500.c
  +++ b/hw/ppc/e500.c
  @@ -31,6 +31,7 @@
  #include hw/loader.h
  #include elf.h
  #include hw/sysbus.h
  +#include hw/pci_host.h
  #include exec-memory.h
  #include host-utils.h
 
  @@ -587,3 +588,23 @@ void ppce500_init(PPCE500Params *params)
  kvmppc_init();
  }
  }
  +
  +static void e500_ccsr_type_init(Object *obj) {
  +PPCE500CCSRState *ccsr = CCSR(obj);
  +ccsr-ccsr_space.size = int128_make64(MPC8544_CCSRBAR_SIZE);
  +}
  +
  +static const TypeInfo e500_ccsr_info = {
  +.name  = TYPE_CCSR,
  +.parent=  TYPE_SYS_BUS_DEVICE,
  +.instance_size = sizeof(PPCE500CCSRState),
  +.instance_init = e500_ccsr_type_init, };
  +
  +static void e500_ccsr_register_types(void) {
  +type_register_static(e500_ccsr_info);
  +}
  +
  +type_init(e500_ccsr_register_types)
  diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index
  92b1dc0..135f2a6 100644
  --- a/hw/ppce500_pci.c
  +++ b/hw/ppce500_pci.c
  @@ -315,6 +315,8 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
  int i;
  MemoryRegion *address_space_mem = get_system_memory();
  MemoryRegion *address_space_io = get_system_io();
  +PPCE500CCSRState *ccsr;
  +PCIDevice *pdev;
 
  h = PCI_HOST_BRIDGE(dev);
  s = PPC_E500_PCI_HOST_BRIDGE(dev); @@ -342,6 +344,11 @@ static int
  e500_pcihost_initfn(SysBusDevice *dev)
  memory_region_add_subregion(s-container, PCIE500_REG_BASE, 

[Qemu-devel] [PATCH 8/9] fbdev: add mouse pointer support

2012-09-19 Thread Gerd Hoffmann
Add mouse_set and cursor_define DisplayChangeListener callbacks
and mouse pointer rendering support.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 ui/fbdev.c |   95 
 1 files changed, 95 insertions(+), 0 deletions(-)

diff --git a/ui/fbdev.c b/ui/fbdev.c
index d55850e..d859d84 100644
--- a/ui/fbdev.c
+++ b/ui/fbdev.c
@@ -82,6 +82,12 @@ static pixman_image_t *framebuffer;
 static pixman_transform_t transform;
 static pixman_region16_t  dirty;
 
+static QEMUCursor *ptr_cursor;
+static pixman_image_t *ptr_image;
+static intptr_refresh;
+static intpx, py, pw, ph;
+static intmx, my, mon;
+
 /* fwd decls */
 static int fbdev_activate_vt(int tty, int vtno, bool wait);
 
@@ -877,6 +883,51 @@ static void fbdev_render(DisplayState *ds)
 pixman_region_init(dirty);
 }
 
+static void fbdev_unrender_ptr(DisplayState *ds)
+{
+if (!pw  !ph) {
+return;
+}
+pixman_region_union_rect(dirty, dirty, px, py, pw, ph);
+ph = pw = 0;
+}
+
+static void fbdev_render_ptr(DisplayState *ds)
+{
+pixman_region16_t region;
+pixman_transform_t transform;
+
+if (!mon || !ptr_image) {
+return;
+}
+if (mx  0 || mx = cw || my  0 || my = ch) {
+return;
+}
+
+px = mx - ptr_cursor-hot_x;
+py = my - ptr_cursor-hot_y;
+pw = ptr_cursor-width;
+ph = ptr_cursor-height;
+
+pixman_transform_init_identity(transform);
+pixman_transform_translate(transform, NULL,
+   pixman_int_to_fixed(-cx),
+   pixman_int_to_fixed(-cy));
+pixman_transform_translate(transform, NULL,
+   pixman_int_to_fixed(-px),
+   pixman_int_to_fixed(-py));
+pixman_image_set_transform(ptr_image, transform);
+
+pixman_region_init_rect(region, 0, 0, pw, ph);
+pixman_image_set_clip_region(ptr_image, region);
+
+pixman_image_composite(PIXMAN_OP_OVER, ptr_image, NULL, framebuffer,
+   0, 0, 0, 0, 0, 0, fb_var.xres, fb_var.yres);
+
+pixman_region_fini(region);
+ptr_refresh = 0;
+}
+
 /*  */
 /* qemu interfaces  */
 
@@ -918,6 +969,9 @@ static void fbdev_update(DisplayState *ds, int x, int y, 
int w, int h)
 }
 
 pixman_region_union_rect(dirty, dirty, x, y, w, h);
+if (ptr_image  mon  pw  ph) {
+ptr_refresh++;
+}
 }
 
 static void fbdev_resize(DisplayState *ds)
@@ -954,9 +1008,48 @@ static void fbdev_refresh(DisplayState *ds)
 fbdev_update(ds, 0, 0, 0, 0);
 }
 
+if (ptr_refresh) {
+fbdev_unrender_ptr(ds);
+}
 if (pixman_region_not_empty(dirty)) {
 fbdev_render(ds);
 }
+if (ptr_refresh) {
+fbdev_render_ptr(ds);
+}
+}
+
+static void fbdev_mouse_set(DisplayState *ds, int x, int y, int on)
+{
+ptr_refresh++;
+mx = x;
+my = y;
+mon = on;
+}
+
+static void fbdev_cursor_define(DisplayState *ds, QEMUCursor *cursor)
+{
+ptr_refresh++;
+
+if (ptr_cursor) {
+cursor_put(ptr_cursor);
+ptr_cursor = NULL;
+}
+if (ptr_image) {
+pixman_image_unref(ptr_image);
+ptr_image = NULL;
+}
+
+if (!cursor) {
+return;
+}
+
+ptr_cursor = cursor;
+cursor_get(ptr_cursor);
+ptr_image = pixman_image_create_bits(PIXMAN_a8r8g8b8,
+ cursor-width, cursor-height,
+ cursor-data,
+ cursor-width * 4);
 }
 
 static void fbdev_exit_notifier(Notifier *notifier, void *data)
@@ -985,6 +1078,8 @@ int fbdev_display_init(DisplayState *ds, const char 
*device, Error **err)
 dcl-dpy_resize  = fbdev_resize;
 dcl-dpy_setdata = fbdev_setdata;
 dcl-dpy_refresh = fbdev_refresh;
+dcl-dpy_mouse_set = fbdev_mouse_set;
+dcl-dpy_cursor_define = fbdev_cursor_define;
 register_displaychangelistener(ds, dcl);
 
 trace_fbdev_enabled();
-- 
1.7.1




[Qemu-devel] [PATCH 3/9] move set_mouse + cursor_define callbacks

2012-09-19 Thread Gerd Hoffmann
When adding DisplayChangeListeners the set_mouse and cursor_define
callbacks have been left in DisplayState for some reason.  Fix it.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 console.c  |2 +-
 console.h  |   39 +++
 hw/jazz_led.c  |2 +-
 hw/qxl-render.c|2 +-
 hw/vga.c   |   10 +-
 hw/vmware_vga.c|   11 ++-
 ui/sdl.c   |8 
 ui/spice-display.c |4 ++--
 ui/vnc.c   |8 
 9 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/console.c b/console.c
index a8bcc42..cc0479b 100644
--- a/console.c
+++ b/console.c
@@ -1239,7 +1239,7 @@ static void text_console_update(void *opaque, 
console_ch_t *chardata)
 s-text_y[1] = 0;
 }
 if (s-cursor_invalidate) {
-dpy_cursor(s-ds, s-x, s-y);
+dpy_text_cursor(s-ds, s-x, s-y);
 s-cursor_invalidate = 0;
 }
 }
diff --git a/console.h b/console.h
index 48fef22..bef2d2d 100644
--- a/console.h
+++ b/console.h
@@ -164,6 +164,9 @@ struct DisplayChangeListener {
  int w, int h, uint32_t c);
 void (*dpy_text_cursor)(struct DisplayState *s, int x, int y);
 
+void (*dpy_mouse_set)(struct DisplayState *s, int x, int y, int on);
+void (*dpy_cursor_define)(struct DisplayState *s, QEMUCursor *cursor);
+
 QLIST_ENTRY(DisplayChangeListener) next;
 };
 
@@ -181,9 +184,6 @@ struct DisplayState {
 struct DisplayAllocator* allocator;
 QLIST_HEAD(, DisplayChangeListener) listeners;
 
-void (*mouse_set)(int x, int y, int on);
-void (*cursor_define)(QEMUCursor *cursor);
-
 struct DisplayState *next;
 };
 
@@ -304,7 +304,7 @@ static inline void dpy_fill(struct DisplayState *s, int x, 
int y,
 }
 }
 
-static inline void dpy_cursor(struct DisplayState *s, int x, int y)
+static inline void dpy_text_cursor(struct DisplayState *s, int x, int y)
 {
 struct DisplayChangeListener *dcl;
 QLIST_FOREACH(dcl, s-listeners, next) {
@@ -314,6 +314,37 @@ static inline void dpy_cursor(struct DisplayState *s, int 
x, int y)
 }
 }
 
+static inline void dpy_mouse_set(struct DisplayState *s, int x, int y, int on)
+{
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_mouse_set) {
+dcl-dpy_mouse_set(s, x, y, on);
+}
+}
+}
+
+static inline void dpy_cursor_define(struct DisplayState *s, QEMUCursor 
*cursor)
+{
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_cursor_define) {
+dcl-dpy_cursor_define(s, cursor);
+}
+}
+}
+
+static inline bool dpy_cursor_define_supported(struct DisplayState *s)
+{
+struct DisplayChangeListener *dcl;
+QLIST_FOREACH(dcl, s-listeners, next) {
+if (dcl-dpy_cursor_define) {
+return true;
+}
+}
+return false;
+}
+
 static inline int ds_get_linesize(DisplayState *ds)
 {
 return ds-surface-linesize;
diff --git a/hw/jazz_led.c b/hw/jazz_led.c
index 6486523..c4d54e2 100644
--- a/hw/jazz_led.c
+++ b/hw/jazz_led.c
@@ -210,7 +210,7 @@ static void jazz_led_text_update(void *opaque, console_ch_t 
*chardata)
 LedState *s = opaque;
 char buf[2];
 
-dpy_cursor(s-ds, -1, -1);
+dpy_text_cursor(s-ds, -1, -1);
 qemu_console_resize(s-ds, 2, 1);
 
 /* TODO: draw the segments */
diff --git a/hw/qxl-render.c b/hw/qxl-render.c
index e2e3fe2..085a090 100644
--- a/hw/qxl-render.c
+++ b/hw/qxl-render.c
@@ -238,7 +238,7 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext)
 return 1;
 }
 
-if (!qxl-ssd.ds-mouse_set || !qxl-ssd.ds-cursor_define) {
+if (!dpy_cursor_define_supported(qxl-ssd.ds)) {
 return 0;
 }
 
diff --git a/hw/vga.c b/hw/vga.c
index afaef0d..ec4f0c5 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -2081,11 +2081,11 @@ static void vga_update_text(void *opaque, console_ch_t 
*chardata)
 s-cr[VGA_CRTC_CURSOR_END] != s-cursor_end || full_update) {
 cursor_visible = !(s-cr[VGA_CRTC_CURSOR_START]  0x20);
 if (cursor_visible  cursor_offset  size  cursor_offset = 0)
-dpy_cursor(s-ds,
-   TEXTMODE_X(cursor_offset),
-   TEXTMODE_Y(cursor_offset));
+dpy_text_cursor(s-ds,
+TEXTMODE_X(cursor_offset),
+TEXTMODE_Y(cursor_offset));
 else
-dpy_cursor(s-ds, -1, -1);
+dpy_text_cursor(s-ds, -1, -1);
 s-cursor_offset = cursor_offset;
 s-cursor_start = s-cr[VGA_CRTC_CURSOR_START];
 s-cursor_end = s-cr[VGA_CRTC_CURSOR_END];
@@ -2146,7 +2146,7 @@ static void vga_update_text(void *opaque, console_ch_t 
*chardata)
 /* Display a message */
 s-last_width = 60;
 s-last_height = height = 3;
-dpy_cursor(s-ds, -1, -1);
+

[Qemu-devel] Posix aio communication

2012-09-19 Thread kumaran

Can anyone explain me how aio-thread communicates back to io thread.

I could see that it uses SIGUSR2 ,but I couldn't find the corresponding 
signal handler.






Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?

2012-09-19 Thread Anthony Liguori
Markus Armbruster arm...@redhat.com writes:

 Anthony Liguori anth...@codemonkey.ws writes:

 Markus Armbruster arm...@redhat.com writes:

 Jan Kiszka jan.kis...@siemens.com writes:

  * The issues discussed in this email plus the fact that the guest
memory may be corrupted, and the guest may be in real-mode even
when paging is enabled


 Yes, there are some limitations with this option. Jan said that he
 always use gdb to deal with vmcore, so he needs such information.

 The point is to overcome the focus on Linux-only dump processing tools.
 
 While I don't care for supporting alternate dump processing tools
 myself, I certainly don't mind supporting them, as long as the code
 satisfies basic safety and reliability requirements.
 
 This code doesn't, as far as I can tell.

 It works, thought not under all circumstances.

 I don't doubt it works often enough to be useful to somebody.  But basic
 safety and reliability requirements are a bit more than that.  They
 include don't explode in ways a reasonable user can't be expected to
 foresee.  I don't think a reasonable user can be expected to see that
 -p is safe only for trusted guests.

 We shipped the API, we're not removing it.  Our compatibility isn't
 whatever libvirt is currently using.

 Bah, don't be more catholic than the pope.  It's been out for how many
 days?  Have you looked at the code?

 It's perfectly reasonable to ask to document the behavior of the
 method.  It's also a trivial patch to qapi-schema.json.

 I'm okay with leaving obscure security holes in upstream QEMU,
 indefinitely, as long as they're documented.  I don't think it's a good
 idea, but it's something reasonable people can disagree about.

It's *not* a security hole.

The malicious guest you mention is just a guest with fully populated
PTEs and PDEs, no?

There's nothing malicious about that.

It just so happens that the memory usage of this command is proportional
to the amount of page mappings in the guest.

I think your making a mountain out of a mole hill here.

Regards,

Anthony Liguori


 We need to document that -p is unreliable and unsafe, therefore should
 only be used with trusted guests, and its result need not be correct
 even then.

 It's unreasonable to ask for an interface to be removed just because it
 could be misused when it has a legimitate use-case.

 The issue isn't misuse (operator does something stupid), it's abuse
 (guest can make it blow up when operator uses it as intended), and
 fragility (produces crap when operator uses it as intended, but the
 guest isn't in a sufficiently nice state).



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Avi Kivity
On 09/19/2012 02:46 PM, Edgar E. Iglesias wrote:
 On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote:
 On 09/19/2012 07:40 AM, Peter Crosthwaite wrote:
  On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias
  edgar.igles...@gmail.com wrote:
  On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote:
  Ping for PMM,
 
  This is the root case of your block on the SDHCI series - this is a
  discussion on resolution to bogus infinite looping DMA. For current
  participants in this discussion, heres our thread on the same topic
  over in SD land:
 
  http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html
 
  With the findings here and acknowledgement that this is a general
  problem, we either need to declare this issue of scope for SDHCI, or
  work with these guys (in the immediate future) on the DMA infinite
  loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig
  here, but can we get a decisive plan for going forward with this issue
  if it is going to continue to block SDHCI.
 
  Thanks to Igor for identifying the overlap between these discussions.
 
  Hi,
 
  A couple of dumb questions.
 
  What is the reason for the blocker? that possible guest dos is worse
  than no functionality at all?
 
  Can't you put the DMA/IO processing into the background?
  
  I dont know a way do do asynchronous DMA unless I am missing something
  here.
 
 You could schedule a bottom half and do the accesses from there.  Solves
 nothing though.
 
  So what happens is we have a device that walks a SG list
  synchronously while holding all the locks and stuff being discussed
  here. If that SG list infinite loops then crash.
 
 Did you mean loop, or recursion into the memory region that initiates DMA?
 
 I think we were discussing the loop issue (I hadn't thought of recursion
 issues before) :)
 
 Jan's point of resetability was interesting.
 
 Processing a finite amount of dma descriptors and scheduling bh's
 to continously get back and continue processing should be OK no?
 

Yes.

 That would avoid the lockups and allow the device to be reset at
 any time. Or am I missing something?
 

Not that I can see.  If real hardware can be looped, so can qemu.  I'm
only worried about recursion and deadlocks (while real hardware can
deadlock, we'd prefer to avoid that).


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [0/13] pseries patch queue

2012-09-19 Thread Alexander Graf

On 13.09.2012, at 04:57, David Gibson wrote:

 Hi Alex,
 
 Here's my current set of ready-to-merge pseries related patches.  This
 includes the latest revisions of the patches to fix system reset which
 I posted before, plus a number of other small bugfixes and cleanups.

Thanks, applied all to ppc-next.


Alex




Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga

2012-09-19 Thread Avi Kivity
On 09/19/2012 02:35 PM, Gerd Hoffmann wrote:

 Looks like word writes are supported provided the memory API breaks up
 writes in little endian order.  Better to make it explicit.
 
 Like the attached incremental patch?

Very like.


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Edgar E. Iglesias
On Wed, Sep 19, 2012 at 03:12:12PM +0300, Avi Kivity wrote:
 On 09/19/2012 02:46 PM, Edgar E. Iglesias wrote:
  On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote:
  On 09/19/2012 07:40 AM, Peter Crosthwaite wrote:
   On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias
   edgar.igles...@gmail.com wrote:
   On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote:
   Ping for PMM,
  
   This is the root case of your block on the SDHCI series - this is a
   discussion on resolution to bogus infinite looping DMA. For current
   participants in this discussion, heres our thread on the same topic
   over in SD land:
  
   http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html
  
   With the findings here and acknowledgement that this is a general
   problem, we either need to declare this issue of scope for SDHCI, or
   work with these guys (in the immediate future) on the DMA infinite
   loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig
   here, but can we get a decisive plan for going forward with this issue
   if it is going to continue to block SDHCI.
  
   Thanks to Igor for identifying the overlap between these discussions.
  
   Hi,
  
   A couple of dumb questions.
  
   What is the reason for the blocker? that possible guest dos is worse
   than no functionality at all?
  
   Can't you put the DMA/IO processing into the background?
   
   I dont know a way do do asynchronous DMA unless I am missing something
   here.
  
  You could schedule a bottom half and do the accesses from there.  Solves
  nothing though.
  
   So what happens is we have a device that walks a SG list
   synchronously while holding all the locks and stuff being discussed
   here. If that SG list infinite loops then crash.
  
  Did you mean loop, or recursion into the memory region that initiates DMA?
  
  I think we were discussing the loop issue (I hadn't thought of recursion
  issues before) :)
  
  Jan's point of resetability was interesting.
  
  Processing a finite amount of dma descriptors and scheduling bh's
  to continously get back and continue processing should be OK no?
  
 
 Yes.
 
  That would avoid the lockups and allow the device to be reset at
  any time. Or am I missing something?
  
 
 Not that I can see.  If real hardware can be looped, so can qemu.  I'm
 only worried about recursion and deadlocks (while real hardware can
 deadlock, we'd prefer to avoid that).

Agreed, thanks



Re: [Qemu-devel] Shifts, ppc[64], xtensa

2012-09-19 Thread malc
On Tue, 18 Sep 2012, Richard Henderson wrote:

 On 09/18/2012 12:52 PM, malc wrote:
   case INDEX_op_shl_i32:
   if (const_args[2]) {
  +if (args[2]  31) {
  +tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
  +tcg_out32 (s, SLW | SAB (args[1], args[0], 0));
  +}
 
 What's this bit for?

This bit is to offset for the fact that i haven't slept in a while, and...

 
 AFAIK all you should need are the added  31 below.
 

Not really - due to the 0 corner case, following should be better:

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 26c4b33..784b157 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -409,6 +409,7 @@ static int tcg_target_const_match(tcg_target_long val,
 
 #define TW XO31(4)
 #define TRAP   (TW | TO (31))
+#define NOP0x6000
 
 #define RT(r) ((r)21)
 #define RS(r) ((r)21)
@@ -1306,10 +1307,10 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, 
unsigned long addr)
 *ptr = 0x4800 | (disp  0x03fc); /* b disp */
 patch_size = 4;
 } else {
-ptr[0] = 0x6000; /* nop */
-ptr[1] = 0x6000;
-ptr[2] = 0x6000;
-ptr[3] = 0x6000;
+ptr[0] = NOP;
+ptr[1] = NOP;
+ptr[2] = NOP;
+ptr[3] = NOP;
 patch_size = 16;
 }
 }
@@ -1330,7 +1331,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 /* direct jump method */
 
 s-tb_jmp_offset[args[0]] = s-code_ptr - s-code_buf;
-s-code_ptr += 16;
+tcg_out32 (s, NOP);
+tcg_out32 (s, NOP);
+tcg_out32 (s, NOP);
+tcg_out32 (s, NOP);
 }
 else {
 tcg_abort ();
@@ -1565,35 +1569,54 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 
 case INDEX_op_shl_i32:
 if (const_args[2]) {
-tcg_out32 (s, (RLWINM
-   | RA (args[0])
-   | RS (args[1])
-   | SH (args[2])
-   | MB (0)
-   | ME (31 - args[2])
-   )
-);
+int sh = args[2]  31;
+if (!sh) {
+tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]);
+}
+else {
+tcg_out32 (s, (RLWINM
+   | RA (args[0])
+   | RS (args[1])
+   | SH (sh)
+   | MB (0)
+   | ME (31 - sh)
+   )
+);
+}
 }
 else
 tcg_out32 (s, SLW | SAB (args[1], args[0], args[2]));
 break;
 case INDEX_op_shr_i32:
 if (const_args[2]) {
-tcg_out32 (s, (RLWINM
-   | RA (args[0])
-   | RS (args[1])
-   | SH (32 - args[2])
-   | MB (args[2])
-   | ME (31)
-   )
-);
+int sh = args[2]  31;
+if (!sh) {
+tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]);
+}
+else {
+tcg_out32 (s, (RLWINM
+   | RA (args[0])
+   | RS (args[1])
+   | SH (32 - sh)
+   | MB (sh)
+   | ME (31)
+   )
+);
+}
 }
 else
 tcg_out32 (s, SRW | SAB (args[1], args[0], args[2]));
 break;
 case INDEX_op_sar_i32:
-if (const_args[2])
-tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2]));
+if (const_args[2]) {
+int sh = args[2]  31;
+if (!sh) {
+tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]);
+}
+else {
+tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (sh));
+}
+}
 else
 tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2]));
 break;
@@ -1604,7 +1627,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 | RS (args[1])
 | MB (0)
 | ME (31)
-| (const_args[2] ? RLWINM | SH (args[2])
+| (const_args[2] ? RLWINM | SH (args[2]  31)
  : RLWNM | RB (args[2]))
 ;
 tcg_out32 (s, op);


This expects deposit/rotr constant arguments to be sane,
i'm no longer certain this assumption is a sane one though...

-- 
mailto:av1...@comtv.ru



Re: [Qemu-devel] Shifts, ppc[64], xtensa

2012-09-19 Thread malc
On Wed, 19 Sep 2012, Max Filippov wrote:

 On Tue, Sep 18, 2012 at 11:52 PM, malc av1...@comtv.ru wrote:
 
  Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa
 
 malc, could you please expand a little bit what are these shift issues?
 (sounds like a modern trend, I must have missed something)
 

Just audit op_opt output of extensa on 32bit host for shr_i32, i'm getting
things like:

OP after optimization and liveness analysis:
 movi_i32 tmp0,$0xd00793f4
 movi_i32 tmp1,$0x1
 movi_i32 tmp2,$0x1
 movi_i32 tmp3,$advance_ccount
 call tmp3,$0x0,$0,env,tmp2
 movi_i32 tmp2,$window_check
 call tmp2,$0x0,$0,env,tmp0,tmp1
 movi_i32 tmp0,$0x1
 add_i32 ar4,ar4,tmp0
 movi_i32 tmp1,$0xd00793f6
 movi_i32 tmp2,$0x3
 movi_i32 tmp3,$0x1
 movi_i32 tmp4,$advance_ccount
 call tmp4,$0x0,$0,env,tmp3
 movi_i32 tmp3,$window_check
 call tmp3,$0x0,$0,env,tmp1,tmp2
 mov_i32 tmp0,ar4
 qemu_ld8u ar12,tmp0,$0x0
 movi_i32 tmp0,$0xffe0
 add_i32 ar9,ar12,tmp0

 movi_i32 tmp1,$0x40
 shr_i32 tmp0,ar9,tmp1


# I don't get the above
 movi_i32 tmp1,$0xff
 and_i32 ar9,tmp0,tmp1
# Nor the continuation

 movi_i32 tmp0,$0x3
 movi_i32 tmp1,$advance_ccount
 call tmp1,$0x0,$0,env,tmp0
 brcond_i32 ar6,ar9,ltu,$0x0
 nopn $0x2,$0x2
 movi_i32 pc,$0xd0079402
 goto_tb $0x0
 exit_tb $0x4a036c68
 set_label $0x0
 nopn $0x2,$0x2
 movi_i32 pc,$0xd0079430
 goto_tb $0x1
 exit_tb $0x4a036c69
 end

Just adding some debugging printfs to constant shift paths in tcg target
can also be useful.

  exposed another bug in power's tcg - gototb's target was expected to be
  always filled via tb_set_jmp_target (even though it's clearly not what
  tcg/README prescribes, sorry about that).
 
 

-- 
mailto:av1...@comtv.ru



Re: [Qemu-devel] directory hierarchy

2012-09-19 Thread Avi Kivity
On 09/14/2012 10:51 PM, Blue Swirl wrote:

 exec:
 
 These files need cleanup so that TCG code gets into tcg/. Maybe also
 TB and CPUTLB handling.

Some of that could be done by adding a separate MemoryListener for tcg.


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] Shifts, ppc[64], xtensa

2012-09-19 Thread Peter Maydell
On 18 September 2012 20:52, malc av1...@comtv.ru wrote:

 Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa
 exposed another bug in power's tcg - gototb's target was expected to be
 always filled via tb_set_jmp_target (even though it's clearly not what
 tcg/README prescribes, sorry about that).

 Thanks to Max Filippov for pointing to xtensa test suite that helped to
 narrow the search to gototb.

 Testing of the following with other targets on ppc flavours is welcome..

 P.S. Xtensa does mighty weird things with shifts i must say...

 diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
 index 26c4b33..08f62fa 100644
 --- a/tcg/ppc/tcg-target.c
 +++ b/tcg/ppc/tcg-target.c
 @@ -409,6 +409,7 @@ static int tcg_target_const_match(tcg_target_long val,

  #define TW XO31(4)
  #define TRAP   (TW | TO (31))
 +#define NOP0x6000

  #define RT(r) ((r)21)
  #define RS(r) ((r)21)
 @@ -1306,10 +1307,10 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, 
 unsigned long addr)
  *ptr = 0x4800 | (disp  0x03fc); /* b disp */
  patch_size = 4;
  } else {
 -ptr[0] = 0x6000; /* nop */
 -ptr[1] = 0x6000;
 -ptr[2] = 0x6000;
 -ptr[3] = 0x6000;
 +ptr[0] = NOP;
 +ptr[1] = NOP;
 +ptr[2] = NOP;
 +ptr[3] = NOP;
  patch_size = 16;
  }
  }
 @@ -1330,7 +1331,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
 const TCGArg *args,
  /* direct jump method */

  s-tb_jmp_offset[args[0]] = s-code_ptr - s-code_buf;
 -s-code_ptr += 16;
 +tcg_out32 (s, NOP);
 +tcg_out32 (s, NOP);
 +tcg_out32 (s, NOP);
 +tcg_out32 (s, NOP);

Not too familiar with the PPC backend, but doesn't this mean that
in the retranslation case we will overwrite a correct jump destination
with these NOP words and then rewrite it again with the correct
destination? That can cause problems with cache incoherency;
compare the fix applied in commit c69806ab8276 for ARM.

thanks
-- PMM



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Igor Mitsyanko

On 09/19/2012 04:12 PM, Avi Kivity wrote:

On 09/19/2012 02:46 PM, Edgar E. Iglesias wrote:

On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote:

On 09/19/2012 07:40 AM, Peter Crosthwaite wrote:

On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias
edgar.igles...@gmail.com wrote:

On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote:

Ping for PMM,

This is the root case of your block on the SDHCI series - this is a
discussion on resolution to bogus infinite looping DMA. For current
participants in this discussion, heres our thread on the same topic
over in SD land:

http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html

With the findings here and acknowledgement that this is a general
problem, we either need to declare this issue of scope for SDHCI, or
work with these guys (in the immediate future) on the DMA infinite
loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig
here, but can we get a decisive plan for going forward with this issue
if it is going to continue to block SDHCI.

Thanks to Igor for identifying the overlap between these discussions.

Hi,

A couple of dumb questions.

What is the reason for the blocker? that possible guest dos is worse
than no functionality at all?

Can't you put the DMA/IO processing into the background?

I dont know a way do do asynchronous DMA unless I am missing something
here.

You could schedule a bottom half and do the accesses from there.  Solves
nothing though.


So what happens is we have a device that walks a SG list
synchronously while holding all the locks and stuff being discussed
here. If that SG list infinite loops then crash.

Did you mean loop, or recursion into the memory region that initiates DMA?

I think we were discussing the loop issue (I hadn't thought of recursion
issues before) :)

Jan's point of resetability was interesting.

Processing a finite amount of dma descriptors and scheduling bh's
to continously get back and continue processing should be OK no?


Yes.


Bottom half idea sounds good.
Also, for this particular device, rather then limiting amount of 
processed descriptor entries in bottom half, it seems reasonable to 
interrupt bh handler if we encounter a LINK descriptor since only 
recursive LINKs can generate infinite loops. In that case, a very long 
descriptor table without a single LINK entry could potentially hung QEMU 
for a long time, but that time will be finite anyway. Long term, this 
problem will disappear when/if someone converts QEMU SD card model to 
use async io.

That would avoid the lockups and allow the device to be reset at
any time. Or am I missing something?


Not that I can see.  If real hardware can be looped, so can qemu.  I'm
only worried about recursion and deadlocks (while real hardware can
deadlock, we'd prefer to avoid that).




So, I think the idea here is that if real hardware can be locked we 
should lock too, but provide a guest CPU a possibility to abort locked 
operation. For this particular example, SD card controller can deadlock 
on recursive descriptor LINK entries, but CPU can abort ongoing 
transaction at any time by issuing ABORT command. And if guest CPU 
busy-waits in infinite while() loop for a TRANSFER OVER flag to be set, 
then it had it coming.





Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling

2012-09-19 Thread Alexander Graf

On 19.09.2012, at 13:34, Alexander Graf wrote:

 
 On 19.09.2012, at 13:29, Peter Maydell wrote:
 
 On 19 September 2012 01:14, Richard Henderson r...@twiddle.net wrote:
 On 09/18/2012 01:18 PM, Alexander Graf wrote:
 -/* remember what pgm exeption this was */
 +/* Remember what pgm exeption this was.  */
 
 ...if you're changing this comment then at least fix the typo
 (exception)...
 
 tmp = tcg_const_i32(code);
 tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
 tcg_temp_free_i32(tmp);
 -tmp = tcg_const_i32(ilc);
 +tmp = tcg_const_i32(s-next_pc - s-pc);
 
 Mind to explain this one?
 
 ILC = the size of the insn.  Rather than passing ILC around into
 gen_program_exception, get it back from the s-next_pc value that
 we stored into DisasContext.
 
 ...but isn't (s-next_pc - s-pc) ilc*2, not ilc? Compare this hunk
 from elsewhere in the patch:
 
 -tmp32_2 = tcg_const_i32(ilc * 2);
 -tmp32_3 = tcg_const_i32(EXCP_SVC);
 +tmp32_2 = tcg_const_i32(s-next_pc - s-pc);
 
 Yes, it is, and hence it's correct. The exception field encodes the real 
 instruction length, not the ILC bits in the opcode. Maybe the naming is 
 unfortunate...

Or maybe not. For SVC we seem to pass in the full instruction length, while PGM 
only gets ILC. So the above hunk is wrong.

Richard, please double-check all the ilc passing again with the spec.


Alex




Re: [Qemu-devel] [PATCH 0/5] tcg: movcond

2012-09-19 Thread malc
On Tue, 18 Sep 2012, Aurelien Jarno wrote:

 On Tue, Sep 18, 2012 at 07:23:55AM -0700, Richard Henderson wrote:
  As recently discussed, with the optional fallback to setcond.
  
  I include a patch to target-alpha to test correctness both
  before and after implementing the opcode in the i386 backend,
  as well as in the optimizations.
  
 
 Thanks for this new implementation, it's a lot cleaner than the previous
 one, especially the fallback to setcond if not implemented, and the
 argument alias for the x86 backend, deferring the issue to the register
 allocator instead of trying to do that in the backend.
 
 From my side, I am ok with it. That said I'd also like to have at least
 the opinion of Malc and Blue (Cc:ed). Comments from others are also
 welcome.

I don't have objections (philosphical or otherwise) to this, the only
request is for a detailed directions on how to exercise non emulated
version (i.e. where to get the alpha image from how to run it exactly)

[..snip..]

P.S. The only thing i've noticed is somewhat unusual naming convention
 in the README, nothing else uses cN and vN naming of arguments,
 perhaps sticking to t0, t1, t2, t3 is better to avoid confusion?

-- 
mailto:av1...@comtv.ru



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Avi Kivity
On 09/19/2012 04:01 PM, Igor Mitsyanko wrote:
 That would avoid the lockups and allow the device to be reset at
 any time. Or am I missing something?

 Not that I can see.  If real hardware can be looped, so can qemu.  I'm
 only worried about recursion and deadlocks (while real hardware can
 deadlock, we'd prefer to avoid that).


 
 So, I think the idea here is that if real hardware can be locked we
 should lock too, but provide a guest CPU a possibility to abort locked
 operation. For this particular example, SD card controller can deadlock
 on recursive descriptor LINK entries, but CPU can abort ongoing
 transaction at any time by issuing ABORT command. And if guest CPU
 busy-waits in infinite while() loop for a TRANSFER OVER flag to be set,
 then it had it coming.

If real hardware would lock up so should we (in the guest's eyes) but
the qemu monitor should remain responsive.  It is not part of emulated
hardware.


-- 
error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [PATCH] xen: Fix, no unplug of pt device by platform device.

2012-09-19 Thread Stefano Stabellini
On Wed, 19 Sep 2012, Anthony PERARD wrote:
 The Xen platform device will unplug any NICs if requested by the guest 
 (PVonHVM)
 including a NIC that would have been passthrough. This patch makes sure that a
 passthrough device will not be unplug.
 
 Reported-by: Zhang, Yang Z yang.z.zh...@intel.com
 Signed-off-by: Anthony PERARD anthony.per...@citrix.com
 

Zhang, does it this patch fix the problem for you?


  hw/xen_platform.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)
 
 diff --git a/hw/xen_platform.c b/hw/xen_platform.c
 index 0d6c2ff..956dbfe 100644
 --- a/hw/xen_platform.c
 +++ b/hw/xen_platform.c
 @@ -85,8 +85,10 @@ static void log_writeb(PCIXenPlatformState *s, char val)
  
  static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
  {
 +/* We have to ignore passthrough devices */
  if (pci_get_word(d-config + PCI_CLASS_DEVICE) ==
 -PCI_CLASS_NETWORK_ETHERNET) {
 +PCI_CLASS_NETWORK_ETHERNET
 + strcmp(d-name, xen-pci-passthrough) != 0) {
  qdev_free(d-qdev);
  }
  }
 @@ -98,8 +100,10 @@ static void pci_unplug_nics(PCIBus *bus)
  
  static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
  {
 +/* We have to ignore passthrough devices */
  if (pci_get_word(d-config + PCI_CLASS_DEVICE) ==
 -PCI_CLASS_STORAGE_IDE) {
 +PCI_CLASS_STORAGE_IDE
 + strcmp(d-name, xen-pci-passthrough) != 0) {
  qdev_unplug((d-qdev), NULL);
  }
  }
 -- 
 Anthony PERARD
 



Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap

2012-09-19 Thread Igor Mitsyanko

On 09/19/2012 11:57 AM, Jan Kiszka wrote:

On 2012-09-19 06:40, Peter Crosthwaite wrote:

On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias
edgar.igles...@gmail.com wrote:

On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote:

Ping for PMM,

This is the root case of your block on the SDHCI series - this is a
discussion on resolution to bogus infinite looping DMA. For current
participants in this discussion, heres our thread on the same topic
over in SD land:

http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html

With the findings here and acknowledgement that this is a general
problem, we either need to declare this issue of scope for SDHCI, or
work with these guys (in the immediate future) on the DMA infinite
loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig
here, but can we get a decisive plan for going forward with this issue
if it is going to continue to block SDHCI.

Thanks to Igor for identifying the overlap between these discussions.

Hi,

A couple of dumb questions.

What is the reason for the blocker? that possible guest dos is worse
than no functionality at all?

Can't you put the DMA/IO processing into the background?

I dont know a way do do asynchronous DMA unless I am missing something
here. So what happens is we have a device that walks a SG list
synchronously while holding all the locks and stuff being discussed
here. If that SG list infinite loops then crash.

We could switch to async DMA, but most DMA-capable devices aren't
prepared for rescheduling points in the middle of their code. This will
require quite a bit of code review.


From what I understand, we can't switch to async DMA while SD card 
model doesn't support it. And introducing this support would require to 
convert every existing SD card user in QEMU.
I can recall that such a conversion was already suggested in a mailing 
list a while ago, but no one (including me) volunteered to do it :)



what's the diff between setup of bad descriptors and  writing a
while (1) loop in the guest CPU?


While(1) in guest doesnt freeze all of QEMU (monitor still works and
stuff), wheras the DMA ops going in circles does, as locks are taken
by an infinite loop.

That's the point. If we loop, we need at least a way to stop it by
issuing a device or system reset. But I tend to think that detecting
loops and failing/ignoring requests is easier implementable.

Jan






Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?

2012-09-19 Thread Markus Armbruster
Anthony Liguori anth...@codemonkey.ws writes:

 Markus Armbruster arm...@redhat.com writes:

 Anthony Liguori anth...@codemonkey.ws writes:

 Markus Armbruster arm...@redhat.com writes:

 Jan Kiszka jan.kis...@siemens.com writes:

  * The issues discussed in this email plus the fact that the guest
memory may be corrupted, and the guest may be in real-mode even
when paging is enabled


 Yes, there are some limitations with this option. Jan said that he
 always use gdb to deal with vmcore, so he needs such information.

 The point is to overcome the focus on Linux-only dump processing tools.
 
 While I don't care for supporting alternate dump processing tools
 myself, I certainly don't mind supporting them, as long as the code
 satisfies basic safety and reliability requirements.
 
 This code doesn't, as far as I can tell.

 It works, thought not under all circumstances.

 I don't doubt it works often enough to be useful to somebody.  But basic
 safety and reliability requirements are a bit more than that.  They
 include don't explode in ways a reasonable user can't be expected to
 foresee.  I don't think a reasonable user can be expected to see that
 -p is safe only for trusted guests.

 We shipped the API, we're not removing it.  Our compatibility isn't
 whatever libvirt is currently using.

 Bah, don't be more catholic than the pope.  It's been out for how many
 days?  Have you looked at the code?

 It's perfectly reasonable to ask to document the behavior of the
 method.  It's also a trivial patch to qapi-schema.json.

 I'm okay with leaving obscure security holes in upstream QEMU,
 indefinitely, as long as they're documented.  I don't think it's a good
 idea, but it's something reasonable people can disagree about.

 It's *not* a security hole.

Since we agree that documenting the existing crappy behavior is okay for
stable, let's not argue what exactly to call it.

 The malicious guest you mention is just a guest with fully populated
 PTEs and PDEs, no?

A normal guest uses a small fraction of its RAM for page tables.  QEMU
allocates 10x of that fraction for dump -p.  Not so nice.

A not-so-normal guest uses most of its RAM for page tables.  QEMU
allocates in the order of 10x of the guest RAM for -p.  Oops.

[...]



Re: [Qemu-devel] [PATCH V3 01/11] atomic: introduce atomic operations

2012-09-19 Thread Jamie Lokier
Avi Kivity wrote:
 On 09/13/2012 09:54 AM, liu ping fan wrote:
 
  +typedef struct Atomic {
  +int counter;
  +} Atomic;
 
  Best to mark counter 'volatile'.
 
  +
  +static inline void atomic_set(Atomic *v, int i)
  +{
  +v-counter = i;
  +}
  +
  +static inline int atomic_read(Atomic *v)
  +{
  +return v-counter;
  +}
 
 
  So these two operations don't get mangled by the optimizer.
 
  Browsing linux code and reading lkml, find some similar material. But
  they have moved volatile from -counter to function - atomic_read().
  As to atomic_read(), I think it need to prevent optimizer from
  refetching issue, but as to atomic_set(), do we need ?
 
 I think so, to prevent reordering.

Hi,

I don't think volatile makes any difference to reordering here.

The compiler is not going to move the atomic_set() store before or
after another instruction on the same atomic variable anyway, just
like it wouldn't do that for an ordinary assignment.

If you're concerned about ordering with respect to other memory, then
volatile wouldn't make much difference.  barrier() before and after would.

If you're copying Linux's semantics, Linux's atomic_set() doesn't
include any barriers, nor imply any.  atomic_read() uses volatile to
ensure that each call re-reads the value, for example in a loop.
(Same as ACCESS_ONCE()).  If there was a call to atomic_set() in a
loop, it doesn't guarantee that will be written each time.

-- Jamie



Re: [Qemu-devel] [PATCH 2/2] target-i386: Fix default Hypervisor level for hypervisor-vendor=kvm.

2012-09-19 Thread Eduardo Habkost
On Tue, Sep 18, 2012 at 03:32:04PM -0400, Don Slutz wrote:
 On 09/18/12 13:00, Eduardo Habkost wrote:
 On Tue, Sep 18, 2012 at 10:49:53AM -0400, Don Slutz wrote:
  From http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html
 EAX should be KVM_CPUID_FEATURES (0x4001) not 0.
 
 If kvm is not configured, the additional option of hypervisor-level=1
 (or hypervisor-level=0x4001) needs to be specified to get this.
 ---
   target-i386/cpu.c |   12 +++-
   1 files changed, 11 insertions(+), 1 deletions(-)
 
 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index 6e43eff..d73b0a8 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -1248,7 +1248,12 @@ static char *x86_cpuid_get_hv_vendor(Object *obj, 
 Error **errp)
  env-cpuid_hv_level == CPUID_HV_LEVEL_XEN) {
   pstrcpy(value, sizeof(value), xen);
   } else if (!strcmp(value, CPUID_HV_VENDOR_KVM) 
 -   env-cpuid_hv_level == 0) {
 +#if defined(CONFIG_KVM)
 +   env-cpuid_hv_level == KVM_CPUID_FEATURES
 +#else
 +   env-cpuid_hv_level == 0
 +#endif
 +   ) {
   pstrcpy(value, sizeof(value), kvm);
   }
   return value;
 @@ -1281,6 +1286,11 @@ static void x86_cpuid_set_hv_vendor(Object *obj, 
 const char *value,
   }
   pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_XEN);
   } else if (!strcmp(value, kvm)) {
 +#if defined(CONFIG_KVM)
 +if (env-cpuid_hv_level == 0) {
 +env-cpuid_hv_level = KVM_CPUID_FEATURES;
 +}
 +#endif
 If CPUID[0x4000].EAX set to 0 is documented as equivalent to having
 it set to 0x4001 (KVM_CPUID_FEATURES), why the confusing checks for
 CONFIG_KVM? Why not always set it to KVM_CPUID_FEATURES?
 
 At line 36 of the file:
 
 #if defined(CONFIG_KVM)
 #include linux/kvm_para.h
 #endif
 
 So without the check you get compile failures (i386-linux-user).
 

Right. We can't include linux/kvm_para.h on a non-Linux build host, but
on the other hand I don't see any reason to not have
cpuid_hv_level=0x4001 on builds without CONFIG_KVM too.

If we want to allow fake KVM CPUID leaves on non-CONFIG_KVM builds like
the code above does, QEMU needs its own constant for
KVM_CPUID_FEATURES/0x4001, so it doesn't depend on the Linux header.


 I have no issue with removing the check(s) for 0.  Currently
 hypervisor-level=0 will not force the old values; I can make a change
 so that works.
  -Don Slutz

-- 
Eduardo



Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?

2012-09-19 Thread Luiz Capitulino
On Wed, 19 Sep 2012 11:26:51 +0900 (JST)
HATAYAMA Daisuke d.hatay...@jp.fujitsu.com wrote:

 From: Wen Congyang we...@cn.fujitsu.com
 Subject: Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix 
 it or drop it?
 Date: Wed, 19 Sep 2012 10:07:04 +0800
 
  At 09/19/2012 08:18 AM, Luiz Capitulino Wrote:
  On Tue, 18 Sep 2012 16:13:30 -0500
  Anthony Liguori anth...@codemonkey.ws wrote:
  
  Markus Armbruster arm...@redhat.com writes:
 
  Jan Kiszka jan.kis...@siemens.com writes:
 
   * The issues discussed in this email plus the fact that the guest
 memory may be corrupted, and the guest may be in real-mode even
 when paging is enabled
 
 
  Yes, there are some limitations with this option. Jan said that he
  always use gdb to deal with vmcore, so he needs such information.
 
  The point is to overcome the focus on Linux-only dump processing 
  tools.
 
  While I don't care for supporting alternate dump processing tools
  myself, I certainly don't mind supporting them, as long as the code
  satisfies basic safety and reliability requirements.
 
  This code doesn't, as far as I can tell.
 
  It works, thought not under all circumstances.
 
  I don't doubt it works often enough to be useful to somebody.  But basic
  safety and reliability requirements are a bit more than that.  They
  include don't explode in ways a reasonable user can't be expected to
  foresee.  I don't think a reasonable user can be expected to see that
  -p is safe only for trusted guests.
 
  We shipped the API, we're not removing it.  Our compatibility isn't
  whatever libvirt is currently using.
 
  It's perfectly reasonable to ask to document the behavior of the
  method.  It's also a trivial patch to qapi-schema.json.
  
  I feel that documenting it is not enough. It would be fine to do that
  if the worst case was a bad dump file, but the worst case as the
  code stands right now will affect the host.
  
  It's unreasonable to ask for an interface to be removed just because it
  could be misused when it has a legimitate use-case.
  
  The point is not that it can be misused. The issue we're concerned about
  is that a malicious guest could cause qemu to allocate dozens of
  gigabytes of RAM.
  
  Hmm, I guess the malicious guest's page table provides wrong information.
  We allocate memory to store memory mapping. Each memory mapping needs
  less than 40 bytes memory. The num of memory mapping is less than
  (2^48) / (2^12) = 2^36. And 2^36 * 40 = 64G * 40, too many memory
  
  What about this:
  1. if the num of memory mapping  10, we only store 10 memory
 mappings.
  
  2. The memory mapping which has smaller virtual address will be dropped?
  
  In this case, the memory we need is less than 10MB. So we will not allocate
  too many memory.

The problem of artificially limiting it is that we can reach the limit
in legal usage (ie. a very large machine).

 How about dropping making a whole list of memory maps at the same
 time, and how about rewriting the code so that it always has at most
 one memory mapping by merging virtually consequtive chunks? If
 possible, only 40 bytes is needed.

It already merges contiguous addresses and addresses that fall in
the same range. It can also skip addresses due to a few reasons (I/O
page, not present page, etc), which makes the problem very unlikely
in practice.

Our concern is with guests intentionally trying to make qemu allocate
more memory.



Re: [Qemu-devel] [libvirt] [PATCH v2 1/4] config: Introduce migration for SPICE graphics

2012-09-19 Thread Michal Privoznik
On 15.09.2012 17:10, Daniel P. Berrange wrote:
 On Fri, Sep 14, 2012 at 05:23:16PM -0600, Eric Blake wrote:
 [adding qemu]

 On 09/14/2012 11:47 AM, Daniel P. Berrange wrote:
 On Fri, Sep 14, 2012 at 07:34:50PM +0200, Michal Privoznik wrote:
 With this element users will control how SPICE
 server behaves upon migration. For now, there's
 just one attribute 'seamless' turning seamless
 migration on/off/default.

 Ewww, no. This information is a related to a API operation,
 not the VM configuration. It should be either auto-detected
 by libvirt to the best compatible setting, or passed as a
 flag to the virDomainMigrate API call if auto-detection is
 not possible.

 But with the current qemu implementation, there's no way to know if the
 destination supports this until after you've started the source, and the
 current implementation in qemu is that you must declare the semantics at
 the time you start qemu, not at the time you send the 'migrate' monitor
 command.  For libvirt autodetection to work without polluting the domain
 XML, we'd need to be able to auto-detect at the time we start migration.

 This sounds like we need to enhance the 'migrate-set-capabilities'
 command to enable or disable this feature on the fly, according to what
 libvirt detects from the remote end, rather than hard-coding it to the
 startup state of qemu on the source side.
 
 Hmm, my understanding of the QEMU flag was different. Based on
 the commit message:
 
 spice: adding seamless-migration option to the command line
 
 The seamless-migration flag is required in order to identify
 whether libvirt supports the new QEVENT_SPICE_MIGRATE_COMPLETED or not
 (by default the flag is off).
 New libvirt versions that wait for QEVENT_SPICE_MIGRATE_COMPLETED should 
 turn on this flag.
 When this flag is off, spice fallbacks to its old migration method, which
 can result in data loss.
 
 
 This says to me that any libvirt which knows about the new
 SPICE_MIGRATE_COMPLETED event, should set the seamless-migration
 flag unconditionally, to indicate that it can handle the event
 and thus the new migration method. It says nothing about only
 setting this flag if the destination QEMU also supports it.
 As such, IMHO, we can  should set this flag unconditonally
 on all QEMUs we run which support it.
 
 If it turns out that this flag does indeed require that the
 destination QEMU also has the same setting, then IMHO this
 flag is a fatally flawed design. At time of starting any QEMU
 instance, we can't know whether the destination QEMU we want
 to migrate to will have the support or not. Compatibility
 checks of this kind can only be decided at time the migrate
 command is actually issued.
 
 
 Daniel
 

From my investigation I was able to migrate between qemu where one had
seamless_migration=on and the other one didn't support such argument at
all. Literally, on source I've checked out 27af778828db9 (the last
commit in Yonit's patchset) on destination f5bb039c6d97e (the last
before the patchset). Then I've migrated and receive both event and flag
set in query-spice response. So I guess the design is okay.

Having said that - what's desired solution here? Unconditionally set
seamless_migration=on on enough new qemu and don't expose it in XML at all?

Michal



Re: [Qemu-devel] [PATCH V3 01/11] atomic: introduce atomic operations

2012-09-19 Thread Jamie Lokier
liu ping fan wrote:
  +static inline void atomic_set(Atomic *v, int i)
  +{
  +v-counter = i;
  +}

Hi,

When running on ARM Linux kernels prior to 2.6.32, userspace
atomic_set() needs to use clrex or strex too.

See Linux commit 200b812d, Clear the exclusive monitor when returning
from an exception.

You can see ARM's atomic_set() used to use strex, and warns it's
important.  The kernel patch allows atomic_set() to be simplified, and
that includes for userspace, by putting clrex/strex in the exception
return path instead.

However, someone may run QEMU on a kernel before 2.6.32, which isn't
that old.  (E.g. my phone is running 2.6.28).

Otherwise you can have this situation:

Initially: a = 0.

Thread
  atomic_inc(a, 1)
  = ldrex, add, [strex interrupted]

 Interrupted by signal handler
  atomic_set(a, 3)
  = str
 Signal return

Resume thread
  = strex (succeeds because CPU-local exclusive-flag still set)

Result: a = 1, should be impossible when the signal triggered, and
information about the signal is lost.

A more realistic example would use atomic_compare_exchange(), to
atomic-read-and-clear, atomic-read-and-dec-if-not-zero a variable set
in a signal handler, however I've used atomic_inc() to illustrate
because that's in your patch.

Best,
-- Jamie



[Qemu-devel] [PATCH] qxl/update_area_io: cleanup invalid parameters handling

2012-09-19 Thread Michael Tokarev
This cleans up two additions of almost the same code in commits
511b13e2c9 and ccc2960d654.  While at it, make error paths
consistent (always use 'break' instead of 'return').

Signed-off-by: Michael Tokarev m...@tls.msk.ru
Cc: Dunrong Huang riegama...@gmail.com
Cc: Alon Levy al...@redhat.com
---
 hw/qxl.c |   13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/hw/qxl.c b/hw/qxl.c
index 33169f3..3c82c2a 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1547,20 +1547,13 @@ async_common:
 if (d-ram-update_surface  d-ssd.num_surfaces) {
 qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: invalid surface id %d\n,
   d-ram-update_surface);
-return;
+break;
 }
-if (update.left = update.right || update.top = update.bottom) {
+if (update.left = update.right || update.top = update.bottom ||
+update.left  0 || update.top  0) {
 qxl_set_guest_bug(d,
 QXL_IO_UPDATE_AREA: invalid area (%ux%u)x(%ux%u)\n,
 update.left, update.top, update.right, update.bottom);
-return;
-}
-
-if (update.left  0 || update.top  0 || update.left = update.right ||
-update.top = update.bottom) {
-qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: 
-  invalid area(%d,%d,%d,%d)\n, update.left,
-  update.right, update.top, update.bottom);
 break;
 }
 if (async == QXL_ASYNC) {
-- 
1.7.10.4




Re: [Qemu-devel] [PATCH 05/10] qxl: dont update invalid area

2012-09-19 Thread Michael Tokarev
On 13.09.2012 12:45, Gerd Hoffmann wrote:
 From: Dunrong Huang riegama...@gmail.com
 
 This patch fixes the following error:
 
 $ ~/usr/bin/qemu-system-x86_64 -enable-kvm -m 1024 -spice 
 port=5900,disable-ticketing -vga qxl -cdrom 
 ~/Images/linuxmint-13-mate-dvd-32bit.iso
 (/home/mathslinux/usr/bin/qemu-system-x86_64:10068): SpiceWorker-CRITICAL **: 
 red_worker.c:4599:red_update_area: condition `area-left = 0  area-top = 
 0  area-left  area-right  area-top  area-bottom' failed
 Aborted
 
 spice server terminates QEMU process if we pass invalid area to it,
 so dont update those invalid areas.
 
 Signed-off-by: Dunrong Huang riegama...@gmail.com
 Signed-off-by: Gerd Hoffmann kra...@redhat.com
 ---
  hw/qxl.c |7 +++
  1 files changed, 7 insertions(+), 0 deletions(-)
 
 diff --git a/hw/qxl.c b/hw/qxl.c
 index 257a37d..0176b1a 100644
 --- a/hw/qxl.c
 +++ b/hw/qxl.c
 @@ -1470,6 +1470,13 @@ async_common:
  return;
  }
  
 +if (update.left  0 || update.top  0 || update.left = update.right 
 ||
 +update.top = update.bottom) {
 +qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: 
 +  invalid area(%d,%d,%d,%d)\n, update.left,
 +  update.right, update.top, update.bottom);
 +break;
 +}

Please take a look at the previous chunk of code, which was
added in 511b13e2c9b426b3c56060909693de5097f0b496
qxl/update_area_io: guest_bug on invalid parameters by alevy:

+if (d-ram-update_surface  NUM_SURFACES) {
+qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: invalid surface id %d\n,
+  d-ram-update_surface);
+return;
+}
+if (update.left = update.right || update.top = update.bottom) {
+qxl_set_guest_bug(d,
+QXL_IO_UPDATE_AREA: invalid area (%ux%u)x(%ux%u)\n,
+update.left, update.top, update.right, update.bottom);
+return;
+}
+

Now, this place looks.. well.. funny.

A (trivial) cleanup patch is on the way.

Thanks,

/mjt



Re: [Qemu-devel] [PATCH v2 2/2] Versatile Express: add modelling of NOR flash

2012-09-19 Thread Francesco Lavra
On 09/19/2012 12:26 PM, Peter Maydell wrote:
 On 18 September 2012 21:59, Francesco Lavra francescolavra...@gmail.com 
 wrote:
 On 09/18/2012 03:46 PM, Peter Maydell wrote:
 On 17 September 2012 21:08, Francesco Lavra francescolavra...@gmail.com 
 wrote:
  qemu_irq pic[64];
  uint32_t proc_id;
  uint32_t sys_id;
 +DriveInfo *dinfo;
  ram_addr_t vram_size, sram_size;
  MemoryRegion *sysmem = get_system_memory();
  MemoryRegion *vram = g_new(MemoryRegion, 1);
 @@ -410,8 +415,23 @@ static void vexpress_common_init(const VEDBoardInfo
 *daughterboard,

  sysbus_create_simple(pl111, map[VE_CLCD], pic[14]);

 -/* VE_NORFLASH0: not modelled */
 -/* VE_NORFLASH1: not modelled */
 +dinfo = drive_get_next(IF_PFLASH);
 +if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, vexpress.flash0,
 +VEXPRESS_FLASH_SIZE, dinfo ? dinfo-bdrv : NULL,
 +VEXPRESS_FLASH_SECT_SIZE,
 +VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
 +0x00, 0x89, 0x00, 0x18, 0)) {
 +fprintf(stderr, vexpress: error registering flash 0.\n);

 Shouldn't these errors be fatal?

 I checked the existing uses of pflash_cfi_0[1,2]_register() in the code,
 and if I'm not mistaken only in 5 out of 19 devices these errors are
 fatal, in the other 14 cases initialization continues even after flash
 registration failure, with or without an error message. Let me know if
 you still prefer these errors to be fatal.
 
 So the only reason this can fail is if the user specified a file
 to back the flash but trying to read it failed (ie, bad filename
 or file not the same size as the flash). I think that merits
 actually stopping on error.
 
 Ideally in the long term the flash devices should be converted
 to proper QOM devices and we could push the error handling back
 into the device itself (which is better positioned to distinguish
 bad filename from wrong length I suspect).

Ok, I will make these errors fatal in v3.

--
Francesco



[Qemu-devel] [PATCH 01/12] nbd: add more constants

2012-09-19 Thread Paolo Bonzini
Avoid magic numbers and magic size computations; hide them behind
constants.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 nbd.c | 17 ++---
 1 file modificato, 10 inserzioni(+), 7 rimozioni(-)

diff --git a/nbd.c b/nbd.c
index 0dd60c5..8201b7a 100644
--- a/nbd.c
+++ b/nbd.c
@@ -57,9 +57,12 @@
 
 /* This is all part of the official NBD API */
 
+#define NBD_REQUEST_SIZE(4 + 4 + 8 + 8 + 4)
 #define NBD_REPLY_SIZE  (4 + 4 + 8)
 #define NBD_REQUEST_MAGIC   0x25609513
 #define NBD_REPLY_MAGIC 0x67446698
+#define NBD_OPTS_MAGIC  0x49484156454F5054LL
+#define NBD_CLIENT_MAGIC0x420281861253LL
 
 #define NBD_SET_SOCK_IO(0xab, 0)
 #define NBD_SET_BLKSIZE _IO(0xab, 1)
@@ -213,7 +216,7 @@ static int nbd_send_negotiate(int csock, off_t size, 
uint32_t flags)
 
 /* Negotiate
 [ 0 ..   7]   passwd   (NBDMAGIC)
-[ 8 ..  15]   magic(0x00420281861253)
+[ 8 ..  15]   magic(NBD_CLIENT_MAGIC)
 [16 ..  23]   size
 [24 ..  27]   flags
 [28 .. 151]   reserved (0)
@@ -224,7 +227,7 @@ static int nbd_send_negotiate(int csock, off_t size, 
uint32_t flags)
 
 TRACE(Beginning negotiation.);
 memcpy(buf, NBDMAGIC, 8);
-cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
+cpu_to_be64w((uint64_t*)(buf + 8), NBD_CLIENT_MAGIC);
 cpu_to_be64w((uint64_t*)(buf + 16), size);
 cpu_to_be32w((uint32_t*)(buf + 24),
  flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
@@ -295,7 +298,7 @@ int nbd_receive_negotiate(int csock, const char *name, 
uint32_t *flags,
 uint32_t namesize;
 
 TRACE(Checking magic (opts_magic));
-if (magic != 0x49484156454F5054LL) {
+if (magic != NBD_OPTS_MAGIC) {
 LOG(Bad magic received);
 goto fail;
 }
@@ -334,7 +337,7 @@ int nbd_receive_negotiate(int csock, const char *name, 
uint32_t *flags,
 } else {
 TRACE(Checking magic (cli_magic));
 
-if (magic != 0x00420281861253LL) {
+if (magic != NBD_CLIENT_MAGIC) {
 LOG(Bad magic received);
 goto fail;
 }
@@ -477,7 +480,7 @@ int nbd_client(int fd)
 
 ssize_t nbd_send_request(int csock, struct nbd_request *request)
 {
-uint8_t buf[4 + 4 + 8 + 8 + 4];
+uint8_t buf[NBD_REQUEST_SIZE];
 ssize_t ret;
 
 cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
@@ -504,7 +507,7 @@ ssize_t nbd_send_request(int csock, struct nbd_request 
*request)
 
 static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
 {
-uint8_t buf[4 + 4 + 8 + 8 + 4];
+uint8_t buf[NBD_REQUEST_SIZE];
 uint32_t magic;
 ssize_t ret;
 
@@ -582,7 +585,7 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply 
*reply)
 
 static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
 {
-uint8_t buf[4 + 4 + 8];
+uint8_t buf[NBD_REPLY_SIZE];
 ssize_t ret;
 
 /* Reply
-- 
1.7.12





[Qemu-devel] [PATCH] qemu-timer: simplify qemu_run_timers

2012-09-19 Thread Paolo Bonzini
ptimer_head is an invariant pointer to clock-active_timers.
Remove it, and just reference clock-active_timers directly.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 qemu-timer.c | 7 +++
 1 file modificato, 3 inserzioni(+), 4 rimozioni(-)

diff --git a/qemu-timer.c b/qemu-timer.c
index c7a1551..908a103 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -372,21 +372,20 @@ bool qemu_timer_expired(QEMUTimer *timer_head, int64_t 
current_time)
 
 void qemu_run_timers(QEMUClock *clock)
 {
-QEMUTimer **ptimer_head, *ts;
+QEMUTimer *ts;
 int64_t current_time;

 if (!clock-enabled)
 return;
 
 current_time = qemu_get_clock_ns(clock);
-ptimer_head = clock-active_timers;
 for(;;) {
-ts = *ptimer_head;
+ts = clock-active_timers;
 if (!qemu_timer_expired_ns(ts, current_time)) {
 break;
 }
 /* remove timer from the list before calling the callback */
-*ptimer_head = ts-next;
+clock-active_timers = ts-next;
 ts-next = NULL;
 
 /* run the callback (the timer list can be modified) */
-- 
1.7.12




Re: [Qemu-devel] [PATCH] qapi: convert add_client

2012-09-19 Thread Luiz Capitulino
On Wed, 19 Sep 2012 13:04:22 +0200
Paolo Bonzini pbonz...@redhat.com wrote:

 Il 18/09/2012 21:06, Luiz Capitulino ha scritto:
  +fd = monitor_get_fd(cur_mon, fdname);
  +if (fd  0) {
  +error_setg(errp, file-description '%s' has not been found, 
  fdname);
  +return;
  +}
 
 Please change this to add an Error * argument to monitor_get_fd, and
 change non-Error-friendly callers of monitor_get_fd to
 monitor_handle_fd_param.

Good point.

 You can then reuse the error in monitor_handle_fd_param (there is
 already QERR_FD_NOT_FOUND, by the way).
 
 I'll post the patches later in my reviewed NBD series, please take them
 from there and I'll rebase before my pull request.

Ok, although I wouldn't mind having this patch on top of yours.



[Qemu-devel] [PULL 00/12] NBD patches for 2012-09-22

2012-09-19 Thread Paolo Bonzini
The following changes since commit 89c7fd21930de671a6e34793e8b1ee257e2e:

  Remove unused CONFIG_TCG_PASS_AREG0 and dead code (2012-09-15 17:51:14 +)

are available in the git repository at:

  git://github.com/bonzini/qemu.git nbd-next

for you to fetch changes up to be3d30e144dbb99cd39ae4cedfb802337b5b172f:

  nbd: add nbd_export_get_blockdev (2012-09-19 14:03:15 +0200)

These patches merge the first part of the embedded NBD server.  The actual
QEMU implementation needs some refactoring of qemu-sockets, and might even
go in through Luiz's tree.


Paolo Bonzini (12):
  nbd: add more constants
  nbd: pass NBDClient to nbd_send_negotiate
  nbd: do not close BlockDriverState in nbd_export_close
  nbd: make refcount interface public
  nbd: do not leak nbd_trip coroutines when a connection is torn down
  nbd: add reference counting to NBDExport
  nbd: track clients into NBDExport
  nbd: add notification for closing an NBDExport
  qemu-nbd: rewrite termination conditions to use a state machine
  nbd: register named exports
  nbd: negotiate with named exports
  nbd: add nbd_export_get_blockdev

 nbd.c  | 396 ++---
 nbd.h  |  15 ++-
 qemu-nbd.c |  36 --
 3 file modificati, 367 inserzioni(+), 80 rimozioni(-)
-- 
1.7.12




[Qemu-devel] [PATCH 10/12] nbd: register named exports

2012-09-19 Thread Paolo Bonzini
Add an API to register and find named exports.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 nbd.c | 49 +
 nbd.h |  4 
 2 file modificati, 53 inserzioni(+)

diff --git a/nbd.c b/nbd.c
index 2e9de70..2d2221c 100644
--- a/nbd.c
+++ b/nbd.c
@@ -93,13 +93,17 @@ struct NBDExport {
 void (*close)(NBDExport *exp);
 
 BlockDriverState *bs;
+char *name;
 off_t dev_offset;
 off_t size;
 uint32_t nbdflags;
 QTAILQ_HEAD(, NBDClient) clients;
 QSIMPLEQ_HEAD(, NBDRequest) requests;
+QTAILQ_ENTRY(NBDExport) next;
 };
 
+static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
+
 struct NBDClient {
 int refcount;
 void (*close)(NBDClient *client);
@@ -740,6 +744,39 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t 
dev_offset,
 return exp;
 }
 
+NBDExport *nbd_export_find(const char *name)
+{
+NBDExport *exp;
+QTAILQ_FOREACH(exp, exports, next) {
+if (strcmp(name, exp-name) == 0) {
+return exp;
+}
+}
+
+return NULL;
+}
+
+void nbd_export_set_name(NBDExport *exp, const char *name)
+{
+if (exp-name == name) {
+return;
+}
+
+nbd_export_get(exp);
+if (exp-name != NULL) {
+g_free(exp-name);
+exp-name = NULL;
+QTAILQ_REMOVE(exports, exp, next);
+nbd_export_put(exp);
+}
+if (name != NULL) {
+nbd_export_get(exp);
+exp-name = g_strdup(name);
+QTAILQ_INSERT_TAIL(exports, exp, next);
+}
+nbd_export_put(exp);
+}
+
 void nbd_export_close(NBDExport *exp)
 {
 NBDClient *client, *next;
@@ -765,6 +802,8 @@ void nbd_export_put(NBDExport *exp)
 }
 
 if (--exp-refcount == 0) {
+assert(exp-name == NULL);
+
 if (exp-close) {
 exp-close(exp);
 }
@@ -780,6 +819,16 @@ void nbd_export_put(NBDExport *exp)
 }
 }
 
+void nbd_export_close_all(void)
+{
+NBDExport *exp, *next;
+
+QTAILQ_FOREACH_SAFE(exp, exports, next, next) {
+nbd_export_close(exp);
+nbd_export_set_name(exp, NULL);
+}
+}
+
 static int nbd_can_read(void *opaque);
 static void nbd_read(void *opaque);
 static void nbd_restart_write(void *opaque);
diff --git a/nbd.h b/nbd.h
index 895820b..f0edb9c 100644
--- a/nbd.h
+++ b/nbd.h
@@ -85,6 +85,10 @@ void nbd_export_close(NBDExport *exp);
 void nbd_export_get(NBDExport *exp);
 void nbd_export_put(NBDExport *exp);
 
+NBDExport *nbd_export_find(const char *name);
+void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_close_all(void);
+
 NBDClient *nbd_client_new(NBDExport *exp, int csock,
   void (*close)(NBDClient *));
 void nbd_client_close(NBDClient *client);
-- 
1.7.12





Re: [Qemu-devel] [PATCH] arch_init.c: Improve '-soundhw help' for non-HAS_AUDIO_CHOICE archs

2012-09-19 Thread malc
On Wed, 19 Sep 2012, Peter Maydell wrote:

 For architectures which don't set HAS_AUDIO_CHOICE, improve the
 '-soundhw help' message so that it doesn't simply print an empty
 list, implying no sound support at all.
 

[..snip..]

Applied, thanks.

-- 
mailto:av1...@comtv.ru



  1   2   3   >