[Qemu-devel] [PATCH v2] target-mips: Use TCG registers for the FPU.
With normal FP, this doesn't have much affect on the generated code, because most of the FP operations are not CONST/PURE, and so we spill registers in about the same frequency as the explicit load/stores. But with Loongson multimedia instructions, which are all integral and whose helpers are in fact CONST+PURE, this greatly improves the code. Signed-off-by: Richard Henderson r...@twiddle.net --- As requested, only generating 64-bit fp registers now. The generated code looks quite good on i386. It could be a tad better for x86_64, but it's still better than it was. r~ target-mips/translate.c | 96 +++-- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/target-mips/translate.c b/target-mips/translate.c index 52eeb2b..4ae15aa 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -478,6 +478,7 @@ static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC]; static TCGv cpu_dspctrl, btarget, bcond; static TCGv_i32 hflags; static TCGv_i32 fpu_fcr0, fpu_fcr31; +static TCGv_i64 fpu_f64[32]; static uint32_t gen_opc_hflags[OPC_BUF_SIZE]; @@ -545,26 +546,31 @@ enum { BS_EXCP = 3, /* We reached an exception condition */ }; -static const char *regnames[] = -{ r0, at, v0, v1, a0, a1, a2, a3, - t0, t1, t2, t3, t4, t5, t6, t7, - s0, s1, s2, s3, s4, s5, s6, s7, - t8, t9, k0, k1, gp, sp, s8, ra, }; +static const char * const regnames[] = { +r0, at, v0, v1, a0, a1, a2, a3, +t0, t1, t2, t3, t4, t5, t6, t7, +s0, s1, s2, s3, s4, s5, s6, s7, +t8, t9, k0, k1, gp, sp, s8, ra, +}; -static const char *regnames_HI[] = -{ HI0, HI1, HI2, HI3, }; +static const char * const regnames_HI[] = { +HI0, HI1, HI2, HI3, +}; -static const char *regnames_LO[] = -{ LO0, LO1, LO2, LO3, }; +static const char * const regnames_LO[] = { +LO0, LO1, LO2, LO3, +}; -static const char *regnames_ACX[] = -{ ACX0, ACX1, ACX2, ACX3, }; +static const char * const regnames_ACX[] = { +ACX0, ACX1, ACX2, ACX3, +}; -static const char *fregnames[] = -{ f0, f1, f2, f3, f4, f5, f6, f7, - f8, f9, f10, f11, f12, f13, f14, f15, - f16, f17, f18, f19, f20, f21, f22, f23, - f24, f25, f26, f27, f28, f29, f30, f31, }; +static const char * const fregnames[] = { +f0, f1, f2, f3, f4, f5, f6, f7, +f8, f9, f10, f11, f12, f13, f14, f15, +f16, f17, f18, f19, f20, f21, f22, f23, +f24, f25, f26, f27, f28, f29, f30, f31, +}; #ifdef MIPS_DEBUG_DISAS #define MIPS_DEBUG(fmt, ...) \ @@ -658,54 +664,54 @@ static inline void gen_store_srsgpr (int from, int to) } /* Floating point register moves. */ -static inline void gen_load_fpr32 (TCGv_i32 t, int reg) +static void gen_load_fpr32(TCGv_i32 t, int reg) { -tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX])); +tcg_gen_trunc_i64_i32(t, fpu_f64[reg]); } -static inline void gen_store_fpr32 (TCGv_i32 t, int reg) +static void gen_store_fpr32(TCGv_i32 t, int reg) { -tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX])); +TCGv_i64 t64 = tcg_temp_new_i64(); +tcg_gen_extu_i32_i64(t64, t); +tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32); +tcg_temp_free_i64(t64); } -static inline void gen_load_fpr32h (TCGv_i32 t, int reg) +static void gen_load_fpr32h(TCGv_i32 t, int reg) { -tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX])); +TCGv_i64 t64 = tcg_temp_new_i64(); +tcg_gen_shri_i64(t64, fpu_f64[reg], 32); +tcg_gen_trunc_i64_i32(t, t64); +tcg_temp_free_i64(t64); } -static inline void gen_store_fpr32h (TCGv_i32 t, int reg) +static void gen_store_fpr32h(TCGv_i32 t, int reg) { -tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX])); +TCGv_i64 t64 = tcg_temp_new_i64(); +tcg_gen_extu_i32_i64(t64, t); +tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32); +tcg_temp_free_i64(t64); } -static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) +static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) { if (ctx-hflags MIPS_HFLAG_F64) { -tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d)); +tcg_gen_mov_i64(t, fpu_f64[reg]); } else { -TCGv_i32 t0 = tcg_temp_new_i32(); -TCGv_i32 t1 = tcg_temp_new_i32(); -gen_load_fpr32(t0, reg ~1); -gen_load_fpr32(t1, reg | 1); -tcg_gen_concat_i32_i64(t, t0, t1); -tcg_temp_free_i32(t0); -tcg_temp_free_i32(t1); +tcg_gen_concat32_i64(t, fpu_f64[reg ~1], fpu_f64[reg | 1]); } } -static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) +static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) { if (ctx-hflags MIPS_HFLAG_F64) { -
Re: [Qemu-devel] [PATCH V3 1/5] libqblock build system
于 2012-9-18 18:05, Paolo Bonzini 写道: Il 18/09/2012 11:01, Wenchao Xia ha scritto: Libqblock was placed in new directory ./libqblock, libtool will build dynamic library there, source files of block layer remains in ./block. So block related source code will generate 3 sets of binary, first is old ones used in qemu, second and third are non PIC and PIC ones in ./libqblock. GCC compiler flag visibility=hidden was used with special macro, to export only symbols that was marked as PUBLIC. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- Makefile| 13 - Makefile.objs |6 libqblock/Makefile | 64 +++ 3 files changed, 82 insertions(+), 1 deletions(-) create mode 100644 libqblock/Makefile create mode 100644 libqblock/libqblock-error.c create mode 100644 libqblock/libqblock.c diff --git a/Makefile b/Makefile index 971e92f..b0b9b8d 100644 --- a/Makefile +++ b/Makefile @@ -164,6 +164,17 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o +## +# Support building shared library libqblock +ifeq ($(LIBTOOL),) +$(libqblock-lib-la): + @echo libtool is missing, please install and rerun configure; exit 1 +else +$(libqblock-lib-la): + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C libqblock V=$(V) TARGET_DIR=$*/ $(libqblock-lib-la),) +endif +### + vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o $(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS), LINK $@) @@ -227,7 +238,7 @@ clean: rm -rf qapi-generated rm -rf qga/qapi-generated $(MAKE) -C tests/tcg clean - for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard; do \ + for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ rm -f $$d/qemu-options.def; \ done diff --git a/Makefile.objs b/Makefile.objs index 4412757..8a4c9fc 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -248,3 +248,9 @@ nested-vars += \ common-obj-y \ extra-obj-y dummy := $(call unnest-vars) + +# +# libqblock + +libqblock-lib-la = libqblock.la +libqblock-lib-path = libqblock diff --git a/libqblock/Makefile b/libqblock/Makefile new file mode 100644 index 000..bf7abcc --- /dev/null +++ b/libqblock/Makefile @@ -0,0 +1,64 @@ +### +# libqblock Makefile +# Todo: +#1 trace related files is generated in this directory, move +# them to the root directory. +## +-include ../config-host.mak +-include $(SRC_PATH)/Makefile.objs +-include $(SRC_PATH)/rules.mak + +# +# Library settings +# +$(call set-vpath, $(SRC_PATH)) + +#expand the foldered vars,especially ./block +dummy := $(call unnest-vars-1) + +#library objects +tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ + qemu-timer-common.o main-loop.o notify.o \ + iohandler.o cutils.o iov.o async.o +tools-obj-$(CONFIG_POSIX) += compatfd.o Do you really need all of these? I guess only some .o are needed, but not sure how to pick them out, is there some tools which can help detect which object files are needed in final linkage? (BTW, I posted recently a patch to move tools-obj-y to Makefile.objs. It doesn't apply anymore, I'll repost---but the conflicts are trivial). greate, then I can use tools-obj-y directly. +libqblock-y=libqblock.o libqblock-error.o +libqblock-lib-y=$(patsubst %.o,%.lo,$(libqblock-y)) + +QEMU_OBJS=$(tools-obj-y) $(block-obj-y) +QEMU_OBJS_FILTERED=$(filter %.o,$(QEMU_OBJS)) What does this filter out? $(block-obj-y) contains /block, which would cause trouble in building, so filtered it out. Paolo +QEMU_OBJS_LIB=$(patsubst %.o, %.lo,$(QEMU_OBJS_FILTERED)) + +QEMU_CFLAGS+= -I../ -I../include +#adding magic macro define for symbol hiding and exposing +QEMU_CFLAGS+= -fvisibility=hidden -D LIBQB_BUILD + +#dependency libraries +LIBS+=-lz $(LIBS_TOOLS) + +# +# Runtime rules +# +clean: + rm -f *.lo *.o *.d *.la libqblock-test trace.c trace.c-timestamp + rm -rf .libs block trace + +all: libqblock-test + @true + +help: + @echo type make libqblock-test at root dirtory, libtool is required +
Re: [Qemu-devel] [PATCH V3 5/5] libqblock test example code
于 2012-9-18 18:10, Paolo Bonzini 写道: Il 18/09/2012 11:01, Wenchao Xia ha scritto: In this example, user first create two qcow2 images, and then get the backing file relationship information of them. Then does write and read sync IO on them. Please use gtest so that this can be easily extensible. Paolo OK. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- tests/libqblock/libqblock-test.c | 233 ++ 1 files changed, 233 insertions(+), 0 deletions(-) diff --git a/tests/libqblock/libqblock-test.c b/tests/libqblock/libqblock-test.c index c05c0c4..c0b7963 100644 --- a/tests/libqblock/libqblock-test.c +++ b/tests/libqblock/libqblock-test.c @@ -1,4 +1,237 @@ +/* + * QEMU block layer library test + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include stdarg.h +#include stdio.h +#include unistd.h +#include inttypes.h +#include string.h +#include stdlib.h +#include libqblock.h + +#define TEST_BUF_SIZE 1024 +static unsigned char buf_r[TEST_BUF_SIZE]; +static unsigned char buf_w[TEST_BUF_SIZE] = {0, 0, 0, 0}; + +typedef struct VerifyData { +unsigned char *buf_r; +unsigned char *buf_w; +int len; +} VerifyData; + +static void print_loc(QBlockProtInfo *loc) +{ +if (loc == NULL) { +printf(backing file is NULL.); +return; +} +switch (loc-prot_type) { +case QB_PROT_NONE: +printf(protocol type [none].); +break; +case QB_PROT_FILE: +printf(protocol type [file], filename [%s]., + loc-prot_op.o_file.filename); +break; +default: +printf(protocol type not supported.); +break; +} +} + +static void print_info_image_static(QBlockStaticInfo *info) +{ +printf(===image location:\n); +print_loc(info-loc); +printf(\nvirtual_size % PRId64 , format type %d [%s], + *(info-member_addr-virt_size), + info-fmt.fmt_type, qb_fmttype2str(info-fmt.fmt_type)); +printf(\nbacking image location:\n); +print_loc(info-member_addr-backing_loc); +printf(\n); +} + +static void test_check(VerifyData *vdata) +{ +int cmp; +cmp = memcmp(vdata-buf_r, vdata-buf_w, vdata-len); +if (cmp == 0) { +printf(compare succeed, %d.\n, vdata-buf_r[24]); +} else { +printf(!!! compare fail, %d.\n, vdata-buf_r[24]); +exit(1); +} +} + int main(int argc, char **argv) { +const char *filename1, *filename2; +QBroker *broker = NULL; +QBlockState *qbs = NULL; +QBlockProtInfo *ol = NULL; +QBlockFmtInfo *of = NULL; +QBlockStaticInfo *info_st = NULL; +int ret, flag; +int test_offset = 510; +int test_len = 520; +VerifyData vdata; +char err_str[1024]; + +vdata.buf_r = buf_r; +vdata.buf_w = buf_w; +vdata.len = test_len; + +filename1 = ./qemu_image1; +filename2 = ./qemu_image2; +printf(qemu test, filename1 is %s, filename2 is %s.\n, + filename1, filename2); + +ret = qb_broker_new(broker); +if (ret 0) { +goto free; +} + +ret = qb_state_new(broker, qbs); +if (ret 0) { +goto free; +} + +ret = qb_prot_info_new(broker, ol); +if (ret 0) { +goto free; +} + +ret = qb_fmt_info_new(broker, of); +if (ret 0) { +goto free; +} + +/* create a new image */ + +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename2; +of-fmt_type = QB_FMT_QCOW2; +of-fmt_op.o_qcow2.virt_size = 100 * 1024; +flag = 0; + +ret = qb_create(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(create fail 1. %s.\n, err_str); +goto unlink; +} + +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename1; +of-fmt_type = QB_FMT_QCOW2; +of-fmt_op.o_qcow2.backing_loc.prot_type = QB_PROT_FILE; +of-fmt_op.o_qcow2.backing_loc.prot_op.o_file.filename = filename2; +flag = 0; +ret = qb_create(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(create fail 2. %s.\n, err_str); +goto unlink; +} + +/* get informations */ +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename1; +of-fmt_type = QB_FMT_NONE; +flag = LIBQBLOCK_O_NO_BACKING; +ret = qb_open(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(info getting, open failed. %s.\n, err_str); +goto free; +} + +while (1) { +ret = qb_info_image_static_get(broker, qbs, info_st); +if (ret 0) { +qb_error_get_human_str(broker,
Re: [Qemu-devel] [PATCH V3 4/5] libqblock test build system
于 2012-9-18 18:10, Paolo Bonzini 写道: Il 18/09/2012 11:01, Wenchao Xia ha scritto: Created a new directory in tests, make chekc-libqblock will build an executable binrary, make clean will delete it. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- .gitignore |1 + Makefile |1 + tests/Makefile |3 +++ tests/libqblock/Makefile | 32 tests/libqblock/libqblock-test.c |4 5 files changed, 41 insertions(+), 0 deletions(-) create mode 100644 tests/libqblock/Makefile create mode 100644 tests/libqblock/libqblock-test.c diff --git a/.gitignore b/.gitignore index 824c0d2..eccb637 100644 --- a/.gitignore +++ b/.gitignore @@ -95,3 +95,4 @@ cscope.* tags TAGS *~ +tests/libqblock/*.bin diff --git a/Makefile b/Makefile index b0b9b8d..de8ea17 100644 --- a/Makefile +++ b/Makefile @@ -238,6 +238,7 @@ clean: rm -rf qapi-generated rm -rf qga/qapi-generated $(MAKE) -C tests/tcg clean + $(MAKE) -C tests/libqblock clean for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ rm -f $$d/qemu-options.def; \ diff --git a/tests/Makefile b/tests/Makefile index 26a67ce..69af1e2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -148,4 +148,7 @@ check-unit: $(patsubst %,check-%, $(check-unit-y)) check-block: $(patsubst %,check-%, $(check-block-y)) check: check-unit check-qtest +check-libqblock: + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C tests/libqblock V=$(V) TARGET_DIR=$*/ check-libqblock,) Please just put everything in tests/Makefile. make check should run it if LIBTOOL is available. OK. +libqblock-test.bin: $(libqblock-test-objs) $(libqblock-la-path) + $(call quiet-command,$(LIBTOOL) --mode=link --quiet --tag=CC $(CC) -shared -rpath $(libdir) -o $@ $^, lt LINK $@) .bin looks so MS-DOS. :) Paolo -- Best Regards Wenchao Xia
Re: [Qemu-devel] [PATCH] hw/pflash_cfi0[12]: Use host-utils.h ctz32()
Peter Maydell peter.mayd...@linaro.org writes: On 18 September 2012 17:53, Stefan Weil s...@weilnetz.de wrote: What about moving the ctz32 function (or even all of host-utils.h) to bitops.h? I guess that would make some sense, though I'm not generally a huge fan of shuffling code around. (bitops.h and host-utils.h are not under the same license terms, which might complicate things slightly.) Have a look at hw/hd-geometry.c for a suitable boilerplate comment.
Re: [Qemu-devel] [PATCH] qapi: convert add_client
Luiz Capitulino lcapitul...@redhat.com writes: On Tue, 18 Sep 2012 13:13:16 -0600 Eric Blake ebl...@redhat.com wrote: On 09/18/2012 01:06 PM, Luiz Capitulino wrote: Also fixes a few issues while there: 1. The fd returned by monitor_get_fd() leaks in most error conditions 2. monitor_get_fd() return value is not checked. Best case we get an error that is not correctly reported, worse case one of the functions using the fd (with value of -1) will explode 3. A few error conditions aren't reported Signed-off-by: Luiz Capitulino lcapitul...@redhat.com --- monitor.c| 39 --- qapi-schema.json | 23 +++ qmp-commands.hx | 5 + qmp.c| 44 4 files changed, 68 insertions(+), 43 deletions(-) { 'command': 'screendump', 'data': {'filename': 'str'} } + +## +# @add_client [...] If this were a new command for 1.3, I'd say to name it 'add-client'; but since QMP has already been exposing it and you are now just documenting it, you can't change the name. Yes, we just have to live with that for all old commands. If the inconsistency bothers us, we can either * add suitable aliases for every QMP name containing '_', or * fix the QMP names, and fold '_' to '-' in names received from client.
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
Anthony Liguori anth...@codemonkey.ws writes: Markus Armbruster arm...@redhat.com writes: Jan Kiszka jan.kis...@siemens.com writes: * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. While I don't care for supporting alternate dump processing tools myself, I certainly don't mind supporting them, as long as the code satisfies basic safety and reliability requirements. This code doesn't, as far as I can tell. It works, thought not under all circumstances. I don't doubt it works often enough to be useful to somebody. But basic safety and reliability requirements are a bit more than that. They include don't explode in ways a reasonable user can't be expected to foresee. I don't think a reasonable user can be expected to see that -p is safe only for trusted guests. We shipped the API, we're not removing it. Our compatibility isn't whatever libvirt is currently using. Bah, don't be more catholic than the pope. It's been out for how many days? Have you looked at the code? It's perfectly reasonable to ask to document the behavior of the method. It's also a trivial patch to qapi-schema.json. I'm okay with leaving obscure security holes in upstream QEMU, indefinitely, as long as they're documented. I don't think it's a good idea, but it's something reasonable people can disagree about. We need to document that -p is unreliable and unsafe, therefore should only be used with trusted guests, and its result need not be correct even then. It's unreasonable to ask for an interface to be removed just because it could be misused when it has a legimitate use-case. The issue isn't misuse (operator does something stupid), it's abuse (guest can make it blow up when operator uses it as intended), and fragility (produces crap when operator uses it as intended, but the guest isn't in a sufficiently nice state).
Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
Il 18/09/2012 22:37, Anthony Liguori ha scritto: Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c right now otherwise the refactoring would be trivial. I'll leave that for another day. Cc: Paolo Bonzini pbonz...@redhat.com Cc: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Anthony Liguori aligu...@us.ibm.com --- Please note, this is lightly tested. Since this is such a fundamental change, I'd like to do some performance analysis before committing but wanted to share early. Looks good. I think Peter Portante tested something similar, and found no big difference between the two. But it's a good thing and, in my opinion, for non-timerfd OSes we should simply adjust the select() timeout and not bother with signals. I'm not sure if the same can be done for Windows, but I think it's possible as long as you keep the timeBeginPeriod/timeEndPeriod calls. As a start, Stefan, can you check if the win32 timer works for you with the calls added? Like this: diff --git a/qemu-timer.c b/qemu-timer.c index c7a1551..721c769 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t) HANDLE hTimer; BOOLEAN success; +timeGetDevCaps(mm_tc, sizeof(mm_tc)); + +timeBeginPeriod(mm_tc.wPeriodMin); + /* If you call ChangeTimerQueueTimer on a one-shot timer (its period is zero) that has already expired, the timer is not updated. Since creating a new timer is relatively expensive, set a bogus one-hour @@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t) if (!success) { fprintf(stderr, Failed to initialize win32 alarm timer: %ld\n, GetLastError()); +timeEndPeriod(mm_tc.wPeriodMin); return -1; } @@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t) if (hTimer) { DeleteTimerQueueTimer(NULL, hTimer, NULL); } +timeEndPeriod(mm_tc.wPeriodMin); } static void win32_rearm_timer(struct qemu_alarm_timer *t, Paolo
Re: [Qemu-devel] [PATCH 4/4] vl: add -late-object to create QOM objects after machine init
Il 19/09/2012 01:07, Anthony Liguori ha scritto: In order to create qdev objects via -late-object, we almost always have to specify the parent_bus which is usually created during machine init. Until we properly support two stage init, introduce a -late-object option that allows for creation of objects post-machine init. This is only needed for -device, no? So we can avoid introducing it and just use -device. +1, was thinking along the same lines... Sorry for such a delayed response... The semantics of how -device is different in that it implies realize. Yes, all I'm saying is, I don't see a need for -late-object since it is only needed for devices and we have a separate method to create them. Paolo
Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
On 2012-09-19 09:26, Paolo Bonzini wrote: Il 18/09/2012 22:37, Anthony Liguori ha scritto: Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c right now otherwise the refactoring would be trivial. I'll leave that for another day. Cc: Paolo Bonzini pbonz...@redhat.com Cc: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Anthony Liguori aligu...@us.ibm.com --- Please note, this is lightly tested. Since this is such a fundamental change, I'd like to do some performance analysis before committing but wanted to share early. Looks good. I think Peter Portante tested something similar, and found no big difference between the two. But it's a good thing and, in my opinion, for non-timerfd OSes we should simply adjust the select() timeout and not bother with signals. What would be the advantage of timerfd over select? On Linux, both use hrtimers (and low slack for RT processes). I'm starting to like the select/WaitForMultipleObjects pattern as it would allow to consolidate over basically two versions of timers and simplify the code. Jan I'm not sure if the same can be done for Windows, but I think it's possible as long as you keep the timeBeginPeriod/timeEndPeriod calls. As a start, Stefan, can you check if the win32 timer works for you with the calls added? Like this: diff --git a/qemu-timer.c b/qemu-timer.c index c7a1551..721c769 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t) HANDLE hTimer; BOOLEAN success; +timeGetDevCaps(mm_tc, sizeof(mm_tc)); + +timeBeginPeriod(mm_tc.wPeriodMin); + /* If you call ChangeTimerQueueTimer on a one-shot timer (its period is zero) that has already expired, the timer is not updated. Since creating a new timer is relatively expensive, set a bogus one-hour @@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t) if (!success) { fprintf(stderr, Failed to initialize win32 alarm timer: %ld\n, GetLastError()); +timeEndPeriod(mm_tc.wPeriodMin); return -1; } @@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t) if (hTimer) { DeleteTimerQueueTimer(NULL, hTimer, NULL); } +timeEndPeriod(mm_tc.wPeriodMin); } static void win32_rearm_timer(struct qemu_alarm_timer *t, Paolo -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
[Qemu-devel] [PATCH] Adding BAR0 for e500 PCI controller
PCI Root complex have TYPE-1 configuration header while PCI endpoint have type-0 configuration header. The type-1 configuration header have a BAR (BAR0). In Freescale PCI controller BAR0 is used for mapping pci address space to CCSR address space. This can used for 2 purposes: 1) for MSI interrupt generation 2) Allow CCSR registers access when configured as PCI endpoint, which I am not sure is a use case with QEMU-KVM guest. What I observed is that when guest read the size of BAR0 of host controller configuration header (TYPE1 header) then it always reads it as 0. When looking into the QEMU hw/ppce500_pci.c, I do not find the PCI controller device registering BAR0. I do not find any other controller also doing so may they do not use BAR0. There are two issues when BAR0 is not there (which I can think of): 1) There should be BAR0 emulated for PCI Root comaplex (TYPE1 header) and when reading the size of BAR0, it should give size as per real h/w. This patch solves this problem. 2) Do we need this BAR0 inbound address translation? When BAR0 is of non-zero size then it will be configured for PCI address space to local address(CCSR) space translation on inbound access. The primary use case is for MSI interrupt generation. The device is configured with a address offsets in PCI address space, which will be translated to MSI interrupt generation MPIC registers. Currently I do not understand the MSI interrupt generation mechanism in QEMU and also IIRC we do not use QEMU MSI interrupt mechanism on e500 guest machines. But this BAR0 will be used when using MSI on e500. I can see one more issue, There are ATMUs emulated in hw/ppce500_pci.c, but i do not see these being used for address translation. So far that works because pci address space and local address space are 1:1 mapped. BAR0 inbound translation + ATMU translation will complete the address translation of inbound traffic. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- hw/pci_host.h|9 + hw/ppc/e500.c| 21 + hw/ppce500_pci.c |7 +++ 3 files changed, 37 insertions(+), 0 deletions(-) diff --git a/hw/pci_host.h b/hw/pci_host.h index 4b9c300..c1ec7eb 100644 --- a/hw/pci_host.h +++ b/hw/pci_host.h @@ -41,10 +41,19 @@ struct PCIHostState { MemoryRegion data_mem; MemoryRegion mmcfg; MemoryRegion *address_space; +MemoryRegion bar0; uint32_t config_reg; PCIBus *bus; }; +typedef struct PPCE500CCSRState { +SysBusDevice *parent; +MemoryRegion ccsr_space; +} PPCE500CCSRState; + +#define TYPE_CCSR e500-ccsr +#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR) + /* common internal helpers for PCI/PCIe hosts, cut off overflows */ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, uint32_t limit, uint32_t val, uint32_t len); diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 6f0de6d..1f5da28 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -31,6 +31,7 @@ #include hw/loader.h #include elf.h #include hw/sysbus.h +#include hw/pci_host.h #include exec-memory.h #include host-utils.h @@ -587,3 +588,23 @@ void ppce500_init(PPCE500Params *params) kvmppc_init(); } } + +static void e500_ccsr_type_init(Object *obj) +{ +PPCE500CCSRState *ccsr = CCSR(obj); +ccsr-ccsr_space.size = int128_make64(MPC8544_CCSRBAR_SIZE); +} + +static const TypeInfo e500_ccsr_info = { +.name = TYPE_CCSR, +.parent= TYPE_SYS_BUS_DEVICE, +.instance_size = sizeof(PPCE500CCSRState), +.instance_init = e500_ccsr_type_init, +}; + +static void e500_ccsr_register_types(void) +{ +type_register_static(e500_ccsr_info); +} + +type_init(e500_ccsr_register_types) diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index 92b1dc0..135f2a6 100644 --- a/hw/ppce500_pci.c +++ b/hw/ppce500_pci.c @@ -315,6 +315,8 @@ static int e500_pcihost_initfn(SysBusDevice *dev) int i; MemoryRegion *address_space_mem = get_system_memory(); MemoryRegion *address_space_io = get_system_io(); +PPCE500CCSRState *ccsr; +PCIDevice *pdev; h = PCI_HOST_BRIDGE(dev); s = PPC_E500_PCI_HOST_BRIDGE(dev); @@ -342,6 +344,11 @@ static int e500_pcihost_initfn(SysBusDevice *dev) memory_region_add_subregion(s-container, PCIE500_REG_BASE, s-iomem); sysbus_init_mmio(dev, s-container); +ccsr = CCSR(object_new(TYPE_CCSR)); +memory_region_init_io(h-bar0, pci_host_conf_be_ops, h, + pci-bar0, memory_region_size(ccsr-ccsr_space)); +pdev = pci_find_device(b, 0, 0); +pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, h-bar0); return 0; } -- 1.7.0.4
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 09/19/2012 07:40 AM, Peter Crosthwaite wrote: On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias edgar.igles...@gmail.com wrote: On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote: Ping for PMM, This is the root case of your block on the SDHCI series - this is a discussion on resolution to bogus infinite looping DMA. For current participants in this discussion, heres our thread on the same topic over in SD land: http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html With the findings here and acknowledgement that this is a general problem, we either need to declare this issue of scope for SDHCI, or work with these guys (in the immediate future) on the DMA infinite loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig here, but can we get a decisive plan for going forward with this issue if it is going to continue to block SDHCI. Thanks to Igor for identifying the overlap between these discussions. Hi, A couple of dumb questions. What is the reason for the blocker? that possible guest dos is worse than no functionality at all? Can't you put the DMA/IO processing into the background? I dont know a way do do asynchronous DMA unless I am missing something here. You could schedule a bottom half and do the accesses from there. Solves nothing though. So what happens is we have a device that walks a SG list synchronously while holding all the locks and stuff being discussed here. If that SG list infinite loops then crash. Did you mean loop, or recursion into the memory region that initiates DMA? As this problem already exists I don't think new device models need block on it. Maybe a simple fix is: void my_device_write(...) { if (dma_in_progress) { return; } ... s-dma_in_progress = true; cpu_physical_memory_rw(...) s-dma_in_progress = false; } and we could try to figure out how to move this into common code. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 2012-09-19 06:40, Peter Crosthwaite wrote: On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias edgar.igles...@gmail.com wrote: On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote: Ping for PMM, This is the root case of your block on the SDHCI series - this is a discussion on resolution to bogus infinite looping DMA. For current participants in this discussion, heres our thread on the same topic over in SD land: http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html With the findings here and acknowledgement that this is a general problem, we either need to declare this issue of scope for SDHCI, or work with these guys (in the immediate future) on the DMA infinite loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig here, but can we get a decisive plan for going forward with this issue if it is going to continue to block SDHCI. Thanks to Igor for identifying the overlap between these discussions. Hi, A couple of dumb questions. What is the reason for the blocker? that possible guest dos is worse than no functionality at all? Can't you put the DMA/IO processing into the background? I dont know a way do do asynchronous DMA unless I am missing something here. So what happens is we have a device that walks a SG list synchronously while holding all the locks and stuff being discussed here. If that SG list infinite loops then crash. We could switch to async DMA, but most DMA-capable devices aren't prepared for rescheduling points in the middle of their code. This will require quite a bit of code review. what's the diff between setup of bad descriptors and writing a while (1) loop in the guest CPU? While(1) in guest doesnt freeze all of QEMU (monitor still works and stuff), wheras the DMA ops going in circles does, as locks are taken by an infinite loop. That's the point. If we loop, we need at least a way to stop it by issuing a device or system reset. But I tend to think that detecting loops and failing/ignoring requests is easier implementable. Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [RFC PATCH 11/13] hmp: add NBD server commands
Il 18/09/2012 22:22, Luiz Capitulino ha scritto: +if (addr.host == NULL || addr.port == NULL) { +error_set(errp, QERR_SOCKET_CREATE_FAILED); You can and should use monitor_printf() directly. Hmm, why? Then I would have two error paths, one with monitor_printf and one with hmp_handle_error. But I will change this to add an Error* to inet_parse. +goto exit; +} + +qmp_nbd_server_start(addr, errp); +if (errp != NULL) { Please, use error_is_set() instead. I'll just rename errp to local_err, which is the usual convention. +goto exit; +} + +/* Then try adding all block devices. If one fails, close all and + * exit. + */ +bs = NULL; +while ((bs = bdrv_next(bs))) { To keep hmp.c an honest qmp client, you should use qmp_query_block() instead of looping through BSs directly. You'll find other examples if you find for qmp_query_block() in this file. Right, thanks! Paolo
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 09/17/2012 05:24 AM, liu ping fan wrote: On Thu, Sep 13, 2012 at 4:19 PM, Avi Kivity a...@redhat.com wrote: On 09/13/2012 09:55 AM, liu ping fan wrote: On Tue, Sep 11, 2012 at 8:41 PM, Avi Kivity a...@redhat.com wrote: On 09/11/2012 03:24 PM, Avi Kivity wrote: On 09/11/2012 12:57 PM, Jan Kiszka wrote: On 2012-09-11 11:44, liu ping fan wrote: On Tue, Sep 11, 2012 at 4:35 PM, Avi Kivity a...@redhat.com wrote: On 09/11/2012 10:51 AM, Liu Ping Fan wrote: From: Liu Ping Fan pingf...@linux.vnet.ibm.com The func call chain can suffer from recursively hold qemu_mutex_lock_iothread. We introduce lockmap to record the lock depth. What is the root cause? io handlers initiating I/O? cpu_physical_memory_rw() can be called nested, and when called, it can be protected by no-lock/device lock/ big-lock. I think without big lock, io-dispatcher should face the same issue. As to main-loop, have not carefully consider, but at least, dma-helper will call cpu_physical_memory_rw() with big-lock. That is our core problem: inconsistent invocation of existing services /wrt locking. For portio, I was lucky that there is no nesting and I was able to drop the big lock around all (x86) call sites. But MMIO is way more tricky due to DMA nesting. Maybe we need to switch to a continuation style. Instead of expecting cpu_physical_memory_rw() to complete synchronously, it becomes an asynchronous call and you provide it with a completion. That means devices which use it are forced to drop the lock in between. Block and network clients will be easy to convert since they already use APIs that drop the lock (except for accessing the descriptors). We could try to introduce a different version of cpu_physical_memory_rw, cpu_physical_memory_rw_unlocked. But the problem remains that an MMIO request can trigger the very same access in a nested fashion, and we will have to detect this to avoid locking up QEMU (locking up the guest might be OK). An async version of c_p_m_rw() will just cause a completion to bounce around, consuming cpu but not deadlocking anything. If we can keep a count of the bounces, we might be able to stall it indefinitely or at least ratelimit it. Another option is to require all users of c_p_m_rw() and related to use a coroutine or thread. That makes the programming easier (but still required a revalidation after the dropped lock). For the nested cpu_physical_memory_rw(), we change it internal but keep it sync API as it is. (Wrapper current cpu_physical_memory_rw() into cpu_physical_memory_rw_internal() ) LOCK() // can be device or big lock or both, depend on caller. .. cpu_physical_memory_rw() { UNLOCK() //unlock all the locks queue_work_on_thread(cpu_physical_memory_rw_internal, completion); // cpu_physical_memory_rw_internal can take lock(device,biglock) again wait_for_completion(completion) LOCK() } .. UNLOCK() This is dangerous. The caller expects to hold the lock across the call, and will not re-validate its state. Although cpu_physical_memory_rw_internal() can be freed to use lock, but with precondition. -- We still need to trace lock stack taken by cpu_physical_memory_rw(), so that it can return to caller correctly. Is that OK? I'm convinced that we need a recursive lock if we don't convert everything at once. So how about: - make bql a recursive lock (use pthreads, don't invent our own) - change c_p_m_rw() calling convention to caller may hold the BQL, but no device lock this allows devices to DMA each other. Unconverted device models will work as they used to. Converted device models will need to drop the device lock, re-acquire it, and revalidate any state. That will cause I think we are cornered by devices to DMA each other, and it raises the unavoidable nested lock. Also to avoid deadlock caused by device's lock sequence, we should drop the current device lock to acquire another one. The only little diverge is about the revalidate, do we need to rollback? It basically means you can't hold contents of device state in local variables. You need to read everything again from the device. That includes things like DMA enable bits. (btw: to we respect PCI_COMMAND_MASTER? it seems that we do not. This is a trivial example of an iommu, we should get that going). Or for converted device, we can just tag it a busy flag, that is checkset busy flag at the entry of device's mmio-dispatch. So when re-acquire device's lock, the device's state is intact. The state can be changed by a parallel access to another register, which is valid. Has anybody other suggestion? -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 09/19/2012 06:02 AM, liu ping fan wrote: Currently, cpu_physical_memory_rw() can be used directly or indirectly by mmio-dispatcher to access other devices' memory region. This can cause some problem when adopting device's private lock. Back ground refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html For lazy, just refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html --1st. the recursive lock of biglock. If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have the following (section of the whole call chain, and with private_lockA): lockA-mmio-dispatcher -- hold biglock -- c_p_m_rw() --- Before c_p_m_rw(), we drop private_lockA to anti the possibly of deadlock. But we can not anti the nested of this chain or calling to another lockB-mmio-dispatcher. So we can not avoid the possibility of nested lock of biglock. And another important factor is that we break the lock sequence: private_lock--biglock. All of these require us to push biglock's holding into c_p_m_rw(), the wrapper can not give help. I agree that this is unavoidable. --2nd. c_p_m_rw(), sync or async? IF we convert all of the device to be protected by refcount, then we can have //no big lock c_p_m_rw() { devB-ref++; { ---pushed onto another thread. lock_privatelock mr-ops-write(); unlock_privatelock } wait_for_completion(); devB-ref--; } This model can help c_p_m_rw() present as a SYNC API. But currently, we mix biglock and private lock together, and wait_for_completion() maybe block the release of big lock, which finally causes deadlock. So we can not simply rely on this model. Instead, we need to classify the calling scene into three cases: case1. lockA--dispatcher --- lockB-dispatcher //can use async+completion model case2. lockA--dispatcher --- biglock-dispatcher // sync, but can cause the nested lock of biglock case3. biglock-dispacher --- lockB-dispatcher // async to avoid the lock sequence problem, (as to completion, it need to be placed outside the top level biglock, and it is hard to do so. Suggest to change to case 1. Or at present, just leave it async) This new model will require the biglock can be nested. I think changing to an async model is too complicated. It's difficult enough already. Isn't dropping private locks + recursive big locks sufficient? -- error compiling committee.c: too many arguments to function
[Qemu-devel] [PATCH] Revert serial: fix retry logic
This reverts commit 67c5322d7000fd105a926eec44bc1765b7d70bdd: I'm not sure if the retry logic has ever worked when not using FIFO mode. I found this while writing a test case although code inspection confirms it is definitely broken. The TSR retry logic will never actually happen because it is guarded by an 'if (s-tsr_rety 0)' but this is the only place that can ever make the variable greater than zero. That effectively makes the retry logic an 'if (0) I believe this is a typo and the intention was = 0. Once this is fixed thoug I see double transmits with my test case. This is because in the non FIFO case, serial_xmit may get invoked while LSR.THRE is still high because the character was processed but the retransmit timer was still active. We can handle this by simply checking for LSR.THRE and returning early. It's possible that the FIFO paths also need some attention. Cc: Stefano Stabellini stefano.stabell...@eu.citrix.com Signed-off-by: Anthony Liguori aligu...@us.ibm.com Even if the previous logic was never worked, new logic breaks stuff - namely, qemu -enable-kvm -nographic -kernel /boot/vmlinuz-$(uname -r) -append console=ttyS0 -serial pty the above command will cause the virtual machine to stuck at startup using 100% CPU till one connects to the pty and sends any char to it. Note this is rather typical invocation for various headless virtual machines by libvirt. So revert this change for now, till a better solution will be found. Signed-off-by: Michael Tokarev m...@tls.msk.ru --- hw/serial.c |4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/serial.c b/hw/serial.c index a421d1e..df54de2 100644 --- a/hw/serial.c +++ b/hw/serial.c @@ -327,8 +327,6 @@ static void serial_xmit(void *opaque) s-tsr = fifo_get(s,XMIT_FIFO); if (!s-xmit_fifo.count) s-lsr |= UART_LSR_THRE; -} else if ((s-lsr UART_LSR_THRE)) { -return; } else { s-tsr = s-thr; s-lsr |= UART_LSR_THRE; @@ -340,7 +338,7 @@ static void serial_xmit(void *opaque) /* in loopback mode, say that we just received a char */ serial_receive1(s, s-tsr, 1); } else if (qemu_chr_fe_write(s-chr, s-tsr, 1) != 1) { -if ((s-tsr_retry = 0) (s-tsr_retry = MAX_XMIT_RETRY)) { +if ((s-tsr_retry 0) (s-tsr_retry = MAX_XMIT_RETRY)) { s-tsr_retry++; qemu_mod_timer(s-transmit_timer, new_xmit_ts + s-char_transmit_time); return; -- 1.7.10.4
Re: [Qemu-devel] [RFC PATCH 09/13] qmp: add NBD server commands
Il 18/09/2012 22:11, Luiz Capitulino ha scritto: (apart from TODOs of course, specially on error handling). Yes, those were waiting for error_setg to get in the tree. :) Paolo
Re: [Qemu-devel] [PATCH v3 3/3] Fix address handling in inet_nonblocking_connect
On 14/09/12 02:58, Orit Wasserman wrote: getaddrinfo can give us a list of addresses, but we only try to connect to the first one. If that fails we never proceed to the next one. This is common on desktop setups that often have ipv6 configured but not actually working. To fix this make inet_connect_nonblocking retry connection with a different address. callers on inet_nonblocking_connect register a callback function that will be called when connect opertion completes, in case of failure the fd will have a negative value Hi Orit, We almost get to the destination :) Signed-off-by: Orit Wasserman owass...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- migration-tcp.c | 29 +--- qemu-char.c |2 +- qemu-sockets.c | 95 +++--- qemu_socket.h | 13 ++-- 4 files changed, 102 insertions(+), 37 deletions(-) ... diff --git a/qemu-sockets.c b/qemu-sockets.c index 212075d..d321c58 100644 --- a/qemu-sockets.c +++ b/qemu-sockets.c @@ -24,6 +24,7 @@ #include qemu_socket.h #include qemu-common.h /* for qemu_isdigit */ +#include main-loop.h #ifndef AI_ADDRCONFIG # define AI_ADDRCONFIG 0 @@ -217,11 +218,69 @@ listen: ((rc) == -EINPROGRESS) #endif +/* Struct to store connect state for non blocking connect */ +typedef struct ConnectState { +int fd; +struct addrinfo *addr_list; +struct addrinfo *current_addr; +ConnectHandler *callback; +void *opaque; +Error *errp; This member is not used. +} ConnectState; # make(gcc-4.4.6-4.el6.x86_64) CCqemu-sockets.o qemu-sockets.c:229: error: redefinition of typedef ‘ConnectState’ qemu_socket.h:46: note: previous declaration of ‘ConnectState’ was here make: *** [qemu-sockets.o] Error 1 struct ConnectState { int fd; struct addrinfo *addr_list; struct addrinfo *current_addr; ConnectHandler *callback; void *opaque; }; + static int inet_connect_addr(struct addrinfo *addr, bool block, - bool *in_progress) + bool *in_progress, ConnectState *connect_state); + +static void wait_for_connect(void *opaque) +{ +ConnectState *s = opaque; +int val = 0, rc = 0; +socklen_t valsize = sizeof(val); +bool in_progress = false; + +qemu_set_fd_handler2(s-fd, NULL, NULL, NULL, NULL); + +do { +rc = getsockopt(s-fd, SOL_SOCKET, SO_ERROR, (void *) val, valsize); +} while (rc == -1 socket_error() == EINTR); # man getsockopt RETURN VALUE On success, zero is returned. On error, -1 is returned, and errno is set appropriately. .. original code: .. .. do { .. ret = getsockopt(s-fd, SOL_SOCKET, SO_ERROR, (void *) val, valsize); .. } while (ret == -1 (socket_error()) == EINTR); .. .. if (ret 0) { // ret equals to -1, and socket_error() != EINTR If ret 0, just set the error state, but the callback function is not set to NULL. Then tcp_wait_for_connect() will be called again, and retry to checking current socket by getsockopt(). But in this patch, we will abandon current socket, and connect next address on the list. We should reserve that block, and move qemu_set_fd_handler2(...NULL..) below it. .. migrate_fd_error(s); .. return; .. } .. .. qemu_set_fd_handler2(s-fd, NULL, NULL, NULL, NULL); +/* update rc to contain error details */ +if (!rc val) { +rc = -val; another issue: val = 0 all the time? +} + +/* connect error */ +if (rc 0) { +closesocket(s-fd); +s-fd = rc; +} + +/* try to connect to the next address on the list */ +while (s-current_addr-ai_next != NULL s-fd 0) { +s-current_addr = s-current_addr-ai_next; +s-fd = inet_connect_addr(s-current_addr, false, in_progress, s); +/* connect in progress */ +if (in_progress) { +return; +} +} + +freeaddrinfo(s-addr_list); +if (s-callback) { +s-callback(s-fd, s-opaque); +} +g_free(s); +return; +} -- Amos.
Re: [Qemu-devel] [PATCH v3 1/3] Refactor inet_connect_opts function
On 14/09/12 02:58, Orit Wasserman wrote: From: Michael S. Tsirkin m...@redhat.com refactor address resolution code to fix nonblocking connect remove getnameinfo call Signed-off-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Amos Kong ak...@redhat.com Signed-off-by: Orit Wasserman owass...@redhat.com Hi Orit, Happy new year! --- qemu-sockets.c | 144 +++ 1 files changed, 81 insertions(+), 63 deletions(-) diff --git a/qemu-sockets.c b/qemu-sockets.c index 361d890..939a453 100644 ... + +int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp) +{ +struct addrinfo *res, *e; +int sock = -1; +bool block = qemu_opt_get_bool(opts, block, 0); + +res = inet_parse_connect_opts(opts, errp); +if (!res) { +return -1; +} + +if (in_progress) { +*in_progress = false; } trivial comment: You moved this block to head of inet_connect_addr() in [patch 3/3], why not do this in this patch? I know if *in_progress becomes true, sock is always = 0, for loop will exits. But moving this block to inet_connect_addr() is clearer. for (e = res; e != NULL; e = e-ai_next) { -if (getnameinfo((struct sockaddr*)e-ai_addr,e-ai_addrlen, -uaddr,INET6_ADDRSTRLEN,uport,32, -NI_NUMERICHOST | NI_NUMERICSERV) != 0) { -fprintf(stderr,%s: getnameinfo: oops\n, __FUNCTION__); -continue; -} -sock = qemu_socket(e-ai_family, e-ai_socktype, e-ai_protocol); -if (sock 0) { -fprintf(stderr,%s: socket(%s): %s\n, __FUNCTION__, -inet_strfamily(e-ai_family), strerror(errno)); -continue; -} -setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(void*)on,sizeof(on)); -if (!block) { -socket_set_nonblock(sock); +sock = inet_connect_addr(e, block, in_progress); +if (sock = 0) { +break; } -/* connect to peer */ -do { -rc = 0; -if (connect(sock, e-ai_addr, e-ai_addrlen) 0) { -rc = -socket_error(); -} -} while (rc == -EINTR); - - #ifdef _WIN32 -if (!block (rc == -EINPROGRESS || rc == -EWOULDBLOCK - || rc == -WSAEALREADY)) { - #else -if (!block (rc == -EINPROGRESS)) { - #endif -if (in_progress) { -*in_progress = true; -} -} else if (rc 0) { -if (NULL == e-ai_next) -fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, __FUNCTION__, -inet_strfamily(e-ai_family), -e-ai_canonname, uaddr, uport, strerror(errno)); -closesocket(sock); -continue; -} -freeaddrinfo(res); -return sock; } -error_set(errp, QERR_SOCKET_CONNECT_FAILED); +if (sock 0) { +error_set(errp, QERR_SOCKET_CONNECT_FAILED); +} freeaddrinfo(res); -return -1; +return sock; } int inet_dgram_opts(QemuOpts *opts) -- Amos.
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On Wed, Sep 19, 2012 at 4:01 PM, Avi Kivity a...@redhat.com wrote: On 09/17/2012 05:24 AM, liu ping fan wrote: On Thu, Sep 13, 2012 at 4:19 PM, Avi Kivity a...@redhat.com wrote: On 09/13/2012 09:55 AM, liu ping fan wrote: On Tue, Sep 11, 2012 at 8:41 PM, Avi Kivity a...@redhat.com wrote: On 09/11/2012 03:24 PM, Avi Kivity wrote: On 09/11/2012 12:57 PM, Jan Kiszka wrote: On 2012-09-11 11:44, liu ping fan wrote: On Tue, Sep 11, 2012 at 4:35 PM, Avi Kivity a...@redhat.com wrote: On 09/11/2012 10:51 AM, Liu Ping Fan wrote: From: Liu Ping Fan pingf...@linux.vnet.ibm.com The func call chain can suffer from recursively hold qemu_mutex_lock_iothread. We introduce lockmap to record the lock depth. What is the root cause? io handlers initiating I/O? cpu_physical_memory_rw() can be called nested, and when called, it can be protected by no-lock/device lock/ big-lock. I think without big lock, io-dispatcher should face the same issue. As to main-loop, have not carefully consider, but at least, dma-helper will call cpu_physical_memory_rw() with big-lock. That is our core problem: inconsistent invocation of existing services /wrt locking. For portio, I was lucky that there is no nesting and I was able to drop the big lock around all (x86) call sites. But MMIO is way more tricky due to DMA nesting. Maybe we need to switch to a continuation style. Instead of expecting cpu_physical_memory_rw() to complete synchronously, it becomes an asynchronous call and you provide it with a completion. That means devices which use it are forced to drop the lock in between. Block and network clients will be easy to convert since they already use APIs that drop the lock (except for accessing the descriptors). We could try to introduce a different version of cpu_physical_memory_rw, cpu_physical_memory_rw_unlocked. But the problem remains that an MMIO request can trigger the very same access in a nested fashion, and we will have to detect this to avoid locking up QEMU (locking up the guest might be OK). An async version of c_p_m_rw() will just cause a completion to bounce around, consuming cpu but not deadlocking anything. If we can keep a count of the bounces, we might be able to stall it indefinitely or at least ratelimit it. Another option is to require all users of c_p_m_rw() and related to use a coroutine or thread. That makes the programming easier (but still required a revalidation after the dropped lock). For the nested cpu_physical_memory_rw(), we change it internal but keep it sync API as it is. (Wrapper current cpu_physical_memory_rw() into cpu_physical_memory_rw_internal() ) LOCK() // can be device or big lock or both, depend on caller. .. cpu_physical_memory_rw() { UNLOCK() //unlock all the locks queue_work_on_thread(cpu_physical_memory_rw_internal, completion); // cpu_physical_memory_rw_internal can take lock(device,biglock) again wait_for_completion(completion) LOCK() } .. UNLOCK() This is dangerous. The caller expects to hold the lock across the call, and will not re-validate its state. Although cpu_physical_memory_rw_internal() can be freed to use lock, but with precondition. -- We still need to trace lock stack taken by cpu_physical_memory_rw(), so that it can return to caller correctly. Is that OK? I'm convinced that we need a recursive lock if we don't convert everything at once. So how about: - make bql a recursive lock (use pthreads, don't invent our own) - change c_p_m_rw() calling convention to caller may hold the BQL, but no device lock this allows devices to DMA each other. Unconverted device models will work as they used to. Converted device models will need to drop the device lock, re-acquire it, and revalidate any state. That will cause I think we are cornered by devices to DMA each other, and it raises the unavoidable nested lock. Also to avoid deadlock caused by device's lock sequence, we should drop the current device lock to acquire another one. The only little diverge is about the revalidate, do we need to rollback? It basically means you can't hold contents of device state in local variables. You need to read everything again from the device. That includes things like DMA enable bits. I think that read everything again from the device can not work. Suppose the following scene: If the device's state contains the change of a series of internal registers (supposing partA+partB; partC+partD), after partA changed, the device's lock is broken. At this point, another access to this device, it will work on partA+partB to determine C+D, but since partB is not correctly updated yet. So C+D may be decided by broken context and be wrong. (btw: to we respect PCI_COMMAND_MASTER? it seems that we do not. This is a trivial example of an iommu, we should get that going). Or for converted device, we can just tag it a busy flag, that is checkset busy flag at
Re: [Qemu-devel] [hellogcc] Call for Chinese QEMU developers
+1 By the way, forward it to QEMU mail list in order that some qemu guys can see this. 2012/9/19 陳韋任 (Wei-Ren Chen) che...@cs.nctu.edu.tw: Hi all, 借個場地,我們想知道在 QEMU 上的中國/台灣開發者有哪些人。 有興趣露個臉的朋友,請回覆此帖。彼此認識一下。 韋任 -- Wei-Ren Chen (陳韋任) Computer Systems Lab, Institute of Information Science, Academia Sinica, Taiwan (R.O.C.) Tel:886-2-2788-3799 #1667 Homepage: http://people.cs.nctu.edu.tw/~chenwj -- Regards, Zhi Yong Wu
Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
Il 19/09/2012 09:44, Jan Kiszka ha scritto: Looks good. I think Peter Portante tested something similar, and found no big difference between the two. But it's a good thing and, in my opinion, for non-timerfd OSes we should simply adjust the select() timeout and not bother with signals. What would be the advantage of timerfd over select? On Linux, both use hrtimers (and low slack for RT processes). I'm starting to like the select/WaitForMultipleObjects pattern as it would allow to consolidate over basically two versions of timers and simplify the code. Oh, I didn't know this. Even better. Paolo
Re: [Qemu-devel] [PULL 00/11] Block patches
Am 18.09.2012 19:49, schrieb Michael Tokarev: On 14.09.2012 16:39, Kevin Wolf wrote: The following changes since commit e0a1e32dbc41e6b2aabb436a9417dfd32177a3dc: Merge branch 'usb.64' of git://git.kraxel.org/qemu (2012-09-11 18:06:56 +0200) are available in the git repository at: git://repo.or.cz/qemu/kevin.git for-anthony Are any of these appropriate for -stable? I think some are, below: I think all of your suggestions would be appropriate, even though most of them fix rather theoretical cases. Jason Baron (1): ahci: properly reset PxCMD on HBA reset Most likely this. Jason? Yes, this fixes a real visible bug with AHCI CD-ROMs. Kevin
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote: On 09/19/2012 06:02 AM, liu ping fan wrote: Currently, cpu_physical_memory_rw() can be used directly or indirectly by mmio-dispatcher to access other devices' memory region. This can cause some problem when adopting device's private lock. Back ground refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html For lazy, just refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html --1st. the recursive lock of biglock. If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have the following (section of the whole call chain, and with private_lockA): lockA-mmio-dispatcher -- hold biglock -- c_p_m_rw() --- Before c_p_m_rw(), we drop private_lockA to anti the possibly of deadlock. But we can not anti the nested of this chain or calling to another lockB-mmio-dispatcher. So we can not avoid the possibility of nested lock of biglock. And another important factor is that we break the lock sequence: private_lock--biglock. All of these require us to push biglock's holding into c_p_m_rw(), the wrapper can not give help. I agree that this is unavoidable. --2nd. c_p_m_rw(), sync or async? IF we convert all of the device to be protected by refcount, then we can have //no big lock c_p_m_rw() { devB-ref++; { ---pushed onto another thread. lock_privatelock mr-ops-write(); unlock_privatelock } wait_for_completion(); devB-ref--; } This model can help c_p_m_rw() present as a SYNC API. But currently, we mix biglock and private lock together, and wait_for_completion() maybe block the release of big lock, which finally causes deadlock. So we can not simply rely on this model. Instead, we need to classify the calling scene into three cases: case1. lockA--dispatcher --- lockB-dispatcher //can use async+completion model case2. lockA--dispatcher --- biglock-dispatcher // sync, but can cause the nested lock of biglock case3. biglock-dispacher --- lockB-dispatcher // async to avoid the lock sequence problem, (as to completion, it need to be placed outside the top level biglock, and it is hard to do so. Suggest to change to case 1. Or at present, just leave it async) This new model will require the biglock can be nested. I think changing to an async model is too complicated. It's difficult enough already. Isn't dropping private locks + recursive big locks sufficient? I think that dropping private locks + recursive big locks just cover case 2. And most of the important, it dont describe case3 which break the rule of lock sequence private-lock -- biglock. Scene: devA_lock--(devX_with-biglock---devB_lock). I just want to classify and post these cases to discuss. Maybe we can achieve without async. Thanks and regards, pingfan -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 09/19/2012 11:36 AM, liu ping fan wrote: It basically means you can't hold contents of device state in local variables. You need to read everything again from the device. That includes things like DMA enable bits. I think that read everything again from the device can not work. Suppose the following scene: If the device's state contains the change of a series of internal registers (supposing partA+partB; partC+partD), after partA changed, the device's lock is broken. At this point, another access to this device, it will work on partA+partB to determine C+D, but since partB is not correctly updated yet. So C+D may be decided by broken context and be wrong. That's the guest's problem. Note it could have happened even before the change, since the writes to A/B/C/D are unordered wrt the DMA. (btw: to we respect PCI_COMMAND_MASTER? it seems that we do not. This is a trivial example of an iommu, we should get that going). Or for converted device, we can just tag it a busy flag, that is checkset busy flag at the entry of device's mmio-dispatch. So when re-acquire device's lock, the device's state is intact. The state can be changed by a parallel access to another register, which is valid. Do you mean that the device can be accessed in parallel? But how? we use device's lock. Some DMA is asynchronous already, like networking and IDE DMA. So we drop the big lock while doing it. With the change to drop device locks around c_p_m_rw(), we have that problem everywhere. What my suggestion is: lock(); set_and_test(dev-busy); if busy unlock and return; changing device registers; do other things including calling to c_p_m_rw() //here,lock broken, but set_and_test() works clear(dev-busy); unlock(); So changing device registers is protected, and unbreakable. But the changes may be legitimate. Maybe you're writing to a completely unrelated register, from a different vcpu, now that write is lost. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 09/19/2012 12:00 PM, liu ping fan wrote: On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote: On 09/19/2012 06:02 AM, liu ping fan wrote: Currently, cpu_physical_memory_rw() can be used directly or indirectly by mmio-dispatcher to access other devices' memory region. This can cause some problem when adopting device's private lock. Back ground refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html For lazy, just refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html --1st. the recursive lock of biglock. If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have the following (section of the whole call chain, and with private_lockA): lockA-mmio-dispatcher -- hold biglock -- c_p_m_rw() --- Before c_p_m_rw(), we drop private_lockA to anti the possibly of deadlock. But we can not anti the nested of this chain or calling to another lockB-mmio-dispatcher. So we can not avoid the possibility of nested lock of biglock. And another important factor is that we break the lock sequence: private_lock--biglock. All of these require us to push biglock's holding into c_p_m_rw(), the wrapper can not give help. I agree that this is unavoidable. --2nd. c_p_m_rw(), sync or async? IF we convert all of the device to be protected by refcount, then we can have //no big lock c_p_m_rw() { devB-ref++; { ---pushed onto another thread. lock_privatelock mr-ops-write(); unlock_privatelock } wait_for_completion(); devB-ref--; } This model can help c_p_m_rw() present as a SYNC API. But currently, we mix biglock and private lock together, and wait_for_completion() maybe block the release of big lock, which finally causes deadlock. So we can not simply rely on this model. Instead, we need to classify the calling scene into three cases: case1. lockA--dispatcher --- lockB-dispatcher //can use async+completion model case2. lockA--dispatcher --- biglock-dispatcher // sync, but can cause the nested lock of biglock case3. biglock-dispacher --- lockB-dispatcher // async to avoid the lock sequence problem, (as to completion, it need to be placed outside the top level biglock, and it is hard to do so. Suggest to change to case 1. Or at present, just leave it async) This new model will require the biglock can be nested. I think changing to an async model is too complicated. It's difficult enough already. Isn't dropping private locks + recursive big locks sufficient? I think that dropping private locks + recursive big locks just cover case 2. And most of the important, it dont describe case3 which break the rule of lock sequence private-lock -- biglock. Scene: devA_lock--(devX_with-biglock---devB_lock). Why not? devA will drop its local lock, devX will retake the big lock recursively, devB will take its local lock. In the end, we have biglock - devB. I just want to classify and post these cases to discuss. Maybe we can achieve without async. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On Wed, Sep 19, 2012 at 5:07 PM, Avi Kivity a...@redhat.com wrote: On 09/19/2012 12:00 PM, liu ping fan wrote: On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote: On 09/19/2012 06:02 AM, liu ping fan wrote: Currently, cpu_physical_memory_rw() can be used directly or indirectly by mmio-dispatcher to access other devices' memory region. This can cause some problem when adopting device's private lock. Back ground refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html For lazy, just refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html --1st. the recursive lock of biglock. If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have the following (section of the whole call chain, and with private_lockA): lockA-mmio-dispatcher -- hold biglock -- c_p_m_rw() --- Before c_p_m_rw(), we drop private_lockA to anti the possibly of deadlock. But we can not anti the nested of this chain or calling to another lockB-mmio-dispatcher. So we can not avoid the possibility of nested lock of biglock. And another important factor is that we break the lock sequence: private_lock--biglock. All of these require us to push biglock's holding into c_p_m_rw(), the wrapper can not give help. I agree that this is unavoidable. --2nd. c_p_m_rw(), sync or async? IF we convert all of the device to be protected by refcount, then we can have //no big lock c_p_m_rw() { devB-ref++; { ---pushed onto another thread. lock_privatelock mr-ops-write(); unlock_privatelock } wait_for_completion(); devB-ref--; } This model can help c_p_m_rw() present as a SYNC API. But currently, we mix biglock and private lock together, and wait_for_completion() maybe block the release of big lock, which finally causes deadlock. So we can not simply rely on this model. Instead, we need to classify the calling scene into three cases: case1. lockA--dispatcher --- lockB-dispatcher //can use async+completion model case2. lockA--dispatcher --- biglock-dispatcher // sync, but can cause the nested lock of biglock case3. biglock-dispacher --- lockB-dispatcher // async to avoid the lock sequence problem, (as to completion, it need to be placed outside the top level biglock, and it is hard to do so. Suggest to change to case 1. Or at present, just leave it async) This new model will require the biglock can be nested. I think changing to an async model is too complicated. It's difficult enough already. Isn't dropping private locks + recursive big locks sufficient? I think that dropping private locks + recursive big locks just cover case 2. And most of the important, it dont describe case3 which break the rule of lock sequence private-lock -- biglock. Scene: devA_lock--(devX_with-biglock---devB_lock). Why not? devA will drop its local lock, devX will retake the big lock recursively, devB will take its local lock. In the end, we have biglock - devB. But when adopting local lock, we assume take local lock, then biglock. Otherwise another thread will take biglock then local lock, which cause the possibility of deadlock. I just want to classify and post these cases to discuss. Maybe we can achieve without async. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
Il 19/09/2012 11:11, liu ping fan ha scritto: Why not? devA will drop its local lock, devX will retake the big lock recursively, devB will take its local lock. In the end, we have biglock - devB. But when adopting local lock, we assume take local lock, then biglock. No, because the local lock will be dropped before taking the biglock. The order must always be coarse-fine. I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. Paolo Otherwise another thread will take biglock then local lock, which cause the possibility of deadlock.
Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga
On 09/18/2012 12:51 PM, Gerd Hoffmann wrote: This patch adds a mmio bar to the qemu standard vga which allows to access the standard vga registers and bochs dispi interface registers via mmio. diff --git a/hw/vga-pci.c b/hw/vga-pci.c index 9abbada..e05e2ef 100644 --- a/hw/vga-pci.c +++ b/hw/vga-pci.c @@ -30,9 +30,36 @@ #include qemu-timer.h #include loader.h +/* + * QEMU Standard VGA -- MMIO area spec. + * + * Using PCI bar #2, keeping #1 free, which leaves the + * door open to upgrade bar #0 to 64bit. + * + * mmio area layout: + * 0x - 0x03ff reserved, for possible virtio extension. + * 0x0400 - 0x041f vga ioports (0x3c0 - 0x3df), remapped 1:1 Do they support word accesses to set both index and data? + * 0x0500 - 0x0515 bochs dispi interface registers, mapped flat without + * index/data ports. Use (index 1) as offset for + * (16bit) register access. + */ BAR should disappear with -M old. + +static const MemoryRegionOps pci_vga_ioport_ops = { +.read = pci_vga_ioport_read, +.write = pci_vga_ioport_write, +.valid.min_access_size = 1, +.valid.max_access_size = 4, +.impl.min_access_size = 1, +.impl.max_access_size = 1, +.endianness = DEVICE_LITTLE_ENDIAN, +}; Looks like word writes are supported provided the memory API breaks up writes in little endian order. Better to make it explicit. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH] usb-redir: Allow to attach USB 2.0 devices to 1.1 host controller
Hi, On 09/18/2012 11:18 PM, Anthony Liguori wrote: Hans de Goede hdego...@redhat.com writes: Hi, On 09/17/2012 11:18 AM, Jan Kiszka wrote: On 2012-09-17 11:08, Hans de Goede wrote: snip Although not pretty I'm ok with this, since I actually want to add similar code to allow usb-3 (superspeed) devices like a usb-3 usb-stick to work with ehci or uhci controllers :) Great, that would have been my next question, but I don't have hardware for that around yet. I do have hardware for that around, so once you've respun your patch to address the issues discussed, then that will give me a nice basis to add usb-3 usb-stick to ehci-controller redirection :) BTW, I'm facing several incompatibilities with passed-through CDC/ACM devices (e.g. a Galaxy S2), independent of my patch. Both host-linux and redir doesn't allow to use them properly but show different symptoms. Need to analyze and report once time permits. Hmm, there is (was) one know issues with these devices, which has been fixed in usbredir, so first of all make sure that the usbredir on your spice-client / usbredirserver, has this patch: http://cgit.freedesktop.org/spice/usbredir/commit/?id=7783d3db61083bbf7f61b1ea8608c666b4c6a1dd If that does not work, add the debug parameter to the usb-redir device, set it to 4, collect logs of trying to redirect the device and send me the logs please, ie: -device usb-redir,chardev=usbredirchardev1,id=usbredirdev1,debug=4 Also be aware that usb-redir relies on chardev flowcontrol working, which it does not upstream! See for example here for the chardev flow control patch set which RHEL / Fedora carry: http://cgit.freedesktop.org/~jwrdegoede/qemu/log/?h=qemu-kvm-1.2-usbredirofs=50 Those patches are garbage. sigh, very very deep sigh No they are not garbage. They: 1) Fix a real issue, which is independent of spice / usb-redir but happens to be more easily triggerable with them. Note that the original set was developed to fix a bug which did not involve either! 2) Are disliked by you because they build upon the existing not pretty chardev code. We all agree that needs a rewrite. But that is not a valid reason to keep this perfectly fine patchset out of qemu master for years now! 3) You keep saying they are racy, but you've never given a simple step by step thread 1 does foo, thread 2 does bar replay of such a theoretical race. 4) Have caused no issues in almost 2 years of deployment in the field in both RHEL and Fedora. 5) Are kept out of master by you for no valid reasons, you seem to use them as a tool to strongarm people into rewriting the qemu chardev layer. Well it seems that no one is biting because of -ENOTIME, so can we now please get these included, after almost 2 years since their first posting ? Are you saying that usb-redir doesn't work in qemu.git? So why are we even carrying it? It works just as well as virtio-console or any other chardev frontend, any chardev frontent can currently trigger asserts by writing data faster then the backend can handle. This is just more easily triggerable with USB trafic then with a serial console. The original bug-report this patch set once was written for was about virtio-console and is dated 2010-08-05 ! So why are we even carrying virtio-console ? Regards, Hans p.s. Note that I'm *not* the original author of these patches.
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote: Il 19/09/2012 11:11, liu ping fan ha scritto: Why not? devA will drop its local lock, devX will retake the big lock recursively, devB will take its local lock. In the end, we have biglock - devB. But when adopting local lock, we assume take local lock, then biglock. No, because the local lock will be dropped before taking the biglock. The order must always be coarse-fine. But if we takes coarse firstly, then the mmio-dispatcher will still contend for the big lock against each other. I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. I think fine-coarse may be the rule, since for some code path, it is not necessary to take coarse lock. Thanks and regards, pingfan Paolo Otherwise another thread will take biglock then local lock, which cause the possibility of deadlock.
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 09/19/2012 12:14 PM, Paolo Bonzini wrote: I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. I would say device - backend. It's natural if the backend is the timer subsystem, so extend it from there to the block and network layers. Of course callbacks want it to be the other way round. We could solve the callback problem in the same way we're using for mmio callbacks. When providing the callback, give the subsystem -ref() and -unref() methods. As long as a callback is pending, the subsystem holds a reference. Once the callback has been invoked or cancelled, the reference can be released. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 09/19/2012 12:19 PM, liu ping fan wrote: On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote: Il 19/09/2012 11:11, liu ping fan ha scritto: Why not? devA will drop its local lock, devX will retake the big lock recursively, devB will take its local lock. In the end, we have biglock - devB. But when adopting local lock, we assume take local lock, then biglock. No, because the local lock will be dropped before taking the biglock. The order must always be coarse-fine. But if we takes coarse firstly, then the mmio-dispatcher will still contend for the big lock against each other. Can you detail the sequence? I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. I think fine-coarse may be the rule, since for some code path, it is not necessary to take coarse lock. coarse-fine doesn't mean you have to take the coarse lock. Valid: lock(coarse) lock(fine) Valid: lock(find) Valid: lock(coarse) Invalid: lock(fine) lock(coarse) -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 2012-09-19 11:23, Avi Kivity wrote: On 09/19/2012 12:19 PM, liu ping fan wrote: On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote: Il 19/09/2012 11:11, liu ping fan ha scritto: Why not? devA will drop its local lock, devX will retake the big lock recursively, devB will take its local lock. In the end, we have biglock - devB. But when adopting local lock, we assume take local lock, then biglock. No, because the local lock will be dropped before taking the biglock. The order must always be coarse-fine. But if we takes coarse firstly, then the mmio-dispatcher will still contend for the big lock against each other. Can you detail the sequence? I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. I think fine-coarse may be the rule, since for some code path, it is not necessary to take coarse lock. coarse-fine doesn't mean you have to take the coarse lock. Valid: lock(coarse) lock(fine) This is invalid due to prio inversion issues, independent of potential deadlocks. Jan Valid: lock(find) Valid: lock(coarse) Invalid: lock(fine) lock(coarse) -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 2012-09-19 11:27, Jan Kiszka wrote: On 2012-09-19 11:23, Avi Kivity wrote: On 09/19/2012 12:19 PM, liu ping fan wrote: On Wed, Sep 19, 2012 at 5:14 PM, Paolo Bonzini pbonz...@redhat.com wrote: Il 19/09/2012 11:11, liu ping fan ha scritto: Why not? devA will drop its local lock, devX will retake the big lock recursively, devB will take its local lock. In the end, we have biglock - devB. But when adopting local lock, we assume take local lock, then biglock. No, because the local lock will be dropped before taking the biglock. The order must always be coarse-fine. But if we takes coarse firstly, then the mmio-dispatcher will still contend for the big lock against each other. Can you detail the sequence? I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. I think fine-coarse may be the rule, since for some code path, it is not necessary to take coarse lock. coarse-fine doesn't mean you have to take the coarse lock. Valid: lock(coarse) lock(fine) This is invalid due to prio inversion issues, independent of potential deadlocks. Err, forget it - the other way around is the problem. Sorry, Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 2012-09-19 11:00, liu ping fan wrote: On Wed, Sep 19, 2012 at 4:06 PM, Avi Kivity a...@redhat.com wrote: On 09/19/2012 06:02 AM, liu ping fan wrote: Currently, cpu_physical_memory_rw() can be used directly or indirectly by mmio-dispatcher to access other devices' memory region. This can cause some problem when adopting device's private lock. Back ground refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01481.html For lazy, just refer to: http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01878.html --1st. the recursive lock of biglock. If we leave c_p_m_rw() as it is, ie, no lock inside. Then we can have the following (section of the whole call chain, and with private_lockA): lockA-mmio-dispatcher -- hold biglock -- c_p_m_rw() --- Before c_p_m_rw(), we drop private_lockA to anti the possibly of deadlock. But we can not anti the nested of this chain or calling to another lockB-mmio-dispatcher. So we can not avoid the possibility of nested lock of biglock. And another important factor is that we break the lock sequence: private_lock--biglock. All of these require us to push biglock's holding into c_p_m_rw(), the wrapper can not give help. I agree that this is unavoidable. --2nd. c_p_m_rw(), sync or async? IF we convert all of the device to be protected by refcount, then we can have //no big lock c_p_m_rw() { devB-ref++; { ---pushed onto another thread. lock_privatelock mr-ops-write(); unlock_privatelock } wait_for_completion(); devB-ref--; } This model can help c_p_m_rw() present as a SYNC API. But currently, we mix biglock and private lock together, and wait_for_completion() maybe block the release of big lock, which finally causes deadlock. So we can not simply rely on this model. Instead, we need to classify the calling scene into three cases: case1. lockA--dispatcher --- lockB-dispatcher //can use async+completion model case2. lockA--dispatcher --- biglock-dispatcher // sync, but can cause the nested lock of biglock case3. biglock-dispacher --- lockB-dispatcher // async to avoid the lock sequence problem, (as to completion, it need to be placed outside the top level biglock, and it is hard to do so. Suggest to change to case 1. Or at present, just leave it async) This new model will require the biglock can be nested. I think changing to an async model is too complicated. It's difficult enough already. Isn't dropping private locks + recursive big locks sufficient? I think that dropping private locks + recursive big locks just cover case 2. And most of the important, it dont describe case3 which break the rule of lock sequence private-lock -- biglock. Scene: devA_lock--(devX_with-biglock---devB_lock). I just want to classify and post these cases to discuss. Maybe we can achieve without async. What about the following: What we really need to support in practice is MMIO access triggers RAM access of device model. Scenarios where a device access triggers another MMIO access could likely just be rejected without causing troubles. So, when we dispatch a request to a device, we mark that the current thread is in a MMIO dispatch and reject any follow-up c_p_m_rw that does _not_ target RAM, ie. is another, nested MMIO request - independent of its destination. How much of the known issues would this solve? And what would remain open? Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 09/19/2012 12:34 PM, Jan Kiszka wrote: What about the following: What we really need to support in practice is MMIO access triggers RAM access of device model. Scenarios where a device access triggers another MMIO access could likely just be rejected without causing troubles. So, when we dispatch a request to a device, we mark that the current thread is in a MMIO dispatch and reject any follow-up c_p_m_rw that does _not_ target RAM, ie. is another, nested MMIO request - independent of its destination. How much of the known issues would this solve? And what would remain open? Various iommu-like devices re-dispatch I/O, like changing endianness or bitband. I don't know whether it targets I/O rather than RAM. If they do, we can push the support into the memory API. I think it's acceptable as a short term solution (short term meaning as long as needed). -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
Il 19/09/2012 11:21, Avi Kivity ha scritto: I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. I would say device - backend. It's natural if the backend is the timer subsystem, so extend it from there to the block and network layers. Of course callbacks want it to be the other way round. Yes, that's what I wasn't sure about. In many cases I believe callbacks can just release the backend lock. It works for timers, for example: for(;;) { ts = clock-active_timers; if (!qemu_timer_expired_ns(ts, current_time)) { break; } /* remove timer from the list before calling the callback */ clock-active_timers = ts-next; ts-next = NULL; /* run the callback (the timer list can be modified) */ - ts-cb(ts-opaque); + cb = ts-cb; + opaque = ts-opaque; + unlock(); + cb(opaque); + lock(); } (The hunch is that ts could be deleted exactly at the moment the callback is unlocked. This can be solved with ref/unref on the opaque value, as you mention below). Paolo We could solve the callback problem in the same way we're using for mmio callbacks. When providing the callback, give the subsystem -ref() and -unref() methods. As long as a callback is pending, the subsystem holds a reference. Once the callback has been invoked or cancelled, the reference can be released.
Re: [Qemu-devel] [PATCH V3 1/5] libqblock build system
Il 19/09/2012 08:35, Wenchao Xia ha scritto: +QEMU_OBJS=$(tools-obj-y) $(block-obj-y) +QEMU_OBJS_FILTERED=$(filter %.o,$(QEMU_OBJS)) What does this filter out? $(block-obj-y) contains /block, which would cause trouble in building, so filtered it out. I think that's because you have to call unnest-vars, not unnest-vars-1. Paolo
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 09/19/2012 12:51 PM, Paolo Bonzini wrote: Il 19/09/2012 11:21, Avi Kivity ha scritto: I don't know if the front-end (device) lock should come before or after the back-end (e.g. netdev) lock in the hierarchy, but that's another story. I would say device - backend. It's natural if the backend is the timer subsystem, so extend it from there to the block and network layers. Of course callbacks want it to be the other way round. Yes, that's what I wasn't sure about. In many cases I believe callbacks can just release the backend lock. It works for timers, for example: for(;;) { ts = clock-active_timers; if (!qemu_timer_expired_ns(ts, current_time)) { break; } /* remove timer from the list before calling the callback */ clock-active_timers = ts-next; ts-next = NULL; /* run the callback (the timer list can be modified) */ - ts-cb(ts-opaque); + cb = ts-cb; + opaque = ts-opaque; + unlock(); + cb(opaque); + lock(); } (The hunch is that ts could be deleted exactly at the moment the callback is unlocked. This can be solved with ref/unref on the opaque value, as you mention below). Are you saying that this works as is or not? It does seem broken wrt deletion; after qemu_del_timer() completes the caller expects the callback not to be called. This isn't trivial to guarantee, we need something like dispatch_timer(): pending += 1 timer.ref() drop lock timer.cb() take lock timer.unref() pending -= 1 notify del_timer(): take lock timer.unlink() while pending: wait for notification drop lock but, if del_timer is called with the device lock held, we deadlock. ugh. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH V9 0/8] memory: unify ioport registration
On 09/04/2012 06:13 PM, Julien Grall wrote: This is the nineth version of patch series about ioport registration. Some part of QEMU still use register_ioport* functions to register ioport. These functions doesn't allow to use Memory Listener on it. Jan't patch that dropped the debug ioports causes conflicts, please rebase atop upstream. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [RFC PATCH 00/13] Embedded NBD server
On Mon, Aug 27, 2012 at 05:00:13PM +0200, Paolo Bonzini wrote: Hi all, this is an RFC series implementing an NBD server embedded inside QEMU. This can be used in various cases, including migration with non-shared storage. Three new commands are introduced at the QMP level { 'command': 'nbd-server-start', 'data': { 'addr': 'IPSocketAddress' } } It is probably desirable/required to also have a command where libvirt can pass in a pre-opened listening socket, since I think SELinux policy will not want to allow QEMU to create listening sockets itself. Regards, Daniel -- |: http://berrange.com -o-http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 2012-09-19 11:50, Avi Kivity wrote: On 09/19/2012 12:34 PM, Jan Kiszka wrote: What about the following: What we really need to support in practice is MMIO access triggers RAM access of device model. Scenarios where a device access triggers another MMIO access could likely just be rejected without causing troubles. So, when we dispatch a request to a device, we mark that the current thread is in a MMIO dispatch and reject any follow-up c_p_m_rw that does _not_ target RAM, ie. is another, nested MMIO request - independent of its destination. How much of the known issues would this solve? And what would remain open? Various iommu-like devices re-dispatch I/O, like changing endianness or bitband. I don't know whether it targets I/O rather than RAM. If such dispatching is lock-less and we validate it's not recursive, then it should be fine even when not targeting RAM. And we could also factor out generic dispatchers like MMIO-PIO. If they do, we can push the support into the memory API. I think it's acceptable as a short term solution (short term meaning as long as needed). We will always have to avoid loops. I think all we can improve is what we still allow by better detecting what would be wrong. Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
Il 19/09/2012 12:06, Avi Kivity ha scritto: (The hunch is that ts could be deleted exactly at the moment the callback is unlocked. This can be solved with ref/unref on the opaque value, as you mention below). Are you saying that this works as is or not? It does seem broken wrt deletion; after qemu_del_timer() completes the caller expects the callback not to be called. Ouch, then I think the only solution is to remove this invariant if you add fine-grained locking and re-check the enable conditions in the timer callback. If you allow calling del_timer to be called with the device lock held, you cannot fixing without deadlocking. If you don't, the caller of del_timer cannot set any expectation because the timer could be run in the window between dropping the device lock and calling del_timer Paolo This isn't trivial to guarantee, we need something like dispatch_timer(): pending += 1 timer.ref() drop lock timer.cb() take lock timer.unref() pending -= 1 notify del_timer(): take lock timer.unlink() while pending: wait for notification drop lock but, if del_timer is called with the device lock held, we deadlock. ugh. Wait for notification should be a cond_wait that drops the lock. Alternatively: dispatch_timer(): drop lock if timer has expired: timer.cb() take lock notify del_timer(): take lock timer.unlink() drop lock
Re: [Qemu-devel] [RFC PATCH 00/13] Embedded NBD server
Il 19/09/2012 12:16, Daniel P. Berrange ha scritto: Three new commands are introduced at the QMP level { 'command': 'nbd-server-start', 'data': { 'addr': 'IPSocketAddress' } } It is probably desirable/required to also have a command where libvirt can pass in a pre-opened listening socket, since I think SELinux policy will not want to allow QEMU to create listening sockets itself. Ok, I'll make it like this: { 'type': 'UnixSocketAddress', 'data': { 'path': 'str' } } { 'type': 'String', 'data': { 'str': 'str' } } {'union': 'SocketAddress', 'data': { 'inet': 'IPSocketAddress', 'unix': 'UnixSocketAddress', 'fd': 'String', } } Paolo
Re: [Qemu-devel] [PATCH v2 2/2] Versatile Express: add modelling of NOR flash
On 18 September 2012 21:59, Francesco Lavra francescolavra...@gmail.com wrote: On 09/18/2012 03:46 PM, Peter Maydell wrote: On 17 September 2012 21:08, Francesco Lavra francescolavra...@gmail.com wrote: This patch adds modelling of the two NOR flash banks found on the Versatile Express motherboard. Tested with U-Boot running on an emulated Versatile Express, with either A9 or A15 CoreTile. Signed-off-by: Francesco Lavra francescolavra...@gmail.com --- Changes in v2: Use drive_get_next() instead of drive_get() to get a backing storage for each flash bank. hw/vexpress.c | 24 ++-- 1 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hw/vexpress.c b/hw/vexpress.c index 454c2bb..2ffeab1 100644 --- a/hw/vexpress.c +++ b/hw/vexpress.c @@ -29,8 +29,12 @@ #include sysemu.h #include boards.h #include exec-memory.h +#include blockdev.h +#include flash.h #define VEXPRESS_BOARD_ID 0x8e0 +#define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024) +#define VEXPRESS_FLASH_SECT_SIZE (256 * 1024) static struct arm_boot_info vexpress_binfo; @@ -355,6 +359,7 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, Something in your email send path is wrapping long lines, which means 'git am' doesn't work cleanly. If you're planning on sending more QEMU patches you might want to look into getting this fixed. Oops, sorry about that, if you want me to re-send the patch with these artifacts fixed, just let me know. qemu_irq pic[64]; uint32_t proc_id; uint32_t sys_id; +DriveInfo *dinfo; ram_addr_t vram_size, sram_size; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *vram = g_new(MemoryRegion, 1); @@ -410,8 +415,23 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, sysbus_create_simple(pl111, map[VE_CLCD], pic[14]); -/* VE_NORFLASH0: not modelled */ -/* VE_NORFLASH1: not modelled */ +dinfo = drive_get_next(IF_PFLASH); +if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, vexpress.flash0, +VEXPRESS_FLASH_SIZE, dinfo ? dinfo-bdrv : NULL, +VEXPRESS_FLASH_SECT_SIZE, +VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4, +0x00, 0x89, 0x00, 0x18, 0)) { +fprintf(stderr, vexpress: error registering flash 0.\n); Shouldn't these errors be fatal? I checked the existing uses of pflash_cfi_0[1,2]_register() in the code, and if I'm not mistaken only in 5 out of 19 devices these errors are fatal, in the other 14 cases initialization continues even after flash registration failure, with or without an error message. Let me know if you still prefer these errors to be fatal. So the only reason this can fail is if the user specified a file to back the flash but trying to read it failed (ie, bad filename or file not the same size as the flash). I think that merits actually stopping on error. Ideally in the long term the flash devices should be converted to proper QOM devices and we could push the error handling back into the device itself (which is better positioned to distinguish bad filename from wrong length I suspect). -- PMM
Re: [Qemu-devel] [big lock] Discussion about the convention of device's DMA each other after breaking down biglock
On 09/19/2012 01:19 PM, Paolo Bonzini wrote: Il 19/09/2012 12:06, Avi Kivity ha scritto: (The hunch is that ts could be deleted exactly at the moment the callback is unlocked. This can be solved with ref/unref on the opaque value, as you mention below). Are you saying that this works as is or not? It does seem broken wrt deletion; after qemu_del_timer() completes the caller expects the callback not to be called. Ouch, then I think the only solution is to remove this invariant if you add fine-grained locking and re-check the enable conditions in the timer callback. I think so too, I know authors of GUI frameworks suffer from this problem (the GUI calling into the framework vs. the framework asking the GUI to repaint). Don't know what the state of the art solution is. If you allow calling del_timer to be called with the device lock held, you cannot fixing without deadlocking. If you don't, the caller of del_timer cannot set any expectation because the timer could be run in the window between dropping the device lock and calling del_timer Right. Callbacks should check qemu_timer_deleted() after taking their little lock. The reference held by the timer subsystem protects against qemu_free_timer(). -- error compiling committee.c: too many arguments to function
[Qemu-devel] [Bug 1042388] Re: qemu: Unsupported syscall: 257 (timer_create)
I have a fix for this. I can now successfully install ghc and compile programs with it. In the process of cleaning up the patch and working on a test for the test suite. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1042388 Title: qemu: Unsupported syscall: 257 (timer_create) Status in QEMU: New Bug description: Running qemu-arm-static for git HEAD. When I try to install ghc from debian into my arm chroot I get: Setting up ghc (7.4.1-4) ... qemu: Unsupported syscall: 257 ghc: timer_create: Function not implemented qemu: Unsupported syscall: 257 ghc-pkg: timer_create: Function not implemented dpkg: error processing ghc (--configure): subprocess installed post-installation script returned error exit status 1 Errors were encountered while processing: ghc E: Sub-process /usr/bin/dpkg returned an error code (1) To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1042388/+subscriptions
Re: [Qemu-devel] [PATCH] Adding BAR0 for e500 PCI controller
On 19.09.2012, at 09:41, Bharat Bhushan wrote: PCI Root complex have TYPE-1 configuration header while PCI endpoint have type-0 configuration header. The type-1 configuration header have a BAR (BAR0). In Freescale PCI controller BAR0 is used for mapping pci address space to CCSR address space. This can used for 2 purposes: 1) for MSI interrupt generation 2) Allow CCSR registers access when configured as PCI endpoint, which I am not sure is a use case with QEMU-KVM guest. What I observed is that when guest read the size of BAR0 of host controller configuration header (TYPE1 header) then it always reads it as 0. When looking into the QEMU hw/ppce500_pci.c, I do not find the PCI controller device registering BAR0. I do not find any other controller also doing so may they do not use BAR0. There are two issues when BAR0 is not there (which I can think of): 1) There should be BAR0 emulated for PCI Root comaplex (TYPE1 header) and when reading the size of BAR0, it should give size as per real h/w. This patch solves this problem. 2) Do we need this BAR0 inbound address translation? When BAR0 is of non-zero size then it will be configured for PCI address space to local address(CCSR) space translation on inbound access. The primary use case is for MSI interrupt generation. The device is configured with a address offsets in PCI address space, which will be translated to MSI interrupt generation MPIC registers. Currently I do not understand the MSI interrupt generation mechanism in QEMU and also IIRC we do not use QEMU MSI interrupt mechanism on e500 guest machines. But this BAR0 will be used when using MSI on e500. I can see one more issue, There are ATMUs emulated in hw/ppce500_pci.c, but i do not see these being used for address translation. So far that works because pci address space and local address space are 1:1 mapped. BAR0 inbound translation + ATMU translation will complete the address translation of inbound traffic. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- hw/pci_host.h|9 + hw/ppc/e500.c| 21 + hw/ppce500_pci.c |7 +++ 3 files changed, 37 insertions(+), 0 deletions(-) diff --git a/hw/pci_host.h b/hw/pci_host.h index 4b9c300..c1ec7eb 100644 --- a/hw/pci_host.h +++ b/hw/pci_host.h @@ -41,10 +41,19 @@ struct PCIHostState { MemoryRegion data_mem; MemoryRegion mmcfg; MemoryRegion *address_space; +MemoryRegion bar0; uint32_t config_reg; PCIBus *bus; }; +typedef struct PPCE500CCSRState { +SysBusDevice *parent; +MemoryRegion ccsr_space; +} PPCE500CCSRState; + +#define TYPE_CCSR e500-ccsr +#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR) + All of this is e500 specific, so it should definitely not be in any generic pci host header. /* common internal helpers for PCI/PCIe hosts, cut off overflows */ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, uint32_t limit, uint32_t val, uint32_t len); diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 6f0de6d..1f5da28 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -31,6 +31,7 @@ #include hw/loader.h #include elf.h #include hw/sysbus.h +#include hw/pci_host.h #include exec-memory.h #include host-utils.h @@ -587,3 +588,23 @@ void ppce500_init(PPCE500Params *params) kvmppc_init(); } } + +static void e500_ccsr_type_init(Object *obj) +{ +PPCE500CCSRState *ccsr = CCSR(obj); +ccsr-ccsr_space.size = int128_make64(MPC8544_CCSRBAR_SIZE); +} + +static const TypeInfo e500_ccsr_info = { +.name = TYPE_CCSR, +.parent= TYPE_SYS_BUS_DEVICE, +.instance_size = sizeof(PPCE500CCSRState), +.instance_init = e500_ccsr_type_init, +}; + +static void e500_ccsr_register_types(void) +{ +type_register_static(e500_ccsr_info); +} + +type_init(e500_ccsr_register_types) diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index 92b1dc0..135f2a6 100644 --- a/hw/ppce500_pci.c +++ b/hw/ppce500_pci.c @@ -315,6 +315,8 @@ static int e500_pcihost_initfn(SysBusDevice *dev) int i; MemoryRegion *address_space_mem = get_system_memory(); MemoryRegion *address_space_io = get_system_io(); +PPCE500CCSRState *ccsr; +PCIDevice *pdev; h = PCI_HOST_BRIDGE(dev); s = PPC_E500_PCI_HOST_BRIDGE(dev); @@ -342,6 +344,11 @@ static int e500_pcihost_initfn(SysBusDevice *dev) memory_region_add_subregion(s-container, PCIE500_REG_BASE, s-iomem); sysbus_init_mmio(dev, s-container); +ccsr = CCSR(object_new(TYPE_CCSR)); +memory_region_init_io(h-bar0, pci_host_conf_be_ops, h, + pci-bar0, memory_region_size(ccsr-ccsr_space)); +pdev = pci_find_device(b, 0, 0); +pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, h-bar0); No. The PCI host bridge doesn't create the CCSR object. The machine model
Re: [Qemu-devel] [PATCH] qapi: convert add_client
Il 18/09/2012 21:06, Luiz Capitulino ha scritto: +fd = monitor_get_fd(cur_mon, fdname); +if (fd 0) { +error_setg(errp, file-description '%s' has not been found, fdname); +return; +} Please change this to add an Error * argument to monitor_get_fd, and change non-Error-friendly callers of monitor_get_fd to monitor_handle_fd_param. You can then reuse the error in monitor_handle_fd_param (there is already QERR_FD_NOT_FOUND, by the way). I'll post the patches later in my reviewed NBD series, please take them from there and I'll rebase before my pull request. Paolo
Re: [Qemu-devel] [PATCH 000/126] Rewrite s390x translator
On 19.09.2012, at 02:32, Richard Henderson wrote: On 09/18/2012 02:09 PM, Alexander Graf wrote: Also, 102-126 are missing for me :). Yeah, they never made it to the list. I guess next time I shouldn't try to send to many at the same time. I should stagger them into hunks of 25 at a time or so. I've fixed some bugs since the v1, and I believe I've incorporated all of the comments to date. Is it helpful for you if I re-send, or would it simply be easier to examine the branch? I find email easier to read usually :). Please make sure to test a full Debian guest as well as something s390x based (RHEL/SLES). Alex git://repo.or.cz/qemu/rth.git rth/s390-reorg-3 r~
[Qemu-devel] [PATCH] xen: Fix, no unplug of pt device by platform device.
The Xen platform device will unplug any NICs if requested by the guest (PVonHVM) including a NIC that would have been passthrough. This patch makes sure that a passthrough device will not be unplug. Reported-by: Zhang, Yang Z yang.z.zh...@intel.com Signed-off-by: Anthony PERARD anthony.per...@citrix.com --- hw/xen_platform.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/xen_platform.c b/hw/xen_platform.c index 0d6c2ff..956dbfe 100644 --- a/hw/xen_platform.c +++ b/hw/xen_platform.c @@ -85,8 +85,10 @@ static void log_writeb(PCIXenPlatformState *s, char val) static void unplug_nic(PCIBus *b, PCIDevice *d, void *o) { +/* We have to ignore passthrough devices */ if (pci_get_word(d-config + PCI_CLASS_DEVICE) == -PCI_CLASS_NETWORK_ETHERNET) { +PCI_CLASS_NETWORK_ETHERNET + strcmp(d-name, xen-pci-passthrough) != 0) { qdev_free(d-qdev); } } @@ -98,8 +100,10 @@ static void pci_unplug_nics(PCIBus *bus) static void unplug_disks(PCIBus *b, PCIDevice *d, void *o) { +/* We have to ignore passthrough devices */ if (pci_get_word(d-config + PCI_CLASS_DEVICE) == -PCI_CLASS_STORAGE_IDE) { +PCI_CLASS_STORAGE_IDE + strcmp(d-name, xen-pci-passthrough) != 0) { qdev_unplug((d-qdev), NULL); } } -- Anthony PERARD
Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling
On 19.09.2012, at 02:14, Richard Henderson wrote: On 09/18/2012 01:18 PM, Alexander Graf wrote: -/* remember what pgm exeption this was */ +/* Remember what pgm exeption this was. */ tmp = tcg_const_i32(code); tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code)); tcg_temp_free_i32(tmp); -tmp = tcg_const_i32(ilc); +tmp = tcg_const_i32(s-next_pc - s-pc); Mind to explain this one? ILC = the size of the insn. Rather than passing ILC around into gen_program_exception, get it back from the s-next_pc value that we stored into DisasContext. Ah, makes sense. Maybe create a small helper that makes it more obvious: static int current_ilc(DisasContext *s) { /* Next pc - current pc = current instruction length. */ return s-next_pc - s-pc; } Alex
[Qemu-devel] [PATCH 4/9] fbdev: add linux framebuffer display driver.
Display works, requires truecolor framebuffer with 16 or 32 bpp on the host. 32bpp is recommended. The framebuffer is used as-is, qemu doesn't try to switch modes. With LCD displays mode switching is pretty pointless IMHO, also it wouldn't work anyway with the most common fbdev drivers (vesafb, KMS). Guest display is centered on the host screen. Mouse works, uses /dev/input/mice. Keyboard works. Guest screen has whatever keymap you load inside the guest. Text windows (monitor, serial, ...) have a simple en-us keymap. Good enough to type monitor commands. Not goot enough to work seriously on a serial terminal. But the qemu terminal emulation isn't good enough for that anyway ;) Hot keys: Ctrl-Alt-Fnr - host console switching. Ctrl-Alt-nr - qemu console switching. Ctrl-Alt-ESC- exit qemu. Special feature: Sane console switching. Switching away stops screen updates. Switching back redraws the screen. When started from the linux console qemu uses the vt you've started it from (requires just read/write access to /dev/fb0). When starting from somewhere else qemu tries to open a unused virtual terminal and switch to it (usually requires root privileges to open /dev/ttynr). Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.h |4 + qemu-options.hx |8 + sysemu.h|1 + trace-events| 15 + ui/Makefile.objs|1 + ui/fbdev.c | 965 +++ ui/linux-keynames.h | 388 + vl.c| 19 + 8 files changed, 1401 insertions(+), 0 deletions(-) create mode 100644 ui/fbdev.c create mode 100644 ui/linux-keynames.h diff --git a/console.h b/console.h index bef2d2d..aa9959b 100644 --- a/console.h +++ b/console.h @@ -417,6 +417,10 @@ void qemu_console_copy(DisplayState *ds, int src_x, int src_y, /* sdl.c */ void sdl_display_init(DisplayState *ds, int full_screen, int no_frame); +/* fbdev.c */ +int fbdev_display_init(DisplayState *ds, const char *device, Error **err); +void fbdev_display_uninit(DisplayState *ds); + /* cocoa.m */ void cocoa_display_init(DisplayState *ds, int full_screen); diff --git a/qemu-options.hx b/qemu-options.hx index 09c86c4..3445655 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -947,6 +947,14 @@ Enable/disable spice seamless migration. Default is off. @end table ETEXI +DEF(fbdev, 0, QEMU_OPTION_fbdev, +-fbdev enable fbdev\n, QEMU_ARCH_ALL) +STEXI +@item -fbdev +@findex -fbdev +Enable fbdev (linux framebuffer). +ETEXI + DEF(portrait, 0, QEMU_OPTION_portrait, -portrait rotate graphical output 90 deg left (only PXA LCD)\n, QEMU_ARCH_ALL) diff --git a/sysemu.h b/sysemu.h index 65552ac..34e6bfa 100644 --- a/sysemu.h +++ b/sysemu.h @@ -93,6 +93,7 @@ typedef enum DisplayType DT_DEFAULT, DT_CURSES, DT_SDL, +DT_FBDEV, DT_NOGRAPHIC, DT_NONE, } DisplayType; diff --git a/trace-events b/trace-events index b48fe2d..0d0b7fa 100644 --- a/trace-events +++ b/trace-events @@ -994,3 +994,18 @@ spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) func %u, requested % spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) queries for #%u, IRQ%u spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) @%PRIx64=%PRIx64 IRQ %u spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) %s PIN%d IRQ %u + +# ui/fbdev.c +fbdev_enabled(void) +fbdev_cleanup(void) +fbdev_vt_activate(int vtno, int wait) vtno %d, wait %d +fbdev_vt_activated(void) +fbdev_vt_release_request(void) +fbdev_vt_released(void) +fbdev_vt_aquire_request(void) +fbdev_vt_aquired(void) +fbdev_kbd_raw(int enable) enable %d +fbdev_kbd_event(int keycode, const char *kname, int up) keycode 0x%x [%s], down %d +fbdev_dpy_resize(int w, int h) %dx%d +fbdev_dpy_redraw(void) + diff --git a/ui/Makefile.objs b/ui/Makefile.objs index adc07be..55ddcf2 100644 --- a/ui/Makefile.objs +++ b/ui/Makefile.objs @@ -12,3 +12,4 @@ common-obj-$(CONFIG_SDL) += sdl.o sdl_zoom.o x_keymap.o common-obj-$(CONFIG_COCOA) += cocoa.o common-obj-$(CONFIG_CURSES) += curses.o common-obj-$(CONFIG_VNC) += $(vnc-obj-y) +common-obj-$(CONFIG_LINUX) += fbdev.o diff --git a/ui/fbdev.c b/ui/fbdev.c new file mode 100644 index 000..6dfbacc --- /dev/null +++ b/ui/fbdev.c @@ -0,0 +1,965 @@ +/* + * linux fbdev output driver. + * + * Author: Gerd Hoffmann kra...@redhat.com + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include stdio.h +#include stdlib.h +#include stdbool.h +#include string.h +#include unistd.h +#include fcntl.h +#include signal.h +#include termios.h + +#include sys/ioctl.h +#include sys/mman.h + +#include linux/kd.h +#include linux/vt.h +#include linux/fb.h + +#include qemu-common.h +#include console.h +#include keymaps.h +#include sysemu.h +#include pflib.h + +/* + * must be last so
[Qemu-devel] [PATCH 5/9] fbdev: add monitor command to enable/disable
This patch adds a fbdev monitor command to enable/disable the fbdev display at runtime to both qmp and hmp. qmp: framebuffer-display enable=on|off hmp: framebuffer-display on|off Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hmp-commands.hx | 15 +++ hmp.c|9 + hmp.h|1 + qapi-schema.json | 13 + qmp-commands.hx |6 ++ qmp.c| 19 +++ 6 files changed, 63 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index ed67e99..93e75ab 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1377,6 +1377,21 @@ passed since 1970, i.e. unix epoch. ETEXI { +.name = framebuffer-display, +.args_type = enable:b, +.params = on|off, +.help = enable/disable linux console framebuffer display, +.mhandler.cmd = hmp_framebuffer_display, +}, + +STEXI +@item framebuffer-display on | off +@findex framebuffer-display + +enable/disable linux console framebuffer display. +ETEXI + +{ .name = info, .args_type = item:s?, .params = [subcommand], diff --git a/hmp.c b/hmp.c index ba6fbd3..9677ef6 100644 --- a/hmp.c +++ b/hmp.c @@ -1168,3 +1168,12 @@ void hmp_screen_dump(Monitor *mon, const QDict *qdict) qmp_screendump(filename, err); hmp_handle_error(mon, err); } + +void hmp_framebuffer_display(Monitor *mon, const QDict *qdict) +{ +int enable = qdict_get_bool(qdict, enable); +Error *errp = NULL; + +qmp_framebuffer_display(enable, errp); +hmp_handle_error(mon, errp); +} diff --git a/hmp.h b/hmp.h index 48b9c59..20aa08c 100644 --- a/hmp.h +++ b/hmp.h @@ -73,5 +73,6 @@ void hmp_getfd(Monitor *mon, const QDict *qdict); void hmp_closefd(Monitor *mon, const QDict *qdict); void hmp_send_key(Monitor *mon, const QDict *qdict); void hmp_screen_dump(Monitor *mon, const QDict *qdict); +void hmp_framebuffer_display(Monitor *mon, const QDict *qdict); #endif diff --git a/qapi-schema.json b/qapi-schema.json index 14e4419..87c77f7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2619,3 +2619,16 @@ # Since: 0.14.0 ## { 'command': 'screendump', 'data': {'filename': 'str'} } + +# @framebuffer-display: +# +# Enable/disable linux console framebuffer display. +# +# @enable: whenever the framebuffer display should be enabled or disabled. +# +# Returns: Nothing. +# +# Since: 1.3 +# +## +{ 'command': 'framebuffer-display', 'data': {'enable': 'bool'} } diff --git a/qmp-commands.hx b/qmp-commands.hx index 6e21ddb..e0a747a 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2539,3 +2539,9 @@ EQMP .args_type = , .mhandler.cmd_new = qmp_marshal_input_query_target, }, + +{ +.name = framebuffer-display, +.args_type = enable:b, +.mhandler.cmd_new = qmp_marshal_input_framebuffer_display, +}, diff --git a/qmp.c b/qmp.c index 8463922..56e007f 100644 --- a/qmp.c +++ b/qmp.c @@ -391,6 +391,25 @@ void qmp_change(const char *device, const char *target, } } +void qmp_framebuffer_display(bool enable, Error **errp) +{ +#if defined(CONFIG_LINUX) +DisplayState *ds = get_displaystate(); + +if (enable) { +if (fbdev_display_init(ds, NULL, errp) != 0) { +if (!error_is_set(errp)) { +error_setg(errp, fbdev initialization failed); +} +} +} else { +fbdev_display_uninit(ds); +} +#else +error_setg(errp, fbdev support disabled at compile time); +#endif +} + static void qom_list_types_tramp(ObjectClass *klass, void *data) { ObjectTypeInfoList *e, **pret = data; -- 1.7.1
Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling
On 19 September 2012 01:14, Richard Henderson r...@twiddle.net wrote: On 09/18/2012 01:18 PM, Alexander Graf wrote: -/* remember what pgm exeption this was */ +/* Remember what pgm exeption this was. */ ...if you're changing this comment then at least fix the typo (exception)... tmp = tcg_const_i32(code); tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code)); tcg_temp_free_i32(tmp); -tmp = tcg_const_i32(ilc); +tmp = tcg_const_i32(s-next_pc - s-pc); Mind to explain this one? ILC = the size of the insn. Rather than passing ILC around into gen_program_exception, get it back from the s-next_pc value that we stored into DisasContext. ...but isn't (s-next_pc - s-pc) ilc*2, not ilc? Compare this hunk from elsewhere in the patch: -tmp32_2 = tcg_const_i32(ilc * 2); -tmp32_3 = tcg_const_i32(EXCP_SVC); +tmp32_2 = tcg_const_i32(s-next_pc - s-pc); -- PMM
Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling
On 19.09.2012, at 13:29, Peter Maydell wrote: On 19 September 2012 01:14, Richard Henderson r...@twiddle.net wrote: On 09/18/2012 01:18 PM, Alexander Graf wrote: -/* remember what pgm exeption this was */ +/* Remember what pgm exeption this was. */ ...if you're changing this comment then at least fix the typo (exception)... tmp = tcg_const_i32(code); tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code)); tcg_temp_free_i32(tmp); -tmp = tcg_const_i32(ilc); +tmp = tcg_const_i32(s-next_pc - s-pc); Mind to explain this one? ILC = the size of the insn. Rather than passing ILC around into gen_program_exception, get it back from the s-next_pc value that we stored into DisasContext. ...but isn't (s-next_pc - s-pc) ilc*2, not ilc? Compare this hunk from elsewhere in the patch: -tmp32_2 = tcg_const_i32(ilc * 2); -tmp32_3 = tcg_const_i32(EXCP_SVC); +tmp32_2 = tcg_const_i32(s-next_pc - s-pc); Yes, it is, and hence it's correct. The exception field encodes the real instruction length, not the ILC bits in the opcode. Maybe the naming is unfortunate... Alex
Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga
+ * 0x0400 - 0x041f vga ioports (0x3c0 - 0x3df), remapped 1:1 Do they support word accesses to set both index and data? + * 0x0500 - 0x0515 bochs dispi interface registers, mapped flat without + * index/data ports. Use (index 1) as offset for + * (16bit) register access. + */ BAR should disappear with -M old. Sure. I have a patch in flight which adds the pc-1.3 machine type, once this is in I can easily add the compat stuff. +static const MemoryRegionOps pci_vga_ioport_ops = { +.read = pci_vga_ioport_read, +.write = pci_vga_ioport_write, +.valid.min_access_size = 1, +.valid.max_access_size = 4, +.impl.min_access_size = 1, +.impl.max_access_size = 1, +.endianness = DEVICE_LITTLE_ENDIAN, +}; Looks like word writes are supported provided the memory API breaks up writes in little endian order. Better to make it explicit. Like the attached incremental patch? cheers, Gerd From d46b14dfd74dcc37fe187dc76cd681ad7dbff2d5 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann kra...@redhat.com Date: Wed, 19 Sep 2012 13:31:04 +0200 Subject: [PATCH] [fixup] vga mmio Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/vga-pci.c | 25 ++--- 1 files changed, 22 insertions(+), 3 deletions(-) diff --git a/hw/vga-pci.c b/hw/vga-pci.c index e05e2ef..7fd305d 100644 --- a/hw/vga-pci.c +++ b/hw/vga-pci.c @@ -78,14 +78,33 @@ static uint64_t pci_vga_ioport_read(void *ptr, target_phys_addr_t addr, unsigned size) { PCIVGAState *d = ptr; -return vga_ioport_read(d-vga, addr); +uint64_t ret = 0; + +switch (size) { +case 1: +ret = vga_ioport_read(d-vga, addr); +break; +case 2: +ret = vga_ioport_read(d-vga, addr); +ret |= vga_ioport_read(d-vga, addr+1) 8; +break; +} +return ret; } static void pci_vga_ioport_write(void *ptr, target_phys_addr_t addr, uint64_t val, unsigned size) { PCIVGAState *d = ptr; -vga_ioport_write(d-vga, addr, val); +switch (size) { +case 1: +vga_ioport_write(d-vga, addr, val); +break; +case 2: +vga_ioport_write(d-vga, addr, val 0xff); +vga_ioport_write(d-vga, addr+1, (val 8) 0xff); +break; +} } static const MemoryRegionOps pci_vga_ioport_ops = { @@ -94,7 +113,7 @@ static const MemoryRegionOps pci_vga_ioport_ops = { .valid.min_access_size = 1, .valid.max_access_size = 4, .impl.min_access_size = 1, -.impl.max_access_size = 1, +.impl.max_access_size = 2, .endianness = DEVICE_LITTLE_ENDIAN, }; -- 1.7.1
[Qemu-devel] [PATCH 2/9] add unregister_displaychangelistener
Also change the way the gui_timer is initialized: each time a displaychangelistener is registered or unregistered we'll check whenever we need a timer (due to dpy_refresh callback being present) and if so setup a timer, otherwise zap it. This way the gui timer works correctly with displaychangelisteners coming and going. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.h | 10 ++ vl.c | 31 +++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/console.h b/console.h index 646ad4b..48fef22 100644 --- a/console.h +++ b/console.h @@ -229,9 +229,19 @@ static inline int is_buffer_shared(DisplaySurface *surface) !(surface-flags QEMU_REALPIXELS_FLAG)); } +void gui_setup_refresh(DisplayState *ds); + static inline void register_displaychangelistener(DisplayState *ds, DisplayChangeListener *dcl) { QLIST_INSERT_HEAD(ds-listeners, dcl, next); +gui_setup_refresh(ds); +} + +static inline void unregister_displaychangelistener(DisplayState *ds, +DisplayChangeListener *dcl) +{ +QLIST_REMOVE(dcl, next); +gui_setup_refresh(ds); } static inline void dpy_update(DisplayState *s, int x, int y, int w, int h) diff --git a/vl.c b/vl.c index 2a7c92a..fbb77fe 100644 --- a/vl.c +++ b/vl.c @@ -1288,6 +1288,29 @@ static void gui_update(void *opaque) qemu_mod_timer(ds-gui_timer, interval + qemu_get_clock_ms(rt_clock)); } +void gui_setup_refresh(DisplayState *ds) +{ +DisplayChangeListener *dcl; +bool need_timer = false; + +QLIST_FOREACH(dcl, ds-listeners, next) { +if (dcl-dpy_refresh != NULL) { +need_timer = true; +break; +} +} + +if (need_timer ds-gui_timer == NULL) { +ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds); +qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock)); +} +if (!need_timer ds-gui_timer != NULL) { +qemu_del_timer(ds-gui_timer); +qemu_free_timer(ds-gui_timer); +ds-gui_timer = NULL; +} +} + struct vm_change_state_entry { VMChangeStateHandler *cb; void *opaque; @@ -2350,7 +2373,6 @@ int main(int argc, char **argv, char **envp) const char *kernel_filename, *kernel_cmdline; char boot_devices[33] = cad; /* default to HD-floppy-CD-ROM */ DisplayState *ds; -DisplayChangeListener *dcl; int cyls, heads, secs, translation; QemuOpts *hda_opts = NULL, *opts, *machine_opts; QemuOptsList *olist; @@ -3698,13 +3720,6 @@ int main(int argc, char **argv, char **envp) /* display setup */ dpy_resize(ds); -QLIST_FOREACH(dcl, ds-listeners, next) { -if (dcl-dpy_refresh != NULL) { -ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds); -qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock)); -break; -} -} text_consoles_set_display(ds); if (foreach_device_config(DEV_GDB, gdbserver_start) 0) { -- 1.7.1
[Qemu-devel] [PATCH 9/9] fbdev: add display scaling support
Add support for scaling the guest display. Ctrl-Alt-S hotkey toggles scaling. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- ui/fbdev.c | 61 ++- 1 files changed, 51 insertions(+), 10 deletions(-) diff --git a/ui/fbdev.c b/ui/fbdev.c index d859d84..0f40259 100644 --- a/ui/fbdev.c +++ b/ui/fbdev.c @@ -81,6 +81,7 @@ static pixman_image_t *surface; static pixman_image_t *framebuffer; static pixman_transform_t transform; static pixman_region16_t dirty; +static double scale; static QEMUCursor *ptr_cursor; static pixman_image_t *ptr_image; @@ -88,6 +89,10 @@ static intptr_refresh; static intpx, py, pw, ph; static intmx, my, mon; +/* options */ +static intuse_scale = 1; +static pixman_filter_tpfilter = PIXMAN_FILTER_GOOD; + /* fwd decls */ static int fbdev_activate_vt(int tty, int vtno, bool wait); @@ -182,13 +187,14 @@ static void read_mouse(void *opaque) if (ay 0) { ay = 0; } -if (ax = cw) { -ax = cw-1; +if (ax = cw*scale) { +ax = cw*scale-1; } -if (ay = ch) { -ay = ch-1; +if (ay = ch*scale) { +ay = ch*scale-1; } -kbd_mouse_event(ax * 0x7FFF / cw, ay * 0x7FFF / ch, 0, b); +kbd_mouse_event(ax * 0x7FFF / (cw*scale), +ay * 0x7FFF / (ch*scale), 0, b); } else { kbd_mouse_event(x, y, 0, b); } @@ -543,6 +549,12 @@ static void read_mediumraw(void *opaque) (ctrl-alt-esc) ===\n); exit(1); } +if (keycode == KEY_S) { +use_scale = !use_scale; +resize_screen++; +redraw_screen++; +continue; +} if (keycode = KEY_F1 keycode = KEY_F10) { fbdev_activate_vt(tty, keycode+1-KEY_F1, false); key_down[keycode] = false; @@ -913,6 +925,11 @@ static void fbdev_render_ptr(DisplayState *ds) pixman_transform_translate(transform, NULL, pixman_int_to_fixed(-cx), pixman_int_to_fixed(-cy)); +if (use_scale) { +pixman_transform_scale(transform, NULL, + pixman_double_to_fixed(1/scale), + pixman_double_to_fixed(1/scale)); +} pixman_transform_translate(transform, NULL, pixman_int_to_fixed(-px), pixman_int_to_fixed(-py)); @@ -938,16 +955,32 @@ static void fbdev_update(DisplayState *ds, int x, int y, int w, int h) } if (resize_screen) { +double xs, ys; + trace_fbdev_dpy_resize(ds_get_width(ds), ds_get_height(ds)); resize_screen = 0; cx = 0; cy = 0; cw = ds_get_width(ds); ch = ds_get_height(ds); -if (ds_get_width(ds) fb_var.xres) { -cx = (fb_var.xres - ds_get_width(ds)) / 2; -} -if (ds_get_height(ds) fb_var.yres) { -cy = (fb_var.yres - ds_get_height(ds)) / 2; + +if (use_scale) { +xs = (double)fb_var.xres / cw; +ys = (double)fb_var.yres / ch; +if (xs ys) { +scale = ys; +cx = (fb_var.xres - ds_get_width(ds)*scale) / 2; +} else { +scale = xs; +cy = (fb_var.yres - ds_get_height(ds)*scale) / 2; +} +} else { +scale = 1; +if (ds_get_width(ds) fb_var.xres) { +cx = (fb_var.xres - ds_get_width(ds)) / 2; +} +if (ds_get_height(ds) fb_var.yres) { +cy = (fb_var.yres - ds_get_height(ds)) / 2; +} } if (surface) { pixman_image_unref(surface); @@ -958,7 +991,14 @@ static void fbdev_update(DisplayState *ds, int x, int y, int w, int h) pixman_transform_translate(transform, NULL, pixman_int_to_fixed(-cx), pixman_int_to_fixed(-cy)); +if (use_scale) { +pixman_transform_scale(transform, NULL, + pixman_double_to_fixed(1/scale), + pixman_double_to_fixed(1/scale)); +} pixman_image_set_transform(surface, transform); + +pixman_image_set_filter(surface, pfilter, NULL, 0); } if (redraw_screen) { @@ -1050,6 +1090,7 @@ static void fbdev_cursor_define(DisplayState *ds, QEMUCursor *cursor) cursor-width, cursor-height,
[Qemu-devel] [PATCH v4 0/9] linux framebuffer display driver
Hi, Next round of the framebuffer display driver patches, including git tree for your convinience. Changes: - fix bisectability. - Use 'struct Error' to report initialization errors, so we can pass up more detailed errors to the monitor. - change monitor command names. Sneak preview: branch rebase/fbdev has patches to switch over spice to pixman (http://www.kraxel.org/cgit/qemu/log/?h=rebase/fbdev). cheers, Gerd The following changes since commit 6b80f7db8a7f84d21e46d01e30c8497733bb23a0: Merge remote-tracking branch 'kiszka/queues/slirp' into staging (2012-09-17 10:23:20 -0500) are available in the git repository at: git://git.kraxel.org/qemu fbdev.2 Gerd Hoffmann (9): QLIST-ify display change listeners. add unregister_displaychangelistener move set_mouse + cursor_define callbacks fbdev: add linux framebuffer display driver. fbdev: add monitor command to enable/disable fbdev: make configurable at compile time. fbdev: move to pixman fbdev: add mouse pointer support fbdev: add display scaling support configure | 24 ++ console.c |2 +- console.h | 123 -- hmp-commands.hx | 15 + hmp.c |9 + hmp.h |1 + hw/jazz_led.c |2 +- hw/qxl-render.c |2 +- hw/vga.c| 10 +- hw/vmware_vga.c | 11 +- hw/xenfb.c |2 +- qapi-schema.json| 13 + qemu-options.hx |8 + qmp-commands.hx |6 + qmp.c | 19 + sysemu.h|1 + trace-events| 15 + ui/Makefile.objs|1 + ui/fbdev.c | 1143 +++ ui/linux-keynames.h | 388 + ui/sdl.c|8 +- ui/spice-display.c |4 +- ui/vnc.c|8 +- vl.c| 57 ++- 24 files changed, 1798 insertions(+), 74 deletions(-) create mode 100644 ui/fbdev.c create mode 100644 ui/linux-keynames.h
[Qemu-devel] [PATCH 6/9] fbdev: make configurable at compile time.
Add CONFIG_FBDEV, add --enable-fbdev and --disable-fbdev configure switches so fbdev can be enabled/disabled at compile time. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- configure| 12 qmp.c|2 +- ui/Makefile.objs |2 +- vl.c |4 ++-- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 8564142..c4ba338 100755 --- a/configure +++ b/configure @@ -148,6 +148,7 @@ docs= fdt= nptl= sdl= +fbdev=no virtfs= vnc=yes sparse=no @@ -527,6 +528,7 @@ Haiku) usb=linux kvm=yes vhost_net=yes + fbdev=yes if [ $cpu = i386 -o $cpu = x86_64 ] ; then audio_possible_drivers=$audio_possible_drivers fmod fi @@ -658,6 +660,10 @@ for opt do ;; --enable-sdl) sdl=yes ;; + --disable-fbdev) fbdev=no + ;; + --enable-fbdev) fbdev=yes + ;; --disable-virtfs) virtfs=no ;; --enable-virtfs) virtfs=yes @@ -1070,6 +1076,8 @@ echo --disable-strip disable stripping binaries echo --disable-werror disable compilation abort on warning echo --disable-sdldisable SDL echo --enable-sdl enable SDL +echo --disable-fbdev disable linux framebuffer +echo --enable-fbdev enable linux framebuffer echo --disable-virtfs disable VirtFS echo --enable-virtfs enable VirtFS echo --disable-vncdisable VNC @@ -3159,6 +3167,7 @@ if test $darwin = yes ; then echo Cocoa support $cocoa fi echo SDL support $sdl +echo fbdev support $fbdev echo curses support$curses echo curl support $curl echo mingw32 support $mingw32 @@ -3367,6 +3376,9 @@ if test $sdl = yes ; then echo CONFIG_SDL=y $config_host_mak echo SDL_CFLAGS=$sdl_cflags $config_host_mak fi +if test $fbdev = yes ; then + echo CONFIG_FBDEV=y $config_host_mak +fi if test $cocoa = yes ; then echo CONFIG_COCOA=y $config_host_mak fi diff --git a/qmp.c b/qmp.c index 56e007f..712ab58 100644 --- a/qmp.c +++ b/qmp.c @@ -393,7 +393,7 @@ void qmp_change(const char *device, const char *target, void qmp_framebuffer_display(bool enable, Error **errp) { -#if defined(CONFIG_LINUX) +#if defined(CONFIG_FBDEV) DisplayState *ds = get_displaystate(); if (enable) { diff --git a/ui/Makefile.objs b/ui/Makefile.objs index 55ddcf2..479cd01 100644 --- a/ui/Makefile.objs +++ b/ui/Makefile.objs @@ -12,4 +12,4 @@ common-obj-$(CONFIG_SDL) += sdl.o sdl_zoom.o x_keymap.o common-obj-$(CONFIG_COCOA) += cocoa.o common-obj-$(CONFIG_CURSES) += curses.o common-obj-$(CONFIG_VNC) += $(vnc-obj-y) -common-obj-$(CONFIG_LINUX) += fbdev.o +common-obj-$(CONFIG_FBDEV) += fbdev.o diff --git a/vl.c b/vl.c index 85f2d37..4159035 100644 --- a/vl.c +++ b/vl.c @@ -3041,7 +3041,7 @@ int main(int argc, char **argv, char **envp) fprintf(stderr, SDL support is disabled\n); exit(1); #endif -#ifdef CONFIG_LINUX +#ifdef CONFIG_FBDEV case QEMU_OPTION_fbdev: display_type = DT_FBDEV; break; @@ -3686,7 +3686,7 @@ int main(int argc, char **argv, char **envp) curses_display_init(ds, full_screen); break; #endif -#if defined(CONFIG_LINUX) +#if defined(CONFIG_FBDEV) case DT_FBDEV: { Error *errp = NULL; -- 1.7.1
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote: On 09/19/2012 07:40 AM, Peter Crosthwaite wrote: On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias edgar.igles...@gmail.com wrote: On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote: Ping for PMM, This is the root case of your block on the SDHCI series - this is a discussion on resolution to bogus infinite looping DMA. For current participants in this discussion, heres our thread on the same topic over in SD land: http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html With the findings here and acknowledgement that this is a general problem, we either need to declare this issue of scope for SDHCI, or work with these guys (in the immediate future) on the DMA infinite loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig here, but can we get a decisive plan for going forward with this issue if it is going to continue to block SDHCI. Thanks to Igor for identifying the overlap between these discussions. Hi, A couple of dumb questions. What is the reason for the blocker? that possible guest dos is worse than no functionality at all? Can't you put the DMA/IO processing into the background? I dont know a way do do asynchronous DMA unless I am missing something here. You could schedule a bottom half and do the accesses from there. Solves nothing though. So what happens is we have a device that walks a SG list synchronously while holding all the locks and stuff being discussed here. If that SG list infinite loops then crash. Did you mean loop, or recursion into the memory region that initiates DMA? I think we were discussing the loop issue (I hadn't thought of recursion issues before) :) Jan's point of resetability was interesting. Processing a finite amount of dma descriptors and scheduling bh's to continously get back and continue processing should be OK no? That would avoid the lockups and allow the device to be reset at any time. Or am I missing something? IIRC the etrax dma does something like that but only for receive dma rings... Cheers, Edgar
[Qemu-devel] [PATCH 7/9] fbdev: move to pixman
Stop reinventing the wheel, use the pixman library for raster ops. Also add setdata callback. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- configure | 12 ui/fbdev.c | 182 +--- 2 files changed, 124 insertions(+), 70 deletions(-) diff --git a/configure b/configure index c4ba338..d10ff78 100755 --- a/configure +++ b/configure @@ -148,6 +148,7 @@ docs= fdt= nptl= sdl= +pixman= fbdev=no virtfs= vnc=yes @@ -2153,6 +2154,17 @@ else exit 1 fi +if $pkg_config pixman-1 /dev/null 21 +then +pixman=yes +pixman_cflags=`$pkg_config --cflags pixman-1 2/dev/null` +pixman_libs=`$pkg_config --libs pixman-1 2/dev/null` +QEMU_CFLAGS=$QEMU_CFLAGS $pixman_cflags +libs_softmmu=$libs_softmmu $pixman_libs +else +fbdev=no +fi + ## # libcap probe diff --git a/ui/fbdev.c b/ui/fbdev.c index 6dfbacc..d55850e 100644 --- a/ui/fbdev.c +++ b/ui/fbdev.c @@ -23,11 +23,12 @@ #include linux/vt.h #include linux/fb.h +#include pixman.h + #include qemu-common.h #include console.h #include keymaps.h #include sysemu.h -#include pflib.h /* * must be last so we get the linux input layer @@ -70,19 +71,82 @@ static bool key_down[KEY_CNT]; #define FB_ACQ_REQ 3 static int fb_switch_state; -/* qdev windup */ +/* qemu windup */ static DisplayChangeListener *dcl; -static QemuPfConv *conv; -static PixelFormatfbpf; static intresize_screen; static intredraw_screen; static intcx, cy, cw, ch; static Notifier exit_notifier; +static pixman_image_t *surface; +static pixman_image_t *framebuffer; +static pixman_transform_t transform; +static pixman_region16_t dirty; /* fwd decls */ static int fbdev_activate_vt(int tty, int vtno, bool wait); /* */ +/* pixman helpers */ + +static int pixman_shifts_to_type(int rshift, int gshift, int bshift) +{ +int type = PIXMAN_TYPE_OTHER; + +if (rshift gshift gshift bshift) { +if (bshift == 0) { +type = PIXMAN_TYPE_ARGB; +} else { +#if PIXMAN_VERSION = PIXMAN_VERSION_ENCODE(0, 21, 8) +type = PIXMAN_TYPE_RGBA; +#endif +} +} else if (rshift gshift gshift bshift) { +if (rshift == 0) { +type = PIXMAN_TYPE_ABGR; +} else { +type = PIXMAN_TYPE_BGRA; +} +} +return type; +} + +static pixman_image_t *pixman_from_displaystate(DisplayState *ds) +{ +PixelFormat *pf = ds-surface-pf; +pixman_format_code_t format; +pixman_image_t *image; +int type; + +type = pixman_shifts_to_type(pf-rshift, pf-gshift, pf-bshift); +format = PIXMAN_FORMAT(pf-bits_per_pixel, type, + pf-abits, pf-rbits, pf-gbits, pf-bbits); +image = pixman_image_create_bits(format, ds_get_width(ds), + ds_get_height(ds), + (void *)ds_get_data(ds), + ds_get_linesize(ds)); +return image; +} + +static pixman_image_t *pixman_from_framebuffer(void) +{ +pixman_format_code_t format; +pixman_image_t *image; +int type; + +type = pixman_shifts_to_type(fb_var.red.offset, + fb_var.green.offset, + fb_var.blue.offset); +format = PIXMAN_FORMAT(fb_var.bits_per_pixel, type, + fb_var.transp.length, + fb_var.red.length, + fb_var.green.length, + fb_var.blue.length); +image = pixman_image_create_bits(format, fb_var.xres, fb_var.yres, + (void *)fb_mem, fb_fix.line_length); +return image; +} + +/* */ /* mouse*/ static void read_mouse(void *opaque) @@ -529,6 +593,17 @@ static void fbdev_cleanup(void) { trace_fbdev_cleanup(); +/* release pixman stuff */ +pixman_region_fini(dirty); +if (framebuffer) { +pixman_image_unref(framebuffer); +framebuffer = NULL; +} +if (surface) { +pixman_image_unref(surface); +surface = NULL; +} + /* restore console */ if (fb_mem != NULL) { munmap(fb_mem, fb_fix.smem_len+fb_mem_offset); @@ -682,36 +757,8 @@ static int fbdev_init(const char *device, Error **err) start_mediumraw(tty); qemu_set_fd_handler(tty, read_mediumraw, NULL, NULL); -/* create PixelFormat from fbdev structs */ -fbpf.bits_per_pixel = fb_var.bits_per_pixel; -
[Qemu-devel] [PATCH 1/9] QLIST-ify display change listeners.
Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.h | 72 +++ hw/xenfb.c |2 +- vl.c |9 ++- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/console.h b/console.h index f990684..646ad4b 100644 --- a/console.h +++ b/console.h @@ -164,7 +164,7 @@ struct DisplayChangeListener { int w, int h, uint32_t c); void (*dpy_text_cursor)(struct DisplayState *s, int x, int y); -struct DisplayChangeListener *next; +QLIST_ENTRY(DisplayChangeListener) next; }; struct DisplayAllocator { @@ -179,7 +179,7 @@ struct DisplayState { struct QEMUTimer *gui_timer; struct DisplayAllocator* allocator; -struct DisplayChangeListener* listeners; +QLIST_HEAD(, DisplayChangeListener) listeners; void (*mouse_set)(int x, int y, int on); void (*cursor_define)(QEMUCursor *cursor); @@ -231,72 +231,76 @@ static inline int is_buffer_shared(DisplaySurface *surface) static inline void register_displaychangelistener(DisplayState *ds, DisplayChangeListener *dcl) { -dcl-next = ds-listeners; -ds-listeners = dcl; +QLIST_INSERT_HEAD(ds-listeners, dcl, next); } static inline void dpy_update(DisplayState *s, int x, int y, int w, int h) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { dcl-dpy_update(s, x, y, w, h); -dcl = dcl-next; } } static inline void dpy_resize(DisplayState *s) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { dcl-dpy_resize(s); -dcl = dcl-next; } } static inline void dpy_setdata(DisplayState *s) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_setdata) dcl-dpy_setdata(s); -dcl = dcl-next; +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_setdata) { +dcl-dpy_setdata(s); +} } } static inline void dpy_refresh(DisplayState *s) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_refresh) dcl-dpy_refresh(s); -dcl = dcl-next; +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_refresh) { +dcl-dpy_refresh(s); +} } } static inline void dpy_copy(struct DisplayState *s, int src_x, int src_y, - int dst_x, int dst_y, int w, int h) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_copy) + int dst_x, int dst_y, int w, int h) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_copy) { dcl-dpy_copy(s, src_x, src_y, dst_x, dst_y, w, h); -else /* TODO */ +} else { /* TODO */ dcl-dpy_update(s, dst_x, dst_y, w, h); -dcl = dcl-next; +} } } static inline void dpy_fill(struct DisplayState *s, int x, int y, - int w, int h, uint32_t c) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_fill) dcl-dpy_fill(s, x, y, w, h, c); -dcl = dcl-next; + int w, int h, uint32_t c) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_fill) { +dcl-dpy_fill(s, x, y, w, h, c); +} } } -static inline void dpy_cursor(struct DisplayState *s, int x, int y) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_text_cursor) dcl-dpy_text_cursor(s, x, y); -dcl = dcl-next; +static inline void dpy_cursor(struct DisplayState *s, int x, int y) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_text_cursor) { +dcl-dpy_text_cursor(s, x, y); +} } } diff --git a/hw/xenfb.c b/hw/xenfb.c index 338800a..ef24c33 100644 --- a/hw/xenfb.c +++ b/hw/xenfb.c @@ -717,7 +717,7 @@ static void xenfb_update(void *opaque) if (xenfb_queue_full(xenfb)) return; -for (l = xenfb-c.ds-listeners; l != NULL; l = l-next) { +QLIST_FOREACH(l, xenfb-c.ds-listeners, next) { if (l-idle) continue; idle = 0; diff --git a/vl.c b/vl.c index 7c577fa..2a7c92a 100644 --- a/vl.c +++ b/vl.c @@ -1276,15 +1276,14 @@ static void gui_update(void *opaque) { uint64_t interval = GUI_REFRESH_INTERVAL; DisplayState *ds = opaque; -DisplayChangeListener *dcl = ds-listeners; +DisplayChangeListener *dcl;
Re: [Qemu-devel] [PATCH] Adding BAR0 for e500 PCI controller
-Original Message- From: Alexander Graf [mailto:ag...@suse.de] Sent: Wednesday, September 19, 2012 4:33 PM To: Bhushan Bharat-R65777 Cc: qemu-devel@nongnu.org; qemu-...@nongnu.org; afaer...@suse.de; Bhushan Bharat-R65777 Subject: Re: [PATCH] Adding BAR0 for e500 PCI controller On 19.09.2012, at 09:41, Bharat Bhushan wrote: PCI Root complex have TYPE-1 configuration header while PCI endpoint have type-0 configuration header. The type-1 configuration header have a BAR (BAR0). In Freescale PCI controller BAR0 is used for mapping pci address space to CCSR address space. This can used for 2 purposes: 1) for MSI interrupt generation 2) Allow CCSR registers access when configured as PCI endpoint, which I am not sure is a use case with QEMU-KVM guest. What I observed is that when guest read the size of BAR0 of host controller configuration header (TYPE1 header) then it always reads it as 0. When looking into the QEMU hw/ppce500_pci.c, I do not find the PCI controller device registering BAR0. I do not find any other controller also doing so may they do not use BAR0. There are two issues when BAR0 is not there (which I can think of): 1) There should be BAR0 emulated for PCI Root comaplex (TYPE1 header) and when reading the size of BAR0, it should give size as per real h/w. This patch solves this problem. 2) Do we need this BAR0 inbound address translation? When BAR0 is of non-zero size then it will be configured for PCI address space to local address(CCSR) space translation on inbound access. The primary use case is for MSI interrupt generation. The device is configured with a address offsets in PCI address space, which will be translated to MSI interrupt generation MPIC registers. Currently I do not understand the MSI interrupt generation mechanism in QEMU and also IIRC we do not use QEMU MSI interrupt mechanism on e500 guest machines. But this BAR0 will be used when using MSI on e500. I can see one more issue, There are ATMUs emulated in hw/ppce500_pci.c, but i do not see these being used for address translation. So far that works because pci address space and local address space are 1:1 mapped. BAR0 inbound translation + ATMU translation will complete the address translation of inbound traffic. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- hw/pci_host.h|9 + hw/ppc/e500.c| 21 + hw/ppce500_pci.c |7 +++ 3 files changed, 37 insertions(+), 0 deletions(-) diff --git a/hw/pci_host.h b/hw/pci_host.h index 4b9c300..c1ec7eb 100644 --- a/hw/pci_host.h +++ b/hw/pci_host.h @@ -41,10 +41,19 @@ struct PCIHostState { MemoryRegion data_mem; MemoryRegion mmcfg; MemoryRegion *address_space; +MemoryRegion bar0; uint32_t config_reg; PCIBus *bus; }; +typedef struct PPCE500CCSRState { +SysBusDevice *parent; +MemoryRegion ccsr_space; +} PPCE500CCSRState; + +#define TYPE_CCSR e500-ccsr +#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR) + All of this is e500 specific, so it should definitely not be in any generic pci host header. Yes, ok. /* common internal helpers for PCI/PCIe hosts, cut off overflows */ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, uint32_t limit, uint32_t val, uint32_t len); diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 6f0de6d..1f5da28 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -31,6 +31,7 @@ #include hw/loader.h #include elf.h #include hw/sysbus.h +#include hw/pci_host.h #include exec-memory.h #include host-utils.h @@ -587,3 +588,23 @@ void ppce500_init(PPCE500Params *params) kvmppc_init(); } } + +static void e500_ccsr_type_init(Object *obj) { +PPCE500CCSRState *ccsr = CCSR(obj); +ccsr-ccsr_space.size = int128_make64(MPC8544_CCSRBAR_SIZE); +} + +static const TypeInfo e500_ccsr_info = { +.name = TYPE_CCSR, +.parent= TYPE_SYS_BUS_DEVICE, +.instance_size = sizeof(PPCE500CCSRState), +.instance_init = e500_ccsr_type_init, }; + +static void e500_ccsr_register_types(void) { +type_register_static(e500_ccsr_info); +} + +type_init(e500_ccsr_register_types) diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index 92b1dc0..135f2a6 100644 --- a/hw/ppce500_pci.c +++ b/hw/ppce500_pci.c @@ -315,6 +315,8 @@ static int e500_pcihost_initfn(SysBusDevice *dev) int i; MemoryRegion *address_space_mem = get_system_memory(); MemoryRegion *address_space_io = get_system_io(); +PPCE500CCSRState *ccsr; +PCIDevice *pdev; h = PCI_HOST_BRIDGE(dev); s = PPC_E500_PCI_HOST_BRIDGE(dev); @@ -342,6 +344,11 @@ static int e500_pcihost_initfn(SysBusDevice *dev) memory_region_add_subregion(s-container, PCIE500_REG_BASE,
[Qemu-devel] [PATCH 8/9] fbdev: add mouse pointer support
Add mouse_set and cursor_define DisplayChangeListener callbacks and mouse pointer rendering support. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- ui/fbdev.c | 95 1 files changed, 95 insertions(+), 0 deletions(-) diff --git a/ui/fbdev.c b/ui/fbdev.c index d55850e..d859d84 100644 --- a/ui/fbdev.c +++ b/ui/fbdev.c @@ -82,6 +82,12 @@ static pixman_image_t *framebuffer; static pixman_transform_t transform; static pixman_region16_t dirty; +static QEMUCursor *ptr_cursor; +static pixman_image_t *ptr_image; +static intptr_refresh; +static intpx, py, pw, ph; +static intmx, my, mon; + /* fwd decls */ static int fbdev_activate_vt(int tty, int vtno, bool wait); @@ -877,6 +883,51 @@ static void fbdev_render(DisplayState *ds) pixman_region_init(dirty); } +static void fbdev_unrender_ptr(DisplayState *ds) +{ +if (!pw !ph) { +return; +} +pixman_region_union_rect(dirty, dirty, px, py, pw, ph); +ph = pw = 0; +} + +static void fbdev_render_ptr(DisplayState *ds) +{ +pixman_region16_t region; +pixman_transform_t transform; + +if (!mon || !ptr_image) { +return; +} +if (mx 0 || mx = cw || my 0 || my = ch) { +return; +} + +px = mx - ptr_cursor-hot_x; +py = my - ptr_cursor-hot_y; +pw = ptr_cursor-width; +ph = ptr_cursor-height; + +pixman_transform_init_identity(transform); +pixman_transform_translate(transform, NULL, + pixman_int_to_fixed(-cx), + pixman_int_to_fixed(-cy)); +pixman_transform_translate(transform, NULL, + pixman_int_to_fixed(-px), + pixman_int_to_fixed(-py)); +pixman_image_set_transform(ptr_image, transform); + +pixman_region_init_rect(region, 0, 0, pw, ph); +pixman_image_set_clip_region(ptr_image, region); + +pixman_image_composite(PIXMAN_OP_OVER, ptr_image, NULL, framebuffer, + 0, 0, 0, 0, 0, 0, fb_var.xres, fb_var.yres); + +pixman_region_fini(region); +ptr_refresh = 0; +} + /* */ /* qemu interfaces */ @@ -918,6 +969,9 @@ static void fbdev_update(DisplayState *ds, int x, int y, int w, int h) } pixman_region_union_rect(dirty, dirty, x, y, w, h); +if (ptr_image mon pw ph) { +ptr_refresh++; +} } static void fbdev_resize(DisplayState *ds) @@ -954,9 +1008,48 @@ static void fbdev_refresh(DisplayState *ds) fbdev_update(ds, 0, 0, 0, 0); } +if (ptr_refresh) { +fbdev_unrender_ptr(ds); +} if (pixman_region_not_empty(dirty)) { fbdev_render(ds); } +if (ptr_refresh) { +fbdev_render_ptr(ds); +} +} + +static void fbdev_mouse_set(DisplayState *ds, int x, int y, int on) +{ +ptr_refresh++; +mx = x; +my = y; +mon = on; +} + +static void fbdev_cursor_define(DisplayState *ds, QEMUCursor *cursor) +{ +ptr_refresh++; + +if (ptr_cursor) { +cursor_put(ptr_cursor); +ptr_cursor = NULL; +} +if (ptr_image) { +pixman_image_unref(ptr_image); +ptr_image = NULL; +} + +if (!cursor) { +return; +} + +ptr_cursor = cursor; +cursor_get(ptr_cursor); +ptr_image = pixman_image_create_bits(PIXMAN_a8r8g8b8, + cursor-width, cursor-height, + cursor-data, + cursor-width * 4); } static void fbdev_exit_notifier(Notifier *notifier, void *data) @@ -985,6 +1078,8 @@ int fbdev_display_init(DisplayState *ds, const char *device, Error **err) dcl-dpy_resize = fbdev_resize; dcl-dpy_setdata = fbdev_setdata; dcl-dpy_refresh = fbdev_refresh; +dcl-dpy_mouse_set = fbdev_mouse_set; +dcl-dpy_cursor_define = fbdev_cursor_define; register_displaychangelistener(ds, dcl); trace_fbdev_enabled(); -- 1.7.1
[Qemu-devel] [PATCH 3/9] move set_mouse + cursor_define callbacks
When adding DisplayChangeListeners the set_mouse and cursor_define callbacks have been left in DisplayState for some reason. Fix it. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.c |2 +- console.h | 39 +++ hw/jazz_led.c |2 +- hw/qxl-render.c|2 +- hw/vga.c | 10 +- hw/vmware_vga.c| 11 ++- ui/sdl.c |8 ui/spice-display.c |4 ++-- ui/vnc.c |8 9 files changed, 59 insertions(+), 27 deletions(-) diff --git a/console.c b/console.c index a8bcc42..cc0479b 100644 --- a/console.c +++ b/console.c @@ -1239,7 +1239,7 @@ static void text_console_update(void *opaque, console_ch_t *chardata) s-text_y[1] = 0; } if (s-cursor_invalidate) { -dpy_cursor(s-ds, s-x, s-y); +dpy_text_cursor(s-ds, s-x, s-y); s-cursor_invalidate = 0; } } diff --git a/console.h b/console.h index 48fef22..bef2d2d 100644 --- a/console.h +++ b/console.h @@ -164,6 +164,9 @@ struct DisplayChangeListener { int w, int h, uint32_t c); void (*dpy_text_cursor)(struct DisplayState *s, int x, int y); +void (*dpy_mouse_set)(struct DisplayState *s, int x, int y, int on); +void (*dpy_cursor_define)(struct DisplayState *s, QEMUCursor *cursor); + QLIST_ENTRY(DisplayChangeListener) next; }; @@ -181,9 +184,6 @@ struct DisplayState { struct DisplayAllocator* allocator; QLIST_HEAD(, DisplayChangeListener) listeners; -void (*mouse_set)(int x, int y, int on); -void (*cursor_define)(QEMUCursor *cursor); - struct DisplayState *next; }; @@ -304,7 +304,7 @@ static inline void dpy_fill(struct DisplayState *s, int x, int y, } } -static inline void dpy_cursor(struct DisplayState *s, int x, int y) +static inline void dpy_text_cursor(struct DisplayState *s, int x, int y) { struct DisplayChangeListener *dcl; QLIST_FOREACH(dcl, s-listeners, next) { @@ -314,6 +314,37 @@ static inline void dpy_cursor(struct DisplayState *s, int x, int y) } } +static inline void dpy_mouse_set(struct DisplayState *s, int x, int y, int on) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_mouse_set) { +dcl-dpy_mouse_set(s, x, y, on); +} +} +} + +static inline void dpy_cursor_define(struct DisplayState *s, QEMUCursor *cursor) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_cursor_define) { +dcl-dpy_cursor_define(s, cursor); +} +} +} + +static inline bool dpy_cursor_define_supported(struct DisplayState *s) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_cursor_define) { +return true; +} +} +return false; +} + static inline int ds_get_linesize(DisplayState *ds) { return ds-surface-linesize; diff --git a/hw/jazz_led.c b/hw/jazz_led.c index 6486523..c4d54e2 100644 --- a/hw/jazz_led.c +++ b/hw/jazz_led.c @@ -210,7 +210,7 @@ static void jazz_led_text_update(void *opaque, console_ch_t *chardata) LedState *s = opaque; char buf[2]; -dpy_cursor(s-ds, -1, -1); +dpy_text_cursor(s-ds, -1, -1); qemu_console_resize(s-ds, 2, 1); /* TODO: draw the segments */ diff --git a/hw/qxl-render.c b/hw/qxl-render.c index e2e3fe2..085a090 100644 --- a/hw/qxl-render.c +++ b/hw/qxl-render.c @@ -238,7 +238,7 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext) return 1; } -if (!qxl-ssd.ds-mouse_set || !qxl-ssd.ds-cursor_define) { +if (!dpy_cursor_define_supported(qxl-ssd.ds)) { return 0; } diff --git a/hw/vga.c b/hw/vga.c index afaef0d..ec4f0c5 100644 --- a/hw/vga.c +++ b/hw/vga.c @@ -2081,11 +2081,11 @@ static void vga_update_text(void *opaque, console_ch_t *chardata) s-cr[VGA_CRTC_CURSOR_END] != s-cursor_end || full_update) { cursor_visible = !(s-cr[VGA_CRTC_CURSOR_START] 0x20); if (cursor_visible cursor_offset size cursor_offset = 0) -dpy_cursor(s-ds, - TEXTMODE_X(cursor_offset), - TEXTMODE_Y(cursor_offset)); +dpy_text_cursor(s-ds, +TEXTMODE_X(cursor_offset), +TEXTMODE_Y(cursor_offset)); else -dpy_cursor(s-ds, -1, -1); +dpy_text_cursor(s-ds, -1, -1); s-cursor_offset = cursor_offset; s-cursor_start = s-cr[VGA_CRTC_CURSOR_START]; s-cursor_end = s-cr[VGA_CRTC_CURSOR_END]; @@ -2146,7 +2146,7 @@ static void vga_update_text(void *opaque, console_ch_t *chardata) /* Display a message */ s-last_width = 60; s-last_height = height = 3; -dpy_cursor(s-ds, -1, -1); +
[Qemu-devel] Posix aio communication
Can anyone explain me how aio-thread communicates back to io thread. I could see that it uses SIGUSR2 ,but I couldn't find the corresponding signal handler.
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
Markus Armbruster arm...@redhat.com writes: Anthony Liguori anth...@codemonkey.ws writes: Markus Armbruster arm...@redhat.com writes: Jan Kiszka jan.kis...@siemens.com writes: * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. While I don't care for supporting alternate dump processing tools myself, I certainly don't mind supporting them, as long as the code satisfies basic safety and reliability requirements. This code doesn't, as far as I can tell. It works, thought not under all circumstances. I don't doubt it works often enough to be useful to somebody. But basic safety and reliability requirements are a bit more than that. They include don't explode in ways a reasonable user can't be expected to foresee. I don't think a reasonable user can be expected to see that -p is safe only for trusted guests. We shipped the API, we're not removing it. Our compatibility isn't whatever libvirt is currently using. Bah, don't be more catholic than the pope. It's been out for how many days? Have you looked at the code? It's perfectly reasonable to ask to document the behavior of the method. It's also a trivial patch to qapi-schema.json. I'm okay with leaving obscure security holes in upstream QEMU, indefinitely, as long as they're documented. I don't think it's a good idea, but it's something reasonable people can disagree about. It's *not* a security hole. The malicious guest you mention is just a guest with fully populated PTEs and PDEs, no? There's nothing malicious about that. It just so happens that the memory usage of this command is proportional to the amount of page mappings in the guest. I think your making a mountain out of a mole hill here. Regards, Anthony Liguori We need to document that -p is unreliable and unsafe, therefore should only be used with trusted guests, and its result need not be correct even then. It's unreasonable to ask for an interface to be removed just because it could be misused when it has a legimitate use-case. The issue isn't misuse (operator does something stupid), it's abuse (guest can make it blow up when operator uses it as intended), and fragility (produces crap when operator uses it as intended, but the guest isn't in a sufficiently nice state).
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 09/19/2012 02:46 PM, Edgar E. Iglesias wrote: On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote: On 09/19/2012 07:40 AM, Peter Crosthwaite wrote: On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias edgar.igles...@gmail.com wrote: On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote: Ping for PMM, This is the root case of your block on the SDHCI series - this is a discussion on resolution to bogus infinite looping DMA. For current participants in this discussion, heres our thread on the same topic over in SD land: http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html With the findings here and acknowledgement that this is a general problem, we either need to declare this issue of scope for SDHCI, or work with these guys (in the immediate future) on the DMA infinite loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig here, but can we get a decisive plan for going forward with this issue if it is going to continue to block SDHCI. Thanks to Igor for identifying the overlap between these discussions. Hi, A couple of dumb questions. What is the reason for the blocker? that possible guest dos is worse than no functionality at all? Can't you put the DMA/IO processing into the background? I dont know a way do do asynchronous DMA unless I am missing something here. You could schedule a bottom half and do the accesses from there. Solves nothing though. So what happens is we have a device that walks a SG list synchronously while holding all the locks and stuff being discussed here. If that SG list infinite loops then crash. Did you mean loop, or recursion into the memory region that initiates DMA? I think we were discussing the loop issue (I hadn't thought of recursion issues before) :) Jan's point of resetability was interesting. Processing a finite amount of dma descriptors and scheduling bh's to continously get back and continue processing should be OK no? Yes. That would avoid the lockups and allow the device to be reset at any time. Or am I missing something? Not that I can see. If real hardware can be looped, so can qemu. I'm only worried about recursion and deadlocks (while real hardware can deadlock, we'd prefer to avoid that). -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [0/13] pseries patch queue
On 13.09.2012, at 04:57, David Gibson wrote: Hi Alex, Here's my current set of ready-to-merge pseries related patches. This includes the latest revisions of the patches to fix system reset which I posted before, plus a number of other small bugfixes and cleanups. Thanks, applied all to ppc-next. Alex
Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga
On 09/19/2012 02:35 PM, Gerd Hoffmann wrote: Looks like word writes are supported provided the memory API breaks up writes in little endian order. Better to make it explicit. Like the attached incremental patch? Very like. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On Wed, Sep 19, 2012 at 03:12:12PM +0300, Avi Kivity wrote: On 09/19/2012 02:46 PM, Edgar E. Iglesias wrote: On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote: On 09/19/2012 07:40 AM, Peter Crosthwaite wrote: On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias edgar.igles...@gmail.com wrote: On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote: Ping for PMM, This is the root case of your block on the SDHCI series - this is a discussion on resolution to bogus infinite looping DMA. For current participants in this discussion, heres our thread on the same topic over in SD land: http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html With the findings here and acknowledgement that this is a general problem, we either need to declare this issue of scope for SDHCI, or work with these guys (in the immediate future) on the DMA infinite loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig here, but can we get a decisive plan for going forward with this issue if it is going to continue to block SDHCI. Thanks to Igor for identifying the overlap between these discussions. Hi, A couple of dumb questions. What is the reason for the blocker? that possible guest dos is worse than no functionality at all? Can't you put the DMA/IO processing into the background? I dont know a way do do asynchronous DMA unless I am missing something here. You could schedule a bottom half and do the accesses from there. Solves nothing though. So what happens is we have a device that walks a SG list synchronously while holding all the locks and stuff being discussed here. If that SG list infinite loops then crash. Did you mean loop, or recursion into the memory region that initiates DMA? I think we were discussing the loop issue (I hadn't thought of recursion issues before) :) Jan's point of resetability was interesting. Processing a finite amount of dma descriptors and scheduling bh's to continously get back and continue processing should be OK no? Yes. That would avoid the lockups and allow the device to be reset at any time. Or am I missing something? Not that I can see. If real hardware can be looped, so can qemu. I'm only worried about recursion and deadlocks (while real hardware can deadlock, we'd prefer to avoid that). Agreed, thanks
Re: [Qemu-devel] Shifts, ppc[64], xtensa
On Tue, 18 Sep 2012, Richard Henderson wrote: On 09/18/2012 12:52 PM, malc wrote: case INDEX_op_shl_i32: if (const_args[2]) { +if (args[2] 31) { +tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]); +tcg_out32 (s, SLW | SAB (args[1], args[0], 0)); +} What's this bit for? This bit is to offset for the fact that i haven't slept in a while, and... AFAIK all you should need are the added 31 below. Not really - due to the 0 corner case, following should be better: diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 26c4b33..784b157 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -409,6 +409,7 @@ static int tcg_target_const_match(tcg_target_long val, #define TW XO31(4) #define TRAP (TW | TO (31)) +#define NOP0x6000 #define RT(r) ((r)21) #define RS(r) ((r)21) @@ -1306,10 +1307,10 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) *ptr = 0x4800 | (disp 0x03fc); /* b disp */ patch_size = 4; } else { -ptr[0] = 0x6000; /* nop */ -ptr[1] = 0x6000; -ptr[2] = 0x6000; -ptr[3] = 0x6000; +ptr[0] = NOP; +ptr[1] = NOP; +ptr[2] = NOP; +ptr[3] = NOP; patch_size = 16; } } @@ -1330,7 +1331,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, /* direct jump method */ s-tb_jmp_offset[args[0]] = s-code_ptr - s-code_buf; -s-code_ptr += 16; +tcg_out32 (s, NOP); +tcg_out32 (s, NOP); +tcg_out32 (s, NOP); +tcg_out32 (s, NOP); } else { tcg_abort (); @@ -1565,35 +1569,54 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_shl_i32: if (const_args[2]) { -tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (args[2]) - | MB (0) - | ME (31 - args[2]) - ) -); +int sh = args[2] 31; +if (!sh) { +tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]); +} +else { +tcg_out32 (s, (RLWINM + | RA (args[0]) + | RS (args[1]) + | SH (sh) + | MB (0) + | ME (31 - sh) + ) +); +} } else tcg_out32 (s, SLW | SAB (args[1], args[0], args[2])); break; case INDEX_op_shr_i32: if (const_args[2]) { -tcg_out32 (s, (RLWINM - | RA (args[0]) - | RS (args[1]) - | SH (32 - args[2]) - | MB (args[2]) - | ME (31) - ) -); +int sh = args[2] 31; +if (!sh) { +tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]); +} +else { +tcg_out32 (s, (RLWINM + | RA (args[0]) + | RS (args[1]) + | SH (32 - sh) + | MB (sh) + | ME (31) + ) +); +} } else tcg_out32 (s, SRW | SAB (args[1], args[0], args[2])); break; case INDEX_op_sar_i32: -if (const_args[2]) -tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2])); +if (const_args[2]) { +int sh = args[2] 31; +if (!sh) { +tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]); +} +else { +tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (sh)); +} +} else tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2])); break; @@ -1604,7 +1627,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | RS (args[1]) | MB (0) | ME (31) -| (const_args[2] ? RLWINM | SH (args[2]) +| (const_args[2] ? RLWINM | SH (args[2] 31) : RLWNM | RB (args[2])) ; tcg_out32 (s, op); This expects deposit/rotr constant arguments to be sane, i'm no longer certain this assumption is a sane one though... -- mailto:av1...@comtv.ru
Re: [Qemu-devel] Shifts, ppc[64], xtensa
On Wed, 19 Sep 2012, Max Filippov wrote: On Tue, Sep 18, 2012 at 11:52 PM, malc av1...@comtv.ru wrote: Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa malc, could you please expand a little bit what are these shift issues? (sounds like a modern trend, I must have missed something) Just audit op_opt output of extensa on 32bit host for shr_i32, i'm getting things like: OP after optimization and liveness analysis: movi_i32 tmp0,$0xd00793f4 movi_i32 tmp1,$0x1 movi_i32 tmp2,$0x1 movi_i32 tmp3,$advance_ccount call tmp3,$0x0,$0,env,tmp2 movi_i32 tmp2,$window_check call tmp2,$0x0,$0,env,tmp0,tmp1 movi_i32 tmp0,$0x1 add_i32 ar4,ar4,tmp0 movi_i32 tmp1,$0xd00793f6 movi_i32 tmp2,$0x3 movi_i32 tmp3,$0x1 movi_i32 tmp4,$advance_ccount call tmp4,$0x0,$0,env,tmp3 movi_i32 tmp3,$window_check call tmp3,$0x0,$0,env,tmp1,tmp2 mov_i32 tmp0,ar4 qemu_ld8u ar12,tmp0,$0x0 movi_i32 tmp0,$0xffe0 add_i32 ar9,ar12,tmp0 movi_i32 tmp1,$0x40 shr_i32 tmp0,ar9,tmp1 # I don't get the above movi_i32 tmp1,$0xff and_i32 ar9,tmp0,tmp1 # Nor the continuation movi_i32 tmp0,$0x3 movi_i32 tmp1,$advance_ccount call tmp1,$0x0,$0,env,tmp0 brcond_i32 ar6,ar9,ltu,$0x0 nopn $0x2,$0x2 movi_i32 pc,$0xd0079402 goto_tb $0x0 exit_tb $0x4a036c68 set_label $0x0 nopn $0x2,$0x2 movi_i32 pc,$0xd0079430 goto_tb $0x1 exit_tb $0x4a036c69 end Just adding some debugging printfs to constant shift paths in tcg target can also be useful. exposed another bug in power's tcg - gototb's target was expected to be always filled via tb_set_jmp_target (even though it's clearly not what tcg/README prescribes, sorry about that). -- mailto:av1...@comtv.ru
Re: [Qemu-devel] directory hierarchy
On 09/14/2012 10:51 PM, Blue Swirl wrote: exec: These files need cleanup so that TCG code gets into tcg/. Maybe also TB and CPUTLB handling. Some of that could be done by adding a separate MemoryListener for tcg. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] Shifts, ppc[64], xtensa
On 18 September 2012 20:52, malc av1...@comtv.ru wrote: Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa exposed another bug in power's tcg - gototb's target was expected to be always filled via tb_set_jmp_target (even though it's clearly not what tcg/README prescribes, sorry about that). Thanks to Max Filippov for pointing to xtensa test suite that helped to narrow the search to gototb. Testing of the following with other targets on ppc flavours is welcome.. P.S. Xtensa does mighty weird things with shifts i must say... diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 26c4b33..08f62fa 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -409,6 +409,7 @@ static int tcg_target_const_match(tcg_target_long val, #define TW XO31(4) #define TRAP (TW | TO (31)) +#define NOP0x6000 #define RT(r) ((r)21) #define RS(r) ((r)21) @@ -1306,10 +1307,10 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) *ptr = 0x4800 | (disp 0x03fc); /* b disp */ patch_size = 4; } else { -ptr[0] = 0x6000; /* nop */ -ptr[1] = 0x6000; -ptr[2] = 0x6000; -ptr[3] = 0x6000; +ptr[0] = NOP; +ptr[1] = NOP; +ptr[2] = NOP; +ptr[3] = NOP; patch_size = 16; } } @@ -1330,7 +1331,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, /* direct jump method */ s-tb_jmp_offset[args[0]] = s-code_ptr - s-code_buf; -s-code_ptr += 16; +tcg_out32 (s, NOP); +tcg_out32 (s, NOP); +tcg_out32 (s, NOP); +tcg_out32 (s, NOP); Not too familiar with the PPC backend, but doesn't this mean that in the retranslation case we will overwrite a correct jump destination with these NOP words and then rewrite it again with the correct destination? That can cause problems with cache incoherency; compare the fix applied in commit c69806ab8276 for ARM. thanks -- PMM
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 09/19/2012 04:12 PM, Avi Kivity wrote: On 09/19/2012 02:46 PM, Edgar E. Iglesias wrote: On Wed, Sep 19, 2012 at 10:55:30AM +0300, Avi Kivity wrote: On 09/19/2012 07:40 AM, Peter Crosthwaite wrote: On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias edgar.igles...@gmail.com wrote: On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote: Ping for PMM, This is the root case of your block on the SDHCI series - this is a discussion on resolution to bogus infinite looping DMA. For current participants in this discussion, heres our thread on the same topic over in SD land: http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html With the findings here and acknowledgement that this is a general problem, we either need to declare this issue of scope for SDHCI, or work with these guys (in the immediate future) on the DMA infinite loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig here, but can we get a decisive plan for going forward with this issue if it is going to continue to block SDHCI. Thanks to Igor for identifying the overlap between these discussions. Hi, A couple of dumb questions. What is the reason for the blocker? that possible guest dos is worse than no functionality at all? Can't you put the DMA/IO processing into the background? I dont know a way do do asynchronous DMA unless I am missing something here. You could schedule a bottom half and do the accesses from there. Solves nothing though. So what happens is we have a device that walks a SG list synchronously while holding all the locks and stuff being discussed here. If that SG list infinite loops then crash. Did you mean loop, or recursion into the memory region that initiates DMA? I think we were discussing the loop issue (I hadn't thought of recursion issues before) :) Jan's point of resetability was interesting. Processing a finite amount of dma descriptors and scheduling bh's to continously get back and continue processing should be OK no? Yes. Bottom half idea sounds good. Also, for this particular device, rather then limiting amount of processed descriptor entries in bottom half, it seems reasonable to interrupt bh handler if we encounter a LINK descriptor since only recursive LINKs can generate infinite loops. In that case, a very long descriptor table without a single LINK entry could potentially hung QEMU for a long time, but that time will be finite anyway. Long term, this problem will disappear when/if someone converts QEMU SD card model to use async io. That would avoid the lockups and allow the device to be reset at any time. Or am I missing something? Not that I can see. If real hardware can be looped, so can qemu. I'm only worried about recursion and deadlocks (while real hardware can deadlock, we'd prefer to avoid that). So, I think the idea here is that if real hardware can be locked we should lock too, but provide a guest CPU a possibility to abort locked operation. For this particular example, SD card controller can deadlock on recursive descriptor LINK entries, but CPU can abort ongoing transaction at any time by issuing ABORT command. And if guest CPU busy-waits in infinite while() loop for a TRANSFER OVER flag to be set, then it had it coming.
Re: [Qemu-devel] [PATCH 010/126] target-s390: Reorg exception handling
On 19.09.2012, at 13:34, Alexander Graf wrote: On 19.09.2012, at 13:29, Peter Maydell wrote: On 19 September 2012 01:14, Richard Henderson r...@twiddle.net wrote: On 09/18/2012 01:18 PM, Alexander Graf wrote: -/* remember what pgm exeption this was */ +/* Remember what pgm exeption this was. */ ...if you're changing this comment then at least fix the typo (exception)... tmp = tcg_const_i32(code); tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code)); tcg_temp_free_i32(tmp); -tmp = tcg_const_i32(ilc); +tmp = tcg_const_i32(s-next_pc - s-pc); Mind to explain this one? ILC = the size of the insn. Rather than passing ILC around into gen_program_exception, get it back from the s-next_pc value that we stored into DisasContext. ...but isn't (s-next_pc - s-pc) ilc*2, not ilc? Compare this hunk from elsewhere in the patch: -tmp32_2 = tcg_const_i32(ilc * 2); -tmp32_3 = tcg_const_i32(EXCP_SVC); +tmp32_2 = tcg_const_i32(s-next_pc - s-pc); Yes, it is, and hence it's correct. The exception field encodes the real instruction length, not the ILC bits in the opcode. Maybe the naming is unfortunate... Or maybe not. For SVC we seem to pass in the full instruction length, while PGM only gets ILC. So the above hunk is wrong. Richard, please double-check all the ilc passing again with the spec. Alex
Re: [Qemu-devel] [PATCH 0/5] tcg: movcond
On Tue, 18 Sep 2012, Aurelien Jarno wrote: On Tue, Sep 18, 2012 at 07:23:55AM -0700, Richard Henderson wrote: As recently discussed, with the optional fallback to setcond. I include a patch to target-alpha to test correctness both before and after implementing the opcode in the i386 backend, as well as in the optimizations. Thanks for this new implementation, it's a lot cleaner than the previous one, especially the fallback to setcond if not implemented, and the argument alias for the x86 backend, deferring the issue to the register allocator instead of trying to do that in the backend. From my side, I am ok with it. That said I'd also like to have at least the opinion of Malc and Blue (Cc:ed). Comments from others are also welcome. I don't have objections (philosphical or otherwise) to this, the only request is for a detailed directions on how to exercise non emulated version (i.e. where to get the alpha image from how to run it exactly) [..snip..] P.S. The only thing i've noticed is somewhat unusual naming convention in the README, nothing else uses cN and vN naming of arguments, perhaps sticking to t0, t1, t2, t3 is better to avoid confusion? -- mailto:av1...@comtv.ru
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 09/19/2012 04:01 PM, Igor Mitsyanko wrote: That would avoid the lockups and allow the device to be reset at any time. Or am I missing something? Not that I can see. If real hardware can be looped, so can qemu. I'm only worried about recursion and deadlocks (while real hardware can deadlock, we'd prefer to avoid that). So, I think the idea here is that if real hardware can be locked we should lock too, but provide a guest CPU a possibility to abort locked operation. For this particular example, SD card controller can deadlock on recursive descriptor LINK entries, but CPU can abort ongoing transaction at any time by issuing ABORT command. And if guest CPU busy-waits in infinite while() loop for a TRANSFER OVER flag to be set, then it had it coming. If real hardware would lock up so should we (in the guest's eyes) but the qemu monitor should remain responsive. It is not part of emulated hardware. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH] xen: Fix, no unplug of pt device by platform device.
On Wed, 19 Sep 2012, Anthony PERARD wrote: The Xen platform device will unplug any NICs if requested by the guest (PVonHVM) including a NIC that would have been passthrough. This patch makes sure that a passthrough device will not be unplug. Reported-by: Zhang, Yang Z yang.z.zh...@intel.com Signed-off-by: Anthony PERARD anthony.per...@citrix.com Zhang, does it this patch fix the problem for you? hw/xen_platform.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/xen_platform.c b/hw/xen_platform.c index 0d6c2ff..956dbfe 100644 --- a/hw/xen_platform.c +++ b/hw/xen_platform.c @@ -85,8 +85,10 @@ static void log_writeb(PCIXenPlatformState *s, char val) static void unplug_nic(PCIBus *b, PCIDevice *d, void *o) { +/* We have to ignore passthrough devices */ if (pci_get_word(d-config + PCI_CLASS_DEVICE) == -PCI_CLASS_NETWORK_ETHERNET) { +PCI_CLASS_NETWORK_ETHERNET + strcmp(d-name, xen-pci-passthrough) != 0) { qdev_free(d-qdev); } } @@ -98,8 +100,10 @@ static void pci_unplug_nics(PCIBus *bus) static void unplug_disks(PCIBus *b, PCIDevice *d, void *o) { +/* We have to ignore passthrough devices */ if (pci_get_word(d-config + PCI_CLASS_DEVICE) == -PCI_CLASS_STORAGE_IDE) { +PCI_CLASS_STORAGE_IDE + strcmp(d-name, xen-pci-passthrough) != 0) { qdev_unplug((d-qdev), NULL); } } -- Anthony PERARD
Re: [Qemu-devel] [PATCH V3 10/11] vcpu: introduce lockmap
On 09/19/2012 11:57 AM, Jan Kiszka wrote: On 2012-09-19 06:40, Peter Crosthwaite wrote: On Wed, Sep 19, 2012 at 2:32 PM, Edgar E. Iglesias edgar.igles...@gmail.com wrote: On Wed, Sep 19, 2012 at 02:25:48PM +1000, Peter Crosthwaite wrote: Ping for PMM, This is the root case of your block on the SDHCI series - this is a discussion on resolution to bogus infinite looping DMA. For current participants in this discussion, heres our thread on the same topic over in SD land: http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg01017.html With the findings here and acknowledgement that this is a general problem, we either need to declare this issue of scope for SDHCI, or work with these guys (in the immediate future) on the DMA infinite loop problem flagged here. I dont mind if SDHCI+ADMA is the guinea pig here, but can we get a decisive plan for going forward with this issue if it is going to continue to block SDHCI. Thanks to Igor for identifying the overlap between these discussions. Hi, A couple of dumb questions. What is the reason for the blocker? that possible guest dos is worse than no functionality at all? Can't you put the DMA/IO processing into the background? I dont know a way do do asynchronous DMA unless I am missing something here. So what happens is we have a device that walks a SG list synchronously while holding all the locks and stuff being discussed here. If that SG list infinite loops then crash. We could switch to async DMA, but most DMA-capable devices aren't prepared for rescheduling points in the middle of their code. This will require quite a bit of code review. From what I understand, we can't switch to async DMA while SD card model doesn't support it. And introducing this support would require to convert every existing SD card user in QEMU. I can recall that such a conversion was already suggested in a mailing list a while ago, but no one (including me) volunteered to do it :) what's the diff between setup of bad descriptors and writing a while (1) loop in the guest CPU? While(1) in guest doesnt freeze all of QEMU (monitor still works and stuff), wheras the DMA ops going in circles does, as locks are taken by an infinite loop. That's the point. If we loop, we need at least a way to stop it by issuing a device or system reset. But I tend to think that detecting loops and failing/ignoring requests is easier implementable. Jan
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
Anthony Liguori anth...@codemonkey.ws writes: Markus Armbruster arm...@redhat.com writes: Anthony Liguori anth...@codemonkey.ws writes: Markus Armbruster arm...@redhat.com writes: Jan Kiszka jan.kis...@siemens.com writes: * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. While I don't care for supporting alternate dump processing tools myself, I certainly don't mind supporting them, as long as the code satisfies basic safety and reliability requirements. This code doesn't, as far as I can tell. It works, thought not under all circumstances. I don't doubt it works often enough to be useful to somebody. But basic safety and reliability requirements are a bit more than that. They include don't explode in ways a reasonable user can't be expected to foresee. I don't think a reasonable user can be expected to see that -p is safe only for trusted guests. We shipped the API, we're not removing it. Our compatibility isn't whatever libvirt is currently using. Bah, don't be more catholic than the pope. It's been out for how many days? Have you looked at the code? It's perfectly reasonable to ask to document the behavior of the method. It's also a trivial patch to qapi-schema.json. I'm okay with leaving obscure security holes in upstream QEMU, indefinitely, as long as they're documented. I don't think it's a good idea, but it's something reasonable people can disagree about. It's *not* a security hole. Since we agree that documenting the existing crappy behavior is okay for stable, let's not argue what exactly to call it. The malicious guest you mention is just a guest with fully populated PTEs and PDEs, no? A normal guest uses a small fraction of its RAM for page tables. QEMU allocates 10x of that fraction for dump -p. Not so nice. A not-so-normal guest uses most of its RAM for page tables. QEMU allocates in the order of 10x of the guest RAM for -p. Oops. [...]
Re: [Qemu-devel] [PATCH V3 01/11] atomic: introduce atomic operations
Avi Kivity wrote: On 09/13/2012 09:54 AM, liu ping fan wrote: +typedef struct Atomic { +int counter; +} Atomic; Best to mark counter 'volatile'. + +static inline void atomic_set(Atomic *v, int i) +{ +v-counter = i; +} + +static inline int atomic_read(Atomic *v) +{ +return v-counter; +} So these two operations don't get mangled by the optimizer. Browsing linux code and reading lkml, find some similar material. But they have moved volatile from -counter to function - atomic_read(). As to atomic_read(), I think it need to prevent optimizer from refetching issue, but as to atomic_set(), do we need ? I think so, to prevent reordering. Hi, I don't think volatile makes any difference to reordering here. The compiler is not going to move the atomic_set() store before or after another instruction on the same atomic variable anyway, just like it wouldn't do that for an ordinary assignment. If you're concerned about ordering with respect to other memory, then volatile wouldn't make much difference. barrier() before and after would. If you're copying Linux's semantics, Linux's atomic_set() doesn't include any barriers, nor imply any. atomic_read() uses volatile to ensure that each call re-reads the value, for example in a loop. (Same as ACCESS_ONCE()). If there was a call to atomic_set() in a loop, it doesn't guarantee that will be written each time. -- Jamie
Re: [Qemu-devel] [PATCH 2/2] target-i386: Fix default Hypervisor level for hypervisor-vendor=kvm.
On Tue, Sep 18, 2012 at 03:32:04PM -0400, Don Slutz wrote: On 09/18/12 13:00, Eduardo Habkost wrote: On Tue, Sep 18, 2012 at 10:49:53AM -0400, Don Slutz wrote: From http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html EAX should be KVM_CPUID_FEATURES (0x4001) not 0. If kvm is not configured, the additional option of hypervisor-level=1 (or hypervisor-level=0x4001) needs to be specified to get this. --- target-i386/cpu.c | 12 +++- 1 files changed, 11 insertions(+), 1 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 6e43eff..d73b0a8 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -1248,7 +1248,12 @@ static char *x86_cpuid_get_hv_vendor(Object *obj, Error **errp) env-cpuid_hv_level == CPUID_HV_LEVEL_XEN) { pstrcpy(value, sizeof(value), xen); } else if (!strcmp(value, CPUID_HV_VENDOR_KVM) - env-cpuid_hv_level == 0) { +#if defined(CONFIG_KVM) + env-cpuid_hv_level == KVM_CPUID_FEATURES +#else + env-cpuid_hv_level == 0 +#endif + ) { pstrcpy(value, sizeof(value), kvm); } return value; @@ -1281,6 +1286,11 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const char *value, } pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_XEN); } else if (!strcmp(value, kvm)) { +#if defined(CONFIG_KVM) +if (env-cpuid_hv_level == 0) { +env-cpuid_hv_level = KVM_CPUID_FEATURES; +} +#endif If CPUID[0x4000].EAX set to 0 is documented as equivalent to having it set to 0x4001 (KVM_CPUID_FEATURES), why the confusing checks for CONFIG_KVM? Why not always set it to KVM_CPUID_FEATURES? At line 36 of the file: #if defined(CONFIG_KVM) #include linux/kvm_para.h #endif So without the check you get compile failures (i386-linux-user). Right. We can't include linux/kvm_para.h on a non-Linux build host, but on the other hand I don't see any reason to not have cpuid_hv_level=0x4001 on builds without CONFIG_KVM too. If we want to allow fake KVM CPUID leaves on non-CONFIG_KVM builds like the code above does, QEMU needs its own constant for KVM_CPUID_FEATURES/0x4001, so it doesn't depend on the Linux header. I have no issue with removing the check(s) for 0. Currently hypervisor-level=0 will not force the old values; I can make a change so that works. -Don Slutz -- Eduardo
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
On Wed, 19 Sep 2012 11:26:51 +0900 (JST) HATAYAMA Daisuke d.hatay...@jp.fujitsu.com wrote: From: Wen Congyang we...@cn.fujitsu.com Subject: Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it? Date: Wed, 19 Sep 2012 10:07:04 +0800 At 09/19/2012 08:18 AM, Luiz Capitulino Wrote: On Tue, 18 Sep 2012 16:13:30 -0500 Anthony Liguori anth...@codemonkey.ws wrote: Markus Armbruster arm...@redhat.com writes: Jan Kiszka jan.kis...@siemens.com writes: * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. While I don't care for supporting alternate dump processing tools myself, I certainly don't mind supporting them, as long as the code satisfies basic safety and reliability requirements. This code doesn't, as far as I can tell. It works, thought not under all circumstances. I don't doubt it works often enough to be useful to somebody. But basic safety and reliability requirements are a bit more than that. They include don't explode in ways a reasonable user can't be expected to foresee. I don't think a reasonable user can be expected to see that -p is safe only for trusted guests. We shipped the API, we're not removing it. Our compatibility isn't whatever libvirt is currently using. It's perfectly reasonable to ask to document the behavior of the method. It's also a trivial patch to qapi-schema.json. I feel that documenting it is not enough. It would be fine to do that if the worst case was a bad dump file, but the worst case as the code stands right now will affect the host. It's unreasonable to ask for an interface to be removed just because it could be misused when it has a legimitate use-case. The point is not that it can be misused. The issue we're concerned about is that a malicious guest could cause qemu to allocate dozens of gigabytes of RAM. Hmm, I guess the malicious guest's page table provides wrong information. We allocate memory to store memory mapping. Each memory mapping needs less than 40 bytes memory. The num of memory mapping is less than (2^48) / (2^12) = 2^36. And 2^36 * 40 = 64G * 40, too many memory What about this: 1. if the num of memory mapping 10, we only store 10 memory mappings. 2. The memory mapping which has smaller virtual address will be dropped? In this case, the memory we need is less than 10MB. So we will not allocate too many memory. The problem of artificially limiting it is that we can reach the limit in legal usage (ie. a very large machine). How about dropping making a whole list of memory maps at the same time, and how about rewriting the code so that it always has at most one memory mapping by merging virtually consequtive chunks? If possible, only 40 bytes is needed. It already merges contiguous addresses and addresses that fall in the same range. It can also skip addresses due to a few reasons (I/O page, not present page, etc), which makes the problem very unlikely in practice. Our concern is with guests intentionally trying to make qemu allocate more memory.
Re: [Qemu-devel] [libvirt] [PATCH v2 1/4] config: Introduce migration for SPICE graphics
On 15.09.2012 17:10, Daniel P. Berrange wrote: On Fri, Sep 14, 2012 at 05:23:16PM -0600, Eric Blake wrote: [adding qemu] On 09/14/2012 11:47 AM, Daniel P. Berrange wrote: On Fri, Sep 14, 2012 at 07:34:50PM +0200, Michal Privoznik wrote: With this element users will control how SPICE server behaves upon migration. For now, there's just one attribute 'seamless' turning seamless migration on/off/default. Ewww, no. This information is a related to a API operation, not the VM configuration. It should be either auto-detected by libvirt to the best compatible setting, or passed as a flag to the virDomainMigrate API call if auto-detection is not possible. But with the current qemu implementation, there's no way to know if the destination supports this until after you've started the source, and the current implementation in qemu is that you must declare the semantics at the time you start qemu, not at the time you send the 'migrate' monitor command. For libvirt autodetection to work without polluting the domain XML, we'd need to be able to auto-detect at the time we start migration. This sounds like we need to enhance the 'migrate-set-capabilities' command to enable or disable this feature on the fly, according to what libvirt detects from the remote end, rather than hard-coding it to the startup state of qemu on the source side. Hmm, my understanding of the QEMU flag was different. Based on the commit message: spice: adding seamless-migration option to the command line The seamless-migration flag is required in order to identify whether libvirt supports the new QEVENT_SPICE_MIGRATE_COMPLETED or not (by default the flag is off). New libvirt versions that wait for QEVENT_SPICE_MIGRATE_COMPLETED should turn on this flag. When this flag is off, spice fallbacks to its old migration method, which can result in data loss. This says to me that any libvirt which knows about the new SPICE_MIGRATE_COMPLETED event, should set the seamless-migration flag unconditionally, to indicate that it can handle the event and thus the new migration method. It says nothing about only setting this flag if the destination QEMU also supports it. As such, IMHO, we can should set this flag unconditonally on all QEMUs we run which support it. If it turns out that this flag does indeed require that the destination QEMU also has the same setting, then IMHO this flag is a fatally flawed design. At time of starting any QEMU instance, we can't know whether the destination QEMU we want to migrate to will have the support or not. Compatibility checks of this kind can only be decided at time the migrate command is actually issued. Daniel From my investigation I was able to migrate between qemu where one had seamless_migration=on and the other one didn't support such argument at all. Literally, on source I've checked out 27af778828db9 (the last commit in Yonit's patchset) on destination f5bb039c6d97e (the last before the patchset). Then I've migrated and receive both event and flag set in query-spice response. So I guess the design is okay. Having said that - what's desired solution here? Unconditionally set seamless_migration=on on enough new qemu and don't expose it in XML at all? Michal
Re: [Qemu-devel] [PATCH V3 01/11] atomic: introduce atomic operations
liu ping fan wrote: +static inline void atomic_set(Atomic *v, int i) +{ +v-counter = i; +} Hi, When running on ARM Linux kernels prior to 2.6.32, userspace atomic_set() needs to use clrex or strex too. See Linux commit 200b812d, Clear the exclusive monitor when returning from an exception. You can see ARM's atomic_set() used to use strex, and warns it's important. The kernel patch allows atomic_set() to be simplified, and that includes for userspace, by putting clrex/strex in the exception return path instead. However, someone may run QEMU on a kernel before 2.6.32, which isn't that old. (E.g. my phone is running 2.6.28). Otherwise you can have this situation: Initially: a = 0. Thread atomic_inc(a, 1) = ldrex, add, [strex interrupted] Interrupted by signal handler atomic_set(a, 3) = str Signal return Resume thread = strex (succeeds because CPU-local exclusive-flag still set) Result: a = 1, should be impossible when the signal triggered, and information about the signal is lost. A more realistic example would use atomic_compare_exchange(), to atomic-read-and-clear, atomic-read-and-dec-if-not-zero a variable set in a signal handler, however I've used atomic_inc() to illustrate because that's in your patch. Best, -- Jamie
[Qemu-devel] [PATCH] qxl/update_area_io: cleanup invalid parameters handling
This cleans up two additions of almost the same code in commits 511b13e2c9 and ccc2960d654. While at it, make error paths consistent (always use 'break' instead of 'return'). Signed-off-by: Michael Tokarev m...@tls.msk.ru Cc: Dunrong Huang riegama...@gmail.com Cc: Alon Levy al...@redhat.com --- hw/qxl.c | 13 +++-- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/hw/qxl.c b/hw/qxl.c index 33169f3..3c82c2a 100644 --- a/hw/qxl.c +++ b/hw/qxl.c @@ -1547,20 +1547,13 @@ async_common: if (d-ram-update_surface d-ssd.num_surfaces) { qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: invalid surface id %d\n, d-ram-update_surface); -return; +break; } -if (update.left = update.right || update.top = update.bottom) { +if (update.left = update.right || update.top = update.bottom || +update.left 0 || update.top 0) { qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: invalid area (%ux%u)x(%ux%u)\n, update.left, update.top, update.right, update.bottom); -return; -} - -if (update.left 0 || update.top 0 || update.left = update.right || -update.top = update.bottom) { -qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: - invalid area(%d,%d,%d,%d)\n, update.left, - update.right, update.top, update.bottom); break; } if (async == QXL_ASYNC) { -- 1.7.10.4
Re: [Qemu-devel] [PATCH 05/10] qxl: dont update invalid area
On 13.09.2012 12:45, Gerd Hoffmann wrote: From: Dunrong Huang riegama...@gmail.com This patch fixes the following error: $ ~/usr/bin/qemu-system-x86_64 -enable-kvm -m 1024 -spice port=5900,disable-ticketing -vga qxl -cdrom ~/Images/linuxmint-13-mate-dvd-32bit.iso (/home/mathslinux/usr/bin/qemu-system-x86_64:10068): SpiceWorker-CRITICAL **: red_worker.c:4599:red_update_area: condition `area-left = 0 area-top = 0 area-left area-right area-top area-bottom' failed Aborted spice server terminates QEMU process if we pass invalid area to it, so dont update those invalid areas. Signed-off-by: Dunrong Huang riegama...@gmail.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/qxl.c |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/hw/qxl.c b/hw/qxl.c index 257a37d..0176b1a 100644 --- a/hw/qxl.c +++ b/hw/qxl.c @@ -1470,6 +1470,13 @@ async_common: return; } +if (update.left 0 || update.top 0 || update.left = update.right || +update.top = update.bottom) { +qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: + invalid area(%d,%d,%d,%d)\n, update.left, + update.right, update.top, update.bottom); +break; +} Please take a look at the previous chunk of code, which was added in 511b13e2c9b426b3c56060909693de5097f0b496 qxl/update_area_io: guest_bug on invalid parameters by alevy: +if (d-ram-update_surface NUM_SURFACES) { +qxl_set_guest_bug(d, QXL_IO_UPDATE_AREA: invalid surface id %d\n, + d-ram-update_surface); +return; +} +if (update.left = update.right || update.top = update.bottom) { +qxl_set_guest_bug(d, +QXL_IO_UPDATE_AREA: invalid area (%ux%u)x(%ux%u)\n, +update.left, update.top, update.right, update.bottom); +return; +} + Now, this place looks.. well.. funny. A (trivial) cleanup patch is on the way. Thanks, /mjt
Re: [Qemu-devel] [PATCH v2 2/2] Versatile Express: add modelling of NOR flash
On 09/19/2012 12:26 PM, Peter Maydell wrote: On 18 September 2012 21:59, Francesco Lavra francescolavra...@gmail.com wrote: On 09/18/2012 03:46 PM, Peter Maydell wrote: On 17 September 2012 21:08, Francesco Lavra francescolavra...@gmail.com wrote: qemu_irq pic[64]; uint32_t proc_id; uint32_t sys_id; +DriveInfo *dinfo; ram_addr_t vram_size, sram_size; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *vram = g_new(MemoryRegion, 1); @@ -410,8 +415,23 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, sysbus_create_simple(pl111, map[VE_CLCD], pic[14]); -/* VE_NORFLASH0: not modelled */ -/* VE_NORFLASH1: not modelled */ +dinfo = drive_get_next(IF_PFLASH); +if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, vexpress.flash0, +VEXPRESS_FLASH_SIZE, dinfo ? dinfo-bdrv : NULL, +VEXPRESS_FLASH_SECT_SIZE, +VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4, +0x00, 0x89, 0x00, 0x18, 0)) { +fprintf(stderr, vexpress: error registering flash 0.\n); Shouldn't these errors be fatal? I checked the existing uses of pflash_cfi_0[1,2]_register() in the code, and if I'm not mistaken only in 5 out of 19 devices these errors are fatal, in the other 14 cases initialization continues even after flash registration failure, with or without an error message. Let me know if you still prefer these errors to be fatal. So the only reason this can fail is if the user specified a file to back the flash but trying to read it failed (ie, bad filename or file not the same size as the flash). I think that merits actually stopping on error. Ideally in the long term the flash devices should be converted to proper QOM devices and we could push the error handling back into the device itself (which is better positioned to distinguish bad filename from wrong length I suspect). Ok, I will make these errors fatal in v3. -- Francesco
[Qemu-devel] [PATCH 01/12] nbd: add more constants
Avoid magic numbers and magic size computations; hide them behind constants. Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- nbd.c | 17 ++--- 1 file modificato, 10 inserzioni(+), 7 rimozioni(-) diff --git a/nbd.c b/nbd.c index 0dd60c5..8201b7a 100644 --- a/nbd.c +++ b/nbd.c @@ -57,9 +57,12 @@ /* This is all part of the official NBD API */ +#define NBD_REQUEST_SIZE(4 + 4 + 8 + 8 + 4) #define NBD_REPLY_SIZE (4 + 4 + 8) #define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REPLY_MAGIC 0x67446698 +#define NBD_OPTS_MAGIC 0x49484156454F5054LL +#define NBD_CLIENT_MAGIC0x420281861253LL #define NBD_SET_SOCK_IO(0xab, 0) #define NBD_SET_BLKSIZE _IO(0xab, 1) @@ -213,7 +216,7 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) /* Negotiate [ 0 .. 7] passwd (NBDMAGIC) -[ 8 .. 15] magic(0x00420281861253) +[ 8 .. 15] magic(NBD_CLIENT_MAGIC) [16 .. 23] size [24 .. 27] flags [28 .. 151] reserved (0) @@ -224,7 +227,7 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) TRACE(Beginning negotiation.); memcpy(buf, NBDMAGIC, 8); -cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL); +cpu_to_be64w((uint64_t*)(buf + 8), NBD_CLIENT_MAGIC); cpu_to_be64w((uint64_t*)(buf + 16), size); cpu_to_be32w((uint32_t*)(buf + 24), flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | @@ -295,7 +298,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, uint32_t namesize; TRACE(Checking magic (opts_magic)); -if (magic != 0x49484156454F5054LL) { +if (magic != NBD_OPTS_MAGIC) { LOG(Bad magic received); goto fail; } @@ -334,7 +337,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, } else { TRACE(Checking magic (cli_magic)); -if (magic != 0x00420281861253LL) { +if (magic != NBD_CLIENT_MAGIC) { LOG(Bad magic received); goto fail; } @@ -477,7 +480,7 @@ int nbd_client(int fd) ssize_t nbd_send_request(int csock, struct nbd_request *request) { -uint8_t buf[4 + 4 + 8 + 8 + 4]; +uint8_t buf[NBD_REQUEST_SIZE]; ssize_t ret; cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); @@ -504,7 +507,7 @@ ssize_t nbd_send_request(int csock, struct nbd_request *request) static ssize_t nbd_receive_request(int csock, struct nbd_request *request) { -uint8_t buf[4 + 4 + 8 + 8 + 4]; +uint8_t buf[NBD_REQUEST_SIZE]; uint32_t magic; ssize_t ret; @@ -582,7 +585,7 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) { -uint8_t buf[4 + 4 + 8]; +uint8_t buf[NBD_REPLY_SIZE]; ssize_t ret; /* Reply -- 1.7.12
[Qemu-devel] [PATCH] qemu-timer: simplify qemu_run_timers
ptimer_head is an invariant pointer to clock-active_timers. Remove it, and just reference clock-active_timers directly. Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- qemu-timer.c | 7 +++ 1 file modificato, 3 inserzioni(+), 4 rimozioni(-) diff --git a/qemu-timer.c b/qemu-timer.c index c7a1551..908a103 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -372,21 +372,20 @@ bool qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time) void qemu_run_timers(QEMUClock *clock) { -QEMUTimer **ptimer_head, *ts; +QEMUTimer *ts; int64_t current_time; if (!clock-enabled) return; current_time = qemu_get_clock_ns(clock); -ptimer_head = clock-active_timers; for(;;) { -ts = *ptimer_head; +ts = clock-active_timers; if (!qemu_timer_expired_ns(ts, current_time)) { break; } /* remove timer from the list before calling the callback */ -*ptimer_head = ts-next; +clock-active_timers = ts-next; ts-next = NULL; /* run the callback (the timer list can be modified) */ -- 1.7.12
Re: [Qemu-devel] [PATCH] qapi: convert add_client
On Wed, 19 Sep 2012 13:04:22 +0200 Paolo Bonzini pbonz...@redhat.com wrote: Il 18/09/2012 21:06, Luiz Capitulino ha scritto: +fd = monitor_get_fd(cur_mon, fdname); +if (fd 0) { +error_setg(errp, file-description '%s' has not been found, fdname); +return; +} Please change this to add an Error * argument to monitor_get_fd, and change non-Error-friendly callers of monitor_get_fd to monitor_handle_fd_param. Good point. You can then reuse the error in monitor_handle_fd_param (there is already QERR_FD_NOT_FOUND, by the way). I'll post the patches later in my reviewed NBD series, please take them from there and I'll rebase before my pull request. Ok, although I wouldn't mind having this patch on top of yours.
[Qemu-devel] [PULL 00/12] NBD patches for 2012-09-22
The following changes since commit 89c7fd21930de671a6e34793e8b1ee257e2e: Remove unused CONFIG_TCG_PASS_AREG0 and dead code (2012-09-15 17:51:14 +) are available in the git repository at: git://github.com/bonzini/qemu.git nbd-next for you to fetch changes up to be3d30e144dbb99cd39ae4cedfb802337b5b172f: nbd: add nbd_export_get_blockdev (2012-09-19 14:03:15 +0200) These patches merge the first part of the embedded NBD server. The actual QEMU implementation needs some refactoring of qemu-sockets, and might even go in through Luiz's tree. Paolo Bonzini (12): nbd: add more constants nbd: pass NBDClient to nbd_send_negotiate nbd: do not close BlockDriverState in nbd_export_close nbd: make refcount interface public nbd: do not leak nbd_trip coroutines when a connection is torn down nbd: add reference counting to NBDExport nbd: track clients into NBDExport nbd: add notification for closing an NBDExport qemu-nbd: rewrite termination conditions to use a state machine nbd: register named exports nbd: negotiate with named exports nbd: add nbd_export_get_blockdev nbd.c | 396 ++--- nbd.h | 15 ++- qemu-nbd.c | 36 -- 3 file modificati, 367 inserzioni(+), 80 rimozioni(-) -- 1.7.12
[Qemu-devel] [PATCH 10/12] nbd: register named exports
Add an API to register and find named exports. Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- nbd.c | 49 + nbd.h | 4 2 file modificati, 53 inserzioni(+) diff --git a/nbd.c b/nbd.c index 2e9de70..2d2221c 100644 --- a/nbd.c +++ b/nbd.c @@ -93,13 +93,17 @@ struct NBDExport { void (*close)(NBDExport *exp); BlockDriverState *bs; +char *name; off_t dev_offset; off_t size; uint32_t nbdflags; QTAILQ_HEAD(, NBDClient) clients; QSIMPLEQ_HEAD(, NBDRequest) requests; +QTAILQ_ENTRY(NBDExport) next; }; +static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); + struct NBDClient { int refcount; void (*close)(NBDClient *client); @@ -740,6 +744,39 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, return exp; } +NBDExport *nbd_export_find(const char *name) +{ +NBDExport *exp; +QTAILQ_FOREACH(exp, exports, next) { +if (strcmp(name, exp-name) == 0) { +return exp; +} +} + +return NULL; +} + +void nbd_export_set_name(NBDExport *exp, const char *name) +{ +if (exp-name == name) { +return; +} + +nbd_export_get(exp); +if (exp-name != NULL) { +g_free(exp-name); +exp-name = NULL; +QTAILQ_REMOVE(exports, exp, next); +nbd_export_put(exp); +} +if (name != NULL) { +nbd_export_get(exp); +exp-name = g_strdup(name); +QTAILQ_INSERT_TAIL(exports, exp, next); +} +nbd_export_put(exp); +} + void nbd_export_close(NBDExport *exp) { NBDClient *client, *next; @@ -765,6 +802,8 @@ void nbd_export_put(NBDExport *exp) } if (--exp-refcount == 0) { +assert(exp-name == NULL); + if (exp-close) { exp-close(exp); } @@ -780,6 +819,16 @@ void nbd_export_put(NBDExport *exp) } } +void nbd_export_close_all(void) +{ +NBDExport *exp, *next; + +QTAILQ_FOREACH_SAFE(exp, exports, next, next) { +nbd_export_close(exp); +nbd_export_set_name(exp, NULL); +} +} + static int nbd_can_read(void *opaque); static void nbd_read(void *opaque); static void nbd_restart_write(void *opaque); diff --git a/nbd.h b/nbd.h index 895820b..f0edb9c 100644 --- a/nbd.h +++ b/nbd.h @@ -85,6 +85,10 @@ void nbd_export_close(NBDExport *exp); void nbd_export_get(NBDExport *exp); void nbd_export_put(NBDExport *exp); +NBDExport *nbd_export_find(const char *name); +void nbd_export_set_name(NBDExport *exp, const char *name); +void nbd_export_close_all(void); + NBDClient *nbd_client_new(NBDExport *exp, int csock, void (*close)(NBDClient *)); void nbd_client_close(NBDClient *client); -- 1.7.12
Re: [Qemu-devel] [PATCH] arch_init.c: Improve '-soundhw help' for non-HAS_AUDIO_CHOICE archs
On Wed, 19 Sep 2012, Peter Maydell wrote: For architectures which don't set HAS_AUDIO_CHOICE, improve the '-soundhw help' message so that it doesn't simply print an empty list, implying no sound support at all. [..snip..] Applied, thanks. -- mailto:av1...@comtv.ru