date:20170802

Re: [Qemu-devel] [PATCH 3/6] tests/tcg/linux-test.c: include definitions for getrusage()

2017-08-02 Thread Philippe Mathieu-Daudé

On Wed, Aug 2, 2017 at 6:28 PM, Philippe Mathieu-Daudé  wrote:
> Hi Cleber,
>
> On 08/02/2017 05:15 PM, Cleber Rosa wrote:
>>
>> A include for  is missing, and prevents
>> tests/tcg/linux-test from compiling.
>
>
> getrusage() I presume, don't know if worth adding in commit message.

Sorry I missed it from the commit subject :/

>
>>
>> Signed-off-by: Cleber Rosa 
>
>
> Reviewed-by: Philippe Mathieu-Daudé 
>
>
>> ---
>>   tests/tcg/linux-test.c | 1 +
>>   1 file changed, 1 insertion(+)
>>
>> diff --git a/tests/tcg/linux-test.c b/tests/tcg/linux-test.c
>> index 1c6c013..15c9d7f 100644
>> --- a/tests/tcg/linux-test.c
>> +++ b/tests/tcg/linux-test.c
>> @@ -39,6 +39,7 @@
>>   #include 
>>   #include 
>>   #include 
>> +#include 
>> #define TESTPATH "/tmp/linux-test.tmp"
>>   #define TESTPORT 7654
>>
>

Re: [Qemu-devel] [PATCH for-2.10 2/3] target/mips: Drop redundant gen_io_start/stop()

2017-08-02 Thread Philippe Mathieu-Daudé


On 08/02/2017 06:59 AM, James Hogan wrote:

DMTC0 CP0_Cause does a redundant gen_io_start() and gen_io_end() pair,
even though this is done for all DMTC0 operations outside of the switch
statement. Remove these redundant calls.

Fixes: 5dc5d9f055c5 ("mips: more fixes to the MIPS interrupt glue logic")
Signed-off-by: James Hogan 


Reviewed-by: Philippe Mathieu-Daudé 


Cc: Yongbok Kim 
Cc: Aurelien Jarno 
---
  target/mips/translate.c | 8 
  1 file changed, 0 insertions(+), 8 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 6b41f7b65e00..6e724ac71dcd 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -7403,15 +7403,7 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int 
reg, int sel)
  switch (sel) {
  case 0:
  save_cpu_state(ctx, 1);
-/* Mark as an IO operation because we may trigger a software
-   interrupt.  */
-if (ctx->tb->cflags & CF_USE_ICOUNT) {
-gen_io_start();
-}
  gen_helper_mtc0_cause(cpu_env, arg);
-if (ctx->tb->cflags & CF_USE_ICOUNT) {
-gen_io_end();
-}
  /* Stop translation as we may have triggered an intetrupt. BS_STOP
   * isn't sufficient, we need to ensure we break out of translated
   * code to check for pending interrupts.  */

Re: [Qemu-devel] [Qemu-arm] [PATCH 01/15] target/arm: Use MMUAccessType enum rather than int

2017-08-02 Thread Philippe Mathieu-Daudé


On 08/02/2017 01:43 PM, Peter Maydell wrote:

In the ARM get_phys_addr() code, switch to using the MMUAccessType
enum and its MMU_* values rather than int and literal 0/1/2.

Signed-off-by: Peter Maydell 


Reviewed-by: Philippe Mathieu-Daudé 


---
  target/arm/helper.c| 30 +++---
  target/arm/internals.h |  3 ++-
  2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index fa60040..b78d277 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -20,13 +20,13 @@
  
  #ifndef CONFIG_USER_ONLY

  static bool get_phys_addr(CPUARMState *env, target_ulong address,
-  int access_type, ARMMMUIdx mmu_idx,
+  MMUAccessType access_type, ARMMMUIdx mmu_idx,
hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
target_ulong *page_size, uint32_t *fsr,
ARMMMUFaultInfo *fi);
  
  static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,

-   int access_type, ARMMMUIdx mmu_idx,
+   MMUAccessType access_type, ARMMMUIdx mmu_idx,
 hwaddr *phys_ptr, MemTxAttrs *txattrs, int 
*prot,
 target_ulong *page_size_ptr, uint32_t *fsr,
 ARMMMUFaultInfo *fi);
@@ -2135,7 +2135,7 @@ static CPAccessResult ats_access(CPUARMState *env, const 
ARMCPRegInfo *ri,
  }
  
  static uint64_t do_ats_write(CPUARMState *env, uint64_t value,

- int access_type, ARMMMUIdx mmu_idx)
+ MMUAccessType access_type, ARMMMUIdx mmu_idx)
  {
  hwaddr phys_addr;
  target_ulong page_size;
@@ -2194,7 +2194,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t 
value,
  
  static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)

  {
-int access_type = ri->opc2 & 1;
+MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
  uint64_t par64;
  ARMMMUIdx mmu_idx;
  int el = arm_current_el(env);
@@ -2253,7 +2253,7 @@ static void ats_write(CPUARMState *env, const 
ARMCPRegInfo *ri, uint64_t value)
  static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri,
  uint64_t value)
  {
-int access_type = ri->opc2 & 1;
+MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
  uint64_t par64;
  
  par64 = do_ats_write(env, value, access_type, ARMMMUIdx_S2NS);

@@ -2273,7 +2273,7 @@ static CPAccessResult at_s1e2_access(CPUARMState *env, 
const ARMCPRegInfo *ri,
  static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri,
  uint64_t value)
  {
-int access_type = ri->opc2 & 1;
+MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
  ARMMMUIdx mmu_idx;
  int secure = arm_is_secure_below_el3(env);
  
@@ -7510,7 +7510,7 @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,

  }
  
  static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,

- int access_type, ARMMMUIdx mmu_idx,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
   hwaddr *phys_ptr, int *prot,
   target_ulong *page_size, uint32_t *fsr,
   ARMMMUFaultInfo *fi)
@@ -7626,7 +7626,7 @@ do_fault:
  }
  
  static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,

- int access_type, ARMMMUIdx mmu_idx,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
   hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
   target_ulong *page_size, uint32_t *fsr,
   ARMMMUFaultInfo *fi)
@@ -7733,7 +7733,7 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t 
address,
  if (pxn && !regime_is_user(env, mmu_idx)) {
  xn = 1;
  }
-if (xn && access_type == 2)
+if (xn && access_type == MMU_INST_FETCH)
  goto do_fault;
  
  if (arm_feature(env, ARM_FEATURE_V6K) &&

@@ -7848,7 +7848,7 @@ static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, 
int level,
  }
  
  static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,

-   int access_type, ARMMMUIdx mmu_idx,
+   MMUAccessType access_type, ARMMMUIdx mmu_idx,
 hwaddr *phys_ptr, MemTxAttrs *txattrs, int 
*prot,
 target_ulong *page_size_ptr, uint32_t *fsr,
 ARMMMUFaultInfo *fi)
@@ -8256,7 +8256,7 @@ static inline bool m_is_system_region(CPUARMState *env, 
uint32_t address)
  }
  
  static bool

Re: [Qemu-devel] [PATCH 1/6] tests/tcg/test_path.c: include utils/bufferiszero.c

2017-08-02 Thread Cleber Rosa



On 08/02/2017 05:36 PM, Philippe Mathieu-Daudé wrote:
> Hi Cleber,
> 
> On 08/02/2017 05:15 PM, Cleber Rosa wrote:
>> Which contains one specific function used by iov.c.
>>
>> Without this, "make -C tests/tcg test_path" (and consequently
>> "make -C tests/tcg" or simply "make test") fails quite early.
>>
>> Signed-off-by: Cleber Rosa 
>> ---
>>   tests/tcg/test_path.c | 1 +
>>   1 file changed, 1 insertion(+)
>>
>> diff --git a/tests/tcg/test_path.c b/tests/tcg/test_path.c
>> index 1c29bce..74dbdaf 100644
>> --- a/tests/tcg/test_path.c
>> +++ b/tests/tcg/test_path.c
>> @@ -3,6 +3,7 @@
>>   #include "util/cutils.c"
>>   #include "util/hexdump.c"
>>   #include "util/iov.c"
>> +#include "util/bufferiszero.c"
> 
> This fixes the build, however why not take this opportunity to fix it
> through the Makefile instead of including .c?
> 

Do you mean for all of the .c includes?  I just took a baby step, which
seemed more consistent with the mission (get it to build), without
changing a lot.

> Regards,
> 
> Phil.
> 

Thanks for the review!
- Cleber.

>>   #include "util/path.c"
>>   #include "util/qemu-timer-common.c"
>>   #include 
>>

-- 
Cleber Rosa
[ Sr Software Engineer - Virtualization Team - Red Hat ]
[ Avocado Test Framework - avocado-framework.github.io ]
[  7ABB 96EB 8B46 B94D 5E0F  E9BB 657E 8D33 A5F2 09F3  ]



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] How to make ELF headers/symbol sections available for multiboot?

2017-08-02 Thread Richard Henderson

On 08/02/2017 03:00 PM, Anatol Pomozov wrote:
> What ELF specification says about it? Does it tell a loader to load
> only PT_LOAD segments?

Yes.  In https://refspecs.linuxfoundation.org/ there is a link to "System V ABI
Edition 4.1", which AFAIK is the latest version of the ELF "gABI" spec.
Section 5 describes program loading.

> Here is my current linker script:
> 
> ==
> ENTRY(start)
> 
> SECTIONS {
> . = 1M;
> 
> .text : {
> KEEP(*(.data.multiboot))
> *(.text .text*)
> }
> 
> .rodata : {
> *(.rodata .rodata*)
> }
> 
> .data : {
> *(.data .data.*)
> }
> 
> .bss : {
> __bss_start = .;
> *(.bss .bss*)
> . = ALIGN(8);
> __bss_end = .;
> }
> }
> ===

In the gnu ld manual, read about the PHDRS command, which describes all of the
ways you can manipulate the program header table.

Re-reading that now, I see FILEHDR and PHDR as keywords that can be used to
induce those portions of the file into the loadable segment, but I do not see
anything that could be used to load the section header.

You could force the symbol table and string table to be loaded, by referencing
their sections, but that won't affect the section header itself.

r~

Re: [Qemu-devel] [PATCH 3/6] tests/tcg/linux-test.c: include definitions for getrusage()

2017-08-02 Thread Philippe Mathieu-Daudé


Hi Cleber,

On 08/02/2017 05:15 PM, Cleber Rosa wrote:

A include for  is missing, and prevents
tests/tcg/linux-test from compiling.


getrusage() I presume, don't know if worth adding in commit message.



Signed-off-by: Cleber Rosa 


Reviewed-by: Philippe Mathieu-Daudé 


---
  tests/tcg/linux-test.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tests/tcg/linux-test.c b/tests/tcg/linux-test.c
index 1c6c013..15c9d7f 100644
--- a/tests/tcg/linux-test.c
+++ b/tests/tcg/linux-test.c
@@ -39,6 +39,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #define TESTPATH "/tmp/linux-test.tmp"

  #define TESTPORT 7654

Re: [Qemu-devel] [Qemu-arm] [PATCH 12/15] target/arm: Don't calculate lr in arm_v7m_cpu_do_interrupt() until needed

2017-08-02 Thread Philippe Mathieu-Daudé


On 08/02/2017 01:43 PM, Peter Maydell wrote:

Move the code in arm_v7m_cpu_do_interrupt() that calculates the
magic LR value down to when we're actually going to use it.
Having the calculation and use so far apart makes the code
a little harder to understand than it needs to be.

Signed-off-by: Peter Maydell 


Reviewed-by: Philippe Mathieu-Daudé 


---
  target/arm/helper.c | 15 ---
  1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index b64ddb1..0ecc8f1 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6311,13 +6311,6 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
  
  arm_log_exception(cs->exception_index);
  
-lr = 0xfff1;

-if (env->v7m.control & R_V7M_CONTROL_SPSEL_MASK) {
-lr |= 4;
-}
-if (env->v7m.exception == 0)
-lr |= 8;
-
  /* For exceptions we just mark as pending on the NVIC, and let that
 handle it.  */
  switch (cs->exception_index) {
@@ -6408,6 +6401,14 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
  return; /* Never happens.  Keep compiler happy.  */
  }
  
+lr = 0xfff1;

+if (env->v7m.control & R_V7M_CONTROL_SPSEL_MASK) {
+lr |= 4;
+}
+if (env->v7m.exception == 0) {
+lr |= 8;
+}
+
  v7m_push_stack(cpu);
  v7m_exception_taken(cpu, lr);
  qemu_log_mask(CPU_LOG_INT, "... as %d\n", env->v7m.exception);

Re: [Qemu-devel] [Qemu-arm] [PATCH 13/15] target/arm: Create and use new function arm_v7m_is_handler_mode()

2017-08-02 Thread Philippe Mathieu-Daudé


On 08/02/2017 01:43 PM, Peter Maydell wrote:

Add a utility function for testing whether the CPU is in Handler
mode; this is just a check whether v7m.exception is non-zero, but
we do it in several places and it makes the code a bit easier
to read to not have to mentally figure out what the test is testing.


<3 <3 <3



Signed-off-by: Peter Maydell 


Reviewed-by: Philippe Mathieu-Daudé 


---
  target/arm/cpu.h| 10 --
  target/arm/helper.c |  8 
  2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index da90b7a..a3b4b78 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1630,13 +1630,19 @@ static inline int arm_highest_el(CPUARMState *env)
  return 1;
  }
  
+/* Return true if a v7M CPU is in Handler mode */

+static inline bool arm_v7m_is_handler_mode(CPUARMState *env)
+{
+return env->v7m.exception != 0;
+}
+
  /* Return the current Exception Level (as per ARMv8; note that this differs
   * from the ARMv7 Privilege Level).
   */
  static inline int arm_current_el(CPUARMState *env)
  {
  if (arm_feature(env, ARM_FEATURE_M)) {
-return !((env->v7m.exception == 0) && (env->v7m.control & 1));
+return arm_v7m_is_handler_mode(env) || !(env->v7m.control & 1);
  }
  
  if (is_a64(env)) {

@@ -2636,7 +2642,7 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, 
target_ulong *pc,
  }
  *flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT;
  
-if (env->v7m.exception != 0) {

+if (arm_v7m_is_handler_mode(env)) {
  *flags |= ARM_TBFLAG_HANDLER_MASK;
  }
  
diff --git a/target/arm/helper.c b/target/arm/helper.c

index 0ecc8f1..7920153 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6147,7 +6147,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
   * that jumps to magic addresses don't have magic behaviour unless
   * we're in Handler mode (compare pseudocode BXWritePC()).
   */
-assert(env->v7m.exception != 0);
+assert(arm_v7m_is_handler_mode(env));
  
  /* In the spec pseudocode ExceptionReturn() is called directly

   * from BXWritePC() and gets the full target PC value including
@@ -6254,7 +6254,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
   * resuming in Thread mode. If that doesn't match what the
   * exception return type specified then this is a UsageFault.
   */
-if (return_to_handler == (env->v7m.exception == 0)) {
+if (return_to_handler != arm_v7m_is_handler_mode(env)) {
  /* Take an INVPC UsageFault by pushing the stack again. */
  armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
  env->v7m.cfsr |= R_V7M_CFSR_INVPC_MASK;
@@ -6405,7 +6405,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
  if (env->v7m.control & R_V7M_CONTROL_SPSEL_MASK) {
  lr |= 4;
  }
-if (env->v7m.exception == 0) {
+if (!arm_v7m_is_handler_mode(env)) {
  lr |= 8;
  }
  
@@ -8798,7 +8798,7 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)

   * switch_v7m_sp() deals with updating the SPSEL bit in
   * env->v7m.control, so we only need update the others.
   */
-if (env->v7m.exception == 0) {
+if (!arm_v7m_is_handler_mode(env)) {
  switch_v7m_sp(env, (val & R_V7M_CONTROL_SPSEL_MASK) != 0);
  }
  env->v7m.control &= ~R_V7M_CONTROL_NPRIV_MASK;

Re: [Qemu-devel] How to make ELF headers/symbol sections available for multiboot?

2017-08-02 Thread Anatol Pomozov

Hello Richard

Thank you for this useful information. I still learning about ELF and
a lot of things are still unclear for me.

On Mon, Jul 31, 2017 at 11:20 AM, Richard Henderson  wrote:
> On 07/31/2017 10:21 AM, Anatol Pomozov wrote:
>> ELF sections info is needed for an OS to map address space properly.
>
> No, ELF *program header* info is needed for an OS to map the address space
> properly.  For example:
>
> $ readelf -hl vmlinux-4.9.0-3-5kc-malta

Thanks for this information about program headers. I reread elf_ops.h
and now I see that QEMU loads all PT_LOAD segments. I wonder why GRUB
bootloader loads also sections that are not in this segment (e.g. GRUB
loads content of .shstrtab into target's memory despite my elf does
not keep it in PT_LOAD segment).

What ELF specification says about it? Does it tell a loader to load
only PT_LOAD segments? In this case bootloaders should follow it as
well and current QEMU behavior is correct.

But multiboot expects section headers info and .shstrtab section to be
loaded to the target memory. I believe I need to modify my linker
script and add this information into the loadable segment.

Here is my current linker script:

==
ENTRY(start)

SECTIONS {
. = 1M;

.text : {
KEEP(*(.data.multiboot))
*(.text .text*)
}

.rodata : {
*(.rodata .rodata*)
}

.data : {
*(.data .data.*)
}

.bss : {
__bss_start = .;
*(.bss .bss*)
. = ALIGN(8);
__bss_end = .;
}
}
===

With my linker script only .text .rodata .data and .bss are included
into the loadable segment.


So my questions: how to tell the linker to include "section headers"
and ".shstrtab" section into loadable segment? Once it is done I can
try to modify QEMU to pass its addresses to the target.

>
> Using a mips kernel binary I happend to have handy; it would be the same for
> x86_64, prior to being bundled in the (imo superfluous bzImage) format.
>
> ELF Header:
>   Magic:   7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
>   Class: ELF64
>   Data:  2's complement, little endian
>   Version:   1 (current)
>   OS/ABI:UNIX - System V
>   ABI Version:   0
>   Type:  EXEC (Executable file)
>   Machine:   MIPS R3000
>   Version:   0x1
>   Entry point address:   0x806ed590
>   Start of program headers:  64 (bytes into file)
>   Start of section headers:  13351328 (bytes into file)
>   Flags: 0x8001, noreorder, mips64r2
>   Size of this header:   64 (bytes)
>   Size of program headers:   56 (bytes)
>   Number of program headers: 2
>   Size of section headers:   64 (bytes)
>   Number of section headers: 28
>   Section header string table index: 27
>
> The ELF file header, always at file offset 0.  The relevant info is the
> encoding (64-bit little-endian), cpu (mips), and start of program headers.
>
> Program Headers:
>   Type   Offset VirtAddr   PhysAddr
>  FileSizMemSiz  Flags  Align
>   LOAD   0x1000 0x8010 0x8010
>  0x009fefb4 0x00a5a150  RWE1000
>   NOTE   0x005fecb0 0x806fdcb0 0x806fdcb0
>  0x0024 0x0024  R  4
>
> The ELF program header.  There is one segment to be loaded, at a given 
> physical
> address (which happens to match the virtual address, but that need not have
> been so).
>
> The segment consists of 0x9fefb4 bytes of data to be loaded from the file, and
> occupies 0xa5a150 in memory.  The difference between the two sizes is "bss",
> and should be zeroed.
>
> A proper ELF loader will process *all* LOAD segments, however many are 
> required
> by the binary.  Though in practice there will probably only be 1 or 2.
>
> Section to Segment mapping:
>   Segment Sections...
>   00 .text __ex_table __dbe_table .notes .rodata .pci_fixup __ksymtab
> __ksymtab_gpl __kcrctab __kcrctab_gpl __ksymtab_strings __param __modver .data
> .data..page_aligned .init.text .init.data .exit.text .data.reloc .sbss .bss
>   01 __dbe_table .notes
>
> This mapping is provided by readelf for convenience, not actually present in
> the ELF file.  But it is handy when debugging a linker script.
>
>> Quoting Multiboot specification
>> https://www.gnu.org/software/grub/manual/multiboot/multiboot.html
>
> I'm not sure why someone felt the need to re-invent the wheel, especially
> considering its introduction section talks about trying to stop reinventing 
> the
> wheel...
>
> But... whatever.  I'm not sure why this is relevant to $SUBJECT, since it does

[Qemu-devel] [PATCH] libqtest: Fix typo in comments

2017-08-02 Thread Eric Blake

s/continuosly/continuously/

Signed-off-by: Eric Blake 
---
 tests/libqtest.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/libqtest.h b/tests/libqtest.h
index 38bc1e9953..3ae570927a 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -117,7 +117,7 @@ QDict *qtest_qmp_receive(QTestState *s);
  * @s: #QTestState instance to operate on.
  * @s: #event event to wait for.
  *
- * Continuosly polls for QMP responses until it receives the desired event.
+ * Continuously polls for QMP responses until it receives the desired event.
  */
 void qtest_qmp_eventwait(QTestState *s, const char *event);

@@ -126,7 +126,7 @@ void qtest_qmp_eventwait(QTestState *s, const char *event);
  * @s: #QTestState instance to operate on.
  * @s: #event event to wait for.
  *
- * Continuosly polls for QMP responses until it receives the desired event.
+ * Continuously polls for QMP responses until it receives the desired event.
  * Returns a copy of the event for further investigation.
  */
 QDict *qtest_qmp_eventwait_ref(QTestState *s, const char *event);
@@ -571,7 +571,7 @@ static inline QDict *qmp_receive(void)
  * qmp_eventwait:
  * @s: #event event to wait for.
  *
- * Continuosly polls for QMP responses until it receives the desired event.
+ * Continuously polls for QMP responses until it receives the desired event.
  */
 static inline void qmp_eventwait(const char *event)
 {
@@ -582,7 +582,7 @@ static inline void qmp_eventwait(const char *event)
  * qmp_eventwait_ref:
  * @s: #event event to wait for.
  *
- * Continuosly polls for QMP responses until it receives the desired event.
+ * Continuously polls for QMP responses until it receives the desired event.
  * Returns a copy of the event for further investigation.
  */
 static inline QDict *qmp_eventwait_ref(const char *event)
-- 
2.13.3

Re: [Qemu-devel] [PATCH for-2.10 0/5] tests: acpi: make sure FADT is compared to reference table

2017-08-02 Thread Michael S. Tsirkin

On Wed, Aug 02, 2017 at 09:51:22AM +0200, Igor Mammedov wrote:
> On Wed, 2 Aug 2017 00:14:18 +0300
> "Michael S. Tsirkin"  wrote:
> 
> > On Mon, Jul 31, 2017 at 05:40:47PM +0200, Igor Mammedov wrote:
> > > While refactoring i386/FADT generation to build_append_int_noprefix() 
> > >
> > > and testing it, It turned out that FADT is only tested for valid  
> > >
> > > checksum but actual test for unintended changes isn't applied to it   
> > >
> > > even though we have reference tables in tree. 
> > >
> > > So here goes a couple of cleanups to reflect what fuctions do +   
> > >
> > > some comments and actual fix. 
> > >
> > >   
> > >
> > > Note to maintainer:   
> > >
> > >   FADT reference table is out of sync and should be updated along with
> > >
> > >   series applied. 
> > >
> > >   
> > >
> > > CC: "Michael S. Tsirkin" 
> > > 
> > > CC: Marcel Apfelbaum    
> > 
> > Absolutely good stuff, but not a bugfix as such (it's not that the
> > test is wrong, it's that we skip FADT for now)
> > so I don't think this is 2.10 material.
> Agreed, it could go in when 2.11 merge window is open.

thanks,pls remember to repost or ping then.

> > 
> > > Igor Mammedov (5):
> > >   tests: acpi: move tested tables array allocation outside of
> > > test_acpi_dsdt_table()
> > >   tests: acpi: init table descriptor in test_dst_table()
> > >   tests: acpi: rename test_acpi_tables()/test_dst_table() to reflect its
> > > usage
> > >   tests: acpi: add comments to fetch_rsdt_referenced_tables/data->tables
> > > usage
> > >   tests: acpi: fix FADT not being compared to reference table
> > > 
> > >  tests/bios-tables-test.c | 30 ++
> > >  1 file changed, 18 insertions(+), 12 deletions(-)
> > > 
> > > -- 
> > > 2.7.4

[Qemu-devel] [PATCH 6/6] tests/tcg/test-i386-fprem.c: compilation fix for -Werror=unused-const-variable=

2017-08-02 Thread Cleber Rosa

A clean up of unused code to make the compiler happy.

Signed-off-by: Cleber Rosa 
---
 tests/tcg/test-i386-fprem.c | 15 ---
 1 file changed, 15 deletions(-)

diff --git a/tests/tcg/test-i386-fprem.c b/tests/tcg/test-i386-fprem.c
index f70363d..1dceda0 100644
--- a/tests/tcg/test-i386-fprem.c
+++ b/tests/tcg/test-i386-fprem.c
@@ -54,14 +54,6 @@ union float80u {
 
 #define IEEE854_LONG_DOUBLE_BIAS 0x3fff
 
-static const union float80u q_nan = {
-.ieee_nan.negative = 0,  /* X */
-.ieee_nan.exponent = 0x7fff,
-.ieee_nan.one = 1,
-.ieee_nan.quiet_nan = 1,
-.ieee_nan.mantissa = 0,
-};
-
 static const union float80u s_nan = {
 .ieee_nan.negative = 0,  /* X */
 .ieee_nan.exponent = 0x7fff,
@@ -91,13 +83,6 @@ static const union float80u pos_denorm = {
 .ieee.mantissa = 1,
 };
 
-static const union float80u smallest_positive_norm = {
-.ieee.negative = 0,
-.ieee.exponent = 1,
-.ieee.one = 1,
-.ieee.mantissa = 0,
-};
-
 static void fninit(void)
 {
 asm volatile ("fninit\n");
-- 
2.9.4

Re: [Qemu-devel] [PATCH for-2.10?] test-qga: Kill broken and dead QGA_TEST_SIDE_EFFECTING code

2017-08-02 Thread Eric Blake

On 08/02/2017 03:19 PM, Eric Blake wrote:
> Back when the test was introduced, in commit 62c39b307, the
> test was set up to run qemu-ga directly on the host performing
> the test, and defaults to limiting itself to safe commands.  At
> the time, it was envisioned that setting QGA_TEST_SIDE_EFFECTING
> in the environment could cover a few more commands, while noting
> the potential danger of those side effects running in the host.

> Rather than leave this untested time-bomb in place, rip it out.

Oh, I meant to add that I'm wondering if this should be considered 2.10
material.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [Qemu-arm] [PATCH 14/15] armv7m_nvic.h: Move from include/hw/arm to include/hw/intc

2017-08-02 Thread Philippe Mathieu-Daudé


On 08/02/2017 01:44 PM, Peter Maydell wrote:

The armv7m_nvic.h header file was accidentally placed in
include/hw/arm; move it to include/hw/intc to match where
its corresponding .c file lives.

Signed-off-by: Peter Maydell 


Reviewed-by: Philippe Mathieu-Daudé 


---
  hw/intc/armv7m_nvic.c  | 2 +-
  include/hw/arm/armv7m.h| 2 +-
  include/hw/{arm => intc}/armv7m_nvic.h | 0
  3 files changed, 2 insertions(+), 2 deletions(-)
  rename include/hw/{arm => intc}/armv7m_nvic.h (100%)

diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 343bc16..5a18025 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -17,7 +17,7 @@
  #include "hw/sysbus.h"
  #include "qemu/timer.h"
  #include "hw/arm/arm.h"
-#include "hw/arm/armv7m_nvic.h"
+#include "hw/intc/armv7m_nvic.h"
  #include "target/arm/cpu.h"
  #include "exec/exec-all.h"
  #include "qemu/log.h"
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
index a9b3f2a..10eb058 100644
--- a/include/hw/arm/armv7m.h
+++ b/include/hw/arm/armv7m.h
@@ -11,7 +11,7 @@
  #define HW_ARM_ARMV7M_H
  
  #include "hw/sysbus.h"

-#include "hw/arm/armv7m_nvic.h"
+#include "hw/intc/armv7m_nvic.h"
  
  #define TYPE_BITBAND "ARM,bitband-memory"

  #define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
diff --git a/include/hw/arm/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h
similarity index 100%
rename from include/hw/arm/armv7m_nvic.h
rename to include/hw/intc/armv7m_nvic.h

[Qemu-devel] [PATCH v3 0/2] ERC cleanup and CRW bugfix

2017-08-02 Thread Dong Jia Shi

This series is trying to:
1. clear up ERC related code
2. bugfix for channel path related CRW generation

Change log
--
v2->v3:
Added Halil's R-B on both patches.
Patch #1:
  Added ERC "installed parameters restored".

v1->v2:
Patch #1:
  Add all ERCs.
  Commit message update.
Patch #2:
  Rename the new added parameter to more speaking name -- solicited.

Dong Jia Shi (2):
  s390x/css: use macro for event-information pending error recover code
  s390x/css: generate solicited crw for rchp completion signaling

 hw/s390x/css.c| 16 ++--
 include/hw/s390x/css.h|  3 ++-
 include/hw/s390x/ioinst.h | 12 ++--
 3 files changed, 22 insertions(+), 9 deletions(-)

-- 
2.11.2

Re: [Qemu-devel] [PATCH v2 1/2] build-sys: add --disable-vhost-user

2017-08-02 Thread Michael S. Tsirkin

On Fri, Jul 28, 2017 at 04:13:08PM +0200, Marc-André Lureau wrote:
> Learn to compile out vhost-user. Keep it enabled by default on
> non-win32, that is assumed to be POSIX. Fail if trying to enable it on
> win32.
> 
> When trying to make a vhost-user netdev, it gives the following error:
> 
> -netdev vhost-user,id=foo,chardev=chr-test: Parameter 'type' expects a netdev 
> backend type
> 
> And similar error with the HMP/QMP monitors.
> 
> While at it, rename CONFIG_VHOST_NET_TEST CONFIG_VHOST_USER_NET_TEST
> since it's a vhost-user specific variable.
> 
> Signed-off-by: Marc-André Lureau 

OK so pls address Cornelia's comment and post just patch 1 as
it seems appropriate for 2.10.

> ---
>  hw/virtio/virtio-pci.c|  4 ++--
>  configure | 29 +++--
>  default-configs/pci.mak   |  2 +-
>  default-configs/s390x-softmmu.mak |  2 +-
>  tests/Makefile.include|  6 +++---
>  5 files changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 5d14bd66dc..8b0d6b69cd 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -2135,7 +2135,7 @@ static const TypeInfo vhost_scsi_pci_info = {
>  };
>  #endif
>  
> -#ifdef CONFIG_LINUX
> +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
>  /* vhost-user-scsi-pci */
>  static Property vhost_user_scsi_pci_properties[] = {
>  DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
> @@ -2665,7 +2665,7 @@ static void virtio_pci_register_types(void)
>  #ifdef CONFIG_VHOST_SCSI
>  type_register_static(_scsi_pci_info);
>  #endif
> -#ifdef CONFIG_LINUX
> +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
>  type_register_static(_user_scsi_pci_info);
>  #endif
>  #ifdef CONFIG_VHOST_VSOCK
> diff --git a/configure b/configure
> index 987f59ba88..efec1a613e 100755
> --- a/configure
> +++ b/configure
> @@ -306,6 +306,7 @@ tcg="yes"
>  vhost_net="no"
>  vhost_scsi="no"
>  vhost_vsock="no"
> +vhost_user=""
>  kvm="no"
>  hax="no"
>  rdma=""
> @@ -1282,6 +1283,15 @@ for opt do
>;;
>--enable-vxhs) vxhs="yes"
>;;
> +  --disable-vhost-user) vhost_user="no"
> +  ;;
> +  --enable-vhost-user)
> +  vhost_user="yes"
> +  if test "$mingw32" = "yes" ; then
> +  echo "ERROR: vhost-user isn't available on win32"
> +  exit 1
> +  fi
> +  ;;
>*)
>echo "ERROR: unknown option $opt"
>echo "Try '$0 --help' for more information"
> @@ -1290,6 +1300,14 @@ for opt do
>esac
>  done
>  
> +if test "$vhost_user" = ""; then
> +if test "$mingw32" = "yes" ; then
> +vhost_user="no"
> +else
> +vhost_user="yes"
> +fi
> +fi
> +
>  case "$cpu" in
>  ppc)
> CPU_CFLAGS="-m32"
> @@ -1518,6 +1536,7 @@ disabled with --disable-FEATURE, default is enabled if 
> available:
>tools   build qemu-io, qemu-nbd and qemu-image tools
>vxhsVeritas HyperScale vDisk backend support
>crypto-afalgLinux AF_ALG crypto backend driver
> +  vhost-user  vhost-user support
>  
>  NOTE: The object files are built at the place where configure is launched
>  EOF
> @@ -5348,6 +5367,7 @@ echo "libcap-ng support $cap_ng"
>  echo "vhost-net support $vhost_net"
>  echo "vhost-scsi support $vhost_scsi"
>  echo "vhost-vsock support $vhost_vsock"
> +echo "vhost-user support $vhost_user"
>  echo "Trace backends$trace_backends"
>  if have_backend "simple"; then
>  echo "Trace output file $trace_file-"
> @@ -5757,12 +5777,15 @@ fi
>  if test "$vhost_scsi" = "yes" ; then
>echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
>  fi
> -if test "$vhost_net" = "yes" ; then
> +if test "$vhost_net" = "yes" -a "$vhost_user" = "yes"; then
>echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak
>  fi
>  if test "$vhost_vsock" = "yes" ; then
>echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak
>  fi
> +if test "$vhost_user" = "yes" ; then
> +  echo "CONFIG_VHOST_USER=y" >> $config_host_mak
> +fi
>  if test "$blobs" = "yes" ; then
>echo "INSTALL_BLOBS=yes" >> $config_host_mak
>  fi
> @@ -6358,7 +6381,9 @@ if supported_kvm_target $target; then
>  echo "CONFIG_KVM=y" >> $config_target_mak
>  if test "$vhost_net" = "yes" ; then
>  echo "CONFIG_VHOST_NET=y" >> $config_target_mak
> -echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak
> +if test "$vhost_user" = "yes" ; then
> +echo "CONFIG_VHOST_USER_NET_TEST_$target_name=y" >> 
> $config_host_mak
> +fi
>  fi
>  fi
>  if supported_hax_target $target; then
> diff --git a/default-configs/pci.mak b/default-configs/pci.mak
> index acaa70301a..a758630d30 100644
> --- a/default-configs/pci.mak
> +++ b/default-configs/pci.mak
> @@ -43,4 +43,4 @@ CONFIG_VGA=y
>  CONFIG_VGA_PCI=y
>  CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM)
>  CONFIG_ROCKER=y
> -CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX)
> +CONFIG_VHOST_USER_SCSI=$(and

Re: [Qemu-devel] [PATCH] docs/pcie.txt: Replace ioh3420 with pcie-root-port

2017-08-02 Thread Michael S. Tsirkin

On Wed, Aug 02, 2017 at 06:51:13PM +0300, Marcel Apfelbaum wrote:
> Do not mention ioh3420 in the "how to" doc.
> The device still works and can be used by already
> existing setups, but no need to be mentioned.
> 
> Suggested-by: Andrew Jones 
> Signed-off-by: Marcel Apfelbaum 
> ---

Do we need this in 2.10? I'm inclined to say no, it can wait
until 2.11. Thoughts?

>  docs/pcie.txt | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/docs/pcie.txt b/docs/pcie.txt
> index 5bada24..f990033 100644
> --- a/docs/pcie.txt
> +++ b/docs/pcie.txt
> @@ -43,8 +43,8 @@ Place only the following kinds of devices directly on the 
> Root Complex:
>  strangely when PCI Express devices are integrated
>  with the Root Complex.
>  
> -(2) PCI Express Root Ports (ioh3420), for starting exclusively PCI 
> Express
> -hierarchies.
> +(2) PCI Express Root Ports (pcie-root-port), for starting exclusively
> +PCI Express hierarchies.
>  
>  (3) DMI-PCI Bridges (i82801b11-bridge), for starting legacy PCI
>  hierarchies.
> @@ -65,7 +65,7 @@ Place only the following kinds of devices directly on the 
> Root Complex:
>-device pxb-pcie,id=pcie.1,bus_nr=x[,numa_node=y][,addr=z]
>Only PCI Express Root Ports and DMI-PCI bridges can be connected
>to the pcie.1 bus:
> -  -device 
> ioh3420,id=root_port1[,bus=pcie.1][,chassis=x][,slot=y][,addr=z]  
>\
> +  -device 
> pcie-root-port,id=root_port1[,bus=pcie.1][,chassis=x][,slot=y][,addr=z]   
>   \
>-device i82801b11-bridge,id=dmi_pci_bridge1,bus=pcie.1
>  
>  
> @@ -107,14 +107,14 @@ Plug only PCI Express devices into PCI Express Ports.
>   
>  
>  2.2.1 Plugging a PCI Express device into a PCI Express Root Port:
> -  -device 
> ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
> +  -device 
> pcie-root-port,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
>-device ,bus=root_port1
>  2.2.2 Using multi-function PCI Express Root Ports:
> -  -device 
> ioh3420,id=root_port1,multifunction=on,chassis=x,addr=z.0[,slot=y][,bus=pcie.0]
>  \
> -  -device 
> ioh3420,id=root_port2,chassis=x1,addr=z.1[,slot=y1][,bus=pcie.0] \
> -  -device 
> ioh3420,id=root_port3,chassis=x2,addr=z.2[,slot=y2][,bus=pcie.0] \
> +  -device 
> pcie-root-port,id=root_port1,multifunction=on,chassis=x,addr=z.0[,slot=y][,bus=pcie.0]
>  \
> +  -device 
> pcie-root-port,id=root_port2,chassis=x1,addr=z.1[,slot=y1][,bus=pcie.0] \
> +  -device 
> pcie-root-port,id=root_port3,chassis=x2,addr=z.2[,slot=y2][,bus=pcie.0] \
>  2.2.3 Plugging a PCI Express device into a Switch:
> -  -device ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
> +  -device 
> pcie-root-port,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
>-device x3130-upstream,id=upstream_port1,bus=root_port1[,addr=x]   
>\
>-device 
> xio3130-downstream,id=downstream_port1,bus=upstream_port1,chassis=x1,slot=y1[,addr=z1]]
>  \
>-device ,bus=downstream_port1
> -- 
> 2.9.4

Re: [Qemu-devel] [PATCH] test-qga: Kill broken and dead QGA_TEST_SIDE_EFFECTING code

2017-08-02 Thread Marc-André Lureau



- Original Message -
> Back when the test was introduced, in commit 62c39b307, the
> test was set up to run qemu-ga directly on the host performing
> the test, and defaults to limiting itself to safe commands.  At
> the time, it was envisioned that setting QGA_TEST_SIDE_EFFECTING
> in the environment could cover a few more commands, while noting
> the potential danger of those side effects running in the host.
> 
> But this has NEVER been tested: if you enable the environment
> variable, the test WILL fail.  One obvious reason: if you are not
> running as root, you'll probably get a permission failure when
> trying to freeze the file systems, or when changing system time.
> Less obvious: if you run the test as root (wow, you're brave), you
> could end up hanging if the test tries to log things to a
> temporarily frozen filesystem.  But the cutest reason of all: if
> you get past the above hurdles, the test uses invalid JSON in
> test_qga_fstrim() (missing '' around the dictionary key 'minimum'),
> and will thus fail an assertion in qmp_fd().
> 
> Rather than leave this untested time-bomb in place, rip it out.
> Hopefully, as originally envisioned, we can find an opportunity
> to test an actual sandboxed guest where the guest-agent has
> full permissions and will not unduly affect the host running
> the test - if so, 'git revert' can be used if desired, for
> salvaging any useful parts of this attempt.
> 
> Signed-off-by: Eric Blake 

I think it used to work in a vm (as qemu-ga user), but I don't mind to revert 
it if we need it back:

Signed-off-by: Marc-André Lureau 


> ---
>  tests/test-qga.c | 90
>  
>  1 file changed, 90 deletions(-)
> 
> diff --git a/tests/test-qga.c b/tests/test-qga.c
> index 06783e7585..fd6bc7690f 100644
> --- a/tests/test-qga.c
> +++ b/tests/test-qga.c
> @@ -642,65 +642,6 @@ static void test_qga_get_time(gconstpointer fix)
>  QDECREF(ret);
>  }
> 
> -static void test_qga_set_time(gconstpointer fix)
> -{
> -const TestFixture *fixture = fix;
> -QDict *ret;
> -int64_t current, time;
> -gchar *cmd;
> -
> -/* get current time */
> -ret = qmp_fd(fixture->fd, "{'execute': 'guest-get-time'}");
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -current = qdict_get_int(ret, "return");
> -g_assert_cmpint(current, >, 0);
> -QDECREF(ret);
> -
> -/* set some old time */
> -ret = qmp_fd(fixture->fd, "{'execute': 'guest-set-time',"
> - " 'arguments': { 'time': 1000 } }");
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -QDECREF(ret);
> -
> -/* check old time */
> -ret = qmp_fd(fixture->fd, "{'execute': 'guest-get-time'}");
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -time = qdict_get_int(ret, "return");
> -g_assert_cmpint(time / 1000, <, G_USEC_PER_SEC * 10);
> -QDECREF(ret);
> -
> -/* set back current time */
> -cmd = g_strdup_printf("{'execute': 'guest-set-time',"
> -  " 'arguments': { 'time': %" PRId64 " } }",
> -  current + time * 1000);
> -ret = qmp_fd(fixture->fd, cmd);
> -g_free(cmd);
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -QDECREF(ret);
> -}
> -
> -static void test_qga_fstrim(gconstpointer fix)
> -{
> -const TestFixture *fixture = fix;
> -QDict *ret;
> -QList *list;
> -const QListEntry *entry;
> -
> -ret = qmp_fd(fixture->fd, "{'execute': 'guest-fstrim',"
> - " arguments: { minimum: 4194304 } }");
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -list = qdict_get_qlist(ret, "return");
> -entry = qlist_first(list);
> -g_assert(qdict_haskey(qobject_to_qdict(entry->value), "paths"));
> -
> -QDECREF(ret);
> -}
> -
>  static void test_qga_blacklist(gconstpointer data)
>  {
>  TestFixture fix;
> @@ -831,30 +772,6 @@ static void test_qga_fsfreeze_status(gconstpointer fix)
>  QDECREF(ret);
>  }
> 
> -static void test_qga_fsfreeze_and_thaw(gconstpointer fix)
> -{
> -const TestFixture *fixture = fix;
> -QDict *ret;
> -const gchar *status;
> -
> -ret = qmp_fd(fixture->fd, "{'execute': 'guest-fsfreeze-freeze'}");
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -QDECREF(ret);
> -
> -ret = qmp_fd(fixture->fd, "{'execute': 'guest-fsfreeze-status'}");
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -status = qdict_get_try_str(ret, "return");
> -g_assert_cmpstr(status, ==, "frozen");
> -QDECREF(ret);
> -
> -ret = qmp_fd(fixture->fd, "{'execute': 'guest-fsfreeze-thaw'}");
> -g_assert_nonnull(ret);
> -qmp_assert_no_error(ret);
> -QDECREF(ret);
> -}
> -
>  static void test_qga_guest_exec(gconstpointer fix)
>  {
>  const TestFixture *fixture = fix;
> @@ -1029,13 +946,6 @@ int main(int argc, char **argv)

[Qemu-devel] [PULL 1/2] slirp: fill error when failing to initialize user network

2017-08-02 Thread Samuel Thibault

From: Hervé Poussineau 

With "-netdev user,id=net0,dns=1.2.3.4"
error was:
qemu-system-i386: -netdev user,id=net0,dns=1.2.3.4: Device 'user' could not be 
initialized

Error is now:
qemu-system-i386: -netdev user,id=net0,dns=1.2.3.4: DNS doesn't belong to 
network

Signed-off-by: Hervé Poussineau 
Signed-off-by: Samuel Thibault 
---
 net/slirp.c | 134 ++--
 1 file changed, 94 insertions(+), 40 deletions(-)

diff --git a/net/slirp.c b/net/slirp.c
index 9fbc949e81..01ed21c006 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -91,15 +91,15 @@ static QTAILQ_HEAD(slirp_stacks, SlirpState) slirp_stacks =
 QTAILQ_HEAD_INITIALIZER(slirp_stacks);
 
 static int slirp_hostfwd(SlirpState *s, const char *redir_str,
- int legacy_format);
+ int legacy_format, Error **errp);
 static int slirp_guestfwd(SlirpState *s, const char *config_str,
-  int legacy_format);
+  int legacy_format, Error **errp);
 
 #ifndef _WIN32
 static const char *legacy_smb_export;
 
 static int slirp_smb(SlirpState *s, const char *exported_dir,
- struct in_addr vserver_addr);
+ struct in_addr vserver_addr, Error **errp);
 static void slirp_smb_cleanup(SlirpState *s);
 #else
 static inline void slirp_smb_cleanup(SlirpState *s) { }
@@ -155,7 +155,7 @@ static int net_slirp_init(NetClientState *peer, const char 
*model,
   const char *bootfile, const char *vdhcp_start,
   const char *vnameserver, const char *vnameserver6,
   const char *smb_export, const char *vsmbserver,
-  const char **dnssearch)
+  const char **dnssearch, Error **errp)
 {
 /* default settings according to historic slirp */
 struct in_addr net  = { .s_addr = htonl(0x0a000200) }; /* 10.0.2.0 */
@@ -178,15 +178,18 @@ static int net_slirp_init(NetClientState *peer, const 
char *model,
 struct slirp_config_str *config;
 
 if (!ipv4 && (vnetwork || vhost || vnameserver)) {
+error_setg(errp, "IPv4 disabled but netmask/host/dns provided");
 return -1;
 }
 
 if (!ipv6 && (vprefix6 || vhost6 || vnameserver6)) {
+error_setg(errp, "IPv6 disabled but prefix/host6/dns6 provided");
 return -1;
 }
 
 if (!ipv4 && !ipv6) {
 /* It doesn't make sense to disable both */
+error_setg(errp, "IPv4 and IPv6 disabled");
 return -1;
 }
 
@@ -200,6 +203,7 @@ static int net_slirp_init(NetClientState *peer, const char 
*model,
 if (vnetwork) {
 if (get_str_sep(buf, sizeof(buf), , '/') < 0) {
 if (!inet_aton(vnetwork, )) {
+error_setg(errp, "Failed to parse netmask");
 return -1;
 }
 addr = ntohl(net.s_addr);
@@ -220,14 +224,19 @@ static int net_slirp_init(NetClientState *peer, const 
char *model,
 }
 } else {
 if (!inet_aton(buf, )) {
+error_setg(errp, "Failed to parse netmask");
 return -1;
 }
 shift = strtol(vnetwork, , 10);
 if (*end != '\0') {
 if (!inet_aton(vnetwork, )) {
+error_setg(errp,
+   "Failed to parse netmask (trailing chars)");
 return -1;
 }
 } else if (shift < 4 || shift > 32) {
+error_setg(errp,
+   "Invalid netmask provided (must be in range 4-32)");
 return -1;
 } else {
 mask.s_addr = htonl(0x << (32 - shift));
@@ -240,30 +249,43 @@ static int net_slirp_init(NetClientState *peer, const 
char *model,
 }
 
 if (vhost && !inet_aton(vhost, )) {
+error_setg(errp, "Failed to parse host");
 return -1;
 }
 if ((host.s_addr & mask.s_addr) != net.s_addr) {
+error_setg(errp, "Host doesn't belong to network");
 return -1;
 }
 
 if (vnameserver && !inet_aton(vnameserver, )) {
+error_setg(errp, "Failed to parse DNS");
 return -1;
 }
-if ((dns.s_addr & mask.s_addr) != net.s_addr ||
-dns.s_addr == host.s_addr) {
+if ((dns.s_addr & mask.s_addr) != net.s_addr) {
+error_setg(errp, "DNS doesn't belong to network");
+return -1;
+}
+if (dns.s_addr == host.s_addr) {
+error_setg(errp, "DNS must be different from host");
 return -1;
 }
 
 if (vdhcp_start && !inet_aton(vdhcp_start, )) {
+error_setg(errp, "Failed to parse DHCP start address");
 return -1;
 }
-if ((dhcp.s_addr & mask.s_addr) != net.s_addr ||
-dhcp.s_addr == host.s_addr || dhcp.s_addr == dns.s_addr) {
+if ((dhcp.s_addr &

[Qemu-devel] [PULL 2/2] slirp: check len against dhcp options array end

2017-08-02 Thread Samuel Thibault

From: Prasad J Pandit 

While parsing dhcp options string in 'dhcp_decode', if an options'
length 'len' appeared towards the end of 'bp_vend' array, ensuing
read could lead to an OOB memory access issue. Add check to avoid it.

This is CVE-2017-11434.

Reported-by: Reno Robert 
Signed-off-by: Prasad J Pandit 
Signed-off-by: Samuel Thibault 
---
 slirp/bootp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/slirp/bootp.c b/slirp/bootp.c
index 5a4646c182..5dd1a415b5 100644
--- a/slirp/bootp.c
+++ b/slirp/bootp.c
@@ -123,6 +123,9 @@ static void dhcp_decode(const struct bootp_t *bp, int 
*pmsg_type,
 if (p >= p_end)
 break;
 len = *p++;
+if (p + len > p_end) {
+break;
+}
 DPRINTF("dhcp: tag=%d len=%d\n", tag, len);
 
 switch(tag) {
-- 
2.13.2

[Qemu-devel] [PATCH v3 2/2] s390x/css: generate solicited crw for rchp completion signaling

2017-08-02 Thread Dong Jia Shi

A successful completion of rchp should signal a solicited channel path
initialized CRW (channel report word), while the current implementation
always generates an un-solicited one. Let's fix this.

Reported-by: Halil Pasic 
Signed-off-by: Dong Jia Shi 
Reviewed-by: Halil Pasic 
---
 hw/s390x/css.c | 16 ++--
 include/hw/s390x/css.h |  3 ++-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 5321ca016b..bfa56f4b12 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -1745,10 +1745,10 @@ int css_do_rchp(uint8_t cssid, uint8_t chpid)
 }
 
 /* We don't really use a channel path, so we're done here. */
-css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT,
+css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 1,
   channel_subsys.max_cssid > 0 ? 1 : 0, chpid);
 if (channel_subsys.max_cssid > 0) {
-css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 0, real_cssid << 8);
+css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 1, 0, real_cssid << 8);
 }
 return 0;
 }
@@ -2028,7 +2028,8 @@ void css_subch_assign(uint8_t cssid, uint8_t ssid, 
uint16_t schid,
 }
 }
 
-void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid)
+void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
+   int chain, uint16_t rsid)
 {
 CrwContainer *crw_cont;
 
@@ -2040,6 +2041,9 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, 
uint16_t rsid)
 return;
 }
 crw_cont->crw.flags = (rsc << 8) | erc;
+if (solicited) {
+crw_cont->crw.flags |= CRW_FLAGS_MASK_S;
+}
 if (chain) {
 crw_cont->crw.flags |= CRW_FLAGS_MASK_C;
 }
@@ -2086,9 +2090,9 @@ void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, 
uint16_t schid,
 }
 chain_crw = (channel_subsys.max_ssid > 0) ||
 (channel_subsys.max_cssid > 0);
-css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, chain_crw ? 1 : 0, schid);
+css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, 0, chain_crw ? 1 : 0, schid);
 if (chain_crw) {
-css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, 0,
+css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, 0, 0,
   (guest_cssid << 8) | (ssid << 4));
 }
 /* RW_ERC_IPI --> clear pending interrupts */
@@ -2103,7 +2107,7 @@ void css_generate_chp_crws(uint8_t cssid, uint8_t chpid)
 void css_generate_css_crws(uint8_t cssid)
 {
 if (!channel_subsys.sei_pending) {
-css_queue_crw(CRW_RSC_CSS, CRW_ERC_EVENT, 0, cssid);
+css_queue_crw(CRW_RSC_CSS, CRW_ERC_EVENT, 0, 0, cssid);
 }
 channel_subsys.sei_pending = true;
 }
diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index 5c5fe6b202..5b017e1fc3 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -150,7 +150,8 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src);
 void css_inject_io_interrupt(SubchDev *sch);
 void css_reset(void);
 void css_reset_sch(SubchDev *sch);
-void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid);
+void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
+   int chain, uint16_t rsid);
 void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
int hotplugged, int add);
 void css_generate_chp_crws(uint8_t cssid, uint8_t chpid);
-- 
2.11.2

[Qemu-devel] [PATCH v3 1/2] s390x/css: use macro for event-information pending error recover code

2017-08-02 Thread Dong Jia Shi

Let's use a macro for the ERC (error recover code) when generating a
Channel Subsystem Event-information pending CRW (channel report word).

While we are at it, let's also add all other ERCs.

Signed-off-by: Dong Jia Shi 
Reviewed-by: Halil Pasic 
---
 hw/s390x/css.c|  2 +-
 include/hw/s390x/ioinst.h | 12 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 6a42b95cee..5321ca016b 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -2103,7 +2103,7 @@ void css_generate_chp_crws(uint8_t cssid, uint8_t chpid)
 void css_generate_css_crws(uint8_t cssid)
 {
 if (!channel_subsys.sei_pending) {
-css_queue_crw(CRW_RSC_CSS, 0, 0, cssid);
+css_queue_crw(CRW_RSC_CSS, CRW_ERC_EVENT, 0, cssid);
 }
 channel_subsys.sei_pending = true;
 }
diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h
index 92d15655e4..5f2db6949d 100644
--- a/include/hw/s390x/ioinst.h
+++ b/include/hw/s390x/ioinst.h
@@ -201,8 +201,16 @@ typedef struct CRW {
 #define CRW_FLAGS_MASK_A 0x0080
 #define CRW_FLAGS_MASK_ERC 0x003f
 
-#define CRW_ERC_INIT 0x02
-#define CRW_ERC_IPI  0x04
+#define CRW_ERC_EVENT0x00 /* event information pending */
+#define CRW_ERC_AVAIL0x01 /* available */
+#define CRW_ERC_INIT 0x02 /* initialized */
+#define CRW_ERC_TERROR   0x03 /* temporary error */
+#define CRW_ERC_IPI  0x04 /* installed parm initialized */
+#define CRW_ERC_TERM 0x05 /* terminal */
+#define CRW_ERC_PERRN0x06 /* perm. error, facility not init */
+#define CRW_ERC_PERRI0x07 /* perm. error, facility init */
+#define CRW_ERC_PMOD 0x08 /* installed parameters modified */
+#define CRW_ERC_IPR  0x0A /* installed parameters restored */
 
 #define CRW_RSC_SUBCH 0x3
 #define CRW_RSC_CHP   0x4
-- 
2.11.2

Re: [Qemu-devel] [PATCH 1/6] tests/tcg/test_path.c: include utils/bufferiszero.c

2017-08-02 Thread Philippe Mathieu-Daudé


Hi Cleber,

On 08/02/2017 05:15 PM, Cleber Rosa wrote:

Which contains one specific function used by iov.c.

Without this, "make -C tests/tcg test_path" (and consequently
"make -C tests/tcg" or simply "make test") fails quite early.

Signed-off-by: Cleber Rosa 
---
  tests/tcg/test_path.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tests/tcg/test_path.c b/tests/tcg/test_path.c
index 1c29bce..74dbdaf 100644
--- a/tests/tcg/test_path.c
+++ b/tests/tcg/test_path.c
@@ -3,6 +3,7 @@
  #include "util/cutils.c"
  #include "util/hexdump.c"
  #include "util/iov.c"
+#include "util/bufferiszero.c"


This fixes the build, however why not take this opportunity to fix it 
through the Makefile instead of including .c?


Regards,

Phil.


  #include "util/path.c"
  #include "util/qemu-timer-common.c"
  #include

Re: [Qemu-devel] [PATCH 03/15] target/arm: Consolidate PMSA handling in get_phys_addr()

2017-08-02 Thread Philippe Mathieu-Daudé


On 08/02/2017 01:43 PM, Peter Maydell wrote:

Currently get_phys_addr() has PMSAv7 handling before the
"is translation disabled?" check, and then PMSAv5 after it.
Tidy this up by making the PMSAv5 code handle the "MPU disabled"
case itself, so that we have all the PMSA code in one place.
This will make adding the PMSAv8 code slightly cleaner, and
also means that pre-v7 PMSA cores benefit from the MPU lookup
logging that the PMSAv7 codepath had.

Signed-off-by: Peter Maydell 


Reviewed-by: Philippe Mathieu-Daudé 


---
  target/arm/helper.c | 38 ++
  1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index b78d277..fd83a21 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8423,6 +8423,13 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, 
uint32_t address,
  uint32_t base;
  bool is_user = regime_is_user(env, mmu_idx);
  
+if (regime_translation_disabled(env, mmu_idx)) {

+/* MPU disabled.  */
+*phys_ptr = address;
+*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+return false;
+}
+
  *phys_ptr = address;
  for (n = 7; n >= 0; n--) {
  base = env->cp15.c6_region[n];
@@ -8572,16 +8579,20 @@ static bool get_phys_addr(CPUARMState *env, 
target_ulong address,
  }
  }
  
-/* pmsav7 has special handling for when MPU is disabled so call it before

- * the common MMU/MPU disabled check below.
- */
-if (arm_feature(env, ARM_FEATURE_PMSA) &&
-arm_feature(env, ARM_FEATURE_V7)) {
+if (arm_feature(env, ARM_FEATURE_PMSA)) {
  bool ret;
  *page_size = TARGET_PAGE_SIZE;
-ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
-   phys_ptr, prot, fsr);
-qemu_log_mask(CPU_LOG_MMU, "PMSAv7 MPU lookup for %s at 0x%08" PRIx32
+
+if (arm_feature(env, ARM_FEATURE_V7)) {
+/* PMSAv7 */
+ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
+   phys_ptr, prot, fsr);
+} else {
+/* Pre-v7 MPU */
+ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx,
+   phys_ptr, prot, fsr);
+}
+qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32
" mmu_idx %u -> %s (prot %c%c%c)\n",
access_type == MMU_DATA_LOAD ? "reading" :
(access_type == MMU_DATA_STORE ? "writing" : "execute"),
@@ -8594,21 +8605,16 @@ static bool get_phys_addr(CPUARMState *env, 
target_ulong address,
  return ret;
  }
  
+/* Definitely a real MMU, not an MPU */

+
  if (regime_translation_disabled(env, mmu_idx)) {
-/* MMU/MPU disabled.  */
+/* MMU disabled. */
  *phys_ptr = address;
  *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
  *page_size = TARGET_PAGE_SIZE;
  return 0;
  }
  
-if (arm_feature(env, ARM_FEATURE_PMSA)) {

-/* Pre-v7 MPU */
-*page_size = TARGET_PAGE_SIZE;
-return get_phys_addr_pmsav5(env, address, access_type, mmu_idx,
-phys_ptr, prot, fsr);
-}
-
  if (regime_using_lpae_format(env, mmu_idx)) {
  return get_phys_addr_lpae(env, address, access_type, mmu_idx, 
phys_ptr,
attrs, prot, page_size, fsr, fi);

[Qemu-devel] [PULL 1/2] slirp: fill error when failing to initialize user network

2017-08-02 Thread Samuel Thibault

From: Hervé Poussineau 

With "-netdev user,id=net0,dns=1.2.3.4"
error was:
qemu-system-i386: -netdev user,id=net0,dns=1.2.3.4: Device 'user' could not be 
initialized

Error is now:
qemu-system-i386: -netdev user,id=net0,dns=1.2.3.4: DNS doesn't belong to 
network

Signed-off-by: Hervé Poussineau 
Signed-off-by: Samuel Thibault 
---
 net/slirp.c | 134 ++--
 1 file changed, 94 insertions(+), 40 deletions(-)

diff --git a/net/slirp.c b/net/slirp.c
index 9fbc949e81..01ed21c006 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -91,15 +91,15 @@ static QTAILQ_HEAD(slirp_stacks, SlirpState) slirp_stacks =
 QTAILQ_HEAD_INITIALIZER(slirp_stacks);
 
 static int slirp_hostfwd(SlirpState *s, const char *redir_str,
- int legacy_format);
+ int legacy_format, Error **errp);
 static int slirp_guestfwd(SlirpState *s, const char *config_str,
-  int legacy_format);
+  int legacy_format, Error **errp);
 
 #ifndef _WIN32
 static const char *legacy_smb_export;
 
 static int slirp_smb(SlirpState *s, const char *exported_dir,
- struct in_addr vserver_addr);
+ struct in_addr vserver_addr, Error **errp);
 static void slirp_smb_cleanup(SlirpState *s);
 #else
 static inline void slirp_smb_cleanup(SlirpState *s) { }
@@ -155,7 +155,7 @@ static int net_slirp_init(NetClientState *peer, const char 
*model,
   const char *bootfile, const char *vdhcp_start,
   const char *vnameserver, const char *vnameserver6,
   const char *smb_export, const char *vsmbserver,
-  const char **dnssearch)
+  const char **dnssearch, Error **errp)
 {
 /* default settings according to historic slirp */
 struct in_addr net  = { .s_addr = htonl(0x0a000200) }; /* 10.0.2.0 */
@@ -178,15 +178,18 @@ static int net_slirp_init(NetClientState *peer, const 
char *model,
 struct slirp_config_str *config;
 
 if (!ipv4 && (vnetwork || vhost || vnameserver)) {
+error_setg(errp, "IPv4 disabled but netmask/host/dns provided");
 return -1;
 }
 
 if (!ipv6 && (vprefix6 || vhost6 || vnameserver6)) {
+error_setg(errp, "IPv6 disabled but prefix/host6/dns6 provided");
 return -1;
 }
 
 if (!ipv4 && !ipv6) {
 /* It doesn't make sense to disable both */
+error_setg(errp, "IPv4 and IPv6 disabled");
 return -1;
 }
 
@@ -200,6 +203,7 @@ static int net_slirp_init(NetClientState *peer, const char 
*model,
 if (vnetwork) {
 if (get_str_sep(buf, sizeof(buf), , '/') < 0) {
 if (!inet_aton(vnetwork, )) {
+error_setg(errp, "Failed to parse netmask");
 return -1;
 }
 addr = ntohl(net.s_addr);
@@ -220,14 +224,19 @@ static int net_slirp_init(NetClientState *peer, const 
char *model,
 }
 } else {
 if (!inet_aton(buf, )) {
+error_setg(errp, "Failed to parse netmask");
 return -1;
 }
 shift = strtol(vnetwork, , 10);
 if (*end != '\0') {
 if (!inet_aton(vnetwork, )) {
+error_setg(errp,
+   "Failed to parse netmask (trailing chars)");
 return -1;
 }
 } else if (shift < 4 || shift > 32) {
+error_setg(errp,
+   "Invalid netmask provided (must be in range 4-32)");
 return -1;
 } else {
 mask.s_addr = htonl(0x << (32 - shift));
@@ -240,30 +249,43 @@ static int net_slirp_init(NetClientState *peer, const 
char *model,
 }
 
 if (vhost && !inet_aton(vhost, )) {
+error_setg(errp, "Failed to parse host");
 return -1;
 }
 if ((host.s_addr & mask.s_addr) != net.s_addr) {
+error_setg(errp, "Host doesn't belong to network");
 return -1;
 }
 
 if (vnameserver && !inet_aton(vnameserver, )) {
+error_setg(errp, "Failed to parse DNS");
 return -1;
 }
-if ((dns.s_addr & mask.s_addr) != net.s_addr ||
-dns.s_addr == host.s_addr) {
+if ((dns.s_addr & mask.s_addr) != net.s_addr) {
+error_setg(errp, "DNS doesn't belong to network");
+return -1;
+}
+if (dns.s_addr == host.s_addr) {
+error_setg(errp, "DNS must be different from host");
 return -1;
 }
 
 if (vdhcp_start && !inet_aton(vdhcp_start, )) {
+error_setg(errp, "Failed to parse DHCP start address");
 return -1;
 }
-if ((dhcp.s_addr & mask.s_addr) != net.s_addr ||
-dhcp.s_addr == host.s_addr || dhcp.s_addr == dns.s_addr) {
+if ((dhcp.s_addr &

Re: [Qemu-devel] [PULL 2/2] slirp: check len against dhcp options array end

2017-08-02 Thread Samuel Thibault

Samuel Thibault, on jeu. 03 août 2017 00:26:07 +0200, wrote:
> From: Prasad J Pandit 
> 
> While parsing dhcp options string in 'dhcp_decode', if an options'
> length 'len' appeared towards the end of 'bp_vend' array, ensuing
> read could lead to an OOB memory access issue. Add check to avoid it.
> 
> Reported-by: Reno Robert 
> Signed-off-by: Prasad J Pandit 
> Signed-off-by: Samuel Thibault 

Oops, this should have mentioned the CVE, I'll redo the pull request,
sorry.

> ---
>  slirp/bootp.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/slirp/bootp.c b/slirp/bootp.c
> index 5a4646c182..5dd1a415b5 100644
> --- a/slirp/bootp.c
> +++ b/slirp/bootp.c
> @@ -123,6 +123,9 @@ static void dhcp_decode(const struct bootp_t *bp, int 
> *pmsg_type,
>  if (p >= p_end)
>  break;
>  len = *p++;
> +if (p + len > p_end) {
> +break;
> +}
>  DPRINTF("dhcp: tag=%d len=%d\n", tag, len);
>  
>  switch(tag) {
> -- 
> 2.13.2
> 

-- 
Samuel
"...[Linux's] capacity to talk via any medium except smoke signals."
(By Dr. Greg Wettstein, Roger Maris Cancer Center)

Re: [Qemu-devel] [PATCH] firmware: add const to bin_attribute structures

2017-08-02 Thread Michael S. Tsirkin

On Wed, Aug 02, 2017 at 02:11:35PM +0530, Bhumika Goyal wrote:
> Add const to bin_attribute structures as they are only passed to the
> functions sysfs_{remove/create}_bin_file. The arguments passed are of
> type const, so declare the structures to be const.
> 
> Done using Coccinelle.
> 
> @m disable optional_qualifier@
> identifier s;
> position p;
> @@
> static struct bin_attribute s@p={...};
> 
> @okay1@
> position p;
> identifier m.s;
> @@
> (
> sysfs_create_bin_file(...,@p,...)
> |
> sysfs_remove_bin_file(...,@p,...)
> )
> 
> @bad@
> position p!={m.p,okay1.p};
> identifier m.s;
> @@
> s@p
> 
> @change depends on !bad disable optional_qualifier@
> identifier m.s;
> @@
> static
> +const
> struct bin_attribute s={...};
> 
> Signed-off-by: Bhumika Goyal 

For qemu bits:

Acked-by: Michael S. Tsirkin 

Feel free to merge.,

> ---
>  drivers/firmware/dell_rbu.c  | 6 +++---
>  drivers/firmware/dmi-sysfs.c | 2 +-
>  drivers/firmware/google/gsmi.c   | 2 +-
>  drivers/firmware/google/memconsole.c | 2 +-
>  drivers/firmware/qemu_fw_cfg.c   | 2 +-
>  5 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
> index 2f452f1..a771360 100644
> --- a/drivers/firmware/dell_rbu.c
> +++ b/drivers/firmware/dell_rbu.c
> @@ -675,18 +675,18 @@ static ssize_t write_rbu_packet_size(struct file *filp, 
> struct kobject *kobj,
>   return count;
>  }
>  
> -static struct bin_attribute rbu_data_attr = {
> +static const struct bin_attribute rbu_data_attr = {
>   .attr = {.name = "data", .mode = 0444},
>   .read = read_rbu_data,
>  };
>  
> -static struct bin_attribute rbu_image_type_attr = {
> +static const struct bin_attribute rbu_image_type_attr = {
>   .attr = {.name = "image_type", .mode = 0644},
>   .read = read_rbu_image_type,
>   .write = write_rbu_image_type,
>  };
>  
> -static struct bin_attribute rbu_packet_size_attr = {
> +static const struct bin_attribute rbu_packet_size_attr = {
>   .attr = {.name = "packet_size", .mode = 0644},
>   .read = read_rbu_packet_size,
>   .write = write_rbu_packet_size,
> diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c
> index d5de6ee..936e656 100644
> --- a/drivers/firmware/dmi-sysfs.c
> +++ b/drivers/firmware/dmi-sysfs.c
> @@ -440,7 +440,7 @@ static ssize_t dmi_sel_raw_read(struct file *filp, struct 
> kobject *kobj,
>   return find_dmi_entry(entry, dmi_sel_raw_read_helper, );
>  }
>  
> -static struct bin_attribute dmi_sel_raw_attr = {
> +static const struct bin_attribute dmi_sel_raw_attr = {
>   .attr = {.name = "raw_event_log", .mode = 0400},
>   .read = dmi_sel_raw_read,
>  };
> diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
> index c8f169b..1ad29f7 100644
> --- a/drivers/firmware/google/gsmi.c
> +++ b/drivers/firmware/google/gsmi.c
> @@ -508,7 +508,7 @@ static ssize_t eventlog_write(struct file *filp, struct 
> kobject *kobj,
>  
>  }
>  
> -static struct bin_attribute eventlog_bin_attr = {
> +static const struct bin_attribute eventlog_bin_attr = {
>   .attr = {.name = "append_to_eventlog", .mode = 0200},
>   .write = eventlog_write,
>  };
> diff --git a/drivers/firmware/google/memconsole.c 
> b/drivers/firmware/google/memconsole.c
> index 166f07c..b11a02c 100644
> --- a/drivers/firmware/google/memconsole.c
> +++ b/drivers/firmware/google/memconsole.c
> @@ -33,7 +33,7 @@ static ssize_t memconsole_read(struct file *filp, struct 
> kobject *kobp,
>   return memconsole_read_func(buf, pos, count);
>  }
>  
> -static struct bin_attribute memconsole_bin_attr = {
> +static const struct bin_attribute memconsole_bin_attr = {
>   .attr = {.name = "log", .mode = 0444},
>   .read = memconsole_read,
>  };
> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> index 0e20116..f254977 100644
> --- a/drivers/firmware/qemu_fw_cfg.c
> +++ b/drivers/firmware/qemu_fw_cfg.c
> @@ -343,7 +343,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
> struct kobject *kobj,
>   return count;
>  }
>  
> -static struct bin_attribute fw_cfg_sysfs_attr_raw = {
> +static const struct bin_attribute fw_cfg_sysfs_attr_raw = {
>   .attr = { .name = "raw", .mode = S_IRUSR },
>   .read = fw_cfg_sysfs_read_raw,
>  };
> -- 
> 1.9.1

Re: [Qemu-devel] [PATCH] cpu: don't allow negative core id

2017-08-02 Thread Eduardo Habkost

On Wed, Aug 02, 2017 at 03:50:36PM +0200, Laurent Vivier wrote:
> On 02/08/2017 15:42, Philippe Mathieu-Daudé wrote:
> > Hi Laurent,
> > 
> > On Wed, Aug 2, 2017 at 7:32 AM, Laurent Vivier  wrote:
> >> With pseries machine type a negative core-id is not managed properly:
> >> -1 gives an inaccurate error message ("core -1 already populated"),
> >> -2 crashes QEMU (core dump)
> >>
> >> As it seems a negative value is invalid for any architecture,
> >> instead of checking this in spapr_core_pre_plug() I think it's better
> >> to check this in the generic part, core_prop_set_core_id()
> > 
> > Why is this property signed? If there is not reason to use it negative,
> > is it possible to use object_property_add(.."uint"..)?
> 
> You should be right:
> 
> { 'struct': 'NumaNodeOptions',
>   'data': {
>'*nodeid': 'uint16',
>'*cpus':   ['uint16'],
>'*mem':'size',
>'*memdev': 'str' }}
> 
> but
> 
> { 'struct': 'CpuInstanceProperties',
>   'data': { '*node-id': 'int',
> '*socket-id': 'int',
> '*core-id': 'int',
> '*thread-id': 'int'
>   }
> }
> 
> But I'm not sure it's a good idea to change the API now.

Property parsing is not affected by the QAPI schema at all, so
touching the schema wouldn't fix the bug.

The same applies to the 'type' argument to object_property_add():
it is ignored everywhere.

However, the property setter can simply use a visitor for unsigned values, and
it will reject negative values automatically, e.g.:

  diff --git a/hw/cpu/core.c b/hw/cpu/core.c
  index 2bf960d..b5af2bf 100644
  --- a/hw/cpu/core.c
  +++ b/hw/cpu/core.c
  @@ -25,9 +25,9 @@ static void core_prop_set_core_id(Object *obj, Visitor *v, 
const char *name,
   {
   CPUCore *core = CPU_CORE(obj);
   Error *local_err = NULL;
  -int64_t value;
  +uint32_t value;
   
  -visit_type_int(v, name, , _err);
  +visit_type_uint32(v, name, , _err);
   if (local_err) {
   error_propagate(errp, local_err);
   return;


  $ ./ppc64-softmmu/qemu-system-ppc64 -device 
POWER8_v2.0-spapr-cpu-core,core-id=-2
  qemu-system-ppc64: -device POWER8_v2.0-spapr-cpu-core,core-id=-2: Parameter 
'core-id' expects uint32_t


I would suggest changing the CPUCore struct fields to uint32_t or
uint64_t, but it would be more intrusive and we're past hard
freeze.  Your patch looks good for 2.10.

Reviewed-by: Eduardo Habkost 

I'm queueing it on machine-next.

-- 
Eduardo

[Qemu-devel] [PULL 0/2] slirp updates

2017-08-02 Thread Samuel Thibault

warning: redirection vers https://people.debian.org/~sthibault/qemu.git/
The following changes since commit aaaec6acad7cf97372d48c1b09126a09697519c8:

  Update version for v2.10.0-rc1 release (2017-08-02 16:36:32 +0100)

are available in the git repository at:

  http://people.debian.org/~sthibault/qemu.git tags/samuel-thibault

for you to fetch changes up to 2fe00b96b750f8c7a48dd3a98c5cb7370947264e:

  slirp: check len against dhcp options array end (2017-08-03 00:24:31 +0200)


slirp updates


Hervé Poussineau (1):
  slirp: fill error when failing to initialize user network

Prasad J Pandit (1):
  slirp: check len against dhcp options array end

 net/slirp.c   | 134 --
 slirp/bootp.c |   3 ++
 2 files changed, 97 insertions(+), 40 deletions(-)

[Qemu-devel] [PULL 2/2] slirp: check len against dhcp options array end

2017-08-02 Thread Samuel Thibault

From: Prasad J Pandit 

While parsing dhcp options string in 'dhcp_decode', if an options'
length 'len' appeared towards the end of 'bp_vend' array, ensuing
read could lead to an OOB memory access issue. Add check to avoid it.

Reported-by: Reno Robert 
Signed-off-by: Prasad J Pandit 
Signed-off-by: Samuel Thibault 
---
 slirp/bootp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/slirp/bootp.c b/slirp/bootp.c
index 5a4646c182..5dd1a415b5 100644
--- a/slirp/bootp.c
+++ b/slirp/bootp.c
@@ -123,6 +123,9 @@ static void dhcp_decode(const struct bootp_t *bp, int 
*pmsg_type,
 if (p >= p_end)
 break;
 len = *p++;
+if (p + len > p_end) {
+break;
+}
 DPRINTF("dhcp: tag=%d len=%d\n", tag, len);
 
 switch(tag) {
-- 
2.13.2

[Qemu-devel] [PULL 0/2] slirp updates

2017-08-02 Thread Samuel Thibault

warning: redirection vers https://people.debian.org/~sthibault/qemu.git/
The following changes since commit aaaec6acad7cf97372d48c1b09126a09697519c8:

  Update version for v2.10.0-rc1 release (2017-08-02 16:36:32 +0100)

are available in the git repository at:

  http://people.debian.org/~sthibault/qemu.git tags/samuel-thibault

for you to fetch changes up to 413d463f43fbc4dd3a601e80a5724aa384a265a0:

  slirp: check len against dhcp options array end (2017-08-03 00:26:44 +0200)


slirp updates


Hervé Poussineau (1):
  slirp: fill error when failing to initialize user network

Prasad J Pandit (1):
  slirp: check len against dhcp options array end

 net/slirp.c   | 134 --
 slirp/bootp.c |   3 ++
 2 files changed, 97 insertions(+), 40 deletions(-)

Re: [Qemu-devel] [PULL 02/17] cpu_physical_memory_sync_dirty_bitmap: Fix alignment check

2017-08-02 Thread Paolo Bonzini

On 01/08/2017 20:04, Dr. David Alan Gilbert wrote:
> * Peter Maydell (peter.mayd...@linaro.org) wrote:
>> On 1 August 2017 at 17:17, Paolo Bonzini  wrote:
>>> From: "Dr. David Alan Gilbert" 
>>>
>>> This code has an optimised, word aligned version, and a boring
>>> unaligned version.  Recently 084140bd498909 fixed a missing offset
>>> addition from the core of both versions.  However, the offset isn't
>>> necessarily aligned and thus the choice between the two versions
>>> needs fixing up to also include the offset.
>>>
>>> Symptom:
>>>   A few stuck unsent pages during migration; not normally noticed
>>> unless under very low bandwidth in which case the migration may get
>>> stuck never ending and never performing a 2nd sync; noticed by
>>> a hanging postcopy-test on a very heavily loaded system.
>>>
>>> Fixes: 084140bd498909
>>>
>>> Signed-off-by: Dr. David Alan Gilbert 
>>> Reported-by: Alex BenneÃ© 
>>> Tested-by: Alex BenneÃ© 
>>>
>>> --
>>> v2
>>>   Move 'page' inside the if (Comment from Paolo)
>>> Message-Id: <20170724165125.29887-1-dgilb...@redhat.com>
>>> Signed-off-by: Paolo Bonzini 
>>
>> Something somewhere along the line seems to have mangled the
>> unicode characters in Alex's name :-(
>> Also, Alex's email address is typoed, and what should be the
>> '---' marker has been written as '--' so the below-the-fold waffle
>> is still hanging around in the commit.
>>
>> This doesn't seem worth rejecting the pull request on the
>> eve of rc1 for, but it's still a bit sad :-(
> 
> Hmm yes, I think some of that's mine - the missing 'n' is certainly
> my fault; and I may have got his two 'e's in the wrong order,
> but I don't know what changed the encoding, that seems right on:
>   https://lists.nongnu.org/archive/html/qemu-devel/2017-07/msg07417.html

The encoding is me.

Paolo

Re: [Qemu-devel] [RFC 17/29] migration: rebuild channel on source

2017-08-02 Thread Peter Xu

On Tue, Aug 01, 2017 at 11:59:07AM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > This patch detects the "resume" flag of migration command, rebuild the
> > channels only if the flag is set.
> > 
> > Signed-off-by: Peter Xu 
> > ---
> >  migration/migration.c | 52 
> > ---
> >  1 file changed, 41 insertions(+), 11 deletions(-)
> > 
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 36ff8c3..64de0ee 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -1244,6 +1244,15 @@ void qmp_migrate(const char *uri, bool has_blk, bool 
> > blk,
> >  MigrationState *s = migrate_get_current();
> >  const char *p;
> >  
> > +if (has_resume && resume) {
> > +if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
> > +error_setg(errp, "Cannot resume if there is no "
> > +   "paused migration");
> > +return;
> > +}
> > +goto do_resume;
> > +}
> > +
> >  if (migration_is_setup_or_active(s->state) ||
> >  s->state == MIGRATION_STATUS_CANCELLING ||
> >  s->state == MIGRATION_STATUS_COLO) {
> > @@ -1279,6 +1288,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool 
> > blk,
> >  
> >  s = migrate_init();
> >  
> > +do_resume:
> 
> Can we find a way to avoid this label?
> Perhaps split the bottom half of this function out into a separate
> function?

Yes this label can indeed be avoided (sorry for my laziness). Will
take the suggestion.  Thanks,

-- 
Peter Xu

Re: [Qemu-devel] [for-2.11 PATCH 26/26] spapr: add hotplug hooks for PHB hotplug

2017-08-02 Thread Greg Kurz

On Wed, 2 Aug 2017 12:39:12 +1000
David Gibson  wrote:

> On Tue, Aug 01, 2017 at 05:30:46PM +0200, Greg Kurz wrote:
> > On Fri, 28 Jul 2017 14:24:03 +1000
> > David Gibson  wrote:
> >   
> > > On Thu, Jul 27, 2017 at 07:09:55PM +0200, Greg Kurz wrote:  
> > > > On Thu, 27 Jul 2017 14:41:31 +1000
> > > > Alexey Kardashevskiy  wrote:
> > > > 
> > > > > On 26/07/17 18:40, Greg Kurz wrote:
> > > > > > Hotplugging PHBs is a machine-level operation, but PHBs reside on 
> > > > > > the
> > > > > > main system bus, so we register spapr machine as the handler for the
> > > > > > main system bus.
> > > > > > 
> > > > > > Signed-off-by: Michael Roth 
> > > > > > Signed-off-by: Greg Kurz 
> > > > > > ---
> > > > > > - rebased against ppc-for-2.10
> > > > > > - converted to unplug_request
> > > > > > - handle drc_id at pre-plug
> > > > > > - reset hotplugged PHB at plug
> > > > > > - compatibility with older machine types
> > > > > > ---
> > > > > >  hw/ppc/spapr.c  |  114 
> > > > > > +++
> > > > > >  hw/ppc/spapr_drc.c  |1 
> > > > > >  hw/ppc/spapr_pci.c  |2 -
> > > > > >  include/hw/pci-host/spapr.h |2 +
> > > > > >  include/hw/ppc/spapr.h  |1 
> > > > > >  5 files changed, 118 insertions(+), 2 deletions(-)
> > > > > > 
> > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > > index 90485054c2e7..589f76ef9fb8 100644
> > > > > > --- a/hw/ppc/spapr.c
> > > > > > +++ b/hw/ppc/spapr.c
> > > > > > @@ -2540,6 +2540,10 @@ static void ppc_spapr_init(MachineState 
> > > > > > *machine)
> > > > > >  register_savevm_live(NULL, "spapr/htab", -1, 1,
> > > > > >   _htab_handlers, spapr);
> > > > > >  
> > > > > > +if (spapr->dr_phb_enabled) {
> > > > > > +qbus_set_hotplug_handler(sysbus_get_default(), 
> > > > > > OBJECT(machine), NULL);
> > > > > > +}
> > > > > > +
> > > > > >  qemu_register_boot_set(spapr_boot_set, spapr);
> > > > > >  
> > > > > >  if (kvm_enabled()) {
> > > > > > @@ -3238,6 +3242,103 @@ out:
> > > > > >  error_propagate(errp, local_err);
> > > > > >  }
> > > > > >  
> > > > > > +static void spapr_phb_pre_plug(HotplugHandler *hotplug_dev, 
> > > > > > DeviceState *dev,
> > > > > > +   Error **errp)
> > > > > > +{
> > > > > > +sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
> > > > > > +
> > > > > > +if (sphb->drc_id == (uint32_t)-1) {
> > > > > > +sphb->drc_id = sphb->index;
> > > > > > +}
> > > > > > +
> > > > > > +if (sphb->drc_id >= SPAPR_DRC_MAX_PHB) {
> > > > > > +error_setg(errp, "PHB id %d out of range", sphb->drc_id);
> > > > > > +}  
> > > > > 
> > > > > 
> > > > > sphb->index in considered 16bits in the existing code (even though it 
> > > > > is
> > > > > defined as 32bit) and SPAPR_DRC_MAX_PHB is just 256. I'd suggest 
> > > > > using the
> > > > > same limit for both, either 256 or 65536 is fine for me.
> > > > > 
> > > > > It is actually a bit weird - it is possible to completely configure 
> > > > > few
> > > > > PHBs in the command line so they will have index==-1 but PCI hotplug 
> > > > > code -
> > > > > spapr_phb_get_pci_func_drc() and spapr_phb_realize() - does not check 
> > > > > for
> > > > > this and just does (sphb->index << 16).
> > > > 
> > > > You're right and this looks like a bug... I'll try to come up with a 
> > > > fix.
> > > > 
> > > > > May be just ditch drc_id, enforce index not to be -1 and use it as 
> > > > > drc_id?
> > > > > 
> > > > 
> > > > This was how Mike did it in the original patchset but David suggested
> > > > to introduce drc_id (to preserve existing setups I guess):
> > > > 
> > > > http://patchwork.ozlabs.org/patch/466262/
> > > 
> > > Huh.  So I did.  But.. sorry, I've changed my mind.
> > > 
> > > The fact that needing a DRC forces us to have a reasonable small id
> > > for each PHB seems like a good excuse to make index mandatory - I'm
> > > not convinced anyone was actually creating PHBs without index, and
> > > this does allow us to simplify a bunch of things.
> > > 
> > > I'd like to see that done as a preliminary cleanup patch, though.
> > >   
> > 
> > Just to be sure. I could verify that the weirdness reported by Alexey
> > causes QEMU to misbehave. Only the first "index-less" PHB has realized
> > DRCs:
> >   
> > => subsequent "index-less" PHBs silently ignore hotplugging of PCI devices  
> >   
> > => QEMU won't even start with coldplugged devices in these "index-less"  
> >PHBs
> > 
> > This preliminary cleanup for hotpluggable PHBs is hence also a bug fix
> > for current PHBs.  
> 
> Ok.
> 
> > Do we want to fix this long-standing bug in 2.10 ?  
> 
> No, not worth pushing in this late.
> 

Cool. This will give us enough time to do it right.

> > Do we want to preserve the

Re: [Qemu-devel] [PATCH] migration: fix small leaks

2017-08-02 Thread Peter Xu

On Tue, Aug 01, 2017 at 05:04:18PM +0100, Marc-André Lureau wrote:
> Spotted thanks to valgrind and tests/device-introspect-test:
> 
> ==11711== 1 bytes in 1 blocks are definitely lost in loss record 6 of 14,537
> ==11711==at 0x4C2EB6B: malloc (vg_replace_malloc.c:299)
> ==11711==by 0x1E0CDBD8: g_malloc (gmem.c:94)
> ==11711==by 0x1E0E696E: g_strdup (gstrfuncs.c:363)
> ==11711==by 0x695693: migration_instance_init (migration.c:2226)
> ==11711==by 0x717C4B: object_init_with_type (object.c:344)
> ==11711==by 0x717E80: object_initialize_with_type (object.c:375)
> ==11711==by 0x7182EB: object_new_with_type (object.c:483)
> ==11711==by 0x718328: object_new (object.c:493)
> ==11711==by 0x4B8A29: qmp_device_list_properties (qmp.c:542)
> ==11711==by 0x4A9561: qmp_marshal_device_list_properties 
> (qmp-marshal.c:1425)
> ==11711==by 0x819D4A: do_qmp_dispatch (qmp-dispatch.c:104)
> ==11711==by 0x819E82: qmp_dispatch (qmp-dispatch.c:131)
> 
> Signed-off-by: Marc-André Lureau 

Reviewed-by: Peter Xu 

Thanks,

-- 
Peter Xu

Re: [Qemu-devel] [RFC 19/29] migration: let dst listen on port always

2017-08-02 Thread Peter Xu

On Tue, Aug 01, 2017 at 11:56:10AM +0100, Daniel P. Berrange wrote:
> On Fri, Jul 28, 2017 at 04:06:28PM +0800, Peter Xu wrote:
> > Signed-off-by: Peter Xu 
> > ---
> >  migration/exec.c   | 2 +-
> >  migration/fd.c | 2 +-
> >  migration/socket.c | 4 ++--
> >  3 files changed, 4 insertions(+), 4 deletions(-)
> > 
> > diff --git a/migration/exec.c b/migration/exec.c
> > index 08b599e..b4412db 100644
> > --- a/migration/exec.c
> > +++ b/migration/exec.c
> > @@ -49,7 +49,7 @@ static gboolean exec_accept_incoming_migration(QIOChannel 
> > *ioc,
> >  {
> >  migration_channel_process_incoming(ioc);
> >  object_unref(OBJECT(ioc));
> > -return FALSE; /* unregister */
> > +return TRUE; /* keep it registered */
> >  }
> >  
> >  void exec_start_incoming_migration(const char *command, Error **errp)
> > diff --git a/migration/fd.c b/migration/fd.c
> > index 30f5258..865277a 100644
> > --- a/migration/fd.c
> > +++ b/migration/fd.c
> > @@ -49,7 +49,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel 
> > *ioc,
> >  {
> >  migration_channel_process_incoming(ioc);
> >  object_unref(OBJECT(ioc));
> > -return FALSE; /* unregister */
> > +return TRUE; /* keep it registered */
> >  }
> >  
> >  void fd_start_incoming_migration(const char *infd, Error **errp)
> > diff --git a/migration/socket.c b/migration/socket.c
> > index 757d382..f2c2d01 100644
> > --- a/migration/socket.c
> > +++ b/migration/socket.c
> > @@ -153,8 +153,8 @@ static gboolean 
> > socket_accept_incoming_migration(QIOChannel *ioc,
> >  
> >  out:
> >  /* Close listening socket as its no longer needed */
> > -qio_channel_close(ioc, NULL);
> > -return FALSE; /* unregister */
> > +// qio_channel_close(ioc, NULL);
> > +return TRUE; /* keep it registered */
> >  }
> 
> 
> This is not a very desirable approach IMHO.
> 
> There are two separate things at play - first we have the listener socket,
> and second we have the I/O watch that monitors for incoming clients.
> 
> The current code here closes the listener, and returns FALSE to unregister
> the event loop watch.
> 
> You're reversing both of these so that we keep the listener open and we
> keep monitoring for incoming clients. Ignoring migration resume for a
> minute, this means that the destination QEMU will now accept arbitrarily
> many incoming clients and keep trying to start a new incoming migration.
> 
> The behaviour we need is diferent. We *want* to unregister the event
> loop watch once we've accepted a client. We should only keep the socket
> listener in existance, but *not* accept any more clients. Only once we
> have hit a problem and want to accept a new client to do migration
> recovery, should we be re-adding the event loop watch.

I replied with another approach in the other thread: how about we
re-enable the listen port duing "postcopy-pause" state, and disable
the listen port when get out of that migration state?

Here "listen port" I mean both the IO watch and the socket object. Now
what I can think of: we keep these objects always there, meanwhile we
introduce a new bit for migration, say, "accept_incoming", to decide
whether we will really accept one connection. Then we drop the new
connection if that bit is not set.

(Or a new QIOChannelFeature to temporarily refuse incoming
 connection? E.g., QIO_CHANNEL_FEATURE_LISTEN_REFUSE?)

Thanks,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH] migration: fix small leaks

2017-08-02 Thread Juan Quintela

Marc-André Lureau  wrote:
> Spotted thanks to valgrind and tests/device-introspect-test:
>
> ==11711== 1 bytes in 1 blocks are definitely lost in loss record 6 of 14,537
> ==11711==at 0x4C2EB6B: malloc (vg_replace_malloc.c:299)
> ==11711==by 0x1E0CDBD8: g_malloc (gmem.c:94)
> ==11711==by 0x1E0E696E: g_strdup (gstrfuncs.c:363)
> ==11711==by 0x695693: migration_instance_init (migration.c:2226)
> ==11711==by 0x717C4B: object_init_with_type (object.c:344)
> ==11711==by 0x717E80: object_initialize_with_type (object.c:375)
> ==11711==by 0x7182EB: object_new_with_type (object.c:483)
> ==11711==by 0x718328: object_new (object.c:493)
> ==11711==by 0x4B8A29: qmp_device_list_properties (qmp.c:542)
> ==11711==by 0x4A9561: qmp_marshal_device_list_properties 
> (qmp-marshal.c:1425)
> ==11711==by 0x819D4A: do_qmp_dispatch (qmp-dispatch.c:104)
> ==11711==by 0x819E82: qmp_dispatch (qmp-dispatch.c:131)
>
> Signed-off-by: Marc-André Lureau 

Reviewed-by: Juan Quintela

Re: [Qemu-devel] [PATCH] vfio/pci-quirks: Set non-zero GMS memory size for IGD

2017-08-02 Thread Dmitry Fleytman


> On 2 Aug 2017, at 24:24 AM, Alex Williamson  
> wrote:
> 
> On Fri, 28 Jul 2017 08:22:45 +0300
> Dmitry Fleytman > wrote:
> 
>>> On 28 Jul 2017, at 07:51 AM, Zhang, Xiong Y  wrote:
>>> 
> On 26 Jul 2017, at 08:22 AM, Zhang, Xiong Y   
 wrote:  
> 
> Sorry, we indeed found Intel windows guest graphic driver couldn't be 
> bind  
 when GMS memory size is zero. And we have fixed it and the next intel
 windows driver release will contain this fix.  
> So currently please use x-igd-gms in legacy mode to work around this.
> 
> BTW, how did you know window driver allocate extra ~4G memory when  
 GMS size set to 0 ?
 
 We noticed that with new IGD driver memory usage on VMs raised by around
 4G.
 
 Then we examined memory allocations with poolmon
 (https://docs.microsoft.com/en-us/windows-hardware/drivers/devtest/poolm
 on)
 and saw that around 4G of memory is allocated with IGD driver pool tag.
 Additionally we noticed that on VMs with less than 5G of memory IGD driver
 does not load and system does fallback to standard VGA driver.
 
>>> [Zhang, Xiong Y]  I tried our internal driver and didn't found the above 
>>> two issues, please wait for the next intel graphic driver release.  
>> 
>> Thanks!
> 
> So to close on this, we should just be recommending that users
> encountering this problem add the x-igd-gms=1 option to the device
> until the next Intel graphics release.  I won't plan to push this
> change.  Thanks all,

Hi Alex,

Agree, that makes sense.

Thanks you,
Dmitry

> 
> Alex

Re: [Qemu-devel] [RFC 18/29] migration: new state "postcopy-recover"

2017-08-02 Thread Peter Xu

On Tue, Aug 01, 2017 at 12:36:22PM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:

[...]
> > @@ -2043,9 +2054,32 @@ static bool postcopy_pause(MigrationState *s)
> >  qemu_sem_wait(>postcopy_pause_sem);
> >  }
> >  
> > -trace_postcopy_pause_continued();
> > +if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
> > +/* We were waken up by a recover procedure. Give it a shot */
> >  
> > -return true;
> > +/*
> > + * Firstly, let's wake up the return path now, with a new
> > + * return path channel.
> > + */
> > +qemu_sem_post(>postcopy_pause_rp_sem);
> > +
> > +/* Do the resume logic */
> > +if (postcopy_do_resume(s) == 0) {
> > +/* Let's continue! */
> > +trace_postcopy_pause_continued();
> > +return true;
> > +} else {
> > +/*
> > + * Something wrong happened during the recovery, let's
> > + * pause again. Pause is always better than throwing data
> > + * away.
> > + */
> > +goto do_pause;
> 
> You should be able to turn this around into a do {} while or similar
> rather than goto.

Indeed. Fixing up.  Thanks,

-- 
Peter Xu

[Qemu-devel] [PATCH] target-i386 : reduce rtc 0x70 access vm-exit time

2017-08-02 Thread Peng Hao

some versions of windows guest access rtc frequently because of 
rtc as system tick.guest access rtc like this: write register index 
to 0x70, then write or read data from 0x71. writing 0x70 port is 
just as index and do nothing else. So writing 0x70 is not necessory 
to exit to userspace every time and caching rtc register index in kvm 
can reduce VM-EXIT time.
without my patch, get the vm-exit time of accessing rtc 0x70 using
perf tools: (guest OS : windows 7 64bit)
IO Port Access  Samples Samples%   Time%Min Time  Max Time  Avg time
0x70:POUT 86 30.99%74.59%   9us   29us 10.75us ( +- 
3.41% )

with my patch
IO Port Access  Samples Samples%  Time%   Min Time  Max Time   Avg time
 0x70:POUT   10632.02%29.47%0us  10us 1.57us ( +- 7.38% 
)

the patch is a part of optimizing rtc 0x70 port access.another is in
kernel.

Signed-off-by: Peng Hao 
Reviewed-by: Liu Yi 
---
 accel/kvm/kvm-all.c   | 17 +
 linux-headers/linux/kvm.h |  3 ++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 46ce479..683274d 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1483,6 +1483,21 @@ void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq 
irq, int gsi)
 g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
 }
 
+static void kvm_vrtc_create(KVMState *s)
+{
+int ret;
+if (kvm_check_extension(s, KVM_CAP_VRTC)) {
+return;
+}
+
+ret = kvm_vm_ioctl(s, KVM_CREATE_VRTC);
+
+if (ret < 0) {
+fprintf(stderr, "Create kernel vrtc failed: %s\n", strerror(-ret));
+exit(1);
+}
+}
+
 static void kvm_irqchip_create(MachineState *machine, KVMState *s)
 {
 int ret;
@@ -1750,6 +1765,8 @@ static int kvm_init(MachineState *ms)
 if (machine_kernel_irqchip_allowed(ms)) {
 kvm_irqchip_create(ms, s);
 }
+
+kvm_vrtc_create(s);
 
 if (kvm_eventfds_allowed) {
 s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 7971a4f..784179a 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -929,6 +929,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_SMT_POSSIBLE 147
 #define KVM_CAP_HYPERV_SYNIC2 148
 #define KVM_CAP_HYPERV_VP_INDEX 149
+#define KVM_CAP_VRTC 150
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1258,7 +1259,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_PPC_CONFIGURE_V3_MMU  _IOW(KVMIO,  0xaf, struct kvm_ppc_mmuv3_cfg)
 /* Available with KVM_CAP_PPC_RADIX_MMU */
 #define KVM_PPC_GET_RMMU_INFO_IOW(KVMIO,  0xb0, struct kvm_ppc_rmmu_info)
-
+#define KVM_CREATE_VRTC   _IO(KVMIO,  0xba)
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE_IOWR(KVMIO,  0xe0, struct kvm_create_device)
 
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH for-2.10 0/5] tests: acpi: make sure FADT is compared to reference table

2017-08-02 Thread Igor Mammedov

On Wed, 2 Aug 2017 00:14:18 +0300
"Michael S. Tsirkin"  wrote:

> On Mon, Jul 31, 2017 at 05:40:47PM +0200, Igor Mammedov wrote:
> > While refactoring i386/FADT generation to build_append_int_noprefix()   
> >  
> > and testing it, It turned out that FADT is only tested for valid
> >  
> > checksum but actual test for unintended changes isn't applied to it 
> >  
> > even though we have reference tables in tree.   
> >  
> > So here goes a couple of cleanups to reflect what fuctions do + 
> >  
> > some comments and actual fix.   
> >  
> > 
> >  
> > Note to maintainer: 
> >  
> >   FADT reference table is out of sync and should be updated along with  
> >  
> >   series applied.   
> >  
> > 
> >  
> > CC: "Michael S. Tsirkin"   
> >   
> > CC: Marcel Apfelbaum    
> 
> Absolutely good stuff, but not a bugfix as such (it's not that the
> test is wrong, it's that we skip FADT for now)
> so I don't think this is 2.10 material.
Agreed, it could go in when 2.11 merge window is open.

> 
> > Igor Mammedov (5):
> >   tests: acpi: move tested tables array allocation outside of
> > test_acpi_dsdt_table()
> >   tests: acpi: init table descriptor in test_dst_table()
> >   tests: acpi: rename test_acpi_tables()/test_dst_table() to reflect its
> > usage
> >   tests: acpi: add comments to fetch_rsdt_referenced_tables/data->tables
> > usage
> >   tests: acpi: fix FADT not being compared to reference table
> > 
> >  tests/bios-tables-test.c | 30 ++
> >  1 file changed, 18 insertions(+), 12 deletions(-)
> > 
> > -- 
> > 2.7.4

Re: [Qemu-devel] [for-2.10 PATCH] spapr_drc: fix realize and unrealize

2017-08-02 Thread Greg Kurz

On Fri, 28 Jul 2017 14:27:45 +1000
David Gibson  wrote:

> On Thu, Jul 27, 2017 at 03:50:37PM -0500, Michael Roth wrote:
> > Quoting Greg Kurz (2017-07-27 08:45:47)  
> > > If object_property_add_alias() returns an error in realize(), we should
> > > propagate it to the caller and certainly not unref the DRC.  
> > 
> > Indeed. I think that was the result of this part of the code
> > originally living in spapr_dr_connector_new() during development,
> > where it had previously made sense to free the object if there was a
> > failure and then return NULL. We definitely shouldn't be during it
> > now...  
> 
> Yes, I think that's right.
> 
> I've applied this fix to ppc-for-2.10.
> 
> >   
> > > 
> > > Same thing goes for unrealize(). Since object_property_del() is the last
> > > call, we can even get rid of the intermediate Error *.
> > > 
> > > And finally, unrealize() should undo all registrations performed by
> > > realize().
> > > 
> > > Signed-off-by: Greg Kurz 
> > > ---
> > > 
> > > David,
> > > 
> > > As agreed on the list, here's the version of the fix for 2.10. I'll respin
> > > my PHB hotplug series on top of it.
> > >   
> > 
> > 
> > Since spapr_dr_connector_new() calls realize() with errp = NULL, we
> > basically lose the error now. I think maybe we should at least report it
> > still, but even better would be to have _new() call object_property_* with
> > error_abort, since callers of spapr_dr_connector_new() don't all check
> > for the return value and even if they did the appropriate action would
> > always be to report+abort() anyway.  
> 
> I agree.  Well, except I think it should be _fatal, not
> _abort.  But that can be a follow up patch.
> 

Hmmm... when we'll hotplug a PHB, an error during the realization of
a PCI DRC would then terminate QEMU. I'd rather add an errp argument
to spapr_dr_connector_new() and propagate the error instead.

> > 
> > Looks good otherwise.
> >   
> > >  }
> > > -g_free(child_name);
> > >  vmstate_register(DEVICE(drc), spapr_drc_index(drc), 
> > > _spapr_drc,
> > >   drc);
> > >  qemu_register_reset(drc_reset, drc);
> > > @@ -522,16 +522,13 @@ static void unrealize(DeviceState *d, Error **errp)
> > >  sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d);
> > >  Object *root_container;
> > >  char name[256];
> > > -Error *err = NULL;
> > > 
> > >  trace_spapr_drc_unrealize(spapr_drc_index(drc));
> > > +qemu_unregister_reset(drc_reset, drc);
> > > +vmstate_unregister(DEVICE(drc), _spapr_drc, drc);
> > >  root_container = container_get(object_get_root(), 
> > > DRC_CONTAINER_PATH);
> > >  snprintf(name, sizeof(name), "%x", spapr_drc_index(drc));
> > > -object_property_del(root_container, name, );
> > > -if (err) {
> > > -error_report_err(err);
> > > -object_unref(OBJECT(drc));
> > > -}
> > > +object_property_del(root_container, name, errp);
> > >  }
> > > 
> > >  sPAPRDRConnector *spapr_dr_connector_new(Object *owner, const char *type,
> > > 
> > >   
> >   
> 



pgp9yr7_FXL26.pgp
Description: OpenPGP digital signature

Re: [Qemu-devel] [ANNOUNCE] QEMU 2.10.0-rc1 is now available

2017-08-02 Thread Michael Roth

Quoting Michael Roth (2017-08-02 14:25:48)
> Hello,
> 
> On behalf of the QEMU Team, I'd like to announce the availability of the
> second release candidate for the QEMU 2.10 release.  This release is meant
> for testing purposes and should not be used in a production environment.
> 
>   http://download.qemu-project.org/qemu-2.10.0-rc1.tar.xz
>   http://download.qemu-project.org/qemu-2.10.0-rc1.tar.xz.sig
> 
> You can help improve the quality of the QEMU 2.10 release by testing this
> release and reporting bugs on Launchpad:
> 
>   https://bugs.launchpad.net/qemu/
> 
> The release plan, as well a documented known issues for release
> candidates, are available at:
> 
>   http://wiki.qemu.org/Planning/2.10
> 
> Please add entries to the ChangeLog for the 2.10 release below:
> 
>   http://wiki.qemu.org/ChangeLog/2.10
> 
> 

Changes since rc0:

aaaec6a: Update version for v2.10.0-rc1 release (Peter Maydell)
bbcee37: tests/hmp: Fix typo in the 'chardev-send-break' test (Thomas Huth)
8bd9c4e: io: fix qio_channel_socket_accept err handling (Peter Xu)
2dfaf12: migration: fix comment disorder in RAMState (Peter Xu)
b91bf5e: migration: fix small leaks (Marc-André Lureau)
3a3fcc7: pc: acpi: force FADT rev1 for 440fx based machine types (Igor Mammedov)
208fa0e: pc: make 'pc.rom' readonly when machine has PCI enabled (Igor Mammedov)
41d4e5e: vhost-user: fix watcher need be removed when vhost-user hotplug 
(Yunjian Wang)
91c38e0: tests/bios-tables-test: Compiler warning fix (Dr. David Alan Gilbert)
2f6b38d: accel: cleanup error output (Laurent Vivier)
07f7b73: intel_iommu: use access_flags for iotlb (Peter Xu)
892721d: intel_iommu: fix iova for pt (Peter Xu)
5df04f1: vhost-user: fix legacy cross-endian configurations (Felipe Franciosi)
08b9e0b: vhost: fix a memory leak (Peng Hao)
2cef91c: tests: switch pxe and vm gen id tests to use kvm (Michael S. Tsirkin)
8e8eb0a: block/qapi: Remove redundant NULL check to silence Coverity (Kevin 
Wolf)
59fa68f: qemu-iotests/059: Fix leaked image files (Kevin Wolf)
1803f3f: qemu-iotests/063: Fix leaked image (Kevin Wolf)
a8e9c84: qemu-iotests/162: Fix leaked temporary files (Kevin Wolf)
6a1e909: qemu-iotests/153: Fix leaked scratch images (Kevin Wolf)
0a1505d: qemu-iotests/141: Fix image cleanup (Kevin Wolf)
0e59607: qemu-iotests: Remove blkdebug.conf after tests (Kevin Wolf)
db11d1e: qemu-iotests/041: Fix leaked scratch images (Kevin Wolf)
180ca19: block: fix leaks in bdrv_open_driver() (Manos Pitsidianakis)
998cbd6: block: fix dangling bs->explicit_options in block.c (Manos 
Pitsidianakis)
b81b74b: iotests: Add test of recent fix to 'qemu-img measure' (Eric Blake)
1e2b1f6: iotests: Check dirty bitmap statistics in 124 (Eric Blake)
c09bd34: iotests: Redirect stderr to stdout in 186 (Max Reitz)
645acdc: iotests: Fix test 156 (Max Reitz)
33f21e4: mc146818rtc: implement UIP latching as intended (Paolo Bonzini)
6a51d83: mc146818rtc: simplify check_update_timer (Paolo Bonzini)
da3a392: rtc-test: introduce more update tests (Paolo Bonzini)
bc706fa: rtc-test: cleanup register_b_set_flag test (Paolo Bonzini)
fafeb41: hw/scsi/vmw_pvscsi: Convert to realize (Mao Zhongyi)
dfaea0c: hw/scsi/vmw_pvscsi: Remove the dead error handling (Mao Zhongyi)
1931076: migration: optimize the downtime (Jay Zhou)
8bfce83: qemu-options: document existance of versioned machine types (Daniel P. 
Berrange)
393c13b: bt: stop the sdp memory allocation craziness (Paolo Bonzini)
f5aa69b: exec: Add lock parameter to qemu_ram_ptr_length (Anthony PERARD)
4fadfa0: target-i386: kvm_get/put_vcpu_events don't handle sipi_vector (Peng 
Hao)
eb22aec: docs: document deprecation policy & deprecated features in appendix 
(Daniel P. Berrange)
0ec846b: char: don't exit on hmp 'chardev-add help' (Anton Nefedov)
4db0db1: char-fd: remove useless chr pointer (Marc-André Lureau)
d73f024: accel: cleanup error output (Laurent Vivier)
f70d345: cpu_physical_memory_sync_dirty_bitmap: Fix alignment check (Dr. David 
Alan Gilbert)
452589b: vl.c/exit: pause cpus before closing block devices (Dr. David Alan 
Gilbert)
bd6952a: monitor: Reduce handle_qmp_command() tracing overhead (Denis V. Lunev)
8908eb1: trace-events: fix code style: print 0x before hex numbers (Vladimir 
Sementsov-Ogievskiy)
c3e5875: checkpatch: check trace-events code style (Vladimir 
Sementsov-Ogievskiy)
db73ee4: trace-events: fix code style: %# -> 0x% (Vladimir Sementsov-Ogievskiy)
44c6d63: coding_style: add point about 0x in trace-events (Vladimir 
Sementsov-Ogievskiy)
d87aa13: trace: add trace_event_get_state_backends() (Stefan Hajnoczi)
3932ef3: trace: add TRACE__BACKEND_DSTATE() (Stefan Hajnoczi)
ea1ff54: trace: ensure unique function / variable names per .stp file (Daniel 
P. Berrange)
ed86505: trace: ensure .stp files are rebuilt if trace tool source changes 
(Daniel P. Berrange)
bdf211f: Revert "syscall: fix dereference of undefined pointer" (Peter Maydell)
89cbc37: hw/mps2_scc: fix incorrect properties (Philippe Mathieu-Daudé)
f1a4694: target/arm: Migrate MPU_RNR

[Qemu-devel] [PATCH 1/6] tests/tcg/test_path.c: include utils/bufferiszero.c

2017-08-02 Thread Cleber Rosa

Which contains one specific function used by iov.c.

Without this, "make -C tests/tcg test_path" (and consequently
"make -C tests/tcg" or simply "make test") fails quite early.

Signed-off-by: Cleber Rosa 
---
 tests/tcg/test_path.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/tcg/test_path.c b/tests/tcg/test_path.c
index 1c29bce..74dbdaf 100644
--- a/tests/tcg/test_path.c
+++ b/tests/tcg/test_path.c
@@ -3,6 +3,7 @@
 #include "util/cutils.c"
 #include "util/hexdump.c"
 #include "util/iov.c"
+#include "util/bufferiszero.c"
 #include "util/path.c"
 #include "util/qemu-timer-common.c"
 #include 
-- 
2.9.4

[Qemu-devel] [PATCH 3/6] tests/tcg/linux-test.c: include definitions for getrusage()

2017-08-02 Thread Cleber Rosa

A include for  is missing, and prevents
tests/tcg/linux-test from compiling.

Signed-off-by: Cleber Rosa 
---
 tests/tcg/linux-test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/tcg/linux-test.c b/tests/tcg/linux-test.c
index 1c6c013..15c9d7f 100644
--- a/tests/tcg/linux-test.c
+++ b/tests/tcg/linux-test.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define TESTPATH "/tmp/linux-test.tmp"
 #define TESTPORT 7654
-- 
2.9.4

[Qemu-devel] [PATCH 2/6] tests/tcg/linux-test.c: remove unused include of "qemu/cutils.h"

2017-08-02 Thread Cleber Rosa

Building tests/tcg/linux-test is not currently possible because
$(QEMU_INCLUDES) is not being passed to $(CC_I386).  But, since
it's not really used, instead of adding the $(QEMU_INCLUDES),
let's remove the "qemu/ctuils.h" include instead.

Signed-off-by: Cleber Rosa 
---
 tests/tcg/linux-test.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/tcg/linux-test.c b/tests/tcg/linux-test.c
index 5070d31..1c6c013 100644
--- a/tests/tcg/linux-test.c
+++ b/tests/tcg/linux-test.c
@@ -39,7 +39,6 @@
 #include 
 #include 
 #include 
-#include "qemu/cutils.h"
 
 #define TESTPATH "/tmp/linux-test.tmp"
 #define TESTPORT 7654
-- 
2.9.4

[Qemu-devel] [PATCH] test-qga: Kill broken and dead QGA_TEST_SIDE_EFFECTING code

2017-08-02 Thread Eric Blake

Back when the test was introduced, in commit 62c39b307, the
test was set up to run qemu-ga directly on the host performing
the test, and defaults to limiting itself to safe commands.  At
the time, it was envisioned that setting QGA_TEST_SIDE_EFFECTING
in the environment could cover a few more commands, while noting
the potential danger of those side effects running in the host.

But this has NEVER been tested: if you enable the environment
variable, the test WILL fail.  One obvious reason: if you are not
running as root, you'll probably get a permission failure when
trying to freeze the file systems, or when changing system time.
Less obvious: if you run the test as root (wow, you're brave), you
could end up hanging if the test tries to log things to a
temporarily frozen filesystem.  But the cutest reason of all: if
you get past the above hurdles, the test uses invalid JSON in
test_qga_fstrim() (missing '' around the dictionary key 'minimum'),
and will thus fail an assertion in qmp_fd().

Rather than leave this untested time-bomb in place, rip it out.
Hopefully, as originally envisioned, we can find an opportunity
to test an actual sandboxed guest where the guest-agent has
full permissions and will not unduly affect the host running
the test - if so, 'git revert' can be used if desired, for
salvaging any useful parts of this attempt.

Signed-off-by: Eric Blake 
---
 tests/test-qga.c | 90 
 1 file changed, 90 deletions(-)

diff --git a/tests/test-qga.c b/tests/test-qga.c
index 06783e7585..fd6bc7690f 100644
--- a/tests/test-qga.c
+++ b/tests/test-qga.c
@@ -642,65 +642,6 @@ static void test_qga_get_time(gconstpointer fix)
 QDECREF(ret);
 }

-static void test_qga_set_time(gconstpointer fix)
-{
-const TestFixture *fixture = fix;
-QDict *ret;
-int64_t current, time;
-gchar *cmd;
-
-/* get current time */
-ret = qmp_fd(fixture->fd, "{'execute': 'guest-get-time'}");
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-current = qdict_get_int(ret, "return");
-g_assert_cmpint(current, >, 0);
-QDECREF(ret);
-
-/* set some old time */
-ret = qmp_fd(fixture->fd, "{'execute': 'guest-set-time',"
- " 'arguments': { 'time': 1000 } }");
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-QDECREF(ret);
-
-/* check old time */
-ret = qmp_fd(fixture->fd, "{'execute': 'guest-get-time'}");
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-time = qdict_get_int(ret, "return");
-g_assert_cmpint(time / 1000, <, G_USEC_PER_SEC * 10);
-QDECREF(ret);
-
-/* set back current time */
-cmd = g_strdup_printf("{'execute': 'guest-set-time',"
-  " 'arguments': { 'time': %" PRId64 " } }",
-  current + time * 1000);
-ret = qmp_fd(fixture->fd, cmd);
-g_free(cmd);
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-QDECREF(ret);
-}
-
-static void test_qga_fstrim(gconstpointer fix)
-{
-const TestFixture *fixture = fix;
-QDict *ret;
-QList *list;
-const QListEntry *entry;
-
-ret = qmp_fd(fixture->fd, "{'execute': 'guest-fstrim',"
- " arguments: { minimum: 4194304 } }");
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-list = qdict_get_qlist(ret, "return");
-entry = qlist_first(list);
-g_assert(qdict_haskey(qobject_to_qdict(entry->value), "paths"));
-
-QDECREF(ret);
-}
-
 static void test_qga_blacklist(gconstpointer data)
 {
 TestFixture fix;
@@ -831,30 +772,6 @@ static void test_qga_fsfreeze_status(gconstpointer fix)
 QDECREF(ret);
 }

-static void test_qga_fsfreeze_and_thaw(gconstpointer fix)
-{
-const TestFixture *fixture = fix;
-QDict *ret;
-const gchar *status;
-
-ret = qmp_fd(fixture->fd, "{'execute': 'guest-fsfreeze-freeze'}");
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-QDECREF(ret);
-
-ret = qmp_fd(fixture->fd, "{'execute': 'guest-fsfreeze-status'}");
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-status = qdict_get_try_str(ret, "return");
-g_assert_cmpstr(status, ==, "frozen");
-QDECREF(ret);
-
-ret = qmp_fd(fixture->fd, "{'execute': 'guest-fsfreeze-thaw'}");
-g_assert_nonnull(ret);
-qmp_assert_no_error(ret);
-QDECREF(ret);
-}
-
 static void test_qga_guest_exec(gconstpointer fix)
 {
 const TestFixture *fixture = fix;
@@ -1029,13 +946,6 @@ int main(int argc, char **argv)
 g_test_add_data_func("/qga/guest-get-osinfo", ,
  test_qga_guest_get_osinfo);

-if (g_getenv("QGA_TEST_SIDE_EFFECTING")) {
-g_test_add_data_func("/qga/fsfreeze-and-thaw", ,
- test_qga_fsfreeze_and_thaw);
-g_test_add_data_func("/qga/set-time", , test_qga_set_time);
-g_test_add_data_func("/qga/fstrim", , test_qga_fstrim);
-}
-
 ret = g_test_run();

 fixture_tear_down(,

Re: [Qemu-devel] [PATCH] ppc: fix double-free in cpu_post_load()

2017-08-02 Thread Philippe Mathieu-Daudé


On 08/02/2017 02:34 PM, Greg Kurz wrote:

When running nested with KVM PR, ppc_set_compat() fails and QEMU crashes
because of "double free or corruption (!prev)". The crash happens because
error_report_err() has already called error_free().

Signed-off-by: Greg Kurz 


Reviewed-by: Philippe Mathieu-Daudé 


---
  target/ppc/machine.c |1 -
  1 file changed, 1 deletion(-)

diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index f578156dd411..abe0a1cdf021 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -239,7 +239,6 @@ static int cpu_post_load(void *opaque, int version_id)
  ppc_set_compat(cpu, cpu->compat_pvr, _err);
  if (local_err) {
  error_report_err(local_err);
-error_free(local_err);
  return -1;
  }
  } else

[Qemu-devel] [ANNOUNCE] QEMU 2.10.0-rc1 is now available

2017-08-02 Thread Michael Roth

Hello,

On behalf of the QEMU Team, I'd like to announce the availability of the
second release candidate for the QEMU 2.10 release.  This release is meant
for testing purposes and should not be used in a production environment.

  http://download.qemu-project.org/qemu-2.10.0-rc1.tar.xz
  http://download.qemu-project.org/qemu-2.10.0-rc1.tar.xz.sig

You can help improve the quality of the QEMU 2.10 release by testing this
release and reporting bugs on Launchpad:

  https://bugs.launchpad.net/qemu/

The release plan, as well a documented known issues for release
candidates, are available at:

  http://wiki.qemu.org/Planning/2.10

Please add entries to the ChangeLog for the 2.10 release below:

  http://wiki.qemu.org/ChangeLog/2.10

[Qemu-devel] [PATCH 0/6] Enable building and running tcg tests on i386

2017-08-02 Thread Cleber Rosa

The primary motivation of this patch series is to get "make -C
tests/tcg", on i386, to run.  Having all of the individual i386 tcg
tests passing is beyond the scope of this patch series, though.

The secondary motivation is to gather feedback on the status of the
tcg tests.  If you have strong opinions about it, be that they need to
be nourished and cared for, that they should be eliminated completely,
or anything in between, I'd love to hear about it.

Cleber Rosa(6):
   tests/tcg/test_path.c: include utils/bufferiszero.c
   tests/tcg/linux-test.c: remove unused include of "qemu/cutils.h"
   tests/tcg/linux-test.c: include definitions for getrusage()
   tests/tcg/test-i386-fprem: build with $(QEMU_CFLAGS)
   tests/tcg/test-i386-fprem.c: compilation fix for -Werror=strict-prototype
   tests/tcg/test-i386-fprem.c: compilation fix for 
-Werror=unused-const-variable=

 tests/tcg/Makefile  |  2 +-
 tests/tcg/linux-test.c  |  2 +-
 tests/tcg/test-i386-fprem.c | 17 +
 tests/tcg/test_path.c   |  1 +
4 files changed, 4 insertions(+), 18 deletions(-)

Re: [Qemu-devel] [PATCH] target-mips: apply CP0.PageMask before writing into TLB entry

2017-08-02 Thread Philippe Mathieu-Daudé


Hi Leon,

On 08/02/2017 10:58 AM, Yongbok Kim wrote:

From: Leon Alrae 

PFN0 and PFN1 have to be masked out with PageMask_Mask.

Signed-off-by: Leon Alrae 
Reviewed-by: Yongbok Kim 
[Yongbok Kim:
   Added commit message]
Signed-off-by: Yongbok Kim 
---
  target/mips/op_helper.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c
index 526f8e4..320f2b0 100644
--- a/target/mips/op_helper.c
+++ b/target/mips/op_helper.c
@@ -2008,6 +2008,7 @@ static inline uint64_t get_tlb_pfn_from_entrylo(uint64_t 
entrylo)
  static void r4k_fill_tlb(CPUMIPSState *env, int idx)
  {
  r4k_tlb_t *tlb;
+uint64_t mask = env->CP0_PageMask >> (TARGET_PAGE_BITS + 1);
  
  /* XXX: detect conflicting TLBs and raise a MCHECK exception when needed */

  tlb = >tlb->mmu.r4k.tlb[idx];
@@ -2028,13 +2029,13 @@ static void r4k_fill_tlb(CPUMIPSState *env, int idx)
  tlb->C0 = (env->CP0_EntryLo0 >> 3) & 0x7;
  tlb->XI0 = (env->CP0_EntryLo0 >> CP0EnLo_XI) & 1;
  tlb->RI0 = (env->CP0_EntryLo0 >> CP0EnLo_RI) & 1;
-tlb->PFN[0] = get_tlb_pfn_from_entrylo(env->CP0_EntryLo0) << 12;
+tlb->PFN[0] = (get_tlb_pfn_from_entrylo(env->CP0_EntryLo0) & ~mask) << 12;
  tlb->V1 = (env->CP0_EntryLo1 & 2) != 0;
  tlb->D1 = (env->CP0_EntryLo1 & 4) != 0;
  tlb->C1 = (env->CP0_EntryLo1 >> 3) & 0x7;
  tlb->XI1 = (env->CP0_EntryLo1 >> CP0EnLo_XI) & 1;
  tlb->RI1 = (env->CP0_EntryLo1 >> CP0EnLo_RI) & 1;
-tlb->PFN[1] = get_tlb_pfn_from_entrylo(env->CP0_EntryLo1) << 12;
+tlb->PFN[1] = (get_tlb_pfn_from_entrylo(env->CP0_EntryLo1) & ~mask) << 12;
  }
  
  void r4k_helper_tlbinv(CPUMIPSState *env)




What about refactoring get_tlb_pfn_from_entrylo(uint64_t entrylo) -> 
r4k_get_tlb_pfn_from_entrylo(uint64_t entrylo, uint64_t pagemask) to 
directly masked pfn?


Regards,

Phil.

[Qemu-devel] [PATCH 5/6] tests/tcg/test-i386-fprem.c: compilation fix for -Werror=strict-prototype

2017-08-02 Thread Cleber Rosa

A trivial fix to make the compiler happy.

Signed-off-by: Cleber Rosa 
---
 tests/tcg/test-i386-fprem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tcg/test-i386-fprem.c b/tests/tcg/test-i386-fprem.c
index 1a71623..f70363d 100644
--- a/tests/tcg/test-i386-fprem.c
+++ b/tests/tcg/test-i386-fprem.c
@@ -98,7 +98,7 @@ static const union float80u smallest_positive_norm = {
 .ieee.mantissa = 0,
 };
 
-static void fninit()
+static void fninit(void)
 {
 asm volatile ("fninit\n");
 }
-- 
2.9.4

[Qemu-devel] [PATCH 4/6] tests/tcg/test-i386-fprem: build with $(QEMU_CFLAGS)

2017-08-02 Thread Cleber Rosa

So that glib.h can be found.

Signed-off-by: Cleber Rosa 
---
 tests/tcg/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tcg/Makefile b/tests/tcg/Makefile
index 89e3342..c946fde 100644
--- a/tests/tcg/Makefile
+++ b/tests/tcg/Makefile
@@ -98,7 +98,7 @@ test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \
   $(

Re: [Qemu-devel] [RFC 19/29] migration: let dst listen on port always

2017-08-02 Thread Daniel P. Berrange

On Wed, Aug 02, 2017 at 03:02:24PM +0800, Peter Xu wrote:
> On Tue, Aug 01, 2017 at 11:56:10AM +0100, Daniel P. Berrange wrote:
> > On Fri, Jul 28, 2017 at 04:06:28PM +0800, Peter Xu wrote:
> > > Signed-off-by: Peter Xu 
> > > ---
> > >  migration/exec.c   | 2 +-
> > >  migration/fd.c | 2 +-
> > >  migration/socket.c | 4 ++--
> > >  3 files changed, 4 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/migration/exec.c b/migration/exec.c
> > > index 08b599e..b4412db 100644
> > > --- a/migration/exec.c
> > > +++ b/migration/exec.c
> > > @@ -49,7 +49,7 @@ static gboolean 
> > > exec_accept_incoming_migration(QIOChannel *ioc,
> > >  {
> > >  migration_channel_process_incoming(ioc);
> > >  object_unref(OBJECT(ioc));
> > > -return FALSE; /* unregister */
> > > +return TRUE; /* keep it registered */
> > >  }
> > >  
> > >  void exec_start_incoming_migration(const char *command, Error **errp)
> > > diff --git a/migration/fd.c b/migration/fd.c
> > > index 30f5258..865277a 100644
> > > --- a/migration/fd.c
> > > +++ b/migration/fd.c
> > > @@ -49,7 +49,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel 
> > > *ioc,
> > >  {
> > >  migration_channel_process_incoming(ioc);
> > >  object_unref(OBJECT(ioc));
> > > -return FALSE; /* unregister */
> > > +return TRUE; /* keep it registered */
> > >  }
> > >  
> > >  void fd_start_incoming_migration(const char *infd, Error **errp)
> > > diff --git a/migration/socket.c b/migration/socket.c
> > > index 757d382..f2c2d01 100644
> > > --- a/migration/socket.c
> > > +++ b/migration/socket.c
> > > @@ -153,8 +153,8 @@ static gboolean 
> > > socket_accept_incoming_migration(QIOChannel *ioc,
> > >  
> > >  out:
> > >  /* Close listening socket as its no longer needed */
> > > -qio_channel_close(ioc, NULL);
> > > -return FALSE; /* unregister */
> > > +// qio_channel_close(ioc, NULL);
> > > +return TRUE; /* keep it registered */
> > >  }
> > 
> > 
> > This is not a very desirable approach IMHO.
> > 
> > There are two separate things at play - first we have the listener socket,
> > and second we have the I/O watch that monitors for incoming clients.
> > 
> > The current code here closes the listener, and returns FALSE to unregister
> > the event loop watch.
> > 
> > You're reversing both of these so that we keep the listener open and we
> > keep monitoring for incoming clients. Ignoring migration resume for a
> > minute, this means that the destination QEMU will now accept arbitrarily
> > many incoming clients and keep trying to start a new incoming migration.
> > 
> > The behaviour we need is diferent. We *want* to unregister the event
> > loop watch once we've accepted a client. We should only keep the socket
> > listener in existance, but *not* accept any more clients. Only once we
> > have hit a problem and want to accept a new client to do migration
> > recovery, should we be re-adding the event loop watch.
> 
> I replied with another approach in the other thread: how about we
> re-enable the listen port duing "postcopy-pause" state, and disable
> the listen port when get out of that migration state?

Thinking about this agan, I realize I only considered the socket
migration backend.  If we are using the 'fd' backend, then we
*must* have an explicit monitor command invoked on the target
host to obtain the new client connection. This is because the
'fd' that QEMU has is the actual client connection, not a listener
socket, so libvirt needs to be able to pass in a new fd.


> Here "listen port" I mean both the IO watch and the socket object. Now
> what I can think of: we keep these objects always there, meanwhile we
> introduce a new bit for migration, say, "accept_incoming", to decide
> whether we will really accept one connection. Then we drop the new
> connection if that bit is not set.
> 
> (Or a new QIOChannelFeature to temporarily refuse incoming
>  connection? E.g., QIO_CHANNEL_FEATURE_LISTEN_REFUSE?)

That feature already exists - you just unregister the event loop
watch and re-register it when you're ready again. The chardev
socket code works this way already, since it only allows a single
client at a time

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH 2/2] io: fix qio_channel_socket_accept err handling

2017-08-02 Thread Peter Xu

On Wed, Aug 02, 2017 at 10:30:20AM +0100, Daniel P. Berrange wrote:
> On Wed, Aug 02, 2017 at 11:25:21AM +0800, Peter Xu wrote:
> > When accept failed, we should setup errp with the reason. More
> > importantly, the caller may assume errp be non-NULL when error happens,
> > and not setting the errp may crash QEMU.
> > 
> > At the same time, move the trace_qio_channel_socket_accept_fail() after
> > the if check on EINTR. Two reasons:
> > 
> > 1. when EINTR happened, it's not really a fault (we should just try
> >again), so we should not log with an "accept failure".
> > 
> > 2. trace_*() functions may overwrite errno, then the old errno will be
> >missing. We need to either check errno before trace_*() calls, or
> >reserve the errno.
> > 
> > Signed-off-by: Peter Xu 
> > ---
> >  io/channel-socket.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > index 53386b7..442f230 100644
> > --- a/io/channel-socket.c
> > +++ b/io/channel-socket.c
> > @@ -340,10 +340,11 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> >  cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)>remoteAddr,
> > >remoteAddrLen);
> >  if (cioc->fd < 0) {
> > -trace_qio_channel_socket_accept_fail(ioc);
> >  if (errno == EINTR) {
> >  goto retry;
> >  }
> > +trace_qio_channel_socket_accept_fail(ioc);
> > +error_setg_errno(errp, errno, "Unable to accept connection");
> 
> Err, you're still clobbering errno in trace_qio_channel_socket_accept_fail
> before calling error_setg_errno

Oops! I'll do a quick respin.  Thanks for pointing out.

-- 
Peter Xu

Re: [Qemu-devel] [PATCH 2/3] block: skip implicit nodes in snapshots, blockjobs

2017-08-02 Thread Stefan Hajnoczi

On Tue, Aug 01, 2017 at 04:49:06PM +0300, Manos Pitsidianakis wrote:
> diff --git a/block.c b/block.c
> index 886a457ab0..9ebdba28b0 100644
> --- a/block.c
> +++ b/block.c
> @@ -4947,3 +4947,20 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState 
> *bs, const char *name,
>  
>  return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
>  }
> +
> +/* Get first non-implicit node down a bs chain. */
> +BlockDriverState *bdrv_get_first_non_implicit(BlockDriverState *bs)

Linguistically non-implicit == explicit and that would be simpler.

Alternatives to implicit/explicit are:

  transient/permanent
  internal/external
  self-created/user-created
  worker/user

Let's agree on one and use it consistently.  I like the worker nodes vs
user nodes because it reflects their function.

implicit/explicit is fine by me too.  I just think non-implicit is
clunky.

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v3 4/7] block: convert ThrottleGroup to object with QOM

2017-08-02 Thread Stefan Hajnoczi

On Tue, Aug 01, 2017 at 07:49:33PM +0300, Manos Pitsidianakis wrote:
> On Tue, Aug 01, 2017 at 04:47:03PM +0100, Stefan Hajnoczi wrote:
> > On Mon, Jul 31, 2017 at 12:54:40PM +0300, Manos Pitsidianakis wrote:
> > > ThrottleGroup is converted to an object. This will allow the future
> > > throttle block filter drive easy creation and configuration of throttle
> > > groups in QMP and cli.
> > > 
> > > A new QAPI struct, ThrottleLimits, is introduced to provide a shared
> > > struct for all throttle configuration needs in QMP.
> > > 
> > > ThrottleGroups can be created via CLI as
> > > -object throttle-group,id=foo,x-iops-total=100,x-..
> > > where x-* are individual limit properties. Since we can't add non-scalar
> > > properties in -object this interface must be used instead. However,
> > > setting these properties must be disabled after initialization because
> > > certain combinations of limits are forbidden and thus configuration
> > > changes should be done in one transaction. The individual properties
> > > will go away when support for non-scalar values in CLI is implemented
> > > and thus are marked as experimental.
> > > 
> > > ThrottleGroup also has a `limits` property that uses the ThrottleLimits
> > > struct.  It can be used to create ThrottleGroups or set the
> > > configuration in existing groups as follows:
> > > 
> > > { "execute": "object-add",
> > >   "arguments": {
> > > "qom-type": "throttle-group",
> > > "id": "foo",
> > > "props" : {
> > >   "limits": {
> > >   "iops-total": 100
> > >   }
> > > }
> > >   }
> > > }
> > > { "execute" : "qom-set",
> > > "arguments" : {
> > > "path" : "foo",
> > > "property" : "limits",
> > > "value" : {
> > > "iops-total" : 99
> > > }
> > > }
> > > }
> > > 
> > > This also means a group's configuration can be fetched with qom-get.
> > > 
> > > ThrottleGroups can be anonymous which means they can't get accessed by
> > > other users ie they will always be units instead of group (Because they
> > > have one ThrottleGroupMember).
> > 
> > blockdev.c automatically assigns -drive id= to the group name if
> > throttling.group= wasn't given.  So who will use anonymous single-drive
> > mode?
> 
> Manual filter nodes. It's possible to not pass a group name value and the
> resulting group will be anonymous. Are you suggesting to move this change to
> the throttle filter patch?

What is the use case?  Human users will stick to the legacy syntax
because it's convenient.  Management tools will use the filter
explicitly in the future, and it's easy for them to choose a name.

Unless there is a need for this case I'd prefer to make the group name
mandatory.  That way there are less code paths to worry about.

> > > @@ -87,32 +99,30 @@ ThrottleState *throttle_group_incref(const char *name)
> > > 
> > >  qemu_mutex_lock(_groups_lock);
> > > 
> > > -/* Look for an existing group with that name */
> > > -QTAILQ_FOREACH(iter, _groups, list) {
> > > -if (!strcmp(name, iter->name)) {
> > > -tg = iter;
> > > -break;
> > > +if (name) {
> > > +/* Look for an existing group with that name */
> > > +QTAILQ_FOREACH(iter, _groups, list) {
> > > +if (!g_strcmp0(name, iter->name)) {
> > > +tg = iter;
> > > +break;
> > > +}
> > >  }
> > >  }
> > > 
> > >  /* Create a new one if not found */
> > >  if (!tg) {
> > > -tg = g_new0(ThrottleGroup, 1);
> > > +/* new ThrottleGroup obj will have a refcnt = 1 */
> > > +tg = THROTTLE_GROUP(object_new(TYPE_THROTTLE_GROUP));
> > >  tg->name = g_strdup(name);
> > > -tg->clock_type = QEMU_CLOCK_REALTIME;
> > > -
> > > -if (qtest_enabled()) {
> > > -/* For testing block IO throttling only */
> > > -tg->clock_type = QEMU_CLOCK_VIRTUAL;
> > > -}
> > > -qemu_mutex_init(>lock);
> > > -throttle_init(>ts);
> > > -QLIST_INIT(>head);
> > > -
> > > -QTAILQ_INSERT_TAIL(_groups, tg, list);
> > > +throttle_group_obj_complete((UserCreatable *)tg, _abort);
> > >  }
> > > 
> > > -tg->refcount++;
> > > +qemu_mutex_lock(>lock);
> > > +if (!QLIST_EMPTY(>head)) {
> > > +/* only ref if the group is not empty */
> > > +object_ref(OBJECT(tg));
> > > +}
> > > +qemu_mutex_unlock(>lock);
> > 
> > How do the refcounts work in various cases (legacy -drive
> > throttling.group and -object throttle-group with 0, 1, or multiple
> > drives)?
> > 
> > It's not obvious to me that this code works in all cases.
> 
> Object is created with object_new(): ref count is 1
> Each time we call throttle_group_incref() to add a new member to the group,
> we increase the ref count by 1. We skip the first time we do that because
> there's already a reference. When all members are removed, object is
> deleted.

If the

Re: [Qemu-devel] [RFC] block-insert-node and block-job-delete

2017-08-02 Thread Stefan Hajnoczi

On Fri, Jul 28, 2017 at 01:55:38PM +0200, Kevin Wolf wrote:
> Am 28.07.2017 um 00:09 hat John Snow geschrieben:
> > On 07/26/2017 02:23 PM, Manos Pitsidianakis wrote:
> > > On Wed, Jul 26, 2017 at 04:12:21PM +0100, Stefan Hajnoczi wrote:
> > > > On Wed, Jul 26, 2017 at 05:19:24PM +0300, Manos Pitsidianakis wrote:
> > I think I agree with Stefan's concerns that this is more of a
> > temporary workaround for the benefit of jobs, but that there may well
> > be other reasons (especially in the future) where graph manipulations
> > may occur invisibly to the user.
> 
> We should do our best to avoid this. Implicit graph changes only make
> libvirt's life hard. I agree that we'll have many more graph changes in
> the future, but let's keep them as explicit as possible.

Speaking of implicit graph changes: the COLO replication driver
(block/replication.c) uses the backup job internally and that requires
before write notifiers.  That means there is an implicit graph change
when COLO decides to complete the backup job.

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 1/2] migration: fix comment disorder in RAMState

2017-08-02 Thread Juan Quintela

Peter Xu  wrote:
> Comments for "migration_dirty_pages" and "bitmap_mutex" are switched.
> Fix it.
>
> Reviewed-by: Dr. David Alan Gilbert 
> Signed-off-by: Peter Xu 

Reviewed-by: Juan Quintela

Re: [Qemu-devel] [PATCH 1/3] block: add options parameter to bdrv_new_open_driver()

2017-08-02 Thread Stefan Hajnoczi

On Tue, Aug 01, 2017 at 04:49:05PM +0300, Manos Pitsidianakis wrote:
> diff --git a/block.c b/block.c
> index 37e72b7a96..886a457ab0 100644
> --- a/block.c
> +++ b/block.c
> @@ -1149,16 +1149,26 @@ free_and_fail:
>  return ret;
>  }
>  
> +/*
> + * If options is not NULL it is cloned (which adds another reference to the
> + * option entries). If the call to bdrv_new_open_driver() is successful, the
> + * caller should unref options to pass ownership.
> + * */

Please follow the ownership convention for the options argument.
Existing block.c function take ownership of options:

  * options is a QDict of options to pass to the block drivers, or NULL for an
  * empty set of options. The reference to the QDict belongs to the block layer
  * after the call (even on failure), so if the caller intends to reuse the
  * dictionary, it needs to use QINCREF() before calling bdrv_open.

>  BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char 
> *node_name,
> -   int flags, Error **errp)
> +   int flags, QDict *options, Error 
> **errp)

signature.asc
Description: PGP signature

[Qemu-devel] [PATCH for-2.10 2/3] target/mips: Drop redundant gen_io_start/stop()

2017-08-02 Thread James Hogan

DMTC0 CP0_Cause does a redundant gen_io_start() and gen_io_end() pair,
even though this is done for all DMTC0 operations outside of the switch
statement. Remove these redundant calls.

Fixes: 5dc5d9f055c5 ("mips: more fixes to the MIPS interrupt glue logic")
Signed-off-by: James Hogan 
Cc: Yongbok Kim 
Cc: Aurelien Jarno 
---
 target/mips/translate.c | 8 
 1 file changed, 0 insertions(+), 8 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 6b41f7b65e00..6e724ac71dcd 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -7403,15 +7403,7 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int 
reg, int sel)
 switch (sel) {
 case 0:
 save_cpu_state(ctx, 1);
-/* Mark as an IO operation because we may trigger a software
-   interrupt.  */
-if (ctx->tb->cflags & CF_USE_ICOUNT) {
-gen_io_start();
-}
 gen_helper_mtc0_cause(cpu_env, arg);
-if (ctx->tb->cflags & CF_USE_ICOUNT) {
-gen_io_end();
-}
 /* Stop translation as we may have triggered an intetrupt. BS_STOP
  * isn't sufficient, we need to ensure we break out of translated
  * code to check for pending interrupts.  */
-- 
git-series 0.8.10

Re: [Qemu-devel] [PULL 00/10] pc, acpi, virtio: fixes, test speedup for rc1

2017-08-02 Thread Peter Maydell

On 1 August 2017 at 22:37, Michael S. Tsirkin  wrote:
> The following changes since commit 7d48cf8102a10e4a54333811bafb5eb566509268:
>
>   Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' 
> into staging (2017-08-01 14:33:56 +0100)
>
> are available in the git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to 3a3fcc75f92ab0d71ba0e7511af7dc575c4b4f6c:
>
>   pc: acpi: force FADT rev1 for 440fx based machine types (2017-08-02 
> 00:13:26 +0300)
>
> 
> pc, acpi, virtio: fixes, test speedup for rc1
>
> Some fixes all over the place. Notably vhost-user gained a new message
> to set endian-ness. Borderline for 2.10 but seems to be the only way to
> fix legacy guests.  Also pc tests are run on kvm now. Not a fix at all
> but doesn't touch qemu itself, so I merged it since I had to run these a
> lot and I just got tired of waiting for these to finish.
>
> Signed-off-by: Michael S. Tsirkin 

Applied, thanks.

-- PMM

Re: [Qemu-devel] [PATCH v3 4/7] block: convert ThrottleGroup to object with QOM

2017-08-02 Thread Manos Pitsidianakis


On Wed, Aug 02, 2017 at 11:39:22AM +0100, Stefan Hajnoczi wrote:

On Tue, Aug 01, 2017 at 07:49:33PM +0300, Manos Pitsidianakis wrote:

On Tue, Aug 01, 2017 at 04:47:03PM +0100, Stefan Hajnoczi wrote:
> On Mon, Jul 31, 2017 at 12:54:40PM +0300, Manos Pitsidianakis wrote:
> > ThrottleGroup is converted to an object. This will allow the future
> > throttle block filter drive easy creation and configuration of throttle
> > groups in QMP and cli.
> >
> > A new QAPI struct, ThrottleLimits, is introduced to provide a shared
> > struct for all throttle configuration needs in QMP.
> >
> > ThrottleGroups can be created via CLI as
> > -object throttle-group,id=foo,x-iops-total=100,x-..
> > where x-* are individual limit properties. Since we can't add non-scalar
> > properties in -object this interface must be used instead. However,
> > setting these properties must be disabled after initialization because
> > certain combinations of limits are forbidden and thus configuration
> > changes should be done in one transaction. The individual properties
> > will go away when support for non-scalar values in CLI is implemented
> > and thus are marked as experimental.
> >
> > ThrottleGroup also has a `limits` property that uses the ThrottleLimits
> > struct.  It can be used to create ThrottleGroups or set the
> > configuration in existing groups as follows:
> >
> > { "execute": "object-add",
> >   "arguments": {
> > "qom-type": "throttle-group",
> > "id": "foo",
> > "props" : {
> >   "limits": {
> >   "iops-total": 100
> >   }
> > }
> >   }
> > }
> > { "execute" : "qom-set",
> > "arguments" : {
> > "path" : "foo",
> > "property" : "limits",
> > "value" : {
> > "iops-total" : 99
> > }
> > }
> > }
> >
> > This also means a group's configuration can be fetched with qom-get.
> >
> > ThrottleGroups can be anonymous which means they can't get accessed by
> > other users ie they will always be units instead of group (Because they
> > have one ThrottleGroupMember).
>
> blockdev.c automatically assigns -drive id= to the group name if
> throttling.group= wasn't given.  So who will use anonymous single-drive
> mode?

Manual filter nodes. It's possible to not pass a group name value and the
resulting group will be anonymous. Are you suggesting to move this change to
the throttle filter patch?


What is the use case?  Human users will stick to the legacy syntax
because it's convenient.  Management tools will use the filter
explicitly in the future, and it's easy for them to choose a name.

Unless there is a need for this case I'd prefer to make the group name
mandatory.  That way there are less code paths to worry about.


I think Kevin requested this though I don't really remember the use 
case.



> > @@ -87,32 +99,30 @@ ThrottleState *throttle_group_incref(const char *name)
> >
> >  qemu_mutex_lock(_groups_lock);
> >
> > -/* Look for an existing group with that name */
> > -QTAILQ_FOREACH(iter, _groups, list) {
> > -if (!strcmp(name, iter->name)) {
> > -tg = iter;
> > -break;
> > +if (name) {
> > +/* Look for an existing group with that name */
> > +QTAILQ_FOREACH(iter, _groups, list) {
> > +if (!g_strcmp0(name, iter->name)) {
> > +tg = iter;
> > +break;
> > +}
> >  }
> >  }
> >
> >  /* Create a new one if not found */
> >  if (!tg) {
> > -tg = g_new0(ThrottleGroup, 1);
> > +/* new ThrottleGroup obj will have a refcnt = 1 */
> > +tg = THROTTLE_GROUP(object_new(TYPE_THROTTLE_GROUP));
> >  tg->name = g_strdup(name);
> > -tg->clock_type = QEMU_CLOCK_REALTIME;
> > -
> > -if (qtest_enabled()) {
> > -/* For testing block IO throttling only */
> > -tg->clock_type = QEMU_CLOCK_VIRTUAL;
> > -}
> > -qemu_mutex_init(>lock);
> > -throttle_init(>ts);
> > -QLIST_INIT(>head);
> > -
> > -QTAILQ_INSERT_TAIL(_groups, tg, list);
> > +throttle_group_obj_complete((UserCreatable *)tg, _abort);
> >  }
> >
> > -tg->refcount++;
> > +qemu_mutex_lock(>lock);
> > +if (!QLIST_EMPTY(>head)) {
> > +/* only ref if the group is not empty */
> > +object_ref(OBJECT(tg));
> > +}
> > +qemu_mutex_unlock(>lock);
>
> How do the refcounts work in various cases (legacy -drive
> throttling.group and -object throttle-group with 0, 1, or multiple
> drives)?
>
> It's not obvious to me that this code works in all cases.

Object is created with object_new(): ref count is 1
Each time we call throttle_group_incref() to add a new member to the group,
we increase the ref count by 1. We skip the first time we do that because
there's already a reference. When all members are removed, object is
deleted.


If the ThrottleGroup was created with -object throttle-group it
shouldn't disappear when the last

Re: [Qemu-devel] [PATCH 2/2] io: fix qio_channel_socket_accept err handling

2017-08-02 Thread Juan Quintela

Peter Xu  wrote:
> When accept failed, we should setup errp with the reason. More
> importantly, the caller may assume errp be non-NULL when error happens,
> and not setting the errp may crash QEMU.
>
> At the same time, move the trace_qio_channel_socket_accept_fail() after
> the if check on EINTR. Two reasons:
>
> 1. when EINTR happened, it's not really a fault (we should just try
>again), so we should not log with an "accept failure".
>
> 2. trace_*() functions may overwrite errno, then the old errno will be
>missing. We need to either check errno before trace_*() calls, or
>reserve the errno.
>
> Signed-off-by: Peter Xu 

Reviewed-by: Juan Quintela

[Qemu-devel] [PATCH for-2.10 0/3] target/mips: Fix fallout from indirect branch optimisation

2017-08-02 Thread James Hogan

Primarily patch 1 fixes some fallout from the recent indirect branch
optimisations in v2.10.0-rc0, where interrupt handling may be delayed
after interrupts are enabled until it is too late.

Patch 2 and 3 then fix a couple of other loosely related issues spotted
in the process or in testing.

Please give a good look (I'm only roughly familiar with icount stuff),
and consider for v2.10.

Cc: Aurelien Jarno 
Cc: Yongbok Kim 
Cc: Richard Henderson 

James Hogan (3):
  target/mips: Use BS_EXCP where interrupts are expected
  target/mips: Drop redundant gen_io_start/stop()
  target/mips: Fix RDHWR CC with icount

 target/mips/translate.c | 66 --
 1 file changed, 45 insertions(+), 21 deletions(-)

-- 
git-series 0.8.10

[Qemu-devel] [PATCH for-2.10 3/3] target/mips: Fix RDHWR CC with icount

2017-08-02 Thread James Hogan

RDHWR CC reads the CPU timer like MFC0 CP0_Count, so with icount enabled
it must set can_do_io while it calls the helper to avoid the "Bad icount
read" error. It should also break out of the translation loop to ensure
that timer interrupts are immediately handled.

Fixes: 2e70f6efa8b9 ("Add instruction counter.")
Signed-off-by: James Hogan 
Cc: Aurelien Jarno 
Cc: Yongbok Kim 
---
I've based this on MFC0 Count, but this instruction is also available to
usermode (e.g. CONFIG_USER_ONLY), which I presume is still fine.
---
 target/mips/translate.c | 11 +++
 1 file changed, 11 insertions(+), 0 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 6e724ac71dcd..f29092f6d4ac 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -10759,8 +10759,19 @@ static void gen_rdhwr(DisasContext *ctx, int rt, int 
rd, int sel)
 gen_store_gpr(t0, rt);
 break;
 case 2:
+if (ctx->tb->cflags & CF_USE_ICOUNT) {
+gen_io_start();
+}
 gen_helper_rdhwr_cc(t0, cpu_env);
+if (ctx->tb->cflags & CF_USE_ICOUNT) {
+gen_io_end();
+}
 gen_store_gpr(t0, rt);
+/* Break the TB to be able to take timer interrupts immediately
+   after reading count. BS_STOP isn't sufficient, we need to ensure
+   we break completely out of translated code.  */
+gen_save_pc(ctx->pc + 4);
+ctx->bstate = BS_EXCP;
 break;
 case 3:
 gen_helper_rdhwr_ccres(t0, cpu_env);
-- 
git-series 0.8.10

Re: [Qemu-devel] [PATCH 3/3] block: remove legacy I/O throttling

2017-08-02 Thread Stefan Hajnoczi

On Tue, Aug 01, 2017 at 04:49:07PM +0300, Manos Pitsidianakis wrote:
> diff --git a/block.c b/block.c
> index 9ebdba28b0..c6aad25286 100644
> --- a/block.c
> +++ b/block.c
> @@ -1975,6 +1975,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState 
> *child_bs,
>  child = g_new(BdrvChild, 1);
>  *child = (BdrvChild) {
>  .bs = NULL,
> +.parent_bs  = NULL,
>  .name   = g_strdup(child_name),
>  .role   = child_role,
>  .perm   = perm,
> @@ -2009,6 +2010,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState 
> *parent_bs,
>  if (child == NULL) {
>  return NULL;
>  }
> +child->parent_bs = parent_bs;
>  
>  QLIST_INSERT_HEAD(_bs->children, child, next);
>  return child;
> @@ -3729,6 +3731,12 @@ const char *bdrv_get_parent_name(const 
> BlockDriverState *bs)
>  return name;
>  }
>  }
> +if (c->parent_bs && c->parent_bs->implicit) {
> +name = bdrv_get_parent_name(c->parent_bs);
> +if (name && *name) {
> +return name;
> +}
> +}
>  }
>  
>  return NULL;

This should be a separate patch.

Who updates parent_bs if the parent is changed (e.g.
bdrv_replace_node())?

We already have bs->parents.  Why is BdrvChild->parent_bs needed?

> -void blk_io_limits_disable(BlockBackend *blk)
> +void blk_io_limits_disable(BlockBackend *blk, Error **errp)
>  {
> -assert(blk->public.throttle_group_member.throttle_state);
> -bdrv_drained_begin(blk_bs(blk));

Is it safe to drop drained_begin?  We must ensure that no I/O requests
run during this function.

> -throttle_group_unregister_tgm(>public.throttle_group_member);
> -bdrv_drained_end(blk_bs(blk));
> +BlockDriverState *bs, *throttle_node;
> +
> +throttle_node = blk_get_public(blk)->throttle_node;

Is blk_get_public() still necessary?  Perhaps we can do away with the
concept of the public struct now.  It doesn't need to be done in this
patch though.

> +
> +assert(throttle_node && throttle_node->refcnt == 1);

Are you sure the throttle_node->refcnt == 1 assertion holds?  For
example, does the built-in NBD server have a reference to the throttle
node if nbd-server-add is called after throttling has been enabled?

Since we have the blk->throttle_node pointer we know we're the owner.
Others may be using the node too but we may choose to remove it at any
time.

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] migration: fix small leaks

2017-08-02 Thread Dr. David Alan Gilbert

* Marc-André Lureau (marcandre.lur...@redhat.com) wrote:
> Spotted thanks to valgrind and tests/device-introspect-test:
> 
> ==11711== 1 bytes in 1 blocks are definitely lost in loss record 6 of 14,537
> ==11711==at 0x4C2EB6B: malloc (vg_replace_malloc.c:299)
> ==11711==by 0x1E0CDBD8: g_malloc (gmem.c:94)
> ==11711==by 0x1E0E696E: g_strdup (gstrfuncs.c:363)
> ==11711==by 0x695693: migration_instance_init (migration.c:2226)
> ==11711==by 0x717C4B: object_init_with_type (object.c:344)
> ==11711==by 0x717E80: object_initialize_with_type (object.c:375)
> ==11711==by 0x7182EB: object_new_with_type (object.c:483)
> ==11711==by 0x718328: object_new (object.c:493)
> ==11711==by 0x4B8A29: qmp_device_list_properties (qmp.c:542)
> ==11711==by 0x4A9561: qmp_marshal_device_list_properties 
> (qmp-marshal.c:1425)
> ==11711==by 0x819D4A: do_qmp_dispatch (qmp-dispatch.c:104)
> ==11711==by 0x819E82: qmp_dispatch (qmp-dispatch.c:131)
> 
> Signed-off-by: Marc-André Lureau 

Queued for migration

> ---
>  migration/migration.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 085c32c994..c3fe0ed9ca 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -2214,6 +2214,15 @@ static void migration_class_init(ObjectClass *klass, 
> void *data)
>  dc->props = migration_properties;
>  }
>  
> +static void migration_instance_finalize(Object *obj)
> +{
> +MigrationState *ms = MIGRATION_OBJ(obj);
> +MigrationParameters *params = >parameters;
> +
> +g_free(params->tls_hostname);
> +g_free(params->tls_creds);
> +}
> +
>  static void migration_instance_init(Object *obj)
>  {
>  MigrationState *ms = MIGRATION_OBJ(obj);
> @@ -2282,6 +2291,7 @@ static const TypeInfo migration_type = {
>  .class_size = sizeof(MigrationClass),
>  .instance_size = sizeof(MigrationState),
>  .instance_init = migration_instance_init,
> +.instance_finalize = migration_instance_finalize,
>  };
>  
>  static void register_migration_types(void)
> -- 
> 2.14.0.rc0.1.g40ca67566
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH 3/3] block: remove legacy I/O throttling

2017-08-02 Thread Kevin Wolf

Am 02.08.2017 um 12:07 hat Stefan Hajnoczi geschrieben:
> On Tue, Aug 01, 2017 at 04:49:07PM +0300, Manos Pitsidianakis wrote:
> > diff --git a/block.c b/block.c
> > index 9ebdba28b0..c6aad25286 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -1975,6 +1975,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState 
> > *child_bs,
> >  child = g_new(BdrvChild, 1);
> >  *child = (BdrvChild) {
> >  .bs = NULL,
> > +.parent_bs  = NULL,
> >  .name   = g_strdup(child_name),
> >  .role   = child_role,
> >  .perm   = perm,
> > @@ -2009,6 +2010,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState 
> > *parent_bs,
> >  if (child == NULL) {
> >  return NULL;
> >  }
> > +child->parent_bs = parent_bs;
> >  
> >  QLIST_INSERT_HEAD(_bs->children, child, next);
> >  return child;
> > @@ -3729,6 +3731,12 @@ const char *bdrv_get_parent_name(const 
> > BlockDriverState *bs)
> >  return name;
> >  }
> >  }
> > +if (c->parent_bs && c->parent_bs->implicit) {
> > +name = bdrv_get_parent_name(c->parent_bs);
> > +if (name && *name) {
> > +return name;
> > +}
> > +}
> >  }
> >  
> >  return NULL;
> 
> This should be a separate patch.
> 
> Who updates parent_bs if the parent is changed (e.g.
> bdrv_replace_node())?
> 
> We already have bs->parents.  Why is BdrvChild->parent_bs needed?

I haven't look at the whole patch yet, but BdrvChild->parent_bs is a
thing that intentionally doesn't exist. A node simply has no business
knowing its parents - which may or may not be BlockDriverStates (the
obvious example where they aren't BDSes are BlockBackends, but block
jobs own some BdrvChild objects, too).

Usually the replacement is a BdrvChildRole callback.

Kevin


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH] cpu: don't allow negative core id

2017-08-02 Thread Laurent Vivier

With pseries machine type a negative core-id is not managed properly:
-1 gives an inaccurate error message ("core -1 already populated"),
-2 crashes QEMU (core dump)

As it seems a negative value is invalid for any architecture,
instead of checking this in spapr_core_pre_plug() I think it's better
to check this in the generic part, core_prop_set_core_id()

Signed-off-by: Laurent Vivier 
---
 hw/cpu/core.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/cpu/core.c b/hw/cpu/core.c
index 2bf960d..bd578ab 100644
--- a/hw/cpu/core.c
+++ b/hw/cpu/core.c
@@ -33,6 +33,11 @@ static void core_prop_set_core_id(Object *obj, Visitor *v, 
const char *name,
 return;
 }
 
+if (value < 0) {
+error_setg(errp, "Invalid core id %"PRId64, value);
+return;
+}
+
 core->core_id = value;
 }
 
-- 
2.9.4

Re: [Qemu-devel] [RFC 19/29] migration: let dst listen on port always

2017-08-02 Thread Peter Xu

On Wed, Aug 02, 2017 at 10:26:48AM +0100, Daniel P. Berrange wrote:
> On Wed, Aug 02, 2017 at 03:02:24PM +0800, Peter Xu wrote:
> > On Tue, Aug 01, 2017 at 11:56:10AM +0100, Daniel P. Berrange wrote:
> > > On Fri, Jul 28, 2017 at 04:06:28PM +0800, Peter Xu wrote:
> > > > Signed-off-by: Peter Xu 
> > > > ---
> > > >  migration/exec.c   | 2 +-
> > > >  migration/fd.c | 2 +-
> > > >  migration/socket.c | 4 ++--
> > > >  3 files changed, 4 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/migration/exec.c b/migration/exec.c
> > > > index 08b599e..b4412db 100644
> > > > --- a/migration/exec.c
> > > > +++ b/migration/exec.c
> > > > @@ -49,7 +49,7 @@ static gboolean 
> > > > exec_accept_incoming_migration(QIOChannel *ioc,
> > > >  {
> > > >  migration_channel_process_incoming(ioc);
> > > >  object_unref(OBJECT(ioc));
> > > > -return FALSE; /* unregister */
> > > > +return TRUE; /* keep it registered */
> > > >  }
> > > >  
> > > >  void exec_start_incoming_migration(const char *command, Error **errp)
> > > > diff --git a/migration/fd.c b/migration/fd.c
> > > > index 30f5258..865277a 100644
> > > > --- a/migration/fd.c
> > > > +++ b/migration/fd.c
> > > > @@ -49,7 +49,7 @@ static gboolean 
> > > > fd_accept_incoming_migration(QIOChannel *ioc,
> > > >  {
> > > >  migration_channel_process_incoming(ioc);
> > > >  object_unref(OBJECT(ioc));
> > > > -return FALSE; /* unregister */
> > > > +return TRUE; /* keep it registered */
> > > >  }
> > > >  
> > > >  void fd_start_incoming_migration(const char *infd, Error **errp)
> > > > diff --git a/migration/socket.c b/migration/socket.c
> > > > index 757d382..f2c2d01 100644
> > > > --- a/migration/socket.c
> > > > +++ b/migration/socket.c
> > > > @@ -153,8 +153,8 @@ static gboolean 
> > > > socket_accept_incoming_migration(QIOChannel *ioc,
> > > >  
> > > >  out:
> > > >  /* Close listening socket as its no longer needed */
> > > > -qio_channel_close(ioc, NULL);
> > > > -return FALSE; /* unregister */
> > > > +// qio_channel_close(ioc, NULL);
> > > > +return TRUE; /* keep it registered */
> > > >  }
> > > 
> > > 
> > > This is not a very desirable approach IMHO.
> > > 
> > > There are two separate things at play - first we have the listener socket,
> > > and second we have the I/O watch that monitors for incoming clients.
> > > 
> > > The current code here closes the listener, and returns FALSE to unregister
> > > the event loop watch.
> > > 
> > > You're reversing both of these so that we keep the listener open and we
> > > keep monitoring for incoming clients. Ignoring migration resume for a
> > > minute, this means that the destination QEMU will now accept arbitrarily
> > > many incoming clients and keep trying to start a new incoming migration.
> > > 
> > > The behaviour we need is diferent. We *want* to unregister the event
> > > loop watch once we've accepted a client. We should only keep the socket
> > > listener in existance, but *not* accept any more clients. Only once we
> > > have hit a problem and want to accept a new client to do migration
> > > recovery, should we be re-adding the event loop watch.
> > 
> > I replied with another approach in the other thread: how about we
> > re-enable the listen port duing "postcopy-pause" state, and disable
> > the listen port when get out of that migration state?
> 
> Thinking about this agan, I realize I only considered the socket
> migration backend.  If we are using the 'fd' backend, then we
> *must* have an explicit monitor command invoked on the target
> host to obtain the new client connection. This is because the
> 'fd' that QEMU has is the actual client connection, not a listener
> socket, so libvirt needs to be able to pass in a new fd.

Hmm right... So looks like I cannot really ignore this issue even for
the first version (I thought I could).

Not sure whether we can do this: just allow the "migrate_incoming URL"
to work even for not "delayed" cases? Then when we receive that
command, we'll do:

- if there is no existing listening port (when the old URL is one of
  "defer", "exec", "fd"), we create a new listening port using the new
  URL provided

- if there is existing listening port (when the old URL is one of
  "tcp", "sock", "rdma"), we first release the old listening port and
  resources, then create a new port using the new URL

> 
> 
> > Here "listen port" I mean both the IO watch and the socket object. Now
> > what I can think of: we keep these objects always there, meanwhile we
> > introduce a new bit for migration, say, "accept_incoming", to decide
> > whether we will really accept one connection. Then we drop the new
> > connection if that bit is not set.
> > 
> > (Or a new QIOChannelFeature to temporarily refuse incoming
> >  connection? E.g., QIO_CHANNEL_FEATURE_LISTEN_REFUSE?)
> 
> That feature already exists - you just unregister the event loop
> watch and re-register it when you're ready again. The chardev
>

Re: [Qemu-devel] [PATCH] watchdog: wdt_aspeed: Add support for the reset width register

2017-08-02 Thread Andrew Jeffery

On Tue, 2017-08-01 at 09:21 +0200, Cédric Le Goater wrote:
> On 08/01/2017 03:04 AM, Andrew Jeffery wrote:
> > The reset width register controls how the pulse on the SoC's WDTRST{1,2}
> > pins behaves. A pulse is emitted if the external reset bit is set in
> > WDT_CTRL. WDT_RESET_WIDTH requires magic bit patterns to configure both
> > push-pull/open-drain and active-high/active-low behaviours and thus
> > needs some special handling in the write path.
> > 
> > > > Signed-off-by: Andrew Jeffery 
> > ---
> > I understand that we're in stabilisation mode, but I thought I'd send this 
> > out
> > to provoke any feedback. Happy to resend after the 2.10 release if required.
> > 
> >  hw/watchdog/wdt_aspeed.c | 47 
> > +--
> >  1 file changed, 37 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c
> > index 8bbe579b6b66..4ef1412e99fc 100644
> > --- a/hw/watchdog/wdt_aspeed.c
> > +++ b/hw/watchdog/wdt_aspeed.c
> > @@ -14,10 +14,10 @@
> >  #include "qemu/timer.h"
> >  #include "hw/watchdog/wdt_aspeed.h"
> >  
> > -#define WDT_STATUS  (0x00 / 4)
> > -#define WDT_RELOAD_VALUE(0x04 / 4)
> > -#define WDT_RESTART (0x08 / 4)
> > -#define WDT_CTRL(0x0C / 4)
> > +#define WDT_STATUS  (0x00 / 4)
> > +#define WDT_RELOAD_VALUE(0x04 / 4)
> > +#define WDT_RESTART (0x08 / 4)
> > +#define WDT_CTRL(0x0C / 4)
> >  #define   WDT_CTRL_RESET_MODE_SOC   (0x00 << 5)
> >  #define   WDT_CTRL_RESET_MODE_FULL_CHIP (0x01 << 5)
> >  #define   WDT_CTRL_1MHZ_CLK BIT(4)
> > @@ -25,12 +25,21 @@
> >  #define   WDT_CTRL_WDT_INTR BIT(2)
> >  #define   WDT_CTRL_RESET_SYSTEM BIT(1)
> >  #define   WDT_CTRL_ENABLE   BIT(0)
> > +#define WDT_RESET_WIDTH (0x18 / 4)
> > +#define   WDT_RESET_WIDTH_ACTIVE_HIGH   BIT(31)
> > +#define WDT_POLARITY_MASK   (0xFF << 24)
> > +#define WDT_ACTIVE_HIGH_MAGIC   (0xA5 << 24)
> > +#define WDT_ACTIVE_LOW_MAGIC(0x5A << 24)
> > +#define   WDT_RESET_WIDTH_PUSH_PULL BIT(30)
> > +#define WDT_DRIVE_TYPE_MASK (0xFF << 24)
> > +#define WDT_PUSH_PULL_MAGIC (0xA8 << 24)
> > +#define WDT_OPEN_DRAIN_MAGIC(0x8A << 24)
> > +#define   WDT_RESET_WIDTH_DURATION  0xFFF;
> 
> I would call this define WDT_RESET_WIDTH_DEFAULT (0xFF) and 
> use it also in the aspeed_wdt_reset()

This WDT_RESET_WIDTH_DURATION is intended as a mask. It's probably
poorly named. As I found when replying to Phil, the width actually
varies between the AST2400 and AST2500, and on the AST2500 is actually
20 bits wide, not 12 (and is 8 bits on the AST2400). I'll need to
respin to fix that. On the otherhand, 0xFF is the default value for the
field on both the AST2400 and AST2500.

> 
> I have checked the specs and the bits definitions are correct.
> What else could we model ? Would the pulse be interesting ? 

Hrm, we could. In Witherspoon (and Romulus?) systems the pulse is being
used to drive the FAULT pin on the MAX31785 fan controller. I've got
some very hacky patches lying around adding PMBus support and a basic
MAX31785 model to QEMU - with a bit of extra work we could hook up the
external watchdog signal to the FAULT pin and drive our virtual fans to
100% PWM duty as per the hardware.

It's not high on my todo list though :)

Andrew

> 
> C.
> 
> 
> >  
> > -#define WDT_TIMEOUT_STATUS  (0x10 / 4)
> > -#define WDT_TIMEOUT_CLEAR   (0x14 / 4)
> > -#define WDT_RESET_WDITH (0x18 / 4)
> > +#define WDT_TIMEOUT_STATUS  (0x10 / 4)
> > +#define WDT_TIMEOUT_CLEAR   (0x14 / 4)
> >  
> > -#define WDT_RESTART_MAGIC   0x4755
> > +#define WDT_RESTART_MAGIC   0x4755
> >  
> >  static bool aspeed_wdt_is_enabled(const AspeedWDTState *s)
> >  {
> > @@ -55,9 +64,10 @@ static uint64_t aspeed_wdt_read(void *opaque, hwaddr 
> > offset, unsigned size)
> >  return 0;
> >  case WDT_CTRL:
> >  return s->regs[WDT_CTRL];
> > +case WDT_RESET_WIDTH:
> > +return s->regs[WDT_RESET_WIDTH];
> >  case WDT_TIMEOUT_STATUS:
> >  case WDT_TIMEOUT_CLEAR:
> > -case WDT_RESET_WDITH:
> >  qemu_log_mask(LOG_UNIMP,
> >    "%s: uninmplemented read at offset 0x%" HWADDR_PRIx 
> > "\n",
> >    __func__, offset);
> > @@ -119,9 +129,25 @@ static void aspeed_wdt_write(void *opaque, hwaddr 
> > offset, uint64_t data,
> >  timer_del(s->timer);
> >  }
> >  break;
> > +case WDT_RESET_WIDTH:
> > +{
> > +uint32_t property = data & WDT_POLARITY_MASK;
> > +
> > +if (property == WDT_ACTIVE_HIGH_MAGIC) {
> > +s->regs[WDT_RESET_WIDTH] |= WDT_RESET_WIDTH_ACTIVE_HIGH;
> > +} else if (property == WDT_ACTIVE_LOW_MAGIC) {
> > +

Re: [Qemu-devel] [RFC 16/29] qmp: hmp: add migrate "resume" option

2017-08-02 Thread Daniel P. Berrange

On Wed, Aug 02, 2017 at 01:56:46PM +0800, Peter Xu wrote:
> On Tue, Aug 01, 2017 at 12:03:48PM +0100, Daniel P. Berrange wrote:
> > On Fri, Jul 28, 2017 at 04:06:25PM +0800, Peter Xu wrote:
> > > It will be used when we want to resume one paused migration.
> > > 
> > > Signed-off-by: Peter Xu 
> > > ---
> > >  hmp-commands.hx   | 7 ---
> > >  hmp.c | 4 +++-
> > >  migration/migration.c | 2 +-
> > >  qapi-schema.json  | 5 -
> > >  4 files changed, 12 insertions(+), 6 deletions(-)
> > 
> > I'm not seeing explicit info about how we handle the original failure
> > and how it relates to this resume command, but this feels like a
> > potentially racy approach to me.
> > 
> > If we have a network problem between source & target, we could see
> > two results. Either the TCP stream will simply hang (it'll still
> > appear open to QEMU but no traffic will be flowing),
> 
> (let's say this is the "1st condition")
> 
> > or the connection
> > may actually break such that we get EOF and end up closing the file
> > descriptor.
> 
> (let's say this is the "2nd condition")
> 
> > 
> > In the latter case, we're ok because the original channel is now
> > gone and we can safely establish the new one by issuing the new
> > 'migrate --resume URI' command.
> > 
> > In the former case, however, there is the possibility that the
> > hang may come back to life at some point, concurrently with us
> > trying to do 'migrate --resume URI' and I'm unclear on the
> > semantics if that happens.
> > 
> > Should the original connection carry on, and thus cause the
> > 'migrate --resume' command to fail, or will we forcably terminate
> > the original connection no matter what and use the new "resumed"
> > connection.
> 
> Hmm yes, this is a good question. Currently this series is only
> handling the 2nd condition, say, when we can detect the error via
> system calls (IIUC we can know nothing when the 1st condition is
> encountered, we just e.g. block at the system calls as usual when
> reading the file handle). And currently the "resume" command is only
> allowed if the 2nd condition is detected (so it will never destroy an
> existing channel).
> 
> If you see the next following patch, there is something like:
> 
> if (has_resume && resume) {
> if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
> error_setg(errp, "Cannot resume if there is no "
>"paused migration");
> return;
> }
> goto do_resume;
> }
> 
> And here MIGRATION_STATUS_POSTCOPY_PAUSED will only be set when the
> 2nd condition is met.
> 
> > 
> > There's also synchronization with the target host - at the time we
> > want to recover, we need to be able to tell the target to accept
> > new incoming clients again, but we don't want to do that if the
> > original connection comes back to life.
> 
> Yeah, I hacked this part in this v1 series (as you may have seen) to
> keep the ports open-forever. I am not sure whether that is acceptable,
> but looks not. :)
> 
> How about this: when destination detected 2nd condition, it firstly
> switch to "postcopy-pause" state, then re-opens the accept channels.
> And it can turns the accept channels off when the state moves out of
> "postcopy-pause".
> 
> > 
> > It feels to me that if the mgmt app or admin believes the migration
> > is in a stuck state, we should be able to explicitly terminate the
> > existing connection via a monitor command. Then setup the target
> > host to accept new client, and then issue this migrate resume on
> > the source.
> 
> Totally agree. That should be the only way to handle 1st condition
> well. However, would you mind if I postpone it a bit? IMHO as long as
> we can solve the 2nd condition nicely (which is the goal of this
> series), then it won't be too hard to continue support the 1st
> condition.

Sure, the 1st scenario is an easy bolt on to the second scenario. I
just wanted to be clear about what the target of these patches is,
because I think the 1st scenario is probably the most common one.

I guess if you have TCP keepalives enabled with a reasonably short
timeout, the 1st scenario will turn into the 2nd scenario fairly
quickly.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[Qemu-devel] [PATCH v2 1/2] migration: fix comment disorder in RAMState

2017-08-02 Thread Peter Xu

Comments for "migration_dirty_pages" and "bitmap_mutex" are switched.
Fix it.

Reviewed-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Peter Xu 
---
 migration/ram.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 1b08296..e18b3e2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -188,9 +188,9 @@ struct RAMState {
 uint64_t iterations_prev;
 /* Iterations since start */
 uint64_t iterations;
-/* protects modification of the bitmap */
-uint64_t migration_dirty_pages;
 /* number of dirty bits in the bitmap */
+uint64_t migration_dirty_pages;
+/* protects modification of the bitmap */
 QemuMutex bitmap_mutex;
 /* The RAMBlock used in the last src_page_requests */
 RAMBlock *last_req_rb;
-- 
2.7.4

Re: [Qemu-devel] [PATCH v2 2/2] s390x/css: generate solicited crw for rchp completion signaling

2017-08-02 Thread Cornelia Huck

On Tue, 1 Aug 2017 17:16:37 +0200
Halil Pasic  wrote:

> On 08/01/2017 09:57 AM, Dong Jia Shi wrote:
> [..]
> > --- a/hw/s390x/css.c
> > +++ b/hw/s390x/css.c
> > @@ -1745,10 +1745,10 @@ int css_do_rchp(uint8_t cssid, uint8_t chpid)
> >  }
> > 
> >  /* We don't really use a channel path, so we're done here. */
> > -css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT,
> > +css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 1,
> >channel_subsys.max_cssid > 0 ? 1 : 0, chpid);
> >  if (channel_subsys.max_cssid > 0) {
> > -css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 0, real_cssid << 8);
> > +css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 1, 0, real_cssid << 8);
> >  }
> >  return 0;
> >  }
> > @@ -2028,7 +2028,8 @@ void css_subch_assign(uint8_t cssid, uint8_t ssid, 
> > uint16_t schid,
> >  }
> >  }
> > 
> > -void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid)
> > +void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
> > +   int chain, uint16_t rsid)  
> 
> I think you could make the parameters solicited and chain bool (AFAIU
> they are conceptually bool) for clearer semantic. If you go with that
> you could also get rid of the superfluous ternary operator ( we have
> stuff like some_cond ? 1 : 0 for the chain parameter in more than
> one place.

Just adding the new parameter is the minimum change, and we should keep
it consistent. I'm not convinced about converting to bool yet.

> 
> Btw. I find bool flags easy to mix up and difficult to read. I have no better
> idea how to write this (in C) though. I was considering throwing chain and
> solicited together into a single flags parameter, but looking at the client 
> code
> it does not look like a good idea.

Yes, combining them would do nothing for the code.

Re: [Qemu-devel] [PATCH 00/13] instrument: Add basic event instrumentation

2017-08-02 Thread Peter Maydell

On 2 August 2017 at 12:04, Stefan Hajnoczi  wrote:
> On Tue, Aug 01, 2017 at 02:54:29PM +0100, Peter Maydell wrote:
>> and I don't need the TCG engine to be a library to do that...
>
> You do need TCG APIs if you want TCG-level instrumentation, tuning
> options, callbacks, etc.

I need an API; that doesn't necessarily look like the kind
of API you want to be able to embed the TCG engine into
other things, I think.

>> I agree that we want to provide something that is at least
>> closer to a stable API than "just expose trace events",
>> though.
>
> libqemu has at least three parts:
>
> 1. VM API (i.e. qemu_init(argc, argv), qemu_run(), qemu_vcpu_get_reg32())
> 2. TCG engine
> 3. Device models
>
> Like I said in my email, start with what matters for the instrumentation
> use case (VM API at a minimum to control guest execution).  Other people
> can flesh out the other parts later, as needed.
>
> Other attempts to provide a stable API will be essentially the same
> thing as libqemu.

I don't think this is the case -- you could have a stable
instrumentation API without it looking anything like
libqemu. In particular I don't think you need to have
something that sits at the top level and says 'run'.

In particular I think that pulling TCG out of QEMU
is an enormous and painful undertaking that you just
don't need to do at all to allow this kind of
instrumentation API.

thanks
-- PMM

[Qemu-devel] [PATCH v2 0/2] migration: fixes for 2.10

2017-08-02 Thread Peter Xu

v2:
- patch 2: move trace_*() after error_setg_errno(). [Dan]

Two patches isolated from the postcopy recovery series, which may be
good for 2.10.

Peter Xu (2):
  migration: fix comment disorder in RAMState
  io: fix qio_channel_socket_accept err handling

 io/channel-socket.c | 3 ++-
 migration/ram.c | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH v2 2/2] io: fix qio_channel_socket_accept err handling

2017-08-02 Thread Peter Xu

When accept failed, we should setup errp with the reason. More
importantly, the caller may assume errp be non-NULL when error happens,
and not setting the errp may crash QEMU.

At the same time, move the trace_qio_channel_socket_accept_fail() after
the if check on EINTR. Two reasons:

1. when EINTR happened, it's not really a fault (we should just try
   again), so we should not log with an "accept failure".

2. trace_*() functions may overwrite errno, then the old errno will be
   missing. We need to either check errno before trace_*() calls, or
   reserve the errno.

Signed-off-by: Peter Xu 
---
 io/channel-socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/io/channel-socket.c b/io/channel-socket.c
index 53386b7..591d27e 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -340,10 +340,11 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
 cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)>remoteAddr,
>remoteAddrLen);
 if (cioc->fd < 0) {
-trace_qio_channel_socket_accept_fail(ioc);
 if (errno == EINTR) {
 goto retry;
 }
+error_setg_errno(errp, errno, "Unable to accept connection");
+trace_qio_channel_socket_accept_fail(ioc);
 goto error;
 }
 
-- 
2.7.4

Re: [Qemu-devel] [PATCH v2 2/2] io: fix qio_channel_socket_accept err handling

2017-08-02 Thread Daniel P. Berrange

On Wed, Aug 02, 2017 at 05:41:20PM +0800, Peter Xu wrote:
> When accept failed, we should setup errp with the reason. More
> importantly, the caller may assume errp be non-NULL when error happens,
> and not setting the errp may crash QEMU.
> 
> At the same time, move the trace_qio_channel_socket_accept_fail() after
> the if check on EINTR. Two reasons:
> 
> 1. when EINTR happened, it's not really a fault (we should just try
>again), so we should not log with an "accept failure".
> 
> 2. trace_*() functions may overwrite errno, then the old errno will be
>missing. We need to either check errno before trace_*() calls, or
>reserve the errno.
> 
> Signed-off-by: Peter Xu 
> ---
>  io/channel-socket.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/io/channel-socket.c b/io/channel-socket.c
> index 53386b7..591d27e 100644
> --- a/io/channel-socket.c
> +++ b/io/channel-socket.c
> @@ -340,10 +340,11 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>  cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)>remoteAddr,
> >remoteAddrLen);
>  if (cioc->fd < 0) {
> -trace_qio_channel_socket_accept_fail(ioc);
>  if (errno == EINTR) {
>  goto retry;
>  }
> +error_setg_errno(errp, errno, "Unable to accept connection");
> +trace_qio_channel_socket_accept_fail(ioc);
>  goto error;
>  }

Reviewed-by: Daniel P. Berrange 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH v2 1/2] s390x/css: use macro for event-information pending error recover code

2017-08-02 Thread Cornelia Huck

On Wed, 2 Aug 2017 09:15:18 +0800
Dong Jia Shi  wrote:

> * Halil Pasic  [2017-08-01 17:24:10 +0200]:
> 
> > 
> > 
> > On 08/01/2017 09:57 AM, Dong Jia Shi wrote:  
> > > Let's use a macro for the ERC (error recover code) when generating a
> > > Channel Subsystem Event-information pending CRW (channel report word).
> > > 
> > > While we are at it, let's also add all other ERCs.
> > > 
> > > Signed-off-by: Dong Jia Shi 
> > > ---
> > >  hw/s390x/css.c|  2 +-
> > >  include/hw/s390x/ioinst.h | 11 +--
> > >  2 files changed, 10 insertions(+), 3 deletions(-)

(...)

> > > diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h
> > > index 92d15655e4..f89019f78f 100644
> > > --- a/include/hw/s390x/ioinst.h
> > > +++ b/include/hw/s390x/ioinst.h
> > > @@ -201,8 +201,15 @@ typedef struct CRW {
> > >  #define CRW_FLAGS_MASK_A 0x0080
> > >  #define CRW_FLAGS_MASK_ERC 0x003f
> > > 
> > > -#define CRW_ERC_INIT 0x02
> > > -#define CRW_ERC_IPI  0x04
> > > +#define CRW_ERC_EVENT0x00 /* event information pending */
> > > +#define CRW_ERC_AVAIL0x01 /* available */
> > > +#define CRW_ERC_INIT 0x02 /* initialized */
> > > +#define CRW_ERC_TERROR   0x03 /* temporary error */
> > > +#define CRW_ERC_IPI  0x04 /* installed parm initialized */
> > > +#define CRW_ERC_TERM 0x05 /* terminal */
> > > +#define CRW_ERC_PERRN0x06 /* perm. error, facility not init */
> > > +#define CRW_ERC_PERRI0x07 /* perm. error, facility init */
> > > +#define CRW_ERC_PMOD 0x08 /* installed parameters modified */  
> > 
> > You have missed installed parameters restored from the PoP (above
> > you say add all other).  
> Ok. Will add:
> #define CRW_ERC_IPR  0x0A /* installed parameters restored */

Makes sense.

> 
> > 
> > Other than that.
> > 
> > LGTM
> >   
> > > 
> > >  #define CRW_RSC_SUBCH 0x3
> > >  #define CRW_RSC_CHP   0x4
> > >   
>

[Qemu-devel] [PATCH for-2.10 1/3] target/mips: Use BS_EXCP where interrupts are expected

2017-08-02 Thread James Hogan

Commit e350d8ca3ac7 ("target/mips: optimize indirect branches") made
indirect branches able to directly find the next TB and jump straight to
it without breaking out of translated code and going around the main
execution loop. This breaks the assumption in target/mips/translate.c
that BS_STOP is sufficient to cause pending interrupts to be handled,
since interrupts are only checked in the main loop.

Fix a few of these assumptions by using gen_save_pc to update the saved
PC and using BS_EXCP instead of BS_STOP:

 - [D]MFC0 CP0_Count may trigger a timer interrupt which should be
   immediately handled.

 - [D]MTC0 CP0_Cause may trigger an interrupt (but in fact translation
   was only even being stopped in the DMTC0 case).

 - [D]MTC0 CP0_ when icount is used is assumed could potentially
   cause interrupts.

 - EI may trigger an interrupt which was pending. I specifically hit
   this case when running KVM nested in mipsel-softmmu. A timer
   interrupt while the 2nd guest was executing is caught by KVM which
   switches back to the normal Linux exception base and re-enables
   interrupts with EI. Since the above commit QEMU doesn't leave
   translated code until the nested KVM has already restored the KVM
   exception base and returned to the 2nd guest, at which point it is
   too late to check for pending interrupts and it gets stuck in an
   infinite loop of unhandled interrupts.

Something similar was needed for ARM in commit b29fd33db578
("target/arm: use DISAS_EXIT for eret handling").

Fixes: e350d8ca3ac7 ("target/mips: optimize indirect branches")
Signed-off-by: James Hogan 
Cc: Aurelien Jarno 
Cc: Yongbok Kim 
Cc: Richard Henderson 
---
Although I've given this a bit of testing, I only actually hit the two
EI cases at the end of the patch, and the other cases are mainly just
from auditing translate.c for similar issues with BS_STOP, so review
appreciated for those other cases.
---
 target/mips/translate.c | 47 ++
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 51626aead32c..6b41f7b65e00 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -5334,8 +5334,10 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int 
reg, int sel)
 gen_io_end();
 }
 /* Break the TB to be able to take timer interrupts immediately
-   after reading count.  */
-ctx->bstate = BS_STOP;
+   after reading count. BS_STOP isn't sufficient, we need to ensure
+   we break completely out of translated code.  */
+gen_save_pc(ctx->pc + 4);
+ctx->bstate = BS_EXCP;
 rn = "Count";
 break;
 /* 6,7 are implementation dependent */
@@ -6061,6 +6063,11 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int 
reg, int sel)
 case 0:
 save_cpu_state(ctx, 1);
 gen_helper_mtc0_cause(cpu_env, arg);
+/* Stop translation as we may have triggered an interrupt. BS_STOP
+ * isn't sufficient, we need to ensure we break out of translated
+ * code to check for pending interrupts.  */
+gen_save_pc(ctx->pc + 4);
+ctx->bstate = BS_EXCP;
 rn = "Cause";
 break;
 default:
@@ -6397,7 +6404,10 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int 
reg, int sel)
 /* For simplicity assume that all writes can cause interrupts.  */
 if (ctx->tb->cflags & CF_USE_ICOUNT) {
 gen_io_end();
-ctx->bstate = BS_STOP;
+/* BS_STOP isn't sufficient, we need to ensure we break out of
+ * translated code to check for pending interrupts.  */
+gen_save_pc(ctx->pc + 4);
+ctx->bstate = BS_EXCP;
 }
 return;
 
@@ -6678,8 +6688,10 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int 
reg, int sel)
 gen_io_end();
 }
 /* Break the TB to be able to take timer interrupts immediately
-   after reading count.  */
-ctx->bstate = BS_STOP;
+   after reading count. BS_STOP isn't sufficient, we need to ensure
+   we break completely out of translated code.  */
+gen_save_pc(ctx->pc + 4);
+ctx->bstate = BS_EXCP;
 rn = "Count";
 break;
 /* 6,7 are implementation dependent */
@@ -7400,8 +7412,11 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int 
reg, int sel)
 if (ctx->tb->cflags & CF_USE_ICOUNT) {
 gen_io_end();
 }
-/* Stop translation as we may have triggered an intetrupt */
-ctx->bstate = BS_STOP;
+/* Stop translation as we may have triggered an intetrupt. BS_STOP
+ * isn't sufficient, we need to ensure we break

Re: [Qemu-devel] [PATCH v2 0/2] migration: fixes for 2.10

2017-08-02 Thread Dr. David Alan Gilbert

* Peter Xu (pet...@redhat.com) wrote:
> v2:
> - patch 2: move trace_*() after error_setg_errno(). [Dan]
> 
> Two patches isolated from the postcopy recovery series, which may be
> good for 2.10.

Queued

> Peter Xu (2):
>   migration: fix comment disorder in RAMState
>   io: fix qio_channel_socket_accept err handling
> 
>  io/channel-socket.c | 3 ++-
>  migration/ram.c | 4 ++--
>  2 files changed, 4 insertions(+), 3 deletions(-)
> 
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH 1/1] qemu-iotests/109: Fix lock race condition

2017-08-02 Thread Kevin Wolf

Am 01.08.2017 um 23:31 hat Cleber Rosa geschrieben:
> A race condition is currently present between the clean up attempt of
> the QEMU process and the execution of qemu-img.  The actual (bad)
> output is:
> 
>  -Warning: Image size mismatch!
>  -Images are identical.
>  +qemu-img: Could not open '/tests/qemu-iotests/scratch/t.raw': 
> Failed to get "consistent read" lock
>  +Is another process using the image?
> 
> A KILL signal is sent to the QEMU process, but qemu-img may begin to
> run before the QEMU process is really gone.  qemu-img will then
> attempt to open the TEST_IMG file before it can secure a lock on it.
> 
> This attempts a more graceful shutdown, and waits for the QEMU process
> to exit.
> 
> Signed-off-by: Cleber Rosa 

Thanks, applied to the block branch.

Kevin

Re: [Qemu-devel] [PATCH 00/13] instrument: Add basic event instrumentation

2017-08-02 Thread Stefan Hajnoczi

On Tue, Aug 01, 2017 at 02:54:29PM +0100, Peter Maydell wrote:
> On 1 August 2017 at 14:48, Stefan Hajnoczi  wrote:
> > Thanks for sharing the requirements.  A stable API is necessary for
> > providing these features.
> >
> > We're essentially talking about libqemu.  That means QEMU in library
> > form with an API for JIT engine, reverse engineering, instrumentation,
> > etc tasks.
> 
> > Maintaining libqemu will take ongoing effort and no one has committed.
> > The last discussion about libqemu was here:
> > https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg04847.html
> 
> That thread seems to be focused on trying to extract TCG
> from the rest of QEMU, which definitely isn't a requirement
> for instrumentation, and I would suggest is something
> of a distraction from it.

You are right, it's a different use case.  I just wanted to reference
previous discussion about libqemu.  There have been more in the past for
different use cases.

> I want to be able to say "just
> instrument this setup that QEMU already provides as a
> board model", not have to write a driver that duplicates
> all the work vl.c and our board models do for us today,

Calling qemu_init(argc, argv) and qemu_run() isn't too onerous.  For the
price of that you get an environment where we can offer stable APIs.

> and I don't need the TCG engine to be a library to do that...

You do need TCG APIs if you want TCG-level instrumentation, tuning
options, callbacks, etc.

> I agree that we want to provide something that is at least
> closer to a stable API than "just expose trace events",
> though.

libqemu has at least three parts:

1. VM API (i.e. qemu_init(argc, argv), qemu_run(), qemu_vcpu_get_reg32())
2. TCG engine
3. Device models

Like I said in my email, start with what matters for the instrumentation
use case (VM API at a minimum to control guest execution).  Other people
can flesh out the other parts later, as needed.

Other attempts to provide a stable API will be essentially the same
thing as libqemu.

Stefan

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 00/13] instrument: Add basic event instrumentation

2017-08-02 Thread Stefan Hajnoczi

On Fri, Jul 28, 2017 at 07:21:04PM +0300, Lluís Vilanova wrote:
> Stefan Hajnoczi writes:
> 
> > On Thu, Jul 27, 2017 at 11:40:17AM +0100, Peter Maydell wrote:
> >> On 27 July 2017 at 11:32, Stefan Hajnoczi  wrote:
> >> > On Wed, Jul 26, 2017 at 03:44:39PM +0300, Lluís Vilanova wrote:
> >> >> And why exactly is this a threat? Because it can be used to "extend" 
> >> >> QEMU
> >> >> without touching its sources? Is this a realistic threat? (it's a 
> >> >> rather brittle
> >> >> way to do it, so I'm not sure it's practical)
> >> >
> >> > Unfortunately it is a problem.  I recently came across a product that
> >> > was using LD_PRELOAD= to "integrate" with QEMU.  People really abuse
> >> > these interfaces instead of integrating their features cleanly into
> >> > QEMU.
> >> 
> >> ...if people who want to do this kind of thing already can and
> >> do use LD_PRELOAD for it, I don't think we should worry too much
> >> about trying to make the instrumentation plugin API bulletproof
> >> against similar abuse.
> >> 
> >> > I see the use cases that Peter has been describing and am sure we can
> >> > come up with good solutions.  What I care about is that it doesn't allow
> >> > loading a .so that connects to arbitrary trace events.
> >> 
> >> That said, I agree that we don't really need an arbitrary-trace-event
> >> setup here, and we should probably design our API so that it isn't
> >> handing the trace plugin hooks pointers into QEMU's internals.
> >> We want an API that makes it easy for people to do things based on
> >> changes of the guest binary's state (registers, insns, etc etc)
> >> and which makes it hard for them to accidentally trip themselves up
> >> (eg by prodding around in QEMU internal data structures).
> >> This will have the secondary benefit that it's unlikely that future
> >> changes to QEMU will break plugin code.
> >> 
> >> >> As a side note, I find instrumentation to be most useful for guest code 
> >> >> events,
> >> >> which mostly contain non-pointer values (except for the CPUState*).
> >> 
> >> For instance we definitely should not be passing a CPUState* to
> >> any plugin function.
> 
> > The gdbstub protocol has relevant features for accessing guest memory,
> > registers, etc.  Perhaps a set of QEMU-specific events can be added
> > (e.g. tb generated) so it's possible to instrument and control the
> > guest from an instrumentation program (written in any language).
> 
> > Perhaps there is a fundamental reason why this isn't possible due to the
> > protocol design, because using gdbstub halts all vcpus, etc.  I don't
> > know.
> 
> > Do you think this is an interesting direction?  It definitely seems like
> > a powerful approach though performance would be less than running native
> > code inside the QEMU process.
> 
> That's the same approach someone else dubbed as using a fifo with 
> "synchronous"
> events, right? I have some measurements on this using a pipe, and overheads 
> are
> 1000x to 2300x for each communication event (compared to a function call, and
> depending on whether each process/thread is pinned to the same or different
> CPU).

You are right.  I understand the need for native code without
interprocess communication now.

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 4/6] hw/block: Fix the return type

2017-08-02 Thread Mao Zhongyi


Hi

On 08/01/2017 10:29 PM, Markus Armbruster wrote:

Stefan Hajnoczi  writes:


On Wed, Jul 26, 2017 at 08:02:53PM +0800, Mao Zhongyi wrote:

When the function no success value to transmit, it usually make the
function return void. It has turned out not to be a success, because
it means that the extra local_err variable and error_propagate() will
be needed. It leads to cumbersome code, therefore, transmit success/
failure in the return value is worth.

So fix the return type of blkconf_apply_backend_options(),
blkconf_geometry() and virtio_blk_data_plane_create() to avoid it.

Cc: John Snow 
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: Stefan Hajnoczi 

Signed-off-by: Mao Zhongyi 
---
 hw/block/block.c| 21 -
 hw/block/dataplane/virtio-blk.c | 16 +---
 hw/block/dataplane/virtio-blk.h |  6 +++---
 include/hw/block/block.h| 10 +-
 4 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/hw/block/block.c b/hw/block/block.c
index 27878d0..717bd0e 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -51,8 +51,8 @@ void blkconf_blocksizes(BlockConf *conf)
 }
 }

-void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
-   bool resizable, Error **errp)
+int blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+  bool resizable, Error **errp)


I'm not a fan of these changes because it makes inconsistencies between
the return value and the errp argument possible (e.g. returning success
but setting errp, or returning failure without setting errp).


Opinions and practice vary on this one, and we've discussed the
tradeoffs a few times.

Having both an Error parameter and an error return value poses the
question whether the two agree.  When there's no success value to
transmit, you avoid the problem by making the function return void.  The
problem remains for all the function that return a value on success, and
therefore must return some error value on failure.  A related problem
even remains for functions returning void: consistency between side
effects and the Error parameter.

Returning void leads to awkward boilerplate:

Error err = NULL;

foo(..., err);
if (err) {
error_propagate(errp, err);
... bail out ...
}

Compare:

if (!foo(..., errp)) {
... bail out ...
}

Much easier on the eyes.

For what it's worth, GLib wants you to transmit success / failure in the
return value, too:

https://developer.gnome.org/glib/unstable/glib-Error-Reporting.html#gerror-rules

Plenty of older code returns void, some newer code doesn't, because
returning void is just too awkward.  We willfully deviated from GLib's
convention, and we're now paying the price.

See also
Subject: Re: [RFC 00/15] Error API: Flag errors in *errp even if errors are 
being ignored
Message-ID: <87o9t8qy7d@dusky.pond.sub.org>
https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06191.html


If you really want to do this please use bool as the return type instead
of int.  int can be abused to return error information that should
really be in the Error object.


For what it's worth, GLib wants bool[*].  Let's stick to that unless we
have a compelling reason to differ.


[*] Except being GLib, it wants its very own homemade version of bool.
Which is inferior to stdbool.h's in pretty much every conceivable way.



Thanks for pointing that out！
I will use bool as the return type instead of int.

Thanks,
Mao

Re: [Qemu-devel] [PATCH 3/3] block: remove legacy I/O throttling

2017-08-02 Thread Manos Pitsidianakis


On Wed, Aug 02, 2017 at 11:07:24AM +0100, Stefan Hajnoczi wrote:

On Tue, Aug 01, 2017 at 04:49:07PM +0300, Manos Pitsidianakis wrote:

diff --git a/block.c b/block.c
index 9ebdba28b0..c6aad25286 100644
--- a/block.c
+++ b/block.c
@@ -1975,6 +1975,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState 
*child_bs,
 child = g_new(BdrvChild, 1);
 *child = (BdrvChild) {
 .bs = NULL,
+.parent_bs  = NULL,
 .name   = g_strdup(child_name),
 .role   = child_role,
 .perm   = perm,
@@ -2009,6 +2010,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
 if (child == NULL) {
 return NULL;
 }
+child->parent_bs = parent_bs;

 QLIST_INSERT_HEAD(_bs->children, child, next);
 return child;
@@ -3729,6 +3731,12 @@ const char *bdrv_get_parent_name(const BlockDriverState 
*bs)
 return name;
 }
 }
+if (c->parent_bs && c->parent_bs->implicit) {
+name = bdrv_get_parent_name(c->parent_bs);
+if (name && *name) {
+return name;
+}
+}
 }

 return NULL;


This should be a separate patch.

Who updates parent_bs if the parent is changed (e.g.
bdrv_replace_node())?

We already have bs->parents.  Why is BdrvChild->parent_bs needed?



If I haven't misunderstood this, BdrvChild holds only the child part of 
the parent-child relationship and there's no way to access a parent from 
bs->parents. bdrv_replace_node() will thus only replace the child part 
in BdrvChild from the aspect of the parent. In the old child bs's 
perspective, one of the nodes of bs->parents is removed and in the new 
child bs's perspective a new node in bs->parents was inserted. parent_bs 
thus remains immutable.


child->parent_bs is needed in this patch because in jobs if a job-ID is 
not specified the parent name is used, but this fails if the parent is 
an implicit node instead of BlockBackend and causes a regression 
(certain job setups suddenly need an explicit job ID instead of just 
working).



-void blk_io_limits_disable(BlockBackend *blk)
+void blk_io_limits_disable(BlockBackend *blk, Error **errp)
 {
-assert(blk->public.throttle_group_member.throttle_state);
-bdrv_drained_begin(blk_bs(blk));


Is it safe to drop drained_begin?  We must ensure that no I/O requests
run during this function.


Thanks, I will put it back in.


-throttle_group_unregister_tgm(>public.throttle_group_member);
-bdrv_drained_end(blk_bs(blk));
+BlockDriverState *bs, *throttle_node;
+
+throttle_node = blk_get_public(blk)->throttle_node;


Is blk_get_public() still necessary?  Perhaps we can do away with the
concept of the public struct now.  It doesn't need to be done in this
patch though.


I can include a patch to move throttle_node to BlockBackend and remove 
all BlockBackendPublic code, is that okay?





+
+assert(throttle_node && throttle_node->refcnt == 1);


Are you sure the throttle_node->refcnt == 1 assertion holds?  For
example, does the built-in NBD server have a reference to the throttle
node if nbd-server-add is called after throttling has been enabled?

Since we have the blk->throttle_node pointer we know we're the owner.
Others may be using the node too but we may choose to remove it at any
time.


Hm.. If that's possible I guess we want the removal to be visible to the 
nbd server too. I will use bdrv_replace_node() instead.


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] ppc: fix double-free in cpu_post_load()

2017-08-02 Thread David Gibson

On Wed, Aug 02, 2017 at 07:34:16PM +0200, Greg Kurz wrote:
> When running nested with KVM PR, ppc_set_compat() fails and QEMU crashes
> because of "double free or corruption (!prev)". The crash happens because
> error_report_err() has already called error_free().
> 
> Signed-off-by: Greg Kurz 

Oops, that's a bit embarassing.  Applied to ppc-for-2.10.

> ---
>  target/ppc/machine.c |1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index f578156dd411..abe0a1cdf021 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -239,7 +239,6 @@ static int cpu_post_load(void *opaque, int version_id)
>  ppc_set_compat(cpu, cpu->compat_pvr, _err);
>  if (local_err) {
>  error_report_err(local_err);
> -error_free(local_err);
>  return -1;
>  }
>  } else
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v3 5/5] docs: update documentation considering PCIE-PCI bridge

2017-08-02 Thread Laine Stump

On 08/02/2017 01:58 PM, Marcel Apfelbaum wrote:
> On 02/08/2017 19:26, Michael S. Tsirkin wrote:
>> On Wed, Aug 02, 2017 at 06:36:29PM +0300, Marcel Apfelbaum wrote:
 Can dmi-pci support shpc? why doesn't it? For compatibility?
>>>
>>> I don't know why, but the fact that it doesn't is the reason libvirt
>>> settled on auto-creating a dmi-pci bridge and a pci-pci bridge under
>>> that for Q35. The reasoning was (IIRC Laine's words correctly)
>>> that the
>>> dmi-pci bridge cannot receive hotplugged devices, while the pci-pci
>>> bridge cannot be connected to the root complex. So both were needed.

At least that's what I was told :-) (seriously, 50% of the convoluted
rules encoded into libvirt's PCI bus topology construction and
connection rules come from trial and error, and the other 50% come from
advice and recommendations from others who (unlike me) actually know
something about PCI.)

Of course the whole setup of plugging a pci-bridge into a
dmi-to-pci-bridge was (at the time at least) an exercise in futility,
since hotplug didn't work properly on pci-bridge+Q35 anyway (that
initially wasn't explained to me; it was only after I had constructed
the odd bus topology and it was in released code that someone told me
"Oh, by the way, hotplug to pci-bridge doesn't work on Q35". At first it
was described as a bug, then later reclassified as a future feature.)

(I guess the upside is that all of the horrible complex/confusing code
needed to automatically add two controllers just to plug in a single
endpoint is now already in the code, and will "just work" if/when needed).

Now that I go back to look at this thread (qemu-devel is just too much
for me to try and read unless something has been Cc'ed to me - I really
don't know how you guys manage it!), I see that pcie-pci-bridge has been
implemented, and we (libvirt) will want to use that instead of
dmi-to-pci-bridge when available. And pcie-pci-bridge itself can have
endpoints hotplugged into it, correct? This means there will need to be
patches for libvirt that check for the presence of pcie-pci-bridge, and
if it's found they will replace any auto-added
dmi-to-pci-bridge+pci-bridge with a long pcie-pci-bridge.

>>>
>>> Thanks
>>> Laszlo
>>
>> OK. Is it true that dmi-pci + pci-pci under it will allow hotplug
>> on Q35 if we just flip the bit in _OSC?
>
> Marcel, what say you?... :)
>>>
>>> Good news, works with:
>>> -device i82801b11-bridge,id=b1
>>> -device pci-bridge,id=b2,bus=b1,chassis_nr=1,msi=off
>>
>> And presumably it works for modern windows?
>> OK, so it looks like patch 1 is merely a bugfix, I'll merge it for 2.10.
>>
> 
> Tested with Win10, I think is OK to merge if for 2.10.
> 
>>> Notice bridge's msi=off until the following kernel bug will be merged:
>>>https://www.spinics.net/lists/linux-pci/msg63052.html
>>
>> Does libvirt support msi=off as a work-around?

We have no explicit setting for msi on pci controllers. The only place
we explicitly set that is on the ivshmem device.

That doesn't mean that we couldn't add it. However, if we were going to
do it manually, that would mean adding another knob that we have to
support forever. And even if we wanted to do it automatically, we would
not only need to find something in qemu to key off of when deciding
whether or not to set it, but we would *still* have to explicitly store
the setting in the config so that migrations between hosts using
differing versions of qemu would preserve guest ABI. Are there really
enough people demanding (with actual concrete plans of *using*) hotplug
of legacy PCI devices on Q35 guests *immediately* that we want to
permanently pollute libvirt's code in this manner just for an interim
workaround?

I didn't have enough time/energy to fully parse all the rest of this
thread - is msi=off currently required for pcie-pci-bridge hotplug as
well? (not that it changes my opinion - just as we can tell people
"upgrade to a new qemu and libvirt if you want to hotplug legacy PCI
devices on Q35 guests", we can also tell them "Oh, and wait X weeks and
upgrade to a new kernel too".

Re: [Qemu-devel] [PATCH] libqtest: Fix typo in comments

2017-08-02 Thread Jeff Cody

On Wed, Aug 02, 2017 at 08:08:33PM -0500, Eric Blake wrote:
> s/continuosly/continuously/
> 
> Signed-off-by: Eric Blake 

Hardly seems like a trivial patch like this should need an R-b, but what the
heck:

Reviewed-by: Jeff Cody 


> ---
>  tests/libqtest.h | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/libqtest.h b/tests/libqtest.h
> index 38bc1e9953..3ae570927a 100644
> --- a/tests/libqtest.h
> +++ b/tests/libqtest.h
> @@ -117,7 +117,7 @@ QDict *qtest_qmp_receive(QTestState *s);
>   * @s: #QTestState instance to operate on.
>   * @s: #event event to wait for.
>   *
> - * Continuosly polls for QMP responses until it receives the desired event.
> + * Continuously polls for QMP responses until it receives the desired event.
>   */
>  void qtest_qmp_eventwait(QTestState *s, const char *event);
> 
> @@ -126,7 +126,7 @@ void qtest_qmp_eventwait(QTestState *s, const char 
> *event);
>   * @s: #QTestState instance to operate on.
>   * @s: #event event to wait for.
>   *
> - * Continuosly polls for QMP responses until it receives the desired event.
> + * Continuously polls for QMP responses until it receives the desired event.
>   * Returns a copy of the event for further investigation.
>   */
>  QDict *qtest_qmp_eventwait_ref(QTestState *s, const char *event);
> @@ -571,7 +571,7 @@ static inline QDict *qmp_receive(void)
>   * qmp_eventwait:
>   * @s: #event event to wait for.
>   *
> - * Continuosly polls for QMP responses until it receives the desired event.
> + * Continuously polls for QMP responses until it receives the desired event.
>   */
>  static inline void qmp_eventwait(const char *event)
>  {
> @@ -582,7 +582,7 @@ static inline void qmp_eventwait(const char *event)
>   * qmp_eventwait_ref:
>   * @s: #event event to wait for.
>   *
> - * Continuosly polls for QMP responses until it receives the desired event.
> + * Continuously polls for QMP responses until it receives the desired event.
>   * Returns a copy of the event for further investigation.
>   */
>  static inline QDict *qmp_eventwait_ref(const char *event)
> -- 
> 2.13.3
> 
>

Re: [Qemu-devel] [PATCH] virtio: Mark virtio-device as non-user-creatable

2017-08-02 Thread Halil Pasic



On 08/02/2017 01:01 AM, Eduardo Habkost wrote:
> TYPE_VIRTIO_DEVICE devices are already not usable with -device
> and device_add, but they are reported as user-creatable on
> "-device help" and through monitor interfaces.

I've tried -device virtio-rng on s390x and from what I see, it
seems we 'auto-magically' create the 'controlling' virtio-rng-ccw
device. So I have to ask what do you mean by 'already not usable'?

> 
> Mark them as not user-creatable to avoid confusing users, and to
> allow automated testing (e.g. scripts/device-crash-test) to skip
> them.
> 
> Before this patch, device-crash-test will try to test
> virtio-device devices with all machine-types:
> 
>   $ time ./scripts/device-crash-test -D virtio-device -v 
> ./x86_64-softmmu/qemu-system-x86_64
>   [...]
>   INFO: Total: 1088 test cases
>   INFO: Skipped 408 test cases
> 
>   real0m49.775s
> 
> After this patch, the script won't try to test virtio-device
> devices:
> 
>   $ time ./scripts/device-crash-test -D virtio-device -v 
> ./x86_64-softmmu/qemu-system-x86_64
>   INFO: Total: 0 test cases
> 
>   real0m0.092s
> 
> Signed-off-by: Eduardo Habkost 
> ---
>  hw/virtio/virtio.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 464947f..c4bdb94 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2653,6 +2653,17 @@ static void virtio_device_class_init(ObjectClass 
> *klass, void *data)
>  dc->unrealize = virtio_device_unrealize;
>  dc->bus_type = TYPE_VIRTIO_BUS;
>  dc->props = virtio_properties;
> +/*
> + * Reason:
> + * - TYPE_VIRTIO_DEVICE devices are not visible to guests
> + *   unless they are created and controlled by transport-specific
> + *   devices (virtio-pci, virtio-mmio, and virtio-ccw).
> + * - A TYPE_VIRTIO_BUS bus is never available for plugging
> + *   using -device/device_add, as virtio-bus buses are
> + *   created on the fly and immediately populated by the
> + *   transport-specific devices' realize methods.
> + */
> +dc->user_creatable = false;
>  vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
>  vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
>

Re: [Qemu-devel] [PATCH v3 2/6] seccomp: add obsolete argument to command line

2017-08-02 Thread Daniel P. Berrange

On Wed, Aug 02, 2017 at 01:33:56PM +0100, Daniel P. Berrange wrote:
> On Fri, Jul 28, 2017 at 02:10:36PM +0200, Eduardo Otubo wrote:
> > This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
> > option. It allows Qemu to run safely on old system that still relies on
> > old system calls.
> > 
> > Signed-off-by: Eduardo Otubo 
> > ---
> >  include/sysemu/seccomp.h |  4 +++-
> >  qemu-options.hx  |  9 +++--
> >  qemu-seccomp.c   | 32 +++-
> >  vl.c | 16 +++-
> >  4 files changed, 56 insertions(+), 5 deletions(-)


> > @@ -1032,7 +1036,17 @@ static int parse_sandbox(void *opaque, QemuOpts 
> > *opts, Error **errp)
> >  {
> >  if (qemu_opt_get_bool(opts, "enable", false)) {
> >  #ifdef CONFIG_SECCOMP
> > -if (seccomp_start() < 0) {
> > +uint8_t seccomp_opts = 0x;
> > +const char *value = NULL;
> > +
> > +value = qemu_opt_get(opts, "obsolete");
> > +if (value) {
> > +if (strcmp(value, "allow") == 0) {
> > +seccomp_opts |= OBSOLETE;
> > +}
> > +}
> 
> IIUC, the values will all be booleans, so we should just use
> 
>if (qemu_opt_get_bool(opts, "obsolete", false))
>seccomp_opts |= OBSOLETE;

Oh ignore this. I see from the next patch, we can't treat it as a boolean.

We should however explicitly look for 'value == deny', and then reject
all other values with an error message

> 
> > +
> > +if (seccomp_start(seccomp_opts) < 0) {
> >  error_report("failed to install seccomp syscall filter "
> >   "in the kernel");
> >  return -1;

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH v2 4/4] maint: Include bug-reporting info in --help output.

2017-08-02 Thread Daniel P. Berrange

On Fri, Jul 28, 2017 at 11:47:08AM -0500, Eric Blake wrote:
> These days, many programs are including a bug-reporting address,
> or better yet, a link to the project web site, at the tail of
> their --help output.  However, we were not very consistent at
> doing so: only qemu-nbd and qemu-qa mentioned anything, with the
> latter pointing to an individual person instead of the project.
> 
> Add a new #define that sets up a uniform string, mentioning both
> bug reporting instructions and overall project details, and which
> a downstream vendor could tweak if they want bugs to go to a
> downstream database.  Then use it in all of our binaries which
> have --help output.
> 
> The canned text intentionally references http:// instead of https://
> because our https website currently causes certificate errors in
> some browsers.  That can be tweaked later once we have resolved the
> web site issued.
> 
> Signed-off-by: Eric Blake 
> ---
> 
> v2: tweak text to capitalize QEMU and use consistent trailing .
> 
>  include/qemu-common.h | 5 +
>  vl.c  | 4 +++-
>  bsd-user/main.c   | 2 ++
>  linux-user/main.c | 4 +++-
>  qemu-img.c| 2 +-
>  qemu-io.c | 5 +++--
>  qemu-nbd.c| 2 +-
>  qga/main.c| 2 +-
>  8 files changed, 19 insertions(+), 7 deletions(-)

Reviewed-by: Daniel P. Berrange 


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] Where is the VM live migration code?

2017-08-02 Thread Dr. David Alan Gilbert

* Aditya Bhardwaj (adityaf...@gmail.com) wrote:
> My lab setup is: 3 HP Servers each of 4 GB RAM. Server 1 and Server 2:
> installed with Virtualization using KVM on Redhat 6.5 System. On top of
> this I am using libvirtd for managing virtual machines. NFS server is
> installed on Server 3 for shared disk image. VM migration is working
> correctly. Now i need to do some changes in "migration.c" but did not find
> any source code of KVM-QEMU migration.
> 
> Where I can find the migration source code for KVM-QEMU. All are requested
> to help me for this issue.

Migration is quite big - so take a step back first and understand how
a migration works before diving into the code too far.
First I'd suggest going for something newer than 6.5 - it has a very
very old QEMU on it;  If you can upgrade to a 7.x it'll make your life a
lot easier for building and working with current code.

The current QEMU has a whole migration/ subdirectory with quite a bit of
code, and there's also some kernel code that helps, and code in
individual devices.

Then there's also the code in libvirt that drives it.

A lot of code - so the trick is to figure out what you want to change
and then find the right place.

Dave
> 
> 
> 
> 
> 
> *Thank youWith Regards:Aditya Bhardwaj*
> *Dept. of CSE *
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH for-2.10 3/5] tests: acpi: rename test_acpi_tables()/test_dst_table() to reflect its usage

2017-08-02 Thread Marcel Apfelbaum


On 31/07/2017 18:40, Igor Mammedov wrote:

Main purpose of test_dst_table() is loading a table from QEMU
with checking that checksum in header matches actual one,
rename it reflect main action it performs.

Likewise test_acpi_tables() name is to broad, while the function
only loads tables referenced by RSDT, rename it to reflect it.

Signed-off-by: Igor Mammedov 
---
  tests/bios-tables-test.c | 15 ++-
  1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index adbcc55..ed32e9a 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -206,7 +206,11 @@ static void test_acpi_facs_table(test_data *data)
  ACPI_ASSERT_CMP(facs_table->signature, "FACS");
  }
  
-static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr)

+/** fetch_table
+ *   load ACPI table at @addr into table descriptor @sdt_table
+ *   and check that header checksum matches actual one.
+ */
+static void fetch_table(AcpiSdtTable *sdt_table, uint32_t addr)
  {
  uint8_t checksum;
  
@@ -229,14 +233,15 @@ static void test_acpi_dsdt_table(test_data *data)

  AcpiSdtTable dsdt_table;
  uint32_t addr = data->fadt_table.dsdt;
  
-test_dst_table(_table, addr);

+fetch_table(_table, addr);
  ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT");
  
  /* Since DSDT isn't in RSDT, add DSDT to ASL test tables list manually */

  g_array_append_val(data->tables, dsdt_table);
  }
  
-static void test_acpi_tables(test_data *data)

+/* Load all tables and add to test list directly RSDT referenced tables */
+static void fetch_rsdt_referenced_tables(test_data *data)
  {
  int tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */
  int i;
@@ -245,7 +250,7 @@ static void test_acpi_tables(test_data *data)
  AcpiSdtTable ssdt_table;
  
  uint32_t addr = data->rsdt_tables_addr[i + 1]; /* fadt is first */

-test_dst_table(_table, addr);
+fetch_table(_table, addr);
  g_array_append_val(data->tables, ssdt_table);
  }
  }
@@ -638,7 +643,7 @@ static void test_acpi_one(const char *params, test_data 
*data)
  test_acpi_fadt_table(data);
  test_acpi_facs_table(data);
  test_acpi_dsdt_table(data);
-test_acpi_tables(data);
+fetch_rsdt_referenced_tables(data);
  
  if (iasl) {

  if (getenv(ACPI_REBUILD_EXPECTED_AML)) {




Reviewed-by: Marcel Apfelbaum 

Thanks,
Marcel

Re: [Qemu-devel] [PATCH for-2.10 4/5] tests: acpi: add comments to fetch_rsdt_referenced_tables/data->tables usage

2017-08-02 Thread Marcel Apfelbaum


On 31/07/2017 18:40, Igor Mammedov wrote:

Signed-off-by: Igor Mammedov 
---
  tests/bios-tables-test.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index ed32e9a..a2a90d7 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -251,6 +251,8 @@ static void fetch_rsdt_referenced_tables(test_data *data)
  
  uint32_t addr = data->rsdt_tables_addr[i + 1]; /* fadt is first */

  fetch_table(_table, addr);
+
+/* Add table to ASL test tables list */
  g_array_append_val(data->tables, ssdt_table);
  }
  }
@@ -425,6 +427,7 @@ try_again:
  return exp_tables;
  }
  
+/* test the list of tables in @data->tables against reference tables */

  static void test_acpi_asl(test_data *data)
  {
  int i;




Reviewed-by: Marcel Apfelbaum 

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v6 2/7] qemu.py: fix is_running() return before first launch()

2017-08-02 Thread Stefan Hajnoczi

On Tue, Aug 01, 2017 at 02:56:55PM +0200, Amador Pahim wrote:
> On Tue, Aug 1, 2017 at 12:50 PM, Eduardo Habkost  wrote:
> > On Tue, Aug 01, 2017 at 11:09:25AM +0100, Stefan Hajnoczi wrote:
> >> On Mon, Jul 31, 2017 at 10:51:05AM +0200, Amador Pahim wrote:
> >> > is_running() returns None when called before the first time we
> >> > call launch():
> >> >
> >> > >>> import qemu
> >> > >>> vm = qemu.QEMUMachine('qemu-system-x86_64')
> >> > >>> vm.is_running()
> >> > >>>
> >> >
> >> > It should retunt False instead. This patch fixes that.
> >>
> >> s/retunt/return/
> >>
> 
> Ack
> 
> >> >
> >> > Signed-off-by: Amador Pahim 
> >> > ---
> >> >  scripts/qemu.py | 2 +-
> >> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >> >
> >> > diff --git a/scripts/qemu.py b/scripts/qemu.py
> >> > index 2f1984c93c..77565eb092 100644
> >> > --- a/scripts/qemu.py
> >> > +++ b/scripts/qemu.py
> >> > @@ -86,7 +86,7 @@ class QEMUMachine(object):
> >> >  raise
> >> >
> >> >  def is_running(self):
> >> > -return self._popen and (self._popen.poll() is None)
> >> > +return self._popen is not None and (self._popen.poll() is None)
> >>
> >> The parentheses are inconsistent:
> >>
> >>   return (self._popen is not None) and (self._popen.poll() is None)
> 
> Parentheses are not needed here. I can remove the other one, if you want.

That would be fine.  There are no other instances in the file and PEP8
doesn't cover this either.

> >>
> >> An alternative:
> >>
> >>   return bool(self._popen and self._popen.poll())
> >
> > is_running() should be True only if self._popen.poll() is None
> > (and not if it's 0), so the "self._popen.poll() is None" part is
> > necessary.
> 
> +1

oops :)

Stefan


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH] firmware: add const to bin_attribute structures

2017-08-02 Thread Bhumika Goyal

Add const to bin_attribute structures as they are only passed to the
functions sysfs_{remove/create}_bin_file. The arguments passed are of
type const, so declare the structures to be const.

Done using Coccinelle.

@m disable optional_qualifier@
identifier s;
position p;
@@
static struct bin_attribute s@p={...};

@okay1@
position p;
identifier m.s;
@@
(
sysfs_create_bin_file(...,@p,...)
|
sysfs_remove_bin_file(...,@p,...)
)

@bad@
position p!={m.p,okay1.p};
identifier m.s;
@@
s@p

@change depends on !bad disable optional_qualifier@
identifier m.s;
@@
static
+const
struct bin_attribute s={...};

Signed-off-by: Bhumika Goyal 
---
 drivers/firmware/dell_rbu.c  | 6 +++---
 drivers/firmware/dmi-sysfs.c | 2 +-
 drivers/firmware/google/gsmi.c   | 2 +-
 drivers/firmware/google/memconsole.c | 2 +-
 drivers/firmware/qemu_fw_cfg.c   | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 2f452f1..a771360 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -675,18 +675,18 @@ static ssize_t write_rbu_packet_size(struct file *filp, 
struct kobject *kobj,
return count;
 }
 
-static struct bin_attribute rbu_data_attr = {
+static const struct bin_attribute rbu_data_attr = {
.attr = {.name = "data", .mode = 0444},
.read = read_rbu_data,
 };
 
-static struct bin_attribute rbu_image_type_attr = {
+static const struct bin_attribute rbu_image_type_attr = {
.attr = {.name = "image_type", .mode = 0644},
.read = read_rbu_image_type,
.write = write_rbu_image_type,
 };
 
-static struct bin_attribute rbu_packet_size_attr = {
+static const struct bin_attribute rbu_packet_size_attr = {
.attr = {.name = "packet_size", .mode = 0644},
.read = read_rbu_packet_size,
.write = write_rbu_packet_size,
diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c
index d5de6ee..936e656 100644
--- a/drivers/firmware/dmi-sysfs.c
+++ b/drivers/firmware/dmi-sysfs.c
@@ -440,7 +440,7 @@ static ssize_t dmi_sel_raw_read(struct file *filp, struct 
kobject *kobj,
return find_dmi_entry(entry, dmi_sel_raw_read_helper, );
 }
 
-static struct bin_attribute dmi_sel_raw_attr = {
+static const struct bin_attribute dmi_sel_raw_attr = {
.attr = {.name = "raw_event_log", .mode = 0400},
.read = dmi_sel_raw_read,
 };
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
index c8f169b..1ad29f7 100644
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -508,7 +508,7 @@ static ssize_t eventlog_write(struct file *filp, struct 
kobject *kobj,
 
 }
 
-static struct bin_attribute eventlog_bin_attr = {
+static const struct bin_attribute eventlog_bin_attr = {
.attr = {.name = "append_to_eventlog", .mode = 0200},
.write = eventlog_write,
 };
diff --git a/drivers/firmware/google/memconsole.c 
b/drivers/firmware/google/memconsole.c
index 166f07c..b11a02c 100644
--- a/drivers/firmware/google/memconsole.c
+++ b/drivers/firmware/google/memconsole.c
@@ -33,7 +33,7 @@ static ssize_t memconsole_read(struct file *filp, struct 
kobject *kobp,
return memconsole_read_func(buf, pos, count);
 }
 
-static struct bin_attribute memconsole_bin_attr = {
+static const struct bin_attribute memconsole_bin_attr = {
.attr = {.name = "log", .mode = 0444},
.read = memconsole_read,
 };
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0e20116..f254977 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -343,7 +343,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
struct kobject *kobj,
return count;
 }
 
-static struct bin_attribute fw_cfg_sysfs_attr_raw = {
+static const struct bin_attribute fw_cfg_sysfs_attr_raw = {
.attr = { .name = "raw", .mode = S_IRUSR },
.read = fw_cfg_sysfs_read_raw,
 };
-- 
1.9.1

Re: [Qemu-devel] Where is the VM live migration code?

2017-08-02 Thread Eric Blake

On 08/02/2017 07:18 AM, Daniel P. Berrange wrote:
> On Wed, Aug 02, 2017 at 04:25:08PM +0530, Aditya Bhardwaj wrote:
>> My lab setup is: 3 HP Servers each of 4 GB RAM. Server 1 and Server 2:
>> installed with Virtualization using KVM on Redhat 6.5 System. On top of
>> this I am using libvirtd for managing virtual machines. NFS server is
>> installed on Server 3 for shared disk image. VM migration is working
>> correctly. Now i need to do some changes in "migration.c" but did not find
>> any source code of KVM-QEMU migration.
>>
>> Where I can find the migration source code for KVM-QEMU. All are requested
>> to help me for this issue.
> 
> QEMU source code is in GIT
> 
>   https://git.qemu.org/gitweb.cgi?p=qemu.git;a=summary
> 
> in the 'migration' directory.
> 
> If you're new writing code for QEMU then see this page too
> 
> https://wiki.qemu.org/index.php/Documentation/GettingStartedDevelopers

I've also got a patch pending to make the various qemu executables
provide a link to qemu.org as part of their --help output (which will in
turn make it easier to find links to the source code):

https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg09049.html

It needs some review if it's going to be part of 2.10.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 2/6] seccomp: add obsolete argument to command line

2017-08-02 Thread Daniel P. Berrange

On Fri, Jul 28, 2017 at 02:10:36PM +0200, Eduardo Otubo wrote:
> This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
> option. It allows Qemu to run safely on old system that still relies on
> old system calls.
> 
> Signed-off-by: Eduardo Otubo 
> ---
>  include/sysemu/seccomp.h |  4 +++-
>  qemu-options.hx  |  9 +++--
>  qemu-seccomp.c   | 32 +++-
>  vl.c | 16 +++-
>  4 files changed, 56 insertions(+), 5 deletions(-)
> 
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index cfc06008cb..7a7bde246b 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -15,7 +15,9 @@
>  #ifndef QEMU_SECCOMP_H
>  #define QEMU_SECCOMP_H
>  
> +#define OBSOLETE0x0001

Please namespace this - its far too generic a term to expose to other
source files. I'd suggest 

  QEMU_SECCOMP_SET_OBSOLETE

> -int seccomp_start(void);
> +int seccomp_start(uint8_t seccomp_opts);

This only allows for 8 sets. Perhaps its enough, but I'd suggest
just using a uint32_t straight away.

> diff --git a/qemu-options.hx b/qemu-options.hx
> index 746b5fa75d..54e492f36a 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4004,13 +4004,18 @@ Old param mode (ARM only).
>  ETEXI
>  
>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> -"-sandbox   Enable seccomp mode 2 system call filter (default 
> 'off').\n",
> +"-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter 
> (default 'off').\n" \
> +"obsolete: Allow obsolete system calls\n",
>  QEMU_ARCH_ALL)
>  STEXI
> -@item -sandbox @var{arg}
> +@item -sandbox @var{arg}[,obsolete=@var{string}]
>  @findex -sandbox
>  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering 
> and 'off' will
>  disable it.  The default is 'off'.
> +@table @option
> +@item obsolete=@var{string}
> +Enable Obsolete system calls

Lets explain this a bit more.

E obsolete system calls that are provided by the kernel, but typically no
longer used by modern C library implementations. 

> +@end table
>  ETEXI
>  
>  DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index f8877b07b5..c6a8b28260 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -31,6 +31,20 @@ struct QemuSeccompSyscall {
>  uint8_t priority;
>  };
>  
> +static const struct QemuSeccompSyscall obsolete[] = {
> +{ SCMP_SYS(readdir), 255 },
> +{ SCMP_SYS(_sysctl), 255 },
> +{ SCMP_SYS(bdflush), 255 },
> +{ SCMP_SYS(create_module), 255 },
> +{ SCMP_SYS(get_kernel_syms), 255 },
> +{ SCMP_SYS(query_module), 255 },
> +{ SCMP_SYS(sgetmask), 255 },
> +{ SCMP_SYS(ssetmask), 255 },
> +{ SCMP_SYS(sysfs), 255 },
> +{ SCMP_SYS(uselib), 255 },
> +{ SCMP_SYS(ustat), 255 },
> +};
> +
>  static const struct QemuSeccompSyscall blacklist[] = {
>  { SCMP_SYS(reboot), 255 },
>  { SCMP_SYS(swapon), 255 },
> @@ -56,7 +70,20 @@ static const struct QemuSeccompSyscall blacklist[] = {
>  { SCMP_SYS(vserver), 255 },
>  };
>  
> -int seccomp_start(void)
> +static int is_obsolete(int syscall)
> +{
> +unsigned int i = 0;
> +
> +for (i = 0; i < ARRAY_SIZE(obsolete); i++) {
> +if (syscall == obsolete[i].num) {
> +return 1;
> +}
> +}
> +
> +return 0;
> +}
> +
> +int seccomp_start(uint8_t seccomp_opts)
>  {
>  int rc = 0;
>  unsigned int i = 0;
> @@ -69,6 +96,9 @@ int seccomp_start(void)
>  }
>  
>  for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
> +if ((seccomp_opts & OBSOLETE) && is_obsolete(blacklist[i].num)) {
> +continue;
> +}

IMHO this is leading to a rather inefficient approach. Why not extend
QemuSeccompSyscall struct so that it has another field to list which
set it belongs to. Then you can do


  static const struct QemuSeccompSyscall blacklist[] = {
{ SCMP_SYS(reboot), 255, QEMU_SECCOMP_SET_DEFAULT },
{ SCMP_SYS(swapon), 255, QEMU_SECCOMP_SET_DEFAULT },
 
{ SCMP_SYS(readdir), 255, QEMU_SECCOMP_SET_OBSOLETE },
{ SCMP_SYS(_sysctl), 255, QEMU_SECCOMP_SET_OBSOLETE },
...

And then to process this you can do

  for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
  if (blacklist[i].set != QEMU_SECCOMP_SET_OBSOLETE &&
  blacklist[i].set & seccomp_opts) {
  continue;
  }


>  rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, blacklist[i].num, 0);
>  if (rc < 0) {
>  goto seccomp_return;
> diff --git a/vl.c b/vl.c
> index 15b98800e9..cbe09c94af 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -271,6 +271,10 @@ static QemuOptsList qemu_sandbox_opts = {
>  .name = "enable",
>  .type = QEMU_OPT_BOOL,
>  },
> +{
> +.name = "obsolete",
> +.type = QEMU_OPT_STRING,
> +},
>  { /* end of list */ }
>  },
>  };
> @@

1 2 >

1 - 100 of 176 matches

Mail list logo