date:20151104

[Qemu-devel] [kvm-unit-test RFC] x86: Memory instructions test case

2015-11-04 Thread Eduardo Habkost

Quickly hacked test case for memory instructions (clflush, mfence,
sfence, lfence, clflushopt, clwb, pcommit), that simply checks for #UD
exceptions.

This was useful to test TCG handling of those instructions.

The "fake clwb" part will probably break once a new instruction use
those opcodes.

Signed-off-by: Eduardo Habkost 
---
 config/config-x86-common.mak |  2 +
 config/config-x86_64.mak |  2 +-
 x86/memory.c | 88 
 3 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 x86/memory.c

diff --git a/config/config-x86-common.mak b/config/config-x86-common.mak
index c2f9908..b89684d 100644
--- a/config/config-x86-common.mak
+++ b/config/config-x86-common.mak
@@ -108,6 +108,8 @@ $(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o 
$(TEST_DIR)/vmx_tests.o
 
 $(TEST_DIR)/debug.elf: $(cstart.o) $(TEST_DIR)/debug.o
 
+$(TEST_DIR)/memory.elf: $(cstart.o) $(TEST_DIR)/memory.o
+
 arch_clean:
$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
$(TEST_DIR)/.*.d lib/x86/.*.d
diff --git a/config/config-x86_64.mak b/config/config-x86_64.mak
index 7d4eb34..ec4bded 100644
--- a/config/config-x86_64.mak
+++ b/config/config-x86_64.mak
@@ -7,7 +7,7 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
  $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \
  $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
  $(TEST_DIR)/pcid.flat $(TEST_DIR)/debug.flat \
- $(TEST_DIR)/ioapic.flat
+ $(TEST_DIR)/ioapic.flat $(TEST_DIR)/memory.flat
 tests += $(TEST_DIR)/svm.flat
 tests += $(TEST_DIR)/vmx.flat
 tests += $(TEST_DIR)/tscdeadline_latency.flat
diff --git a/x86/memory.c b/x86/memory.c
new file mode 100644
index 000..cd1eb46
--- /dev/null
+++ b/x86/memory.c
@@ -0,0 +1,88 @@
+/*
+ * Test for x86 cache and memory instructions
+ *
+ * Copyright (c) 2015 Red Hat Inc
+ *
+ * Authors:
+ *  Eduardo Habkost 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "libcflat.h"
+#include "desc.h"
+#include "processor.h"
+
+static long target;
+static volatile int ud;
+static volatile int isize;
+
+static void handle_ud(struct ex_regs *regs)
+{
+   ud = 1;
+   regs->rip += isize;
+}
+
+int main(int ac, char **av)
+{
+   struct cpuid cpuid7, cpuid1;
+   int xfail;
+
+   setup_idt();
+   handle_exception(UD_VECTOR, handle_ud);
+
+   cpuid1 = cpuid(1);
+   cpuid7 = cpuid_indexed(7, 0);
+
+   /* 3-byte instructions: */
+   isize = 3;
+
+   xfail = !(cpuid1.d & (1U << 19)); /* CLFLUSH */
+   ud = 0;
+   asm volatile("clflush (%0)" : : "b" ());
+   report_xfail("clflush", xfail, ud == 0);
+
+   xfail = !(cpuid1.d & (1U << 25)); /* SSE */
+   ud = 0;
+   asm volatile("sfence");
+   report_xfail("sfence", xfail, ud == 0);
+
+   xfail = !(cpuid1.d & (1U << 26)); /* SSE2 */
+   ud = 0;
+   asm volatile("lfence");
+   report_xfail("lfence", xfail, ud == 0);
+
+   ud = 0;
+   asm volatile("mfence");
+   report_xfail("mfence", xfail, ud == 0);
+
+   /* 4-byte instructions: */
+   isize = 4;
+
+   xfail = !(cpuid7.b & (1U << 23)); /* CLFLUSHOPT */
+   ud = 0;
+   /* clflushopt (%rbx): */
+   asm volatile(".byte 0x66, 0x0f, 0xae, 0x3b" : : "b" ());
+   report_xfail("clflushopt", xfail, ud == 0);
+
+   xfail = !(cpuid7.b & (1U << 24)); /* CLWB */
+   ud = 0;
+   /* clwb (%rbx): */
+   asm volatile(".byte 0x66, 0x0f, 0xae, 0x33" : : "b" ());
+   report_xfail("clwb", xfail, ud == 0);
+
+   ud = 0;
+   /* clwb requires a memory operand, the following is NOT a valid
+* CLWB instruction (modrm == 0xF0).
+*/
+   asm volatile(".byte 0x66, 0x0f, 0xae, 0xf0");
+   report("fake clwb", ud);
+
+   xfail = !(cpuid7.b & (1U << 22)); /* PCOMMIT */
+   ud = 0;
+   /* pcommit: */
+   asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8");
+   report_xfail("pcommit", xfail, ud == 0);
+
+   return report_summary();
+}
-- 
2.1.0

Re: [Qemu-devel] [PATCH 04/13] cuda.c: fix CUDA ADB error packet format

2015-11-04 Thread Mark Cave-Ayland

On 04/11/15 03:12, David Gibson wrote:

> On Fri, Oct 23, 2015 at 02:56:29PM +0100, Mark Cave-Ayland wrote:
>> ADB error packets should be of the form (type, status, cmd) rather than just
>> (type, status). This fixes ADB device detection under MacOS 9.
> 
> Hmm.. are there any public doc on these ADB / CUDA things that we can
> look up to compare to?

Sadly no. The main process for working during GSoC was to add logging
into various areas to see the point at which execution stopped, then
compare with MOL, code up the difference and see if this helps boot
progress.

Pretty much every other OS will read in the entire CUDA packet based
upon the length and check the status for errors, whereas it seems MacOS
9 additionally checks the right number of bytes are received in the
reply (even if they are never used).

>> Signed-off-by: Mark Cave-Ayland 
>> ---
>>  hw/misc/macio/cuda.c |6 --
>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
>> index 5d7043e..9ec19af 100644
>> --- a/hw/misc/macio/cuda.c
>> +++ b/hw/misc/macio/cuda.c
>> @@ -560,19 +560,21 @@ static void cuda_receive_packet_from_host(CUDAState *s,
>>  switch(data[0]) {
>>  case ADB_PACKET:
>>  {
>> -uint8_t obuf[ADB_MAX_OUT_LEN + 2];
>> +uint8_t obuf[ADB_MAX_OUT_LEN + 3];
>>  int olen;
>>  olen = adb_request(>adb_bus, obuf + 2, data + 1, len - 1);
>>  if (olen > 0) {
>>  obuf[0] = ADB_PACKET;
>>  obuf[1] = 0x00;
>> +cuda_send_packet_to_host(s, obuf, olen + 2);
>>  } else {
>>  /* error */
>>  obuf[0] = ADB_PACKET;
>>  obuf[1] = -olen;
>> +obuf[2] = data[1];
>>  olen = 0;
>> +cuda_send_packet_to_host(s, obuf, olen + 3);
> 
> Using 'olen + 3' seems confusing here, rather than just '3', since you
> just set olen = 0.

I do prefer the way it is at the moment, because all the calls to
cuda_send_packet_to_host() use a length of the form "olen + X" which
tells you the payload length (0) + the header length (X). So from this
we know that there are just the 3 standard header bytes and no
additional payload being returned by this particular transaction.

>>  }
>> -cuda_send_packet_to_host(s, obuf, olen + 2);
>>  }
>>  break;
>>  case CUDA_PACKET:
> 

ATB,

Mark.

[Qemu-devel] [PATCH 2/2] target-i386: tcg: Check right CPUID bits for clflushopt/pcommit

2015-11-04 Thread Eduardo Habkost

Detect the clflushopt and pcommit instructions and check their
corresponding feature flags, instead of checking CPUID_SSE and
CPUID_CLFLUSH.

Signed-off-by: Eduardo Habkost 
---
 target-i386/translate.c | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index bac1685..a938967 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7731,16 +7731,28 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 goto illegal_op;
 }
 break;
-case 7: /* sfence / clflush */
+case 7: /* sfence / clflush / clflushopt / pcommit */
 if ((modrm & 0xc7) == 0xc0) {
-/* sfence */
-/* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
-if (!(s->cpuid_features & CPUID_SSE))
-goto illegal_op;
+if (s->prefix & PREFIX_DATA) {
+/* pcommit */
+if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT))
+goto illegal_op;
+} else {
+/* sfence */
+/* XXX: also check for cpuid_ext2_features & 
CPUID_EXT2_EMMX */
+if (!(s->cpuid_features & CPUID_SSE))
+goto illegal_op;
+}
 } else {
-/* clflush */
-if (!(s->cpuid_features & CPUID_CLFLUSH))
-goto illegal_op;
+if (s->prefix & PREFIX_DATA) {
+/* clflushopt */
+if (!(s->cpuid_7_0_ebx_features & 
CPUID_7_0_EBX_CLFLUSHOPT))
+goto illegal_op;
+} else {
+/* clflush */
+if (!(s->cpuid_features & CPUID_CLFLUSH))
+goto illegal_op;
+}
 gen_lea_modrm(env, s, modrm);
 }
 break;
-- 
2.1.0

Re: [Qemu-devel] [PATCH v8 10/17] qapi: Simplify visiting of alternate types

2015-11-04 Thread Eric Blake

On 11/04/2015 09:03 AM, Markus Armbruster wrote:

> Conclusions:
> 
> * Having two different name manglers is a headache we could do without,
>   especially since the second one camel_to_upper() is pretty magic.
> 
>   We have it only to get
> 
> typedef enum BlockDeviceIoStatus {
> BLOCK_DEVICE_IO_STATUS_OK = 0,
> BLOCK_DEVICE_IO_STATUS_FAILED = 1,
> BLOCK_DEVICE_IO_STATUS_NOSPACE = 2,
> BLOCK_DEVICE_IO_STATUS_MAX = 3,
> } BlockDeviceIoStatus;
> 
>   instead of
> 
> typedef enum BlockDeviceIoStatus {
> BlockDeviceIoStatus_ok = 0,
> BlockDeviceIoStatus_failed = 1,
> BlockDeviceIoStatus_nospace = 2,
> BlockDeviceIoStatus_MAX = 3,
> } BlockDeviceIoStatus;
> 
>   Bah!  CODING_STYLE doesn't even ask for shouting enumeration
>   constants.  Can't see why we do.

Interesting idea.  In fact, if we went one step further:

BlockDeviceIoStatus_ok = 0,
...
BlockDeviceIoStatusMAX = 3.

(that is, typename + '_' + value for user values, and typename + 'MAX'
for the sentinel), then the max value _cannot_ collide with any of the
other values.

> 
> * Keeping the complexity of the rules under control is good both for
>   qapi.py and for the QAPI schema language.
> 
>   To that end, I think we should consider reserving the same set of
>   names both for members and tag values.  It gets rid of complications
>   like enumerations you can't use as flat union tags.
> 
>   Additionally, the question whether to keep the door open for
>   generating an enum for the alternate cases becomes moot.
> 
> What do you think?

I like the idea. Don't know if it's too late for 2.5, though.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] Implementing a Runtime Shadow Stack

2015-11-04 Thread Jhong Chung, Juan


Hi qemu-devel,

I’m trying to start a project and I need some guidance. I want to 
develop a runtime shadow stack for any binary executed under qemu 
regardless of the guest or host architecture. This will be a proof of 
concept for a course project. For now I don’t need to worry about 
overhead or optimization.


The basic functionality that I'm trying to implement in QEMU is to to 
save the stack pointer when I enter a function and when a function 
returns, compare the current stack pointer to the saved one in case it’s 
been modified.


I apologize in advance for my limited knowledge of QEMU development. 
Newbie here. I've been reading the documentation as much as I can.



What I understand is that for any binary executed in qemu, the guest 
architecture instructions are turned into tiny code by the TCG. Tiny 
code operations are then able to run in the host architecture.


I have two ideas to implement a runtime shadow stack:

1. Modify the binary running under QEMU and add extra guest instructions 
to save the stack pointer after “call” and add a compare instructions 
after “ret”. After that the TCG will take care of translate my newly 
added instructions to tiny code and in theory they will be executed with 
no problem in the host. Does QEMU allow modifying binaries?


2. Modify the TCG: Is it possible to modify the TCG so that when a 
“call” instruction is encounted, it generates tiny code as usual, but it 
also adds an extra function to save the stack pointer? The same idea 
applies to when the TCG encounters a “ret” guest instruction. I want to 
add an additional function that compares the return address and the 
saved stack pointer.


Which approach do you think is better/easier or possible to begin with? 
I’m leaning toward making changes to the TCG. My team and I will open 
source this project. Any pointers are appreciated!


Thanks!

Juan Jhong-Chung

[Qemu-devel] [PATCH 0/2] target-i386: tcg: Handle clflushopt/clwb/pcommit instructions

2015-11-04 Thread Eduardo Habkost

This series makes TCG accept the clwb instruction, and make it check the right
CPUID bits when handling clflushopt and pcommit.

Tested using the kvm-unit-test patch I sent earlier today:
  Subject: [kvm-unit-test RFC] x86: Memory instructions test case
  Message-Id: <1446672079-8549-1-git-send-email-ehabk...@redhat.com>

Eduardo Habkost (2):
  target-i386: tcg: Accept clwb instruction
  target-i386: tcg: Check right CPUID bits for clflushopt/pcommit

 target-i386/translate.c | 39 +++
 1 file changed, 31 insertions(+), 8 deletions(-)

-- 
2.1.0

[Qemu-devel] [PATCH 1/2] target-i386: tcg: Accept clwb instruction

2015-11-04 Thread Eduardo Habkost

Accept the clwb instruction (66 0F AE /6) if its corresponding feature
flag is enabled on CPUID[7].

Signed-off-by: Eduardo Habkost 
---
 target-i386/translate.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index b400d24..bac1685 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7716,10 +7716,21 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 }
 break;
 case 5: /* lfence */
-case 6: /* mfence */
 if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
 goto illegal_op;
 break;
+case 6: /* mfence/clwb */
+if (s->prefix & PREFIX_DATA) {
+/* clwb */
+if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB))
+goto illegal_op;
+gen_lea_modrm(env, s, modrm);
+} else {
+/* mfence */
+if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & 
CPUID_SSE2))
+goto illegal_op;
+}
+break;
 case 7: /* sfence / clflush */
 if ((modrm & 0xc7) == 0xc0) {
 /* sfence */
-- 
2.1.0

Re: [Qemu-devel] [PATCH v3 0/4] SysFS driver for QEMU fw_cfg device

2015-11-04 Thread Gabriel L. Somlo

On Mon, Oct 05, 2015 at 03:18:05PM +0200, Paolo Bonzini wrote:
> 
> 
> On 05/10/2015 14:50, Peter Maydell wrote:
> > If you want to try to support "firmware might also be reading
> > fw_cfg at the same time as the kernel" this is a (painful)
> > problem regardless of how the kernel figures out whether a
> > fw_cfg device is present. I had assumed that one of the design
> > assumptions of this series was that firmware would only
> > read the fw_cfg before booting the guest kernel and never touch
> > it afterwards. If it might touch it later then letting the
> > guest kernel also mess with fw_cfg seems like a really bad idea.
> 
> The idea of tinkering with fw_cfg from the AML code (DSDT/SSDT) has been
> proposed many times, and always dropped.  One of the reasons was that
> the OS could have a driver for fw_cfg.
> 
> So I think that we can define the QEMU0002 id as owned by the OSPM,
> similar to the various standard ACPI ids that are usually found in the
> x86 world (e.g. PNP0B00 is a mc146818 RTC, PNP0303 is an 8042 keyboard
> controller, PNP0501 is a 16550 or similar UART, and so on).  This
> basically sanctions _CRS as the way to pass information from the
> firmware to the OSPM, also similarly to those standard PNP ids.

OK, so I don't expect to go the "pure ACPI" route in the final
version, mainly because I'm still hoping to fill the requirement
of writing a driver which can use either ACPI or DT to detect the
presence of fw_cfg (how I'm going to compile this on kernels with
no ACPI or no DT support is still TBD, and probably will have to
involve #ifdef, but I digress :)

However, Michael's idea of having ACPI supply an "accessor method" for
the guest kernel driver to call, without having to worry about the
specific details in _CRS, sounded intriguing enough to at least explore
in further detail.

So, assuming we have the following fw_cfg AML in ssdt (i386) or
dsdt (arm) (using cpp-style #ifdef to highlight the arch-specific
bits):

Scope (\_SB)
{
Device (FWCF)
{
Name (_HID, EisaId ("QMU0002"))  // _HID: Hardware ID
Name (_STA, 0x0B)  // _STA: Status

#ifdef ARCH_X86

Name (_CRS, ResourceTemplate ()
{
IO (Decode16,
0x0510, // Range Minimum
0x0510, // Range Maximum
0x01,   // Alignment
0x02,   // Length
)
})

#else /* ARCH_ARM */

NAME (_CRS, ResourceTemplate ()
{
Memory32Fixed (ReadWrite,
   0x0902,  // Address Base
   0x000a,  // Address Length
  )
})

#endif

}
}

I can easily patch QEMU to additionally insert the following into
"Device (FWCF)":

#ifdef ARCH_X86

OperationRegion (FWCR, SystemIO, 0x0510, 0x02)
Field (FWCR, WordAcc, NoLock, Preserve)
{
FWCC,   16
}
Field (FWCR, ByteAcc, NoLock, Preserve)
{
Offset (0x01),
FWCD,   8
}

#else /* ARCH_ARM */

OperationRegion (FWCR, SystemMemory, 0x0902, 0x0a)
Field (FWCR, WordAcc, NoLock, Preserve)
{
Offset (0x08),
FWCC,   16  // not sure about endianness on ARM here, but
// I can still address this from the userspace
// kernel driver if needed
}
Field (FWCR, ByteAcc, NoLock, Preserve)
{
FWCD,   8
}

#endif

Method (RDBL, 3, Serialized) // (Arg0 = key, Arg1 = skip, Arg2 = count)
{
FWCC = Arg0
Local0 = Zero
While ((Local0 < Arg1))
{
Local1 = FWCD
Local0++
}
Name (BBUF, Buffer (Arg2) {})
Local0 = Zero
While ((Local0 < Arg2))
{
Index (BBUF, Local0) = FWCD /* \_SB_.FWCF.FWCD */
Local0++
}
Return (BBUF) /* \_SB_.FWCF.RDBL.BBUF */
}

With the host generating the additional RDBL method above, I could
write a "pure ACPI" driver for the guest kernel, where instead of the
current fw_cfg_read_blob() logic:


  static DEFINE_MUTEX(fw_cfg_dev_lock);
  static bool fw_cfg_is_mmio;

  static inline u16 fw_cfg_sel_endianness(u16 key)
  {
  return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
  }

  static inline void fw_cfg_read_blob(u16 key,
  void *buf, loff_t pos, size_t count)
  {
  mutex_lock(_cfg_dev_lock);
  iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
  while (pos-- > 0)
  ioread8(fw_cfg_reg_data);
  ioread8_rep(fw_cfg_reg_data, buf, count);
  mutex_unlock(_cfg_dev_lock);
  }


I could instead write something like this:


  struct acpi_device *dev;/* set during module init.  */

  static inline

Re: [Qemu-devel] [PATCH v9 04/27] qapi: Simplify error testing in test-qmp-*

2015-11-04 Thread Eric Blake

On 11/04/2015 01:40 AM, Markus Armbruster wrote:

> 
>> By moving err into data, we can let test teardown take care
>> of cleaning up any collected error; it also gives us fewer
>> lines of code between repeated tests where init runs teardown
>> on our behalf.
> 
> This part isn't as obvious.
> 
> Having two parts of differing obviousness indicates patch splitting
> could make sense.  Especially when the parts are large and mechanical,
> because reviewing large mechanical changes is much easier when there's
> just one kind of it.

Will split.

>> @@ -364,21 +341,17 @@ static void 
>> test_visitor_in_alternate_number(TestInputVisitorData *data,
>>  /* Parsing an int */
>>
>>  v = visitor_input_test_init(data, "42");
>> -visit_type_AltStrBool(v, , NULL, );
>> -g_assert(err);
>> -error_free(err);
>> -err = NULL;
>> +visit_type_AltStrBool(v, , NULL, >err);
>> +g_assert(data->err);
>>  qapi_free_AltStrBool(asb);
> 
> This leaves data->err non-null.
> 
>>
>>  /* FIXME: Order of alternate should not affect semantics; asn should
>>   * parse the same as ans */
>>  v = visitor_input_test_init(data, "42");

And this part wipes out data, so that data->err is once again NULL.

>> -visit_type_AltStrNum(v, , NULL, );
>> +visit_type_AltStrNum(v, , NULL, >err);
> 
> If visit_type_AltStrNum() errors out, its error will be thrown away by
> its error_propagate(), because data->err is already non-null.

No, you missed that this patch is relying on the magic in 3/27 that
wipes out data on every new test.

> 
> If it fails to error out, its error_propagate() does nothing, and we
> won't detect the test failure.
> 
> Your patch makes forgetting to reset err an even easier mistake to make,
> because it removes most of the error_free() that serve as reminders.
> 
> Is it a good idea anyway?  Let's discuss before I check the remainder of
> this patch.

The point was that by moving err _into_ data, then the resetting of err
becomes as simple as resetting data, rather than having to be verbose at
every use of data->err.  We just need a visitor_input_test_init() in
between.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [RFC v2] Add support for KVM_CAP_SPLIT_IRQCHIP

2015-11-04 Thread Paolo Bonzini



On 02/11/2015 22:19, Matt Gingell wrote:
> Hi Jan, 
> 
> Would you be able to look this over? I’d like to get this into shape to 
> commit, 
> either now or once the corresponding kernel patch goes in.
> 
> Thanks,
> Matt
> 
> Add support for KVM_CAP_SPLIT_IRQCHIP
> 
> Adds a new alternative 'split' to the -machine kernel-irqchip option.
> When split mode is specified:
> 
>  1.) KVM_CAP_SPLIT_IRQCHIP is enabled.
> 
>  2.) The PIC, PIT, and IOAPIC are implemented in userspace while the
>  LAPIC is implemented by KVM.
> 
>  3.) The software IOAPIC delivers interrupts to the KVM LAPIC via
>  kvm_set_irq. Interrupt delivery is configured via the MSI routing
>  table, for which routes are reserved in target-i386/kvm.c then
>  configured in hw/intc/ioapic.c
> 
>  4.) KVM delivers IOAPIC EOIs via a new exit KVM_EXIT_IOAPIC_EOI,
>  which is handled in target-i386/kvm.c and relayed to the software
>  IOAPIC via ioapic_eoi_broadcast.

I had looked at v1, and there was nothing surprising.  I was travelling
and couldn't comment immediately.

I only have a couple of notes:

1) the kvm_*_in_kernel() rename should be a separate patch;

2) this:

+if (machine_kernel_irqchip_split(machine)) {
+ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24);
+if (ret) {
+error_report("Could not enable split irqchip mode: %s\n",
+ strerror(-ret));

and "bool kvm_split_irqchip" should go in target-i386/kvm.c, using the
existing kvm_arch_irqchip_create hook;

3) other arches than i386 should reject kernel_irqchip=split.

More importantly, I would like to understand what is the kernel issue
that you are getting.  It should be resolved before 4.4 goes out in two
months, of course.

Paolo

[Qemu-devel] [Bug 1513234] [NEW] Cannot ping guest from host after closing laptop lid, and re-opening

2015-11-04 Thread Steven Combs

Public bug reported:

I am running Ubuntu 15.10 (this issue also exists on 15.04) x64. Desktop
environment to re-produce is either GNOME 3.16 or OpenBox-3.

I have a Windows 8.1 VM that I run with QEMU and I will work out of that
for my job most of the day. When I am going to leave I like to just
close my laptop lid, come home, and then get back at it. Unfortunately
whenever I get home and open back up my laptop, I can no longer RDP into
my VM and can no longer ping it from the host.

If I open up Virt-Manager I can see the desktop via the Console page but
cannot RDP into it with FreeRDP (I use FreeRDP all day on this machine
so I know this works fine).

If I use the Console tab to login to the Windows VM again, I notice that
I can ping the host from the guest and am connected to the internet.
Just can't seem to communicate with the VM via its IP anymore.

I have a NIC NAT virtual card and am using a Bridge

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1513234

Title:
  Cannot ping guest from host after closing laptop lid, and re-opening

Status in QEMU:
  New

Bug description:
  I am running Ubuntu 15.10 (this issue also exists on 15.04) x64.
  Desktop environment to re-produce is either GNOME 3.16 or OpenBox-3.

  I have a Windows 8.1 VM that I run with QEMU and I will work out of
  that for my job most of the day. When I am going to leave I like to
  just close my laptop lid, come home, and then get back at it.
  Unfortunately whenever I get home and open back up my laptop, I can no
  longer RDP into my VM and can no longer ping it from the host.

  If I open up Virt-Manager I can see the desktop via the Console page
  but cannot RDP into it with FreeRDP (I use FreeRDP all day on this
  machine so I know this works fine).

  If I use the Console tab to login to the Windows VM again, I notice
  that I can ping the host from the guest and am connected to the
  internet. Just can't seem to communicate with the VM via its IP
  anymore.

  I have a NIC NAT virtual card and am using a Bridge

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1513234/+subscriptions

Re: [Qemu-devel] [PATCH 05/13] cuda.c: fix CUDA_PACKET response packet format

2015-11-04 Thread Mark Cave-Ayland

On 04/11/15 03:15, David Gibson wrote:

> On Fri, Oct 23, 2015 at 02:56:30PM +0100, Mark Cave-Ayland wrote:
>> According to comments in MOL, the response to a CUDA_PACKET should be one of
>> the following:
>>
>> Reply: CUDA_PACKET, status, cmd
>> Error: ERROR_PACKET, status, CUDA_PACKET, cmd
>>
>> Update cuda_receive_packet() accordingly to reflect this.
>>
>> Signed-off-by: Mark Cave-Ayland 
> 
> Code seems to match the description, but I don't have another source
> to check what the right thing to do is for CUDA.

Again there is no official documentation for this other than the
comments in MOL's src/drivers/via-cuda.c cuda_packet() function :(

>> ---
>>  hw/misc/macio/cuda.c |   24 +---
>>  1 file changed, 5 insertions(+), 19 deletions(-)
>>
>> diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
>> index 9ec19af..88a0999 100644
>> --- a/hw/misc/macio/cuda.c
>> +++ b/hw/misc/macio/cuda.c
>> @@ -480,7 +480,7 @@ static void cuda_adb_poll(void *opaque)
>>  static void cuda_receive_packet(CUDAState *s,
>>  const uint8_t *data, int len)
>>  {
>> -uint8_t obuf[16];
>> +uint8_t obuf[16] = { CUDA_PACKET, 0, data[0] };
>>  int autopoll;
>>  uint32_t ti;
>>  
>> @@ -497,23 +497,15 @@ static void cuda_receive_packet(CUDAState *s,
>>  timer_del(s->adb_poll_timer);
>>  }
>>  }
>> -obuf[0] = CUDA_PACKET;
>> -obuf[1] = data[1];
>> -cuda_send_packet_to_host(s, obuf, 2);
>> +cuda_send_packet_to_host(s, obuf, 3);
>>  break;
>>  case CUDA_SET_TIME:
>>  ti = (((uint32_t)data[1]) << 24) + (((uint32_t)data[2]) << 16) + 
>> (((uint32_t)data[3]) << 8) + data[4];
>>  s->tick_offset = ti - (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / 
>> get_ticks_per_sec());
>> -obuf[0] = CUDA_PACKET;
>> -obuf[1] = 0;
>> -obuf[2] = 0;
>>  cuda_send_packet_to_host(s, obuf, 3);
>>  break;
>>  case CUDA_GET_TIME:
>>  ti = s->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / 
>> get_ticks_per_sec());
>> -obuf[0] = CUDA_PACKET;
>> -obuf[1] = 0;
>> -obuf[2] = 0;
>>  obuf[3] = ti >> 24;
>>  obuf[4] = ti >> 16;
>>  obuf[5] = ti >> 8;
>> @@ -524,20 +516,14 @@ static void cuda_receive_packet(CUDAState *s,
>>  case CUDA_SET_DEVICE_LIST:
>>  case CUDA_SET_AUTO_RATE:
>>  case CUDA_SET_POWER_MESSAGES:
>> -obuf[0] = CUDA_PACKET;
>> -obuf[1] = 0;
>> -cuda_send_packet_to_host(s, obuf, 2);
>> +cuda_send_packet_to_host(s, obuf, 3);
>>  break;
>>  case CUDA_POWERDOWN:
>> -obuf[0] = CUDA_PACKET;
>> -obuf[1] = 0;
>> -cuda_send_packet_to_host(s, obuf, 2);
>> +cuda_send_packet_to_host(s, obuf, 3);
>>  qemu_system_shutdown_request();
>>  break;
>>  case CUDA_RESET_SYSTEM:
>> -obuf[0] = CUDA_PACKET;
>> -obuf[1] = 0;
>> -cuda_send_packet_to_host(s, obuf, 2);
>> +cuda_send_packet_to_host(s, obuf, 3);
>>  qemu_system_reset_request();
>>  break;
>>  default:


ATB,

Mark.

Re: [Qemu-devel] [PATCH v3 2/3] target-i386: calculate vcpu's TSC rate to be migrated

2015-11-04 Thread Eduardo Habkost

On Mon, Nov 02, 2015 at 05:26:42PM +0800, Haozhong Zhang wrote:
> The value of the migrated vcpu's TSC rate is determined as below.
>  1. If a TSC rate is specified by the cpu option 'tsc-freq', then this
> user-specified value will be used.
>  2. If neither a user-specified TSC rate nor a migrated TSC rate is
> present, we will use the TSC rate from KVM (returned by
> KVM_GET_TSC_KHZ).
>  3. Otherwise, we will use the migrated TSC rate.
> 
> Signed-off-by: Haozhong Zhang 
[...]
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 64046cb..aae5e58 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -3034,3 +3034,36 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
>  {
>  abort();
>  }
> +
> +int kvm_arch_setup_tsc_khz(CPUState *cs)
> +{
> +X86CPU *cpu = X86_CPU(cs);
> +CPUX86State *env = >env;
> +int r;
> +
> +/*
> + * Prepare vcpu's TSC rate to be migrated.
> + *
> + * - If the user specifies the TSC rate by cpu option 'tsc-freq',
> + *   we will use the user-specified value.
> + *
> + * - If there is neither user-specified TSC rate nor migrated TSC
> + *   rate, we will ask KVM for the TSC rate by calling
> + *   KVM_GET_TSC_KHZ.
> + *
> + * - Otherwise, if there is a migrated TSC rate, we will use the
> + *   migrated value.
> + */
> +if (env->tsc_khz) {
> +env->tsc_khz_saved = env->tsc_khz;
> +} else if (!env->tsc_khz_saved) {
> +r = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
> +if (r < 0) {
> +fprintf(stderr, "KVM_GET_TSC_KHZ failed\n");
> +return r;
> +}

The lack of KVM_CAP_GET_TSC_KHZ should make QEMU abort, unless the user
is explicitly requesting a more strict mode where the TSC frequency will
be guaranteed to never change.

> +env->tsc_khz_saved = r;
> +}

Why do you need a separate tsc_khz_saved field, and don't simply use
tsc_khz? It would have the additional feature of letting QMP clients
query the current TSC rate by asking for the tsc-freq property on CPU
objects.


> +
> +return 0;
> +}

-- 
Eduardo

[Qemu-devel] Could not add PCI device with big memory to aarch64 VMs

2015-11-04 Thread liang yan


Hello, Laszlo,

(1)I am trying to add ivshmem device(PCI device with big memory) to my 
aarch64 vm.
So far, I could find device information from vm. But it seems vm did not 
create
correct resource file for this device. Do you have any idea that this 
happens?


I used the upstream EDK2 to build my UEFI firmware.

There are three BARs for this device, and memory map is assigned too, 
but only one

resource file is created.

My qemu supports ACPI 5.1 and the command line is :

  -device ivshmem,size=256M,chardev=ivshmem,msi=on,ioeventfd=on \
  -chardev socket,path=/tmp/ivshmem_socket,id=ivshmem \

The lspci information:

00:00.0 Host bridge: Red Hat, Inc. Device 0008
Subsystem: Red Hat, Inc Device 1100
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
SERR- 

00:01.0 RAM memory: Red Hat, Inc Inter-VM shared memory
Subsystem: Red Hat, Inc QEMU Virtual Machine
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
SERR- 
Interrupt: pin A routed to IRQ 255
Region 0: Memory at 20001000 (32-bit, non-prefetchable) [disabled] 
[size=256]
Region 1: Memory at 2000 (32-bit, non-prefetchable) [disabled] 
[size=4K]
Region 2: Memory at 1000 (64-bit, prefetchable) [disabled] 
[size=256M]

Capabilities: [40] MSI-X: Enable- Count=1 Masked-
Vector table: BAR=1 offset=
PBA: BAR=1 offset=0800

Boot information:

[2.380924] pci :00:01.0: BAR 2: assigned [mem 
0x1000-0x1fff 64bit pref]

[2.382836] pci :00:01.0: BAR 1: assigned [mem 0x2000-0x2fff]
[2.383557] pci :00:01.0: BAR 0: assigned [mem 0x20001000-0x200010ff]


Files under /sys/devices/pci:00/:00:01.0

broken_parity_status  devspec   local_cpus  resource
class  dma_mask_bitsmodaliassubsystem
config  driver_override  msi_bus subsystem_device
consistent_dma_mask_bits  enable   power   subsystem_vendor
d3cold_allowed  irq   remove  uevent
device  local_cpulistrescan  vendor

Information for resource:

0x20001000 0x200010ff 0x00040200
0x2000 0x2fff 0x00040200
0x1000 0x1fff 0x0014220c
0x 0x 0x
0x 0x 0x
0x 0x 0x
0x 0x 0x




(2)It also has a problem that once I use a memory bigger than 256M for 
ivshmem, it could not get through UEFI,

the error message is

PciBus: Discovered PCI @ [00|01|00]
   BAR[0]: Type =  Mem32; Alignment = 0xFFF;Length = 0x100; Offset 
= 0x10
   BAR[1]: Type =  Mem32; Alignment = 0xFFF;Length = 0x1000; Offset 
= 0x14
   BAR[2]: Type = PMem64; Alignment = 0x3FFF;Length = 
0x4000;Offset = 0x18


PciBus: HostBridge->SubmitResources() - Success
ASSERT 
/home/liang/studio/edk2/ArmVirtPkg/PciHostBridgeDxe/PciHostBridge.c(449): ((BOOLEAN)(0==1))


I am wandering if there are memory limitation for pcie devices under 
Qemu environment?



Just thank you in advance and any information would be appreciated.



Best,
Liang

Re: [Qemu-devel] [PATCH v9 17/27] qapi: Clean up after previous commit

2015-11-04 Thread Eric Blake

On 11/04/2015 06:43 AM, Markus Armbruster wrote:
> Eric Blake  writes:
> 
>> From: Markus Armbruster 
>>
>> QAPISchemaObjectTypeVariants.check() parameter members is no
>> longer used, drop it.
>>
>> Signed-off-by: Markus Armbruster 
>> Message-Id: <1446559499-26984-3-git-send-email-arm...@redhat.com>
>> [Variant.check(seen) is used after all, so reword and reduce scope
>> of this patch; rearrange later in the series]
> 
> Don't you need to update the subject?  My "previous commit" was "qapi:
> Simplify QAPISchemaObjectTypeMember.check()", while yours is "qapi:
> Eliminate QAPISchemaObjectType.check() variable members".
> 
> Not sure what moving my two patches apart buys you :)

I'm not quite sure either.  [Can I blame late-night coding?]  For
reference, this was your 3/7 patch.  I was trying to get to the point of
my 'qapi: Check for qapi collisions of flat union branches' (ended up as
19/27) as soon as possible after my tweaks to your 'qapi: Drop obsolete
tag value collision assertions' (your 1/7), so that there was less of a
gap where avoiding churn on passing vseen(dict) to Variant.check()
looked like an unused variable.

In my first attempt, I tried floating my patch right after yours.  But I
quickly discovered that my patch worked better if I built it on top of
your 'qapi: Factor out QAPISchemaObjectTypeMember.check_clash()' (your
6/7), which in turn depended on several of your other patches.  So the
end result of what I posted happens to be whatever order worked for all
my cherry-picking, and I still ended up having to tweak both your 1/7
and 3/7 after all.

For v10, I may just go back to the order that you first supplied patches
in (if for no other reason than to make your commit message more
accurate about being a cleanup of the previous patch, with the meaning
that you had given it).

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 07/13] cuda.c: implement dummy IIC access commands

2015-11-04 Thread Mark Cave-Ayland

On 04/11/15 03:17, David Gibson wrote:

> On Fri, Oct 23, 2015 at 02:56:32PM +0100, Mark Cave-Ayland wrote:
>> These are used by MacOS 9 on boot. Here we return an error except for 4-byte
>> commands which write to the IIC bus.
> 
> A bit of rationale about why some of these give errors and some don't
> would be nice.

This was another change inspired by the relevant code in MOL. There are
comments suggesting that the 3 byte packet format is a (iicAddr, iicReg,
iicData) tuple which I could add in if required? Then again it seems the
MacOS 9 code just needs to receive some kind of reply in order for boot
to proceed, rather than the values actually doing anything.

>>
>> Signed-off-by: Mark Cave-Ayland 
>> ---
>>  hw/misc/macio/cuda.c |   18 ++
>>  1 file changed, 18 insertions(+)
>>
>> diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
>> index 4fe901b..d3ec58a 100644
>> --- a/hw/misc/macio/cuda.c
>> +++ b/hw/misc/macio/cuda.c
>> @@ -529,6 +529,24 @@ static void cuda_receive_packet(CUDAState *s,
>>  cuda_send_packet_to_host(s, obuf, 3);
>>  qemu_system_reset_request();
>>  break;
>> +case CUDA_COMBINED_FORMAT_IIC:
>> +obuf[0] = ERROR_PACKET;
>> +obuf[1] = 0x5;
>> +obuf[2] = CUDA_PACKET;
>> +obuf[3] = data[0];
>> +cuda_send_packet_to_host(s, obuf, 4);
>> +break;
>> +case CUDA_GET_SET_IIC:
>> +if (len == 4) {
>> +cuda_send_packet_to_host(s, obuf, 3);
>> +} else {
>> +obuf[0] = ERROR_PACKET;
>> +obuf[1] = 0x2;
>> +obuf[2] = CUDA_PACKET;
>> +obuf[3] = data[0];
>> +cuda_send_packet_to_host(s, obuf, 4);
>> +}
>> +break;
>>  default:
>>  break;
>>  }
> 

ATB,

Mark.

Re: [Qemu-devel] [PATCH v9 20/27] qapi: Simplify QAPISchemaObjectTypeVariants.check()

2015-11-04 Thread Eric Blake

On 11/04/2015 12:02 PM, Markus Armbruster wrote:
> Eric Blake  writes:
> 
>> From: Markus Armbruster 
>>
>> Avoid the 'if seen' conditional by doing just the essential work
>> here, namely setting self.tag_member for flat unions.
>> Move the rest to callers.
>>
>> Signed-off-by: Markus Armbruster 
>> Message-Id: <1446559499-26984-7-git-send-email-arm...@redhat.com>
>> [reword commit, rebase to earlier AlternateType.check() changes]
>> Signed-off-by: Eric Blake 
>>
>> ---
>> v9: new patch
>> ---
>>  scripts/qapi.py | 5 ++---
>>  1 file changed, 2 insertions(+), 3 deletions(-)
>>
>> diff --git a/scripts/qapi.py b/scripts/qapi.py
>> index 3054628..6653c70 100644
>> --- a/scripts/qapi.py
>> +++ b/scripts/qapi.py
>> @@ -988,9 +988,10 @@ class QAPISchemaObjectType(QAPISchemaType):
>>  for m in self.local_members:
>>  m.check(schema)
>>  m.check_clash(seen)
>> +self.members = seen.values()
>>  if self.variants:
>>  self.variants.check(schema, seen)
>> -self.members = seen.values()
>> +assert self.variants.tag_member in self.members
>>
>>  def is_implicit(self):
>>  # See QAPISchema._make_implicit_object_type()
>> @@ -1053,8 +1054,6 @@ class QAPISchemaObjectTypeVariants(object):
>>  # seen is non-empty for unions, empty for alternates
> 
> You added this comment in PATCH 10 to explain the conditional I'm
> dropping here.  Let's drop the comment, too.

Except the comment becomes relevant again in 23/27.  Or maybe I just
move the comment to that point.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PATCH v2 1/1] target-ppc: Implement rtas_get_sysparm(PROCESSOR_MODULE_INFO)

2015-11-04 Thread Sukadev Bhattiprolu

Implement RTAS_SYSPARM_PROCESSOR_MODULE_INFO parameter to rtas_get_sysparm()
call in qemu. This call returns the processor module (socket), chip and core
information as specified in section 7.3.16.18 of PAPR v2.7.

We walk the /proc/device-tree to determine the number of chips, cores and
modules in the _host_ system and return this info to the guest application
that makes the rtas_get_sysparm() call.

We currently hard code the number of module_types to 1, but we should determine
that dynamically somehow later.

Thanks to input from Nishanth Aravamudan and Alexey Kardashevsk.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v2]:
[Alexey Kardashevsk] Use existing interfaces like ldl_be_p(),
stw_be_phys(), g_hash_table_new_full(), error_report() rather
than re-inventing; fix indentation, function prottypes etc;
Drop the fts() interface (which doesn't seem to be available
on mingw32/mingw64) and use opendir() to walk specific
directories in the directory tree.
---
 hw/ppc/Makefile.objs|   1 +
 hw/ppc/spapr_rtas.c |  35 +++
 hw/ppc/spapr_rtas_modinfo.c | 230 
 hw/ppc/spapr_rtas_modinfo.h |  12 +++
 include/hw/ppc/spapr.h  |   1 +
 5 files changed, 279 insertions(+)
 create mode 100644 hw/ppc/spapr_rtas_modinfo.c
 create mode 100644 hw/ppc/spapr_rtas_modinfo.h

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..57c6b02 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,7 @@ obj-y += ppc.o ppc_booke.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
+obj-$(CONFIG_PSERIES) += spapr_rtas_modinfo.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 34b12a3..41fd8a6 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -34,6 +34,8 @@
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "qapi-event.h"
+
+#include "spapr_rtas_modinfo.h"
 #include "hw/boards.h"
 
 #include 
@@ -240,6 +242,39 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
 target_ulong ret = RTAS_OUT_SUCCESS;
 
 switch (parameter) {
+case RTAS_SYSPARM_PROCESSOR_MODULE_INFO: {
+int i;
+int offset = 0;
+int size;
+struct rtas_module_info modinfo;
+
+if (rtas_get_module_info()) {
+break;
+}
+
+size = sizeof(modinfo);
+size += (modinfo.module_types - 1) * sizeof(struct rtas_socket_info);
+
+stw_be_phys(_space_memory, buffer+offset, size);
+offset += 2;
+
+stw_be_phys(_space_memory, buffer+offset, 
modinfo.module_types);
+offset += 2;
+
+for (i = 0; i < modinfo.module_types; i++) {
+stw_be_phys(_space_memory, buffer+offset,
+modinfo.si[i].sockets);
+offset += 2;
+stw_be_phys(_space_memory, buffer+offset,
+modinfo.si[i].chips);
+offset += 2;
+stw_be_phys(_space_memory, buffer+offset,
+modinfo.si[i].cores_per_chip);
+offset += 2;
+}
+break;
+}
+
 case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
 char *param_val = g_strdup_printf("MaxEntCap=%d,"
   "DesMem=%llu,"
diff --git a/hw/ppc/spapr_rtas_modinfo.c b/hw/ppc/spapr_rtas_modinfo.c
new file mode 100644
index 000..068fc2c
--- /dev/null
+++ b/hw/ppc/spapr_rtas_modinfo.c
@@ -0,0 +1,230 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * Hypercall based emulated RTAS
+ *
+ * Copyright (c) 2015 Sukadev Bhattiprolu, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR

Re: [Qemu-devel] [PATCH 12/13] cuda.c: fix T2 timer and enable its interrupt

2015-11-04 Thread Mark Cave-Ayland

On 04/11/15 03:40, David Gibson wrote:

> On Fri, Oct 23, 2015 at 02:56:37PM +0100, Mark Cave-Ayland wrote:
>> Fix the counter loading logic and enable the T2 interrupt when the timer
>> expires.
> 
> A mention of what uses T2, and therefore why this is useful would be
> good.

There is a good chance that nothing has used T2 before MacOS 9 since
before this patch, it is impossible for the T2 timer interrupt to fire.
It can be seen that MacOS 9 does write to the relevant registers during
boot, and if the T2 interrupt is disabled then boot will hang.

>>
>> Signed-off-by: Mark Cave-Ayland 
>> ---
>>  hw/misc/macio/cuda.c |   30 +-
>>  1 file changed, 21 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
>> index 687cb54..d864b24 100644
>> --- a/hw/misc/macio/cuda.c
>> +++ b/hw/misc/macio/cuda.c
>> @@ -136,7 +136,7 @@ static void cuda_timer_update(CUDAState *s, CUDATimer 
>> *ti,
>>  
>>  static void cuda_update_irq(CUDAState *s)
>>  {
>> -if (s->ifr & s->ier & (SR_INT | T1_INT)) {
>> +if (s->ifr & s->ier & (SR_INT | T1_INT | T2_INT)) {
>>  qemu_irq_raise(s->irq);
>>  } else {
>>  qemu_irq_lower(s->irq);
>> @@ -175,7 +175,7 @@ static unsigned int get_counter(CUDATimer *ti)
>>  
>>  static void set_counter(CUDAState *s, CUDATimer *ti, unsigned int val)
>>  {
>> -CUDA_DPRINTF("T%d.counter=%d\n", 1 + (ti->timer == NULL), val);
>> +CUDA_DPRINTF("T%d.counter=%d\n", 1 + ti->index, val);
>>  ti->load_time = get_tb(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
>> s->frequency);
>>  ti->counter_value = val;
>> @@ -220,7 +220,7 @@ static void cuda_timer_update(CUDAState *s, CUDATimer 
>> *ti,
>>  {
>>  if (!ti->timer)
>>  return;
>> -if ((s->acr & T1MODE) != T1MODE_CONT) {
>> +if (ti->index == 0 && (s->acr & T1MODE) != T1MODE_CONT) {
>>  timer_del(ti->timer);
>>  } else {
>>  ti->next_irq_time = get_next_irq_time(ti, current_time);
>> @@ -238,6 +238,16 @@ static void cuda_timer1(void *opaque)
>>  cuda_update_irq(s);
>>  }
>>  
>> +static void cuda_timer2(void *opaque)
>> +{
>> +CUDAState *s = opaque;
>> +CUDATimer *ti = >timers[1];
>> +
>> +cuda_timer_update(s, ti, ti->next_irq_time);
>> +s->ifr |= T2_INT;
>> +cuda_update_irq(s);
>> +}
>> +
>>  static uint32_t cuda_readb(void *opaque, hwaddr addr)
>>  {
>>  CUDAState *s = opaque;
>> @@ -276,6 +286,7 @@ static uint32_t cuda_readb(void *opaque, hwaddr addr)
>>  case CUDA_REG_T2CL:
>>  val = get_counter(>timers[1]) & 0xff;
>>  s->ifr &= ~T2_INT;
>> +cuda_update_irq(s);
>>  break;
>>  case CUDA_REG_T2CH:
>>  val = get_counter(>timers[1]) >> 8;
>> @@ -352,11 +363,12 @@ static void cuda_writeb(void *opaque, hwaddr addr, 
>> uint32_t val)
>>  cuda_timer_update(s, >timers[0], 
>> qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
>>  break;
>>  case CUDA_REG_T2CL:
>> -s->timers[1].latch = val;
>> -set_counter(s, >timers[1], val);
>> +s->timers[1].latch = (s->timers[1].latch & 0xff00) | val;
>>  break;
>>  case CUDA_REG_T2CH:
>> -set_counter(s, >timers[1], (val << 8) | s->timers[1].latch);
>> +s->timers[1].latch = (s->timers[1].latch & 0xff) | (val << 8);
>> +s->ifr &= ~T2_INT;
>> +set_counter(s, >timers[1], s->timers[1].latch);
> 
> So the new code appears to be like that for T1CL / T1CH, which makes
> sense.  However, T1CL has a cuda_timer_update() call.  Do you also
> need that for T2CL?

This is a side-effect of combining the T1 and T2 code. Unlike T1, T2
appears to be free-running from its written value but generating an
interrupt just after zero-crossing. So in order to get the correct
interval, we write the value to the latch instead of the counter to get
the same effect with the shared timer code.

>>  break;
>>  case CUDA_REG_SR:
>>  s->sr = val;
>> @@ -719,8 +731,7 @@ static void cuda_reset(DeviceState *dev)
>>  s->timers[0].latch = 0x;
>>  set_counter(s, >timers[0], 0x);
>>  
>> -s->timers[1].latch = 0;
>> -set_counter(s, >timers[1], 0x);
>> +s->timers[1].latch = 0x;
>>  }
>>  
>>  static void cuda_realizefn(DeviceState *dev, Error **errp)
>> @@ -730,7 +741,8 @@ static void cuda_realizefn(DeviceState *dev, Error 
>> **errp)
>>  
>>  s->timers[0].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer1, s);
>>  s->timers[0].frequency = s->frequency;
>> -s->timers[1].frequency = s->frequency;
>> +s->timers[1].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer2, s);
>> +s->timers[1].frequency = (SCALE_US * 6000) / 4700;
> 
> Where does this T2 frequency come from?

My understanding of this is that with the shared timer code, the IRQ
timing is calculated based upon CUDA_TIMER_FREQ (4.7MHz / 6). Therefore
by setting the frequency to the inverse

Re: [Qemu-devel] [PATCH v3 2/3] target-i386: calculate vcpu's TSC rate to be migrated

2015-11-04 Thread Haozhong Zhang

On 11/04/15 19:42, Eduardo Habkost wrote:
> On Mon, Nov 02, 2015 at 05:26:42PM +0800, Haozhong Zhang wrote:
> > The value of the migrated vcpu's TSC rate is determined as below.
> >  1. If a TSC rate is specified by the cpu option 'tsc-freq', then this
> > user-specified value will be used.
> >  2. If neither a user-specified TSC rate nor a migrated TSC rate is
> > present, we will use the TSC rate from KVM (returned by
> > KVM_GET_TSC_KHZ).
> >  3. Otherwise, we will use the migrated TSC rate.
> > 
> > Signed-off-by: Haozhong Zhang 
> [...]
> > diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> > index 64046cb..aae5e58 100644
> > --- a/target-i386/kvm.c
> > +++ b/target-i386/kvm.c
> > @@ -3034,3 +3034,36 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
> >  {
> >  abort();
> >  }
> > +
> > +int kvm_arch_setup_tsc_khz(CPUState *cs)
> > +{
> > +X86CPU *cpu = X86_CPU(cs);
> > +CPUX86State *env = >env;
> > +int r;
> > +
> > +/*
> > + * Prepare vcpu's TSC rate to be migrated.
> > + *
> > + * - If the user specifies the TSC rate by cpu option 'tsc-freq',
> > + *   we will use the user-specified value.
> > + *
> > + * - If there is neither user-specified TSC rate nor migrated TSC
> > + *   rate, we will ask KVM for the TSC rate by calling
> > + *   KVM_GET_TSC_KHZ.
> > + *
> > + * - Otherwise, if there is a migrated TSC rate, we will use the
> > + *   migrated value.
> > + */
> > +if (env->tsc_khz) {
> > +env->tsc_khz_saved = env->tsc_khz;
> > +} else if (!env->tsc_khz_saved) {
> > +r = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
> > +if (r < 0) {
> > +fprintf(stderr, "KVM_GET_TSC_KHZ failed\n");
> > +return r;
> > +}
> 
> The lack of KVM_CAP_GET_TSC_KHZ should make QEMU abort, unless the user
> is explicitly requesting a more strict mode where the TSC frequency will
> be guaranteed to never change.
>

I agree KVM_CAP_GET_TSC_KHZ should be checked before KVM_GET_TSC_KHZ,
but I don't think the lack of it should abort QEMU. This piece of code
on the source machine is just to get the TSC frequency to be
migrated. If it fails, it will leave env->tsc_khz_saved be 0. And
according to tsc_khz_needed() in patch 1, if env->tsc_khz_saved == 0,
no TSC frequency will be migrated. So the lack of KVM_CAP_GET_TSC_KHZ
only hurts the migration and does not need to abort QEMU on the source
machine.

> > +env->tsc_khz_saved = r;
> > +}
> 
> Why do you need a separate tsc_khz_saved field, and don't simply use
> tsc_khz? It would have the additional feature of letting QMP clients
> query the current TSC rate by asking for the tsc-freq property on CPU
> objects.
>

It's to avoid overriding env->tsc_khz on the destination in the
migration. I can change this line to
 env->tsc_khz = env->tsc_khz_saved = r;

For the additional QMP feature, will the value of tsc-freq property be
env->tsc_khz? If yes, I guess the above change would be fine?

Haozhong

> 
> > +
> > +return 0;
> > +}
> 
> -- 
> Eduardo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v9 19/27] qapi: Check for qapi collisions of flat union branches

2015-11-04 Thread Eric Blake

On 11/04/2015 04:11 PM, Eric Blake wrote:
> On 11/04/2015 12:01 PM, Markus Armbruster wrote:
>> Eric Blake  writes:
>>
>>> Right now, our ad hoc parser ensures that we cannot have a
>>> flat union that introduces any qapi member names that would
>>> conflict with the non-variant qapi members already present
>>> from the union's base type (see flat-union-clash-member.json).
>>> We want QAPISchema*.check() to make the same check, so we can
>>> later reduce some of the ad hoc checks.
>>>
> 

>> Why can't we simply add the new code to QAPISchemaObjectType.check()?
> 
> We could, but we'd need a nested for-loop:
> 
> for v in variants.variants:
> v.type.check(schema)
> assert not v.type.variants
> vseen = dict(seen)
> for m in v.type.members:
> m.check_clash(seen)
> 
>> The rest of the clash checking is already there...
> 
> You do have a point there.  I guess it wouldn't be the end of the world
> to have the loop nesting be explicit rather than indirect through the
> intermediate Variants.check().
> 
> I'll play with it; and depending on what I do, that may mean I don't
> have to munge your other patches quite so heavily.

And of course, as soon as I hit send, I had another thought - your
patches already added Member.check_clash() called separately from the
.check() chain; maybe I am better off adding a Variants.check_clash()
separate from the .check() chain, rather than trying to cram the whole
nested loop directly in ObjectType.check().

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 3/5] arm: xilinx_zynq: Add linux pre-boot

2015-11-04 Thread Alistair Francis

On Thu, Oct 29, 2015 at 10:34 PM, Peter Crosthwaite
 wrote:
> Add a Linux-specific pre-boot routine that matches the device-
> specific bootloaders behaviour. This is needed for modern Linux that
> expects the ARM PLL in SLCR to be a more even value (not 26).
>
> Cc: Alistair Francis 
> Signed-off-by: Peter Crosthwaite 

This looks fine to me. I haven't looked much at the other patches though.

I think a comment in the zynq_write_board_setup() function saying what
it is doing wouldn't hurt either. Basically just copying the commit
message.

Reviewed-by: Alistair Francis 

Thanks,

Alistair

> ---
> Changed since v1:
> Left main loader address unchanged
> Added comments for SLCR_WRITE macro (PMM review)
> Convert SLCR_WRITE impl. to use movw/movt (PMM review)
> Missing "-" for device-specific
> Changed since RFC:
> Use bootloader callback to load blob.
> Change "firmware" to "board-setup" for consistency.
>
>  hw/arm/xilinx_zynq.c | 42 ++
>  1 file changed, 42 insertions(+)
>
> diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
> index 9f89483..82a9db8 100644
> --- a/hw/arm/xilinx_zynq.c
> +++ b/hw/arm/xilinx_zynq.c
> @@ -43,6 +43,45 @@ static const int dma_irqs[8] = {
>  46, 47, 48, 49, 72, 73, 74, 75
>  };
>
> +#define BOARD_SETUP_ADDR0x100
> +
> +#define SLCR_LOCK_OFFSET0x004
> +#define SLCR_UNLOCK_OFFSET  0x008
> +#define SLCR_ARM_PLL_OFFSET 0x100
> +
> +#define SLCR_XILINX_UNLOCK_KEY  0xdf0d
> +#define SLCR_XILINX_LOCK_KEY0x767b
> +
> +#define ARMV7_IMM16(x) (extract32((x),  0, 12) | \
> +extract32((x), 12,  4) << 16)
> +
> +/* Write immediate val to address r0 + addr. r0 should contain base offset
> + * of the SLCR block. Clobbers r1.
> + */
> +
> +#define SLCR_WRITE(addr, val) \
> +0xe3001000 + ARMV7_IMM16(extract32((val),  0, 16)), /* movw r1 ... */ \
> +0xe3401000 + ARMV7_IMM16(extract32((val), 16, 16)), /* movt r1 ... */ \
> +0xe5801000 + (addr)
> +
> +static void zynq_write_board_setup(ARMCPU *cpu,
> +   const struct arm_boot_info *info)
> +{
> +int n;
> +uint32_t board_setup_blob[] = {
> +0xe3a004f8, /* mov r0, #0xf800 */
> +SLCR_WRITE(SLCR_UNLOCK_OFFSET, SLCR_XILINX_UNLOCK_KEY),
> +SLCR_WRITE(SLCR_ARM_PLL_OFFSET, 0x00014008),
> +SLCR_WRITE(SLCR_LOCK_OFFSET, SLCR_XILINX_LOCK_KEY),
> +0xe12fff1e, /* bx lr */
> +};
> +for (n = 0; n < ARRAY_SIZE(board_setup_blob); n++) {
> +board_setup_blob[n] = tswap32(board_setup_blob[n]);
> +}
> +rom_add_blob_fixed("board-setup", board_setup_blob,
> +   sizeof(board_setup_blob), BOARD_SETUP_ADDR);
> +}
> +
>  static struct arm_boot_info zynq_binfo = {};
>
>  static void gem_init(NICInfo *nd, uint32_t base, qemu_irq irq)
> @@ -252,6 +291,9 @@ static void zynq_init(MachineState *machine)
>  zynq_binfo.nb_cpus = 1;
>  zynq_binfo.board_id = 0xd32;
>  zynq_binfo.loader_start = 0;
> +zynq_binfo.board_setup_addr = BOARD_SETUP_ADDR;
> +zynq_binfo.write_board_setup = zynq_write_board_setup;
> +
>  arm_load_kernel(ARM_CPU(first_cpu), _binfo);
>  }
>
> --
> 1.9.1
>
>

Re: [Qemu-devel] Could not add PCI device with big memory to aarch64 VMs

2015-11-04 Thread Laszlo Ersek

On 11/04/15 23:22, liang yan wrote:
> Hello, Laszlo,
> 
> (1)I am trying to add ivshmem device(PCI device with big memory) to my
> aarch64 vm.
> So far, I could find device information from vm. But it seems vm did not
> create
> correct resource file for this device. Do you have any idea that this
> happens?
> 
> I used the upstream EDK2 to build my UEFI firmware.
> 
> There are three BARs for this device, and memory map is assigned too,
> but only one
> resource file is created.
> 
> My qemu supports ACPI 5.1 and the command line is :
> 
>   -device ivshmem,size=256M,chardev=ivshmem,msi=on,ioeventfd=on \
>   -chardev socket,path=/tmp/ivshmem_socket,id=ivshmem \
> 
> The lspci information:
> 
> 00:00.0 Host bridge: Red Hat, Inc. Device 0008
> Subsystem: Red Hat, Inc Device 1100
> Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B- DisINTx-
> Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
> SERR-  
> 00:01.0 RAM memory: Red Hat, Inc Inter-VM shared memory
> Subsystem: Red Hat, Inc QEMU Virtual Machine
> Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B- DisINTx-
> Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
> SERR-  Interrupt: pin A routed to IRQ 255
> Region 0: Memory at 20001000 (32-bit, non-prefetchable) [disabled]
> [size=256]
> Region 1: Memory at 2000 (32-bit, non-prefetchable) [disabled]
> [size=4K]
> Region 2: Memory at 1000 (64-bit, prefetchable) [disabled]
> [size=256M]
> Capabilities: [40] MSI-X: Enable- Count=1 Masked-
> Vector table: BAR=1 offset=
> PBA: BAR=1 offset=0800
> 
> Boot information:
> 
> [2.380924] pci :00:01.0: BAR 2: assigned [mem
> 0x1000-0x1fff 64bit pref]
> [2.382836] pci :00:01.0: BAR 1: assigned [mem
> 0x2000-0x2fff]
> [2.383557] pci :00:01.0: BAR 0: assigned [mem
> 0x20001000-0x200010ff]
> 
> 
> Files under /sys/devices/pci:00/:00:01.0
> 
> broken_parity_status  devspec   local_cpus  resource
> class  dma_mask_bitsmodaliassubsystem
> config  driver_override  msi_bus subsystem_device
> consistent_dma_mask_bits  enable   power   subsystem_vendor
> d3cold_allowed  irq   remove  uevent
> device  local_cpulistrescan  vendor
> 
> Information for resource:
> 
> 0x20001000 0x200010ff 0x00040200
> 0x2000 0x2fff 0x00040200
> 0x1000 0x1fff 0x0014220c
> 0x 0x 0x
> 0x 0x 0x
> 0x 0x 0x
> 0x 0x 0x
> 
> 
> 
> 
> (2)It also has a problem that once I use a memory bigger than 256M for
> ivshmem, it could not get through UEFI,
> the error message is
> 
> PciBus: Discovered PCI @ [00|01|00]
>BAR[0]: Type =  Mem32; Alignment = 0xFFF;Length = 0x100; Offset =
> 0x10
>BAR[1]: Type =  Mem32; Alignment = 0xFFF;Length = 0x1000; Offset
> = 0x14
>BAR[2]: Type = PMem64; Alignment = 0x3FFF;Length =
> 0x4000;Offset = 0x18
> 
> PciBus: HostBridge->SubmitResources() - Success
> ASSERT
> /home/liang/studio/edk2/ArmVirtPkg/PciHostBridgeDxe/PciHostBridge.c(449): 
> ((BOOLEAN)(0==1))
> 
> 
> I am wandering if there are memory limitation for pcie devices under
> Qemu environment?
> 
> 
> Just thank you in advance and any information would be appreciated.

(CC'ing Ard.)

"Apparently", the firmware-side counterpart of QEMU commit 5125f9cd2532
has never been contributed to edk2.

Therefore the the ProcessPciHost() function in
"ArmVirtPkg/VirtFdtDxe/VirtFdtDxe.c" ignores the
DTB_PCI_HOST_RANGE_MMIO64 type range from the DTB. (Thus only
DTB_PCI_HOST_RANGE_MMIO32 is recognized as PCI MMIO aperture.)

However, even if said driver was extended to parse the new 64-bit
aperture into PCDs (which wouldn't be hard), the
ArmVirtPkg/PciHostBridgeDxe driver would still have to be taught to look
at that aperture (from the PCDs) and to serve MMIO BAR allocation
requests from it. That could be hard.

Please check edk2 commits e48f1f15b0e2^..e5ceb6c9d390, approximately,
for the background on the current code. See also chapter 13 "Protocols -
PCI Bus Support" in the UEFI spec.

Patches welcome. :)

(A separate note on ACPI vs. DT: the firmware forwards *both* from QEMU
to the runtime guest OS. However, the firmware parses only the DT for
its own purposes.)

Thanks
Laszlo

Re: [Qemu-devel] [PATCH v9 19/27] qapi: Check for qapi collisions of flat union branches

2015-11-04 Thread Eric Blake

On 11/04/2015 12:01 PM, Markus Armbruster wrote:
> Eric Blake  writes:
> 
>> Right now, our ad hoc parser ensures that we cannot have a
>> flat union that introduces any qapi member names that would
>> conflict with the non-variant qapi members already present
>> from the union's base type (see flat-union-clash-member.json).
>> We want QAPISchema*.check() to make the same check, so we can
>> later reduce some of the ad hoc checks.
>>

>> @@ -1068,6 +1069,14 @@ class 
>> QAPISchemaObjectTypeVariant(QAPISchemaObjectTypeMember):
>>  def check(self, schema, tag_type, seen):
>>  QAPISchemaObjectTypeMember.check(self, schema)
>>  assert self.name in tag_type.values
>> +if seen:
>> +# This variant is used within a union; ensure each qapi member
>> +# field does not collide with the union's non-variant members.
>> +assert isinstance(self.type, QAPISchemaObjectType)
>> +assert not self.type.variants   # not implemented
>> +self.type.check(schema)
>> +for m in self.type.members:
>> +m.check_clash(seen)
>>
>>  # This function exists to support ugly simple union special cases
>>  # TODO get rid of them, and drop the function
> 
> Two call chains:
> 
> * QAPISchemaObjectType.check() via QAPISchemaObjectTypeVariants.check()
> 
>   In this case, the new conditional executes.
> 
> * QAPISchemaAlternateType.check() via same
> 
>   In this case, it doesn't.
> 
> Why can't we simply add the new code to QAPISchemaObjectType.check()?

We could, but we'd need a nested for-loop:

for v in variants.variants:
v.type.check(schema)
assert not v.type.variants
vseen = dict(seen)
for m in v.type.members:
m.check_clash(seen)

> The rest of the clash checking is already there...

You do have a point there.  I guess it wouldn't be the end of the world
to have the loop nesting be explicit rather than indirect through the
intermediate Variants.check().

I'll play with it; and depending on what I do, that may mean I don't
have to munge your other patches quite so heavily.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 00/13] Mac OS 9 compatibility improvements (upstream rework)

2015-11-04 Thread Mark Cave-Ayland

On 04/11/15 03:44, David Gibson wrote:

> On Fri, Oct 30, 2015 at 04:48:12PM +, Mark Cave-Ayland wrote:
>> On 23/10/15 14:56, Mark Cave-Ayland wrote:
>>
>>> This is a rework of Cormac O'Brien's GSoC project to try and boot MacOS 9 
>>> under
>>> QEMU, the original version of which was posted to the qemu-devel list at the
>>> end of August 
>>> (https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg02521.html).
>>>
>>> The patchset consisted of some simple patches from Alex and then a large 
>>> set of
>>> CUDA changes supplied as a single patch which were the result of Cormac 
>>> analysing
>>> MOL with Alex's help to try and further the boot process.
>>>
>>> In their previous form, the patches were unsuitable for applying upstream 
>>> since
>>> while they furthered MacOS 9 boot, they also caused a couple of major 
>>> regressions
>>> such as breaking the mouse and causing Darwin/OS X boot to panic on startup.
>>>
>>> This reworked patchset fixes these regressions, includes some other 
>>> clean-ups 
>>> and more importantly now passes all of my OpenBIOS image boot tests with an 
>>> OpenBIOS binary from SVN trunk (separate pull request to be sent shortly).
>>> Whilst OpenBIOS still needs one additional patch to run the MacOS 9 
>>> bootloader,
>>> I've uploaded a pre-compiled binary to 
>>> https://www.ilande.co.uk/tmp/openbios-ppc for people interested in testing 
>>> the 
>>> new MacOS 9 functionality.
>>>
>>> Apologies for the delay in sending this out on-list, however due to recent
>>> circumstances I've been without a reliable broadband connection for a couple
>>> of weeks. However given that this is mostly a rework of the previous 
>>> patchset 
>>> and looks good in testing here, I'd definitely like it to be considered for
>>> application during soft freeze.
>>>
>>> Signed-off-by: Mark Cave-Ayland 
>>>
>>> Alexander Graf (3):
>>>   PPC: Allow Rc bit to be set on mtspr
>>>   PPC: Fix lsxw bounds checks
>>>   PPC: mac99: Always add USB controller
>>>
>>> Mark Cave-Ayland (10):
>>>   cuda.c: fix CUDA ADB error packet format
>>>   cuda.c: fix CUDA_PACKET response packet format
>>>   cuda.c: implement simple CUDA_GET_6805_ADDR command
>>>   cuda.c: implement dummy IIC access commands
>>>   cuda.c: fix CUDA SR interrupt clearing
>>>   cuda.c: add defines for CUDA registers
>>>   cuda.c: refactor get_tb() so that the time can be passed in
>>>   cuda.c: rename get_counter() state variable from s to ti for
>>> consistency
>>>   cuda.c: fix T2 timer and enable its interrupt
>>>   cuda.c: add delay to setting of SR_INT bit
>>>
>>>  hw/misc/macio/cuda.c|  243 
>>> ++-
>>>  hw/ppc/mac.h|3 +
>>>  hw/ppc/mac_newworld.c   |3 +-
>>>  target-ppc/mem_helper.c |5 +-
>>>  target-ppc/translate.c  |2 +-
>>>  5 files changed, 163 insertions(+), 93 deletions(-)
>>
>> Ping? Can anyone review this in Alex's absence? In the meantime I've
>> added it to wiki at http://wiki.qemu.org/Planning/2.5 as it would be
>> good to get the GSoC work upstream for 2.5.
> 
> Sorry I've taken a while to get to this.  It looks pretty good, though
> I've sent a handful of comments on individual patches.
> 
> I gathered from one of your replies that you do intend to do a
> respin.  The current comments all look pretty trivial, so I expect
> I'll be ok to apply your respin to ppc-next (which I'm looking after
> in agraf's absence).  It would be nice to get a review from someone
> more familiar with, or better able to test MacOS stuff.

Great. I've sent a further few replies, so if you're happy with the
answers let me know and I'll send a v2 tomorrow.

In terms of testing, as I mentioned above it doesn't regress any of my
working OpenBIOS test-suite images which is a good sign. I'm not sure
who else would be able to review the MacOS side. Both Segher and Ben. H
have looked at parts of the original code changes during GSoC, although
that was from a debugging rather than a code review perspective.


ATB,

Mark.

Re: [Qemu-devel] [PATCH] target-sparc: fix 32-bit truncation in fpackfix

2015-11-04 Thread Mark Cave-Ayland

On 04/11/15 11:05, Richard Henderson wrote:

> On 11/04/2015 11:45 AM, Paolo Bonzini wrote:
int32_t src = rs2 >> (word * 32);
 -int64_t scaled = src << scale;
 +int64_t scaled = (int64_t)src << scale;
int64_t from_fixed = scaled >> 16;
> ...
>>>
>>> I do think we'd be better served by casting to uint64_t on that line.
>>> Note that fpackfix requires the same correction.  And it wouldn't hurt
>>> to cast to uint32_t in fpack16, lest we anger the self-same shifting
>>> gods.
>>
>> Hmmm.. say src = -0x8000, scale = 1;
>>
>> scaled = (uint64_t)-0x800 << 1 = 0x
>> from_fixed = 0x >> 16  = 0x
>>
>> Now from_fixed is positive and you get 32767 instead of -32768.  In
>> other words, we would have to cast to uint64_t on the scaled assignment,
>> and back to int64_t on the from_fixed assignment.  I must be
>> misunderstanding your suggestion.
> 
>   int64_t scaled = (uint64_t)src << scale;
> 
> I.e. one explicit conversion and one implicit conversion.

I suspect Richard knows more about this part of SPARC emulation than I
do, so I'd be fine with a solution similar to the above if everyone
agress. Let me know if you need me to send a SPARC pull request,
although it will probably be quicker coming from Paolo/Richard at the
moment.


ATB,

Mark.

[Qemu-devel] [Bug 1465935] Re: kvm_irqchip_commit_routes: Assertion `ret == 0' failed

2015-11-04 Thread Mathew Hodson

** Changed in: qemu (Ubuntu Utopic)
   Importance: Undecided => High

** Changed in: qemu (Ubuntu Precise)
   Importance: Undecided => High

** No longer affects: qemu (Ubuntu Precise)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1465935

Title:
  kvm_irqchip_commit_routes: Assertion `ret == 0' failed

Status in QEMU:
  New
Status in qemu package in Ubuntu:
  Fix Released
Status in qemu source package in Trusty:
  Fix Committed
Status in qemu source package in Utopic:
  Won't Fix
Status in qemu source package in Vivid:
  Fix Committed

Bug description:
  Several my QEMU instances crashed, and in the  qemu log, I can see
  this assertion failure,

 qemu-system-x86_64: /build/buildd/qemu-2.0.0+dfsg/kvm-all.c:984:
  kvm_irqchip_commit_routes: Assertion `ret == 0' failed.

  The QEMU version is 2.0.0, HV OS is ubuntu 12.04, kernel 3.2.0-38.
  Guest OS is RHEL 6.3.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1465935/+subscriptions

[Qemu-devel] [v2 1/2] kvmclock: add a new function to update env->tsc.

2015-11-04 Thread Liang Li

The commit 317b0a6d8 fixed an issue which caused by the outdated
env->tsc value, but the fix lead to 'cpu_synchronize_all_states()'
called twice during live migration. The 'cpu_synchronize_all_states()'
takes about 130us for a VM which has 4 vcpus, it's a bit expensive.

Synchronize the whole CPU context just for updating env->tsc is too
wasting, this patch use a new function to update the env->tsc.
Comparing to 'cpu_synchronize_all_states()', it only takes about 20us.

Signed-off-by: Liang Li 
---
 hw/i386/kvm/clock.c| 18 ++
 target-i386/kvm.c  | 45 +
 target-i386/kvm_i386.h |  1 +
 3 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index efdf165..0593a3f 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -17,7 +17,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
-#include "sysemu/cpus.h"
+#include "kvm_i386.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -125,21 +125,7 @@ static void kvmclock_vm_state_change(void *opaque, int 
running,
 return;
 }
 
-cpu_synchronize_all_states();
-/* In theory, the cpu_synchronize_all_states() call above wouldn't
- * affect the rest of the code, as the VCPU state inside CPUState
- * is supposed to always match the VCPU state on the kernel side.
- *
- * In practice, calling cpu_synchronize_state() too soon will load the
- * kernel-side APIC state into X86CPU.apic_state too early, APIC state
- * won't be reloaded later because CPUState.vcpu_dirty==true, and
- * outdated APIC state may be migrated to another host.
- *
- * The real fix would be to make sure outdated APIC state is read
- * from the kernel again when necessary. While this is not fixed, we
- * need the cpu_clean_all_dirty() call below.
- */
-cpu_clean_all_dirty();
+kvm_synchronize_all_tsc();
 
 ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, );
 if (ret < 0) {
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 64046cb..2a9953b 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -111,6 +111,51 @@ bool kvm_allows_irq0_override(void)
 return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
 }
 
+static int kvm_get_tsc(CPUState *cs)
+{
+X86CPU *cpu = X86_CPU(cs);
+CPUX86State *env = >env;
+struct {
+struct kvm_msrs info;
+struct kvm_msr_entry entries[1];
+} msr_data;
+int ret;
+
+if (env->tsc_valid) {
+return 0;
+}
+
+msr_data.info.nmsrs = 1;
+msr_data.entries[0].index = MSR_IA32_TSC;
+env->tsc_valid = !runstate_is_running();
+
+ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, _data);
+if (ret < 0) {
+return ret;
+}
+
+env->tsc = msr_data.entries[0].data;
+return 0;
+}
+
+static inline void do_kvm_synchronize_tsc(void *arg)
+{
+CPUState *cpu = arg;
+
+kvm_get_tsc(cpu);
+}
+
+void kvm_synchronize_all_tsc(void)
+{
+CPUState *cpu;
+
+if (kvm_enabled()) {
+CPU_FOREACH(cpu) {
+run_on_cpu(cpu, do_kvm_synchronize_tsc, cpu);
+}
+}
+}
+
 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
 {
 struct kvm_cpuid2 *cpuid;
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index e557e94..c1b312b 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -15,6 +15,7 @@
 
 bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
+void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);
 
-- 
1.9.1

[Qemu-devel] [v2 2/2] Revert "Introduce cpu_clean_all_dirty"

2015-11-04 Thread Liang Li

This reverts commit de9d61e83d43be9069e6646fa9d57a3f47779d28.

Now 'cpu_clean_all_dirty' is useless, we can revert the related code.

Conflicts:
include/sysemu/kvm.h

Signed-off-by: Liang Li 
---
 cpus.c| 9 -
 include/sysemu/cpus.h | 1 -
 include/sysemu/kvm.h  | 8 
 kvm-all.c | 5 -
 4 files changed, 23 deletions(-)

diff --git a/cpus.c b/cpus.c
index d2e9e4f..c6a5d0e 100644
--- a/cpus.c
+++ b/cpus.c
@@ -694,15 +694,6 @@ void cpu_synchronize_all_post_init(void)
 }
 }
 
-void cpu_clean_all_dirty(void)
-{
-CPUState *cpu;
-
-CPU_FOREACH(cpu) {
-cpu_clean_state(cpu);
-}
-}
-
 static int do_vm_stop(RunState state)
 {
 int ret = 0;
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 30ddd12..3d1e5ba 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -11,7 +11,6 @@ void cpu_stop_current(void);
 void cpu_synchronize_all_states(void);
 void cpu_synchronize_all_post_reset(void);
 void cpu_synchronize_all_post_init(void);
-void cpu_clean_all_dirty(void);
 
 void qtest_clock_warp(int64_t dest);
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 461ef65..4ac6176 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -417,7 +417,6 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void 
*ram_addr,
 void kvm_cpu_synchronize_state(CPUState *cpu);
 void kvm_cpu_synchronize_post_reset(CPUState *cpu);
 void kvm_cpu_synchronize_post_init(CPUState *cpu);
-void kvm_cpu_clean_state(CPUState *cpu);
 
 /* generic hooks - to be moved/refactored once there are more users */
 
@@ -442,13 +441,6 @@ static inline void cpu_synchronize_post_init(CPUState *cpu)
 }
 }
 
-static inline void cpu_clean_state(CPUState *cpu)
-{
-if (kvm_enabled()) {
-kvm_cpu_clean_state(cpu);
-}
-}
-
 int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev);
 int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
  PCIDevice *dev);
diff --git a/kvm-all.c b/kvm-all.c
index c442838..1bc1273 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1766,11 +1766,6 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu)
 run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, cpu);
 }
 
-void kvm_cpu_clean_state(CPUState *cpu)
-{
-cpu->kvm_vcpu_dirty = false;
-}
-
 int kvm_cpu_exec(CPUState *cpu)
 {
 struct kvm_run *run = cpu->kvm_run;
-- 
1.9.1

[Qemu-devel] [v2 0/2] Optimize the env->tsc update operation

2015-11-04 Thread Liang Li

This patch set aims for reducing the live migration downtime. It
updates the env->tsc when stopping kvmclock with a new function.
Rather than using 'cpu_synchronize_all_states()', which updates the
whole CPU context just for updating the env->tsc.

For a VM with 4 CPUs, this patch set can help to reduce the VM downtime
about 100us.

Changes in v2:
  * Simplify 'kvm_get_tsc()' function.
  * Introduce a new function 'kvm_synchronize_all_tsc()'

Liang Li (2):
  kvmclock: add a new function to update env->tsc.
  Revert "Introduce cpu_clean_all_dirty"

 cpus.c |  9 -
 hw/i386/kvm/clock.c| 18 ++
 include/sysemu/cpus.h  |  1 -
 include/sysemu/kvm.h   |  8 
 kvm-all.c  |  5 -
 target-i386/kvm.c  | 45 +
 target-i386/kvm_i386.h |  1 +
 7 files changed, 48 insertions(+), 39 deletions(-)

-- 
1.9.1

[Qemu-devel] [GIT PULL qemu v2] pseries: Update SLOF firmware image to qemu-slof-20151103

2015-11-04 Thread Alexey Kardashevskiy

Hi!

This is a SLOF update which is 700k and therefore too big to be posted
as a patch so here is a pull request.

Changes:
v2:
* rebuilt with gcc 4.8, v1 was build with 5.2.1 by mistake


The following changes since commit 90da0d5a703839c8db9f37355107955df043b654:

  ppc/spapr: Add "ibm,pa-features" property to the device-tree (2015-10-23 
12:22:40 +1100)

are available in the git repository at:

  g...@github.com:aik/qemu.git slof-20151103

for you to fetch changes up to d95c5ce2f99f8f2328b288a63fed4f006519be6d:

  pseries: Update SLOF firmware image to qemu-slof-20151103 (2015-11-05 
15:26:08 +1100)


Alexey Kardashevskiy (1):
  pseries: Update SLOF firmware image to qemu-slof-20151103

 pc-bios/README   |   2 +-
 pc-bios/slof.bin | Bin 915584 -> 914712 bytes
 roms/SLOF|   2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

Re: [Qemu-devel] [GIT PULL qemu] pseries: Update SLOF firmware image to qemu-slof-20151103

2015-11-04 Thread Alexey Kardashevskiy


On 11/05/2015 02:28 PM, Alexey Kardashevskiy wrote:

Hi!

This is a SLOF update which is 700k and therefore too big to be posted
as a patch so here is a pull request.


Ignore this please, this is built using gcc 5.2.1.






The following changes since commit 90da0d5a703839c8db9f37355107955df043b654:

   ppc/spapr: Add "ibm,pa-features" property to the device-tree (2015-10-23 
12:22:40 +1100)

are available in the git repository at:

   g...@github.com:aik/qemu.git slof-20151103

for you to fetch changes up to 644abf102409e5bca84d12920e6dde31b5082802:

   pseries: Update SLOF firmware image to qemu-slof-20151103 (2015-11-05 
12:48:40 +1100)


Alexey Kardashevskiy (1):
   pseries: Update SLOF firmware image to qemu-slof-20151103

  pc-bios/README   |   2 +-
  pc-bios/slof.bin | Bin 915584 -> 913256 bytes
  roms/SLOF|   2 +-
  3 files changed, 2 insertions(+), 2 deletions(-)




--
Alexey

[Qemu-devel] [PATCH v3] iscsi: Translate scsi sense into error code

2015-11-04 Thread Fam Zheng

Previously we return -EIO blindly when anything goes wrong. Add a helper
function to parse sense fields and try to make the return code more
meaningful.

This also fixes the default werror configuration (enospc) when we're
using qcow2 on an iscsi lun. The old -EIO not being treated as out of
space error failed to trigger vm stop.

Signed-off-by: Fam Zheng 

---
v3: Don't use err_code as return value when it's not set. [Peter]
v2: Drop the qcow2 patch with ERANGE -> ENOSPC.
Drop dead break after return.
Return EIO for NO_SENSE.
Translate error code for other I/O paths too.
---
 block/iscsi.c | 59 +--
 1 file changed, 53 insertions(+), 6 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 9a628b7..26bd452 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -84,6 +84,7 @@ typedef struct IscsiTask {
 IscsiLun *iscsilun;
 QEMUTimer retry_timer;
 bool force_next_flush;
+int err_code;
 } IscsiTask;
 
 typedef struct IscsiAIOCB {
@@ -182,6 +183,51 @@ static inline unsigned exp_random(double mean)
 #define QEMU_SCSI_STATUS_TIMEOUTSCSI_STATUS_TIMEOUT
 #endif
 
+static int iscsi_translate_sense(struct scsi_sense *sense)
+{
+int ret;
+
+switch (sense->key) {
+case SCSI_SENSE_NOT_READY:
+return -EBUSY;
+case SCSI_SENSE_DATA_PROTECTION:
+return -EACCES;
+case SCSI_SENSE_COMMAND_ABORTED:
+return -ECANCELED;
+case SCSI_SENSE_ILLEGAL_REQUEST:
+/* Parse ASCQ */
+break;
+default:
+return -EIO;
+}
+switch (sense->ascq) {
+case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
+case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
+case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
+case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
+ret = -EINVAL;
+break;
+case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
+ret = -ENOSPC;
+break;
+case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
+ret = -ENOTSUP;
+break;
+case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
+case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
+case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
+ret = -ENOMEDIUM;
+break;
+case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
+ret = -EACCES;
+break;
+default:
+ret = -EIO;
+break;
+}
+return ret;
+}
+
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 void *command_data, void *opaque)
@@ -226,6 +272,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 return;
 }
 }
+iTask->err_code = iscsi_translate_sense(>sense);
 error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
 } else {
 iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
@@ -455,7 +502,7 @@ retry:
 }
 
 if (iTask.status != SCSI_STATUS_GOOD) {
-return -EIO;
+return iTask.err_code;
 }
 
 iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
@@ -644,7 +691,7 @@ retry:
 }
 
 if (iTask.status != SCSI_STATUS_GOOD) {
-return -EIO;
+return iTask.err_code;
 }
 
 return 0;
@@ -683,7 +730,7 @@ retry:
 }
 
 if (iTask.status != SCSI_STATUS_GOOD) {
-return -EIO;
+return iTask.err_code;
 }
 
 return 0;
@@ -703,7 +750,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
 if (status < 0) {
 error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
  iscsi_get_error(iscsi));
-acb->status = -EIO;
+acb->status = iscsi_translate_sense(>task->sense);
 }
 
 acb->ioh->driver_status = 0;
@@ -905,7 +952,7 @@ retry:
 }
 
 if (iTask.status != SCSI_STATUS_GOOD) {
-return -EIO;
+return iTask.err_code;
 }
 
 iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
@@ -999,7 +1046,7 @@ retry:
 }
 
 if (iTask.status != SCSI_STATUS_GOOD) {
-return -EIO;
+return iTask.err_code;
 }
 
 if (flags & BDRV_REQ_MAY_UNMAP) {
-- 
2.4.3

Re: [Qemu-devel] [PULL 00/14] Misc changes for QEMU 2.4-rc1

2015-11-04 Thread Fam Zheng

On Wed, 11/04 16:34, Peter Maydell wrote:
> On 4 November 2015 at 16:18, Paolo Bonzini  wrote:
> > The following changes since commit 7bc8e0c967a4ef77657174d28af775691e18b4ce:
> >
> >   Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging 
> > (2015-10-29 09:49:52 +)
> >
> > are available in the git repository at:
> >
> >
> >   git://github.com/bonzini/qemu.git tags/for-upstream
> >
> > for you to fetch changes up to 8670e274ef292086e109ada29e072ef27d68177b:
> >
> >   configure: disable FORTIFY_SOURCE under clang (2015-11-04 15:56:05 +0100)
> >
> > 
> > * Guest ABI fixes for PC machines (hw_version)
> > * Fixes for recent Perl
> > * John Snow's configure fixes
> > * file-backed RAM improvements (Igor, Pavel)
> > * -Werror=clobbered fixes (Stefan)
> > * Kill -d ioport
> > * Fix qemu-system-s390x
> 
> Is the 2.4 in the subject a typo, or are these two pulls
> targeting a stable branch rather than master?
> 

Please drop 13/14 'iscsi: Translate scsi sense into error code' when applying.
I need to send a new version.

Fam

Re: [Qemu-devel] [PATCH v4 2/7] e1000: Add support for migrating the entire MAC registers' array

2015-11-04 Thread Jason Wang



On 11/04/2015 10:48 PM, Leonid Bloch wrote:
>>> >>  },
>>> >>  .subsections = (const VMStateDescription*[]) {
>>> >>  _e1000_mit_state,
>>> >> +_e1000_full_mac_state,
>>> >> +_e1000_compat_mac_state,
>>> >>  NULL
>>> >>  }
>> >
>> > I'm afraid this will break migration to older qemu. Consider the case
>> > when full_mac_registers=off, we save compat mac registers as a
>> > subsection, this breaks the assumption on load who expect all compact
>> > registers just after phy_reg. (You can just test this with a simple
>> > 'savevm' with full_mac_register_off and the revert this patch and do a
>> > 'loadvm'), Looks like the safe way is to just add a
>> > vmstate_e1000_full_mac_state and keep other fields of vmstate_e1000
>> > unmodified.
> You are right! Thanks.
>> >
>>> >>  };
>>> >> @@ -1603,6 +1640,8 @@ static Property e1000_properties[] = {
>>> >>  compat_flags, E1000_FLAG_AUTONEG_BIT, true),
>>> >>  DEFINE_PROP_BIT("mitigation", E1000State,
>>> >>  compat_flags, E1000_FLAG_MIT_BIT, true),
>>> >> +DEFINE_PROP_BIT("full_mac_registers", E1000State,
>>> >> +compat_flags, E1000_FLAG_MAC_BIT, true),
>>> >>  DEFINE_PROP_END_OF_LIST(),
>>> >>  };
>>> >>
>> >
>> > Need also turn this off for pre 2.5 machines in HW_COMPAT_2_4.
> Thanks for reminding!
> BTW, HW_COMPAT is simply the new version of PC_COMPAT, or am I wrong?
>
> Leonid.
>

HW_COMPAT contains compatibility options that could be used by all
targets (e.g both ppc and x86). PC_COMPAT contains pc specific options.
So usually PC_COMPAT is a superset of HW_COMPAT.

Re: [Qemu-devel] [PATCH v4 7/7] e1000: Implementing various counters

2015-11-04 Thread Jason Wang



On 11/04/2015 11:44 PM, Leonid Bloch wrote:
> On Wed, Nov 4, 2015 at 4:46 AM, Jason Wang  wrote:
>>
>> On 11/03/2015 07:14 PM, Leonid Bloch wrote:
>>> This implements the following Statistic registers (various counters)
>>> according to Intel's specs:
>>>
>>> TSCTC  GOTCL  GOTCH  GORCL  GORCH  MPRC   BPRC   RUCROC
>>> BPTC   MPTC   PTC... PRC...
>>>
>>> Signed-off-by: Leonid Bloch 
>>> Signed-off-by: Dmitry Fleytman 
>>> ---
>>>  hw/net/e1000.c | 78 
>>> ++
>>>  1 file changed, 73 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
>>> index af97e8a..fbda0d1 100644
>>> --- a/hw/net/e1000.c
>>> +++ b/hw/net/e1000.c
>>> @@ -37,6 +37,8 @@
>>>
>>>  #include "e1000_regs.h"
>>>
>> [...]
>>
>>> @@ -,6 +1164,7 @@ e1000_receive_iov(NetClientState *nc, const struct 
>>> iovec *iov, int iovcnt)
>>>  }
>>>  } while (desc_offset < total_size);
>>>
>>> +increase_size_stats(s, PRCregs, total_size);
>>>  inc_reg_if_not_full(s, TPR);
>>>  s->mac_reg[GPRC] = s->mac_reg[TPR];
>>>  /* TOR - Total Octets Received:
>>> @@ -1119,6 +1173,8 @@ e1000_receive_iov(NetClientState *nc, const struct 
>>> iovec *iov, int iovcnt)
>>>   * Always include FCS length (4) in size.
>>>   */
>>>  grow_8reg_if_not_full(s, TORL, size+4);
>>> +s->mac_reg[GORCL] = s->mac_reg[TORL];
>>> +s->mac_reg[GORCH] = s->mac_reg[TORH];
>>>
>>>  n = E1000_ICS_RXT0;
>>>  if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
>>> @@ -1307,11 +1363,23 @@ static uint32_t (*macreg_readops[])(E1000State *, 
>>> int) = {
>>>  getreg(TNCRS),getreg(SEC),  getreg(CEXTERR),  getreg(RLEC),
>>>  getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
>>>  getreg(RFC),  getreg(RJC),  getreg(RNBC), getreg(TSCTFC),
>>> -getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),
>>> +getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
>>> +getreg(GOTCL),
>>>
>>>  [TOTH]= mac_read_clr8,  [TORH]= mac_read_clr8,
>>> +[GOTCH]   = mac_read_clr8,  [GORCH]   = mac_read_clr8,
>>> +[PRC64]   = mac_read_clr4,  [PRC127]  = mac_read_clr4,
>>> +[PRC255]  = mac_read_clr4,  [PRC511]  = mac_read_clr4,
>>> +[PRC1023] = mac_read_clr4,  [PRC1522] = mac_read_clr4,
>>> +[PTC64]   = mac_read_clr4,  [PTC127]  = mac_read_clr4,
>>> +[PTC255]  = mac_read_clr4,  [PTC511]  = mac_read_clr4,
>>> +[PTC1023] = mac_read_clr4,  [PTC1522] = mac_read_clr4,
>>>  [GPRC]= mac_read_clr4,  [GPTC]= mac_read_clr4,
>>>  [TPT] = mac_read_clr4,  [TPR] = mac_read_clr4,
>>> +[RUC] = mac_read_clr4,  [ROC] = mac_read_clr4,
>>> +[BPRC]= mac_read_clr4,  [MPRC]= mac_read_clr4,
>>> +[TSCTC]   = mac_read_clr4,  [BPTC]= mac_read_clr4,
>>> +[MPTC]= mac_read_clr4,
>>>  [ICR] = mac_icr_read,   [EECD]= get_eecd,
>>>  [EERD]= flash_eerd_read,
>>>  [RDFH]= mac_low13_read_prt, [RDFT]= mac_low13_read_prt,
>> Same issue with patch 3. Need limit the function of those registers
>> works only for 2.5 and post 2.5.
> Contrary to the registers in patch 3, these registers do have a
> functionality in the device - they are counters. But they have a
> simple logic and they are distributed throughout the code.
> Contrary to that, for example, the registers that were implemented in
> the patch that added interrupt mitigation support (e9845f098), are
> concentrated in a single location, and a single "if" statement checks
> for a flag: if it is set, some non-trivial logic begins (which is
> skipped if these registers are not needed anyway).
> In the current case, some 10 "if"s are needed to enable a simple logic
> in many places. That may influence performance, and will certainly
> make the code more bulky.

I agree, but this is the price of compatibility. (And which can make our
user happy). For performance, I doubt a single condition will cause
noticeable difference.
> On the other hand, if these registers will function always, absolutely
> no harm will be done if migrating to an older version: these registers
> will simply be inaccessible, as they were so far.

Same as patch 3, reading to those registers will have a zero value.

For the issue of bulky, how about something like this?

- introduce another array mac_regcap[] (which is like phy_regcap).
- store the compat flags required for the function of the registers in
this array, zero means no requirement
- check the enabled compat flag again this in e1000_mmio_write() and
e1000_mmio_read()

And this method would be even useful for future extension for e1000.

Re: [Qemu-devel] [kvm-unit-test RFC] x86: Memory instructions test case

2015-11-04 Thread Xiao Guangrong




On 11/05/2015 05:21 AM, Eduardo Habkost wrote:

Quickly hacked test case for memory instructions (clflush, mfence,
sfence, lfence, clflushopt, clwb, pcommit), that simply checks for #UD
exceptions.

This was useful to test TCG handling of those instructions.

The "fake clwb" part will probably break once a new instruction use
those opcodes.


Tested it on the box on that these instruction are available, it worked.

Tested-by: Xiao Guangrong

Re: [Qemu-devel] [PATCH v2] iscsi: Translate scsi sense into error code

2015-11-04 Thread Fam Zheng

On Wed, 11/04 21:12, Peter Lieven wrote:
> Am 04.11.2015 um 02:36 schrieb Fam Zheng:
> > Previously we return -EIO blindly when anything goes wrong. Add a helper
> > function to parse sense fields and try to make the return code more
> > meaningful.
> >
> > This also fixes the default werror configuration (enospc) when we're
> > using qcow2 on an iscsi lun. The old -EIO not being treated as out of
> > space error failed to trigger vm stop.
> >
> > Signed-off-by: Fam Zheng 
> >
> > ---
> > v2: Drop the qcow2 patch with ERANGE -> ENOSPC.
> > Drop dead break after return.
> > Return EIO for NO_SENSE.
> > Translate error code for other I/O paths too.
> > ---
> >  block/iscsi.c | 63 
> > +++
> >  1 file changed, 55 insertions(+), 8 deletions(-)
> >
> > diff --git a/block/iscsi.c b/block/iscsi.c
> > index 9a628b7..b1bc0ef 100644
> > --- a/block/iscsi.c
> > +++ b/block/iscsi.c
> > @@ -84,6 +84,7 @@ typedef struct IscsiTask {
> >  IscsiLun *iscsilun;
> >  QEMUTimer retry_timer;
> >  bool force_next_flush;
> > +int err_code;
> >  } IscsiTask;
> >  
> >  typedef struct IscsiAIOCB {
> > @@ -182,6 +183,51 @@ static inline unsigned exp_random(double mean)
> >  #define QEMU_SCSI_STATUS_TIMEOUTSCSI_STATUS_TIMEOUT
> >  #endif
> >  
> > +static int iscsi_translate_sense(struct scsi_sense *sense)
> > +{
> > +int ret;
> > +
> > +switch (sense->key) {
> > +case SCSI_SENSE_NOT_READY:
> > +return -EBUSY;
> > +case SCSI_SENSE_DATA_PROTECTION:
> > +return -EACCES;
> > +case SCSI_SENSE_COMMAND_ABORTED:
> > +return -ECANCELED;
> > +case SCSI_SENSE_ILLEGAL_REQUEST:
> > +/* Parse ASCQ */
> > +break;
> > +default:
> > +return -EIO;
> > +}
> > +switch (sense->ascq) {
> > +case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
> > +case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
> > +case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
> > +case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
> > +ret = -EINVAL;
> > +break;
> > +case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
> > +ret = -ENOSPC;
> > +break;
> > +case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
> > +ret = -ENOTSUP;
> > +break;
> > +case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
> > +case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
> > +case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
> > +ret = -ENOMEDIUM;
> > +break;
> > +case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
> > +ret = -EACCES;
> > +break;
> > +default:
> > +ret = -EIO;
> > +break;
> > +}
> > +return ret;
> > +}
> > +
> >  static void
> >  iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
> >  void *command_data, void *opaque)
> > @@ -226,6 +272,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int 
> > status,
> >  return;
> >  }
> >  }
> > +iTask->err_code = iscsi_translate_sense(>sense);
> >  error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
> >  } else {
> >  iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
> > @@ -455,7 +502,7 @@ retry:
> >  }
> >  
> >  if (iTask.status != SCSI_STATUS_GOOD) {
> > -return -EIO;
> > +return iTask.err_code;
> >  }
> >  
> >  iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
> > @@ -536,14 +583,14 @@ retry:
> >  
> >  lbas = scsi_datain_unmarshall(iTask.task);
> >  if (lbas == NULL) {
> > -ret = -EIO;
> > +ret = iTask.err_code;
> >  goto out;
> >  }
> 
> An unmarschalling error will not set iTask.err_code. It is likely
> 0 here. Please leave it at -EIO.
> 
> >  
> >  lbasd = >descriptors[0];
> >  
> >  if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
> > -ret = -EIO;
> > +ret = iTask.err_code;
> >  goto out;
> >  }
> 
> This is an internal sanity check, iTask.err_code will also be 0 here.

You're right in both cases, will fix. Thanks!

Fam

Re: [Qemu-devel] [PATCH] object_new_with_type: remove redundant code

2015-11-04 Thread Cao jin


sorry for this, I am wrong at removing type_initialize.
please forget it

On 11/05/2015 01:19 PM, Cao jin wrote:

g_assert & type_initialize are called in object_initialize_with_type

Signed-off-by: Cao jin 
---
  qom/object.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/qom/object.c b/qom/object.c
index fc6e161..43ac2dd 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -423,9 +423,6 @@ Object *object_new_with_type(Type type)
  {
  Object *obj;

-g_assert(type != NULL);
-type_initialize(type);
-
  obj = g_malloc(type->instance_size);
  object_initialize_with_type(obj, type->instance_size, type);
  obj->free = g_free;



--
Yours Sincerely,

Cao Jin

Re: [Qemu-devel] [PATCH 01/11] snapshot: create helper to test that block drivers supports snapshots

2015-11-04 Thread Greg Kurz

On Wed,  4 Nov 2015 20:19:32 +0300
"Denis V. Lunev"  wrote:

> The patch enforces proper locking for this operation.
> 
> Signed-off-by: Denis V. Lunev 
> CC: Juan Quintela 
> CC: Stefan Hajnoczi 
> CC: Kevin Wolf 
> ---
>  block/snapshot.c | 27 +++
>  include/block/snapshot.h |  9 +
>  migration/savevm.c   | 17 -
>  3 files changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/block/snapshot.c b/block/snapshot.c
> index 89500f2..2ef4545 100644
> --- a/block/snapshot.c
> +++ b/block/snapshot.c
> @@ -356,3 +356,30 @@ int 
> bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
> 
>  return ret;
>  }
> +
> +
> +/* Group operations. All block drivers are involved.
> + * These functions will properly handle dataplace (take aio_context_acquire

s/dataplace/dataplane

> + * when appropriate for appropriate block drivers

Missing ) and anyway the "(take aio_context_acquire..." part more or less
quotes the code and is not needed.

> + *
> + * Returned block driver will be always locked.
> + */
> +
> +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)

Juan suggested bdrv_snapshot_is_possible() and FWIW you already emphasize
that it involves all block drivers in the above comment.

> +{
> +bool ok = true;
> +BlockDriverState *bs = NULL;
> +
> +while ((bs = bdrv_next(bs)) && ok) {
> +AioContext *ctx = bdrv_get_aio_context(bs);
> +
> +aio_context_acquire(ctx);
> +if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) {
> +ok = bdrv_can_snapshot(bs);
> +}
> +aio_context_release(ctx);
> +}
> +
> +*first_bad_bs = bs;
> +return ok;
> +}
> diff --git a/include/block/snapshot.h b/include/block/snapshot.h
> index 770d9bb..0a3cf0d 100644
> --- a/include/block/snapshot.h
> +++ b/include/block/snapshot.h
> @@ -75,4 +75,13 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
>  int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
>   const char *id_or_name,
>   Error **errp);
> +
> +
> +/* Group operations. All block drivers are involved.
> + * These functions will properly handle dataplace (take aio_context_acquire
> + * when appropriate for appropriate block drivers
> + *
> + */

This comment is not needed here, already in block/snapshot.c

> +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs);
> +
>  #endif
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dbcc39a..3ac3f07 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1290,19 +1290,10 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>  const char *name = qdict_get_try_str(qdict, "name");
>  Error *local_err = NULL;
> 
> -/* Verify if there is a device that doesn't support snapshots and is 
> writable */
> -bs = NULL;
> -while ((bs = bdrv_next(bs))) {
> -
> -if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
> -continue;
> -}
> -
> -if (!bdrv_can_snapshot(bs)) {
> -monitor_printf(mon, "Device '%s' is writable but does not 
> support snapshots.\n",
> -   bdrv_get_device_name(bs));
> -return;
> -}
> +if (!bdrv_all_can_snapshot()) {
> +monitor_printf(mon, "Device '%s' is writable but does not "
> +   "support snapshots.\n", bdrv_get_device_name(bs));
> +return;
>  }
> 
>  bs = find_vmstate_bs();

Reviewed-by: Greg Kurz

Re: [Qemu-devel] [PATCH v4 3/7] e1000: Trivial implementation of various MAC registers

2015-11-04 Thread Jason Wang



On 11/04/2015 11:21 PM, Leonid Bloch wrote:
> On Wed, Nov 4, 2015 at 4:44 AM, Jason Wang  wrote:
>>
>> On 11/03/2015 07:14 PM, Leonid Bloch wrote:
>>> These registers appear in Intel's specs, but were not implemented.
>>> These registers are now implemented trivially, i.e. they are initiated
>>> with zero values, and if they are RW, they can be written or read by the
>>> driver, or read only if they are R (essentially retaining their zero
>>> values). For these registers no other procedures are performed.
>>>
>>> For the trivially implemented Diagnostic registers, a debug warning is
>>> produced on read/write attempts.
>>>
>>> The registers implemented here are:
>>>
>>> Transmit:
>>> RW: AIT
>>>
>>> Management:
>>> RW: WUC WUS IPAVIP6AT*  IP4AT*  FFLT*   WUPM*   FFMT*   FFVT*
>>>
>>> Diagnostic:
>>> RW: RDFHRDFTRDFHS   RDFTS   RDFPC   PBM*TDFHTDFTTDFHS
>>> TDFTS   TDFPC
>>>
>>> Statistic:
>>> RW: FCRUC
>>> R:  RNBCTSCTFC  MGTPRC  MGTPDC  MGTPTC  RFC RJC SCC ECOL
>>> LATECOL MCC COLCDC  TNCRS   SEC CEXTERR RLECXONRXC
>>> XONTXC  XOFFRXC XOFFTXC
>>>
>>> Signed-off-by: Leonid Bloch 
>>> Signed-off-by: Dmitry Fleytman 
>>> ---
>>>  hw/net/e1000.c  | 95 
>>> +++--
>>>  hw/net/e1000_regs.h |  6 
>>>  2 files changed, 98 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
>>> index 1190bbe..7db6614 100644
>>> --- a/hw/net/e1000.c
>>> +++ b/hw/net/e1000.c
>>> @@ -170,7 +170,17 @@ enum {
>>>  defreg(TPR), defreg(TPT), defreg(TXDCTL),  defreg(WUFC),
>>>  defreg(RA),  defreg(MTA), defreg(CRCERRS), defreg(VFTA),
>>>  defreg(VET), defreg(RDTR),defreg(RADV),defreg(TADV),
>>> -defreg(ITR),
>>> +defreg(ITR), defreg(FCRUC),   defreg(TDFH),defreg(TDFT),
>>> +defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
>>> +defreg(RDFT),defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
>>> +defreg(IPAV),defreg(WUC), defreg(WUS), defreg(AIT),
>>> +defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),defreg(FFMT),
>>> +defreg(FFVT),defreg(WUPM),defreg(PBM), defreg(SCC),
>>> +defreg(ECOL),defreg(MCC), defreg(LATECOL), defreg(COLC),
>>> +defreg(DC),  defreg(TNCRS),   defreg(SEC), defreg(CEXTERR),
>>> +defreg(RLEC),defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
>>> +defreg(XOFFTXC), defreg(RFC), defreg(RJC), defreg(RNBC),
>>> +defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC)
>>>  };
>>>
>>>  static void
>>> @@ -1118,6 +1128,48 @@ mac_readreg(E1000State *s, int index)
>>>  }
>>>
>>>  static uint32_t
>>> +mac_readreg_prt(E1000State *s, int index)
>>> +{
>>> +DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
>>> +   "It is not fully implemented.\n", index<<2);
>>> +return s->mac_reg[index];
>>> +}
>>> +
>>> +static uint32_t
>>> +mac_low4_read(E1000State *s, int index)
>>> +{
>>> +return s->mac_reg[index] & 0xf;
>>> +}
>>> +
>>> +static uint32_t
>>> +mac_low11_read(E1000State *s, int index)
>>> +{
>>> +return s->mac_reg[index] & 0x7ff;
>>> +}
>>> +
>>> +static uint32_t
>>> +mac_low11_read_prt(E1000State *s, int index)
>>> +{
>>> +DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
>>> +   "It is not fully implemented.\n", index<<2);
>>> +return s->mac_reg[index] & 0x7ff;
>>> +}
>>> +
>>> +static uint32_t
>>> +mac_low13_read_prt(E1000State *s, int index)
>>> +{
>>> +DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
>>> +   "It is not fully implemented.\n", index<<2);
>>> +return s->mac_reg[index] & 0x1fff;
>>> +}
>>> +
>>> +static uint32_t
>>> +mac_low16_read(E1000State *s, int index)
>>> +{
>>> +return s->mac_reg[index] & 0x;
>>> +}
>>> +
>>> +static uint32_t
>>>  mac_icr_read(E1000State *s, int index)
>>>  {
>>>  uint32_t ret = s->mac_reg[ICR];
>>> @@ -1161,6 +1213,14 @@ mac_writereg(E1000State *s, int index, uint32_t val)
>>>  }
>>>
>>>  static void
>>> +mac_writereg_prt(E1000State *s, int index, uint32_t val)
>>> +{
>>> +DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
>>> +   "It is not fully implemented.\n", index<<2);
>>> +s->mac_reg[index] = val;
>>> +}
>>> +
>>> +static void
>>>  set_rdt(E1000State *s, int index, uint32_t val)
>>>  {
>>>  s->mac_reg[index] = val & 0x;
>>> @@ -1219,26 +1279,50 @@ static uint32_t (*macreg_readops[])(E1000State *, 
>>> int) = {
>>>  getreg(RDH),  getreg(RDT),  getreg(VET),  getreg(ICS),
>>>  getreg(TDBAL),getreg(TDBAH),getreg(RDBAH),getreg(RDBAL),
>>>  getreg(TDLEN),getreg(RDLEN),getreg(RDTR), getreg(RADV),
>>> -getreg(TADV), getreg(ITR),
>>> +getreg(TADV), getreg(ITR),  getreg(FCRUC),

[Qemu-devel] [GIT PULL qemu] pseries: Update SLOF firmware image to qemu-slof-20151103

2015-11-04 Thread Alexey Kardashevskiy

Hi!

This is a SLOF update which is 700k and therefore too big to be posted
as a patch so here is a pull request.


The following changes since commit 90da0d5a703839c8db9f37355107955df043b654:

  ppc/spapr: Add "ibm,pa-features" property to the device-tree (2015-10-23 
12:22:40 +1100)

are available in the git repository at:

  g...@github.com:aik/qemu.git slof-20151103

for you to fetch changes up to 644abf102409e5bca84d12920e6dde31b5082802:

  pseries: Update SLOF firmware image to qemu-slof-20151103 (2015-11-05 
12:48:40 +1100)


Alexey Kardashevskiy (1):
  pseries: Update SLOF firmware image to qemu-slof-20151103

 pc-bios/README   |   2 +-
 pc-bios/slof.bin | Bin 915584 -> 913256 bytes
 roms/SLOF|   2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

[Qemu-devel] [PATCH] object_new_with_type: remove redundant code

2015-11-04 Thread Cao jin

g_assert & type_initialize are called in object_initialize_with_type

Signed-off-by: Cao jin 
---
 qom/object.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/qom/object.c b/qom/object.c
index fc6e161..43ac2dd 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -423,9 +423,6 @@ Object *object_new_with_type(Type type)
 {
 Object *obj;
 
-g_assert(type != NULL);
-type_initialize(type);
-
 obj = g_malloc(type->instance_size);
 object_initialize_with_type(obj, type->instance_size, type);
 obj->free = g_free;
-- 
2.1.0

Re: [Qemu-devel] [PATCH v7 3/8] mirror: Do zero write on target if sectors not allocated

2015-11-04 Thread Fam Zheng

On Wed, 11/04 19:35, Kevin Wolf wrote:
> Am 08.06.2015 um 07:56 hat Fam Zheng geschrieben:
> > If guest discards a source cluster, mirroring with bdrv_aio_readv is 
> > overkill.
> > Some protocols do zero upon discard, where it's best to use
> > bdrv_aio_write_zeroes, otherwise, bdrv_aio_discard will be enough.
> > 
> > Signed-off-by: Fam Zheng 
> > ---
> >  block/mirror.c | 20 ++--
> >  1 file changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/block/mirror.c b/block/mirror.c
> > index d2515c7..3c38695 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -164,6 +164,8 @@ static uint64_t coroutine_fn 
> > mirror_iteration(MirrorBlockJob *s)
> >  int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
> >  uint64_t delay_ns = 0;
> >  MirrorOp *op;
> > +int pnum;
> > +int64_t ret;
> >  
> >  s->sector_num = hbitmap_iter_next(>hbi);
> >  if (s->sector_num < 0) {
> > @@ -290,8 +292,22 @@ static uint64_t coroutine_fn 
> > mirror_iteration(MirrorBlockJob *s)
> >  s->in_flight++;
> >  s->sectors_in_flight += nb_sectors;
> >  trace_mirror_one_iteration(s, sector_num, nb_sectors);
> > -bdrv_aio_readv(source, sector_num, >qiov, nb_sectors,
> > -   mirror_read_complete, op);
> > +
> > +ret = bdrv_get_block_status_above(source, NULL, sector_num,
> > +  nb_sectors, );
> > +if (ret < 0 || pnum < nb_sectors ||
> 
> Earlier today I told Richard Jones that qemu-img commit should really
> be using zero cluster support in the backing file since 2.4 because I
> remembered this commit. Turns out it doesn't actually use it but writes
> explicit zeros instead.
> 
> The reason is the condition 'pnum < nb_sectors' here, which makes mirror
> fall back to explicit writes if bdrv_get_block_status_above() doesn't
> return enough sectors (enough being relatively large here, I think in
> qemu-img commit it's always the full 10 MB buffer).
> 
> In other words, we are ignoring any zero areas smaller than 10 MB!
> 
> (What made this worse is that qcow2 had a bug that reports only a single
> zero cluster at a time, so it would never report more than 10 MB, even
> if the image was completely zeroed. I've sent a fix for that one.)
> 
> In order to fix this, we'll probably need to move the call to
> bdrv_get_block_status_above() before actually allocating memory and
> all that for the full nb_chunks. We should detect zeros on the usual
> block job granularity (64k by default, I think).
> 
> > +(ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
> > +bdrv_aio_readv(source, sector_num, >qiov, nb_sectors,
> > +   mirror_read_complete, op);
> > +} else if (ret & BDRV_BLOCK_ZERO) {
> > +bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
> > +  s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
> > +  mirror_write_complete, op);
> > +} else {
> > +assert(!(ret & BDRV_BLOCK_DATA));
> > +bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
> > + mirror_write_complete, op);
> > +}
> >  return delay_ns;
> >  }
> 
> Paolo also noticed that there's no reason at all to allocate buffers
> and a qiov for the write_zeroes and discard cases.

I'll write a patch to address these. Thanks!

Fam

Re: [Qemu-devel] [PATCH v9 04/27] qapi: Simplify error testing in test-qmp-*

2015-11-04 Thread Markus Armbruster

Eric Blake  writes:

> On 11/04/2015 01:40 AM, Markus Armbruster wrote:
>
>> 
>>> By moving err into data, we can let test teardown take care
>>> of cleaning up any collected error; it also gives us fewer
>>> lines of code between repeated tests where init runs teardown
>>> on our behalf.
>> 
>> This part isn't as obvious.
>> 
>> Having two parts of differing obviousness indicates patch splitting
>> could make sense.  Especially when the parts are large and mechanical,
>> because reviewing large mechanical changes is much easier when there's
>> just one kind of it.
>
> Will split.
>
>>> @@ -364,21 +341,17 @@ static void
>>> test_visitor_in_alternate_number(TestInputVisitorData *data,
>>>  /* Parsing an int */
>>>
>>>  v = visitor_input_test_init(data, "42");
>>> -visit_type_AltStrBool(v, , NULL, );
>>> -g_assert(err);
>>> -error_free(err);
>>> -err = NULL;
>>> +visit_type_AltStrBool(v, , NULL, >err);
>>> +g_assert(data->err);
>>>  qapi_free_AltStrBool(asb);
>> 
>> This leaves data->err non-null.
>> 
>>>
>>>  /* FIXME: Order of alternate should not affect semantics; asn should
>>>   * parse the same as ans */
>>>  v = visitor_input_test_init(data, "42");
>
> And this part wipes out data, so that data->err is once again NULL.
>
>>> -visit_type_AltStrNum(v, , NULL, );
>>> +visit_type_AltStrNum(v, , NULL, >err);
>> 
>> If visit_type_AltStrNum() errors out, its error will be thrown away by
>> its error_propagate(), because data->err is already non-null.
>
> No, you missed that this patch is relying on the magic in 3/27 that
> wipes out data on every new test.

You're right.

>> If it fails to error out, its error_propagate() does nothing, and we
>> won't detect the test failure.
>> 
>> Your patch makes forgetting to reset err an even easier mistake to make,
>> because it removes most of the error_free() that serve as reminders.
>> 
>> Is it a good idea anyway?  Let's discuss before I check the remainder of
>> this patch.
>
> The point was that by moving err _into_ data, then the resetting of err
> becomes as simple as resetting data, rather than having to be verbose at
> every use of data->err.  We just need a visitor_input_test_init() in
> between.

Reducing boilerplate is good.  However, I'm afraid hiding
error_free(err); err = NULL; like this sets an example that is easily
copied incorrectly.

Here's what error.h has to say about consuming an Error object:

 * Report an error to stderr:
 * error_report_err(err);
 * This frees the error object.
 *
 * Report an error somewhere else:
 * const char *msg = error_get_pretty(err);
 * do with msg what needs to be done...
 * error_free(err);
 *
 * Handle an error without reporting it (just for completeness):
 * error_free(err);

Perhaps we want something like

 * Expect an error, abort() if there is none:
 * error_free_or_abort();
 * This frees the error object and clears err.  Convenient for tests.

Then the patch quoted above becomes

@@ -364,21 +341,17 @@ static void 
test_visitor_in_alternate_number(TestInputVisitorData *data,
 /* Parsing an int */

 v = visitor_input_test_init(data, "42");
 visit_type_AltStrBool(v, , NULL, );
-g_assert(err);
-error_free(err);
-err = NULL;
+error_free_or_abort();
 qapi_free_AltStrBool(asb);

 /* FIXME: Order of alternate should not affect semantics; asn should
  * parse the same as ans */
 v = visitor_input_test_init(data, "42");
 visit_type_AltStrNum(v, , NULL, );
 /* FIXME g_assert_cmpint(asn->type, == ALT_STR_NUM_KIND_N); */
 /* FIXME g_assert_cmpfloat(asn->u.n, ==, 42); */
-g_assert(err);
-error_free(err);
-err = NULL;
+error_free_or_abort();
 qapi_free_AltStrNum(asn);

 v = visitor_input_test_init(data, "42");

Similar boilerplate reduction, but keeps the clearing of err more
visible.

Re: [Qemu-devel] [PATCH v8 10/17] qapi: Simplify visiting of alternate types

2015-11-04 Thread Markus Armbruster

Eric Blake  writes:

> On 11/04/2015 09:03 AM, Markus Armbruster wrote:
>
>> Conclusions:
>> 
>> * Having two different name manglers is a headache we could do without,
>>   especially since the second one camel_to_upper() is pretty magic.
>> 
>>   We have it only to get
>> 
>> typedef enum BlockDeviceIoStatus {
>> BLOCK_DEVICE_IO_STATUS_OK = 0,
>> BLOCK_DEVICE_IO_STATUS_FAILED = 1,
>> BLOCK_DEVICE_IO_STATUS_NOSPACE = 2,
>> BLOCK_DEVICE_IO_STATUS_MAX = 3,
>> } BlockDeviceIoStatus;
>> 
>>   instead of
>> 
>> typedef enum BlockDeviceIoStatus {
>> BlockDeviceIoStatus_ok = 0,
>> BlockDeviceIoStatus_failed = 1,
>> BlockDeviceIoStatus_nospace = 2,
>> BlockDeviceIoStatus_MAX = 3,
>> } BlockDeviceIoStatus;
>> 
>>   Bah!  CODING_STYLE doesn't even ask for shouting enumeration
>>   constants.  Can't see why we do.
>
> Interesting idea.  In fact, if we went one step further:
>
> BlockDeviceIoStatus_ok = 0,
> ...
> BlockDeviceIoStatusMAX = 3.
>
> (that is, typename + '_' + value for user values, and typename + 'MAX'
> for the sentinel), then the max value _cannot_ collide with any of the
> other values.

Another arbitrary rule killed.

>> * Keeping the complexity of the rules under control is good both for
>>   qapi.py and for the QAPI schema language.
>> 
>>   To that end, I think we should consider reserving the same set of
>>   names both for members and tag values.  It gets rid of complications
>>   like enumerations you can't use as flat union tags.
>> 
>>   Additionally, the question whether to keep the door open for
>>   generating an enum for the alternate cases becomes moot.
>> 
>> What do you think?
>
> I like the idea. Don't know if it's too late for 2.5, though.

The only risk with renaming obviously unique identifiers is producing
merge conflicts.  Annoying, but easy enough to resolve.

How much churn exactly?  I count 1423 occurences of generated
enumeration constants in 143 out of 4442 source files.  Top-scorer is of
course QKeyCode, with 471 occurences in 9 files, almost all in tables.
I've seen worse.  I wouldn't ask for such a pull during hard freeze, of
course.  During soft freeze, merge conflict risk might be lower than
usual.

[Qemu-devel] [PATCH] Trivial: update comment of struct Object

2015-11-04 Thread Cao jin

it don`t has "GSList *interfaces" anymore

Signed-off-by: Cao jin 
---
 include/qom/object.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/qom/object.h b/include/qom/object.h
index 0bb89d4..e46cc30 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -396,9 +396,6 @@ struct ObjectClass
  * As a result, #Object contains a reference to the objects type as its
  * first member.  This allows identification of the real type of the object at
  * run time.
- *
- * #Object also contains a list of #Interfaces that this object
- * implements.
  */
 struct Object
 {
-- 
2.1.0

Re: [Qemu-devel] [PATCH 02/11] snapshot: return error code from bdrv_snapshot_delete_by_id_or_name

2015-11-04 Thread Greg Kurz

On Wed,  4 Nov 2015 20:19:33 +0300
"Denis V. Lunev"  wrote:

> this will make code better in the next patch
> 

This changelog is not very useful. IMHO this calls for squashing this patch
into the next one: then we clearly see how the return value is used and why
the code is better.

> Signed-off-by: Denis V. Lunev 
> CC: Juan Quintela 
> CC: Stefan Hajnoczi 
> CC: Kevin Wolf 
> ---
>  block/snapshot.c | 7 ---
>  include/block/snapshot.h | 6 +++---
>  2 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/block/snapshot.c b/block/snapshot.c
> index 2ef4545..d729c76 100644
> --- a/block/snapshot.c
> +++ b/block/snapshot.c
> @@ -253,9 +253,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
>  return -ENOTSUP;
>  }
> 
> -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
> -const char *id_or_name,
> -Error **errp)
> +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
> +   const char *id_or_name,
> +   Error **errp)
>  {
>  int ret;
>  Error *local_err = NULL;
> @@ -270,6 +270,7 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState 
> *bs,
>  if (ret < 0) {
>  error_propagate(errp, local_err);
>  }
> +return ret;
>  }
> 
>  int bdrv_snapshot_list(BlockDriverState *bs,
> diff --git a/include/block/snapshot.h b/include/block/snapshot.h
> index 0a3cf0d..76cf6a3 100644
> --- a/include/block/snapshot.h
> +++ b/include/block/snapshot.h
> @@ -63,9 +63,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
>   const char *snapshot_id,
>   const char *name,
>   Error **errp);
> -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
> -const char *id_or_name,
> -Error **errp);
> +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
> +   const char *id_or_name,
> +   Error **errp);
>  int bdrv_snapshot_list(BlockDriverState *bs,
> QEMUSnapshotInfo **psn_info);
>  int bdrv_snapshot_load_tmp(BlockDriverState *bs,

Re: [Qemu-devel] Should 'qemu-img commit' sparsify the backing file?

2015-11-04 Thread Fam Zheng

On Wed, 11/04 16:37, Richard W.M. Jones wrote:
> 
> This doesn't work (I tested both qemu 2.3 and qemu from git).  Should it?
> 
> (1) Create a non-sparse guest image:
> 
> $ virt-builder fedora-22
> $ mv fedora-22.img fedora-22.img.sparse
> $ cp --sparse=never fedora-22.img.sparse fedora-22.img
> $ du -sh fedora-22.img
> 6.1G fedora-22.img
> 
> (2) Add a snapshot on top:
> 
> $ qemu-img create -f qcow2 -o compat=1.1 -b fedora-22.img overlay.qcow2
> $ du -sh fedora-22.img overlay.qcow2 
> 6.1G fedora-22.img
> 196K overlay.qcow2
> 
> (3) Sparsify the overlay:
> 
> $ virt-sparsify --in-place overlay.qcow2 
> $ du -sh fedora-22.img overlay.qcow2 
> 6.1G fedora-22.img
> 3.2M overlay.qcow2
> 
> (4) Commit to the backing file:
> 
> $ qemu-img commit overlay.qcow2 
> Image committed.
> $ du -sh fedora-22.img overlay.qcow2 
> 6.1G fedora-22.img <--
> 260K overlay.qcow2
> 
> Notice that the backing file (fedora-22.img) doesn't get any smaller.
> 
> I'm expecting the backing file to shrink to around 800 MB, which is
> does if you run virt-sparsify directly on the backing file.
> 

I don't think this the purpose of "qemu-img commit". Committing "new" data in
overlay.qcow2 has little to do with discarding backing image's fragments where
there are only explict zeroes. However you can try

qemu-img convert fedora-22.img fedora-22.img.1

as the last step, which I think will yield a sparse image.

I don't think we have an in-place sparsifier now.

Fam

Re: [Qemu-devel] [PATCH COLO-Frame v10 35/38] netfilter: Introduce a API to automatically add filter-buffer for each netdev

2015-11-04 Thread zhanghailiang


Hi Jason,

On 2015/11/4 10:56, Jason Wang wrote:



On 11/03/2015 07:56 PM, zhanghailiang wrote:

Signed-off-by: zhanghailiang 
Cc: Jason Wang 


Commit log please.


---
v10: new patch
---
  include/net/filter.h |  1 +
  include/net/net.h|  3 ++
  net/filter-buffer.c  | 84 
  net/net.c| 20 +
  4 files changed, 108 insertions(+)

diff --git a/include/net/filter.h b/include/net/filter.h
index 4499d60..b0954ba 100644
--- a/include/net/filter.h
+++ b/include/net/filter.h
@@ -75,5 +75,6 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
  void *opaque);
  void filter_buffer_release_all(void);
  void  filter_buffer_del_all_timers(void);
+void qemu_auto_add_filter_buffer(NetFilterDirection direction, Error **errp);

  #endif /* QEMU_NET_FILTER_H */
diff --git a/include/net/net.h b/include/net/net.h
index 5c65c45..e32bd90 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -129,6 +129,9 @@ typedef void (*qemu_netfilter_foreach)(NetFilterState *nf, 
void *opaque,
 Error **errp);
  void qemu_foreach_netfilter(qemu_netfilter_foreach func, void *opaque,
  Error **errp);
+typedef void (*qemu_netdev_foreach)(NetClientState *nc, void *opaque,
+Error **errp);
+void qemu_foreach_netdev(qemu_netdev_foreach func, void *opaque, Error **errp);
  int qemu_can_send_packet(NetClientState *nc);
  ssize_t qemu_sendv_packet(NetClientState *nc, const struct iovec *iov,
int iovcnt);
diff --git a/net/filter-buffer.c b/net/filter-buffer.c
index 05313de..0dc1efb 100644
--- a/net/filter-buffer.c
+++ b/net/filter-buffer.c
@@ -15,6 +15,11 @@
  #include "qapi-visit.h"
  #include "qom/object.h"
  #include "net/net.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp-output-visitor.h"
+#include "qapi/qmp-input-visitor.h"
+#include "monitor/monitor.h"
+

  #define TYPE_FILTER_BUFFER "filter-buffer"

@@ -185,6 +190,85 @@ void filter_buffer_del_all_timers(void)
  qemu_foreach_netfilter(filter_buffer_del_timer, NULL, NULL);
  }

+static void netdev_add_filter_buffer(NetClientState *nc, void *opaque,
+ Error **errp)
+{
+NetFilterState *nf;
+bool found = false;
+
+QTAILQ_FOREACH(nf, >filters, next) {
+if (!strcmp(object_get_typename(OBJECT(nf)), TYPE_FILTER_BUFFER)) {
+found = true;
+break;
+}
+}
+
+if (!found) {
+QmpOutputVisitor *qov;
+QmpInputVisitor *qiv;
+Visitor *ov, *iv;
+QObject *obj = NULL;
+QDict *qdict;
+void *dummy = NULL;
+char *id = g_strdup_printf("%s-%s.0", nc->name, TYPE_FILTER_BUFFER);
+char *queue = (char *) opaque;
+bool auto_add = true;
+Error *err = NULL;
+
+qov = qmp_output_visitor_new();
+ov = qmp_output_get_visitor(qov);
+visit_start_struct(ov,  , NULL, NULL, 0, );
+if (err) {
+goto out;
+}
+visit_type_str(ov, >name, "netdev", );
+if (err) {
+goto out;
+}
+visit_type_str(ov, , "queue", );
+if (err) {
+goto out;
+}
+visit_type_bool(ov, _add, "auto", );
+if (err) {
+goto out;
+}
+visit_end_struct(ov, );
+if (err) {
+goto out;
+}
+obj = qmp_output_get_qobject(qov);
+g_assert(obj != NULL);
+qdict = qobject_to_qdict(obj);
+qmp_output_visitor_cleanup(qov);
+
+qiv = qmp_input_visitor_new(obj);
+iv = qmp_input_get_visitor(qiv);
+object_add(TYPE_FILTER_BUFFER, id, qdict, iv, );
+qmp_input_visitor_cleanup(qiv);
+qobject_decref(obj);
+out:
+g_free(id);
+if (err) {
+error_propagate(errp, err);
+}
+}
+}
+/*
+* This will be used by COLO or MC FT, for which they will need
+* to buffer all the packets of all VM's net devices, Here we check
+* and automatically add netfilter for netdev that doesn't attach any buffer
+* netfilter.
+*/
+void qemu_auto_add_filter_buffer(NetFilterDirection direction, Error **errp)
+{
+char *queue = g_strdup(NetFilterDirection_lookup[direction]);
+
+qemu_foreach_netdev(netdev_add_filter_buffer, queue,
+errp);
+g_free(queue);
+}
+


This make me think for following questions:

- What if a nic is hot added after this "automatically" filter add?
- Maybe a better way is to have a default filter? It could be specified
through qemu cli or other (And default filter could be 'nop' which means
no filter) ?



I have thought about this. I'd like to add this default buffer filter quietly,
not through qemu cli. In this way, we can still keep the buffer filter that 
configured by

Re: [Qemu-devel] [PATCH] target-i386: enable cflushopt/clwb/pcommit instructions

2015-11-04 Thread Richard Henderson


On 11/04/2015 08:35 PM, Eduardo Habkost wrote:

On Fri, Oct 30, 2015 at 01:54:33PM -0700, Richard Henderson wrote:

On 10/29/2015 12:31 AM, Xiao Guangrong wrote:

These instructions are used by NVDIMM drivers and the specification
locates at:
https://software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf

There instructions are available on Skylake Server

Signed-off-by: Xiao Guangrong 
---
  target-i386/cpu.c | 8 +---
  target-i386/cpu.h | 3 +++
  2 files changed, 8 insertions(+), 3 deletions(-)


Reviewed-by: Richard Henderson 

Although it would be nice to update the comments in translate.c to include the
new insns, since they overlap mfence and sfence.  At present we only check for
SSE enabled when accepting these; I suppose it's easiest to consider it invalid
to specify +clwb,-sse?


I assume you want to add the extra SSE requirement to TCG code, not to
generic x86 code, then I have no objections.


I don't really want to add any requirement, just point and laugh at anyone who 
reports an bug for the above condition.



But in the case of clwb (/6 with a memory operand, modrm != 0xc0), we
are not just requiring SSE2: we are rejecting the instruction unless
modrm == 0xc0. That means TCG is rejecting the clwb instruction, so I
believe we shouldn't add CLWB to TCG_7_0_EBX_FEATURES yet.


Hmm, yes.

I've cleaned up some of this code on a branch, but it didn't get enough testing 
or review this cycle, so it's going to wait for the next.  I see you've posted 
a patch for this, which should be good enough until then.



r~

Re: [Qemu-devel] [PATCH v2] iscsi: Translate scsi sense into error code

2015-11-04 Thread Peter Lieven

Am 04.11.2015 um 02:36 schrieb Fam Zheng:
> Previously we return -EIO blindly when anything goes wrong. Add a helper
> function to parse sense fields and try to make the return code more
> meaningful.
>
> This also fixes the default werror configuration (enospc) when we're
> using qcow2 on an iscsi lun. The old -EIO not being treated as out of
> space error failed to trigger vm stop.
>
> Signed-off-by: Fam Zheng 
>
> ---
> v2: Drop the qcow2 patch with ERANGE -> ENOSPC.
> Drop dead break after return.
> Return EIO for NO_SENSE.
> Translate error code for other I/O paths too.
> ---
>  block/iscsi.c | 63 
> +++
>  1 file changed, 55 insertions(+), 8 deletions(-)
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 9a628b7..b1bc0ef 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -84,6 +84,7 @@ typedef struct IscsiTask {
>  IscsiLun *iscsilun;
>  QEMUTimer retry_timer;
>  bool force_next_flush;
> +int err_code;
>  } IscsiTask;
>  
>  typedef struct IscsiAIOCB {
> @@ -182,6 +183,51 @@ static inline unsigned exp_random(double mean)
>  #define QEMU_SCSI_STATUS_TIMEOUTSCSI_STATUS_TIMEOUT
>  #endif
>  
> +static int iscsi_translate_sense(struct scsi_sense *sense)
> +{
> +int ret;
> +
> +switch (sense->key) {
> +case SCSI_SENSE_NOT_READY:
> +return -EBUSY;
> +case SCSI_SENSE_DATA_PROTECTION:
> +return -EACCES;
> +case SCSI_SENSE_COMMAND_ABORTED:
> +return -ECANCELED;
> +case SCSI_SENSE_ILLEGAL_REQUEST:
> +/* Parse ASCQ */
> +break;
> +default:
> +return -EIO;
> +}
> +switch (sense->ascq) {
> +case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
> +case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
> +case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
> +case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
> +ret = -EINVAL;
> +break;
> +case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
> +ret = -ENOSPC;
> +break;
> +case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
> +ret = -ENOTSUP;
> +break;
> +case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
> +case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
> +case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
> +ret = -ENOMEDIUM;
> +break;
> +case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
> +ret = -EACCES;
> +break;
> +default:
> +ret = -EIO;
> +break;
> +}
> +return ret;
> +}
> +
>  static void
>  iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
>  void *command_data, void *opaque)
> @@ -226,6 +272,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int 
> status,
>  return;
>  }
>  }
> +iTask->err_code = iscsi_translate_sense(>sense);
>  error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
>  } else {
>  iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
> @@ -455,7 +502,7 @@ retry:
>  }
>  
>  if (iTask.status != SCSI_STATUS_GOOD) {
> -return -EIO;
> +return iTask.err_code;
>  }
>  
>  iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
> @@ -536,14 +583,14 @@ retry:
>  
>  lbas = scsi_datain_unmarshall(iTask.task);
>  if (lbas == NULL) {
> -ret = -EIO;
> +ret = iTask.err_code;
>  goto out;
>  }

An unmarschalling error will not set iTask.err_code. It is likely
0 here. Please leave it at -EIO.

>  
>  lbasd = >descriptors[0];
>  
>  if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
> -ret = -EIO;
> +ret = iTask.err_code;
>  goto out;
>  }

This is an internal sanity check, iTask.err_code will also be 0 here.

>  
> @@ -644,7 +691,7 @@ retry:
>  }
>  
>  if (iTask.status != SCSI_STATUS_GOOD) {
> -return -EIO;
> +return iTask.err_code;
>  }
>  
>  return 0;
> @@ -683,7 +730,7 @@ retry:
>  }
>  
>  if (iTask.status != SCSI_STATUS_GOOD) {
> -return -EIO;
> +return iTask.err_code;
>  }
>  
>  return 0;
> @@ -703,7 +750,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int 
> status,
>  if (status < 0) {
>  error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
>   iscsi_get_error(iscsi));
> -acb->status = -EIO;
> +acb->status = iscsi_translate_sense(>task->sense);
>  }
>  
>  acb->ioh->driver_status = 0;
> @@ -905,7 +952,7 @@ retry:
>  }
>  
>  if (iTask.status != SCSI_STATUS_GOOD) {
> -return -EIO;
> +return iTask.err_code;
>  }
>  
>  iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
> @@ -999,7 +1046,7 @@ retry:
>  }
>  
>  if (iTask.status != SCSI_STATUS_GOOD) {
> -return -EIO;
> +return iTask.err_code;
>  }
>  
>  if (flags &

[Qemu-devel] [PULL 1/3] qga: drop hand-made guest_file_toggle_flags helper

2015-11-04 Thread Michael Roth

From: "Denis V. Lunev" 

We'd better use generic qemu_set_nonblock directly.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Yuri Pudgorodskiy 
Reviewed-by: Eric Blake 
CC: Michael Roth 
Signed-off-by: Michael Roth 
---
 qga/commands-posix.c | 27 ++-
 1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 67a173a..0ebd473 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -28,6 +28,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/queue.h"
 #include "qemu/host-utils.h"
+#include "qemu/sockets.h"
 
 #ifndef CONFIG_HAS_ENVIRON
 #ifdef __APPLE__
@@ -385,27 +386,6 @@ safe_open_or_create(const char *path, const char *mode, 
Error **errp)
 return NULL;
 }
 
-static int guest_file_toggle_flags(int fd, int flags, bool set, Error **err)
-{
-int ret, old_flags;
-
-old_flags = fcntl(fd, F_GETFL);
-if (old_flags == -1) {
-error_setg_errno(err, errno, QERR_QGA_COMMAND_FAILED,
- "failed to fetch filehandle flags");
-return -1;
-}
-
-ret = fcntl(fd, F_SETFL, set ? (old_flags | flags) : (old_flags & ~flags));
-if (ret == -1) {
-error_setg_errno(err, errno, QERR_QGA_COMMAND_FAILED,
- "failed to set filehandle flags");
-return -1;
-}
-
-return ret;
-}
-
 int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode,
 Error **errp)
 {
@@ -426,10 +406,7 @@ int64_t qmp_guest_file_open(const char *path, bool 
has_mode, const char *mode,
 /* set fd non-blocking to avoid common use cases (like reading from a
  * named pipe) from hanging the agent
  */
-if (guest_file_toggle_flags(fileno(fh), O_NONBLOCK, true, errp) < 0) {
-fclose(fh);
-return -1;
-}
+qemu_set_nonblock(fileno(fh));
 
 handle = guest_file_handle_add(fh, errp);
 if (handle < 0) {
-- 
1.9.1

[Qemu-devel] [PULL 2/3] qga: fixed CloseHandle in qmp_guest_file_open

2015-11-04 Thread Michael Roth

From: Olga Krishtal 

CloseHandle use HANDLE as an argument, but not *HANDLE

Signed-off-by: Olga Krishtal 
Signed-off-by: Denis V. Lunev 
Reviewed-by: Stefan Weil 
CC: Michael Roth 
Signed-off-by: Michael Roth 
---
 qga/commands-win32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index d9de23b..97f19d5 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -160,7 +160,7 @@ int64_t qmp_guest_file_open(const char *path, bool has_mode,
 
 fd = guest_file_handle_add(fh, errp);
 if (fd < 0) {
-CloseHandle();
+CloseHandle(fh);
 error_setg(errp, "failed to add handle to qmp handle table");
 return -1;
 }
-- 
1.9.1

[Qemu-devel] [PULL 3/3] qga: set file descriptor in qmp_guest_file_open non-blocking on Win32

2015-11-04 Thread Michael Roth

From: Olga Krishtal 

Set fd non-blocking to avoid common use cases (like reading from a
named pipe) from hanging the agent. This was missed in the original
code.

The patch introduces qemu_set_handle_nonoblocking, the local analog
of qemu_set_nonblock for HANDLES.
The usage of handles in qemu_set_non/block is impossible, because for
win32 there is a difference between file discriptors and file handles,
and all file ops are made via Win32 api.

Signed-off-by: Olga Krishtal 
Signed-off-by: Denis V. Lunev 
CC: Michael Roth 
CC: Stefan Weil 
Signed-off-by: Michael Roth 
---
 qga/commands-win32.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 97f19d5..a5306e7 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -128,6 +128,28 @@ static GuestFileHandle *guest_file_handle_find(int64_t id, 
Error **errp)
 return NULL;
 }
 
+static void handle_set_nonblocking(HANDLE fh)
+{
+DWORD file_type, pipe_state;
+file_type = GetFileType(fh);
+if (file_type != FILE_TYPE_PIPE) {
+return;
+}
+/* If file_type == FILE_TYPE_PIPE, according to MSDN
+ * the specified file is socket or named pipe */
+if (!GetNamedPipeHandleState(fh, _state, NULL,
+ NULL, NULL, NULL, 0)) {
+return;
+}
+/* The fd is named pipe fd */
+if (pipe_state & PIPE_NOWAIT) {
+return;
+}
+
+pipe_state |= PIPE_NOWAIT;
+SetNamedPipeHandleState(fh, _state, NULL, NULL);
+}
+
 int64_t qmp_guest_file_open(const char *path, bool has_mode,
 const char *mode, Error **errp)
 {
@@ -158,6 +180,11 @@ int64_t qmp_guest_file_open(const char *path, bool 
has_mode,
 return -1;
 }
 
+/* set fd non-blocking to avoid common use cases (like reading from a
+ * named pipe) from hanging the agent
+ */
+handle_set_nonblocking(fh);
+
 fd = guest_file_handle_add(fh, errp);
 if (fd < 0) {
 CloseHandle(fh);
-- 
1.9.1

[Qemu-devel] [PULL for-2.5 0/3] qemu-ga patch queue

2015-11-04 Thread Michael Roth

The following changes since commit 79cf9fad341e6e7bd6b55395b71d5c5727d7f5b0:

  Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20151103' 
into staging (2015-11-03 14:54:40 +)

are available in the git repository at:


  git://github.com/mdroth/qemu.git tags/qga-pull-2015-11-04-tag

for you to fetch changes up to fb68777312887000cd0367d72621fdd67cc4a0a0:

  qga: set file descriptor in qmp_guest_file_open non-blocking on Win32 
(2015-11-04 07:37:56 -0600)


qemu-ga patch queue

* fix file handle cleanup on w32
* use non-blocking mode for file handles on w32 to avoid
  hangs on guest-file-read/guest-file-write to pipes


Denis V. Lunev (1):
  qga: drop hand-made guest_file_toggle_flags helper

Olga Krishtal (2):
  qga: fixed CloseHandle in qmp_guest_file_open
  qga: set file descriptor in qmp_guest_file_open non-blocking on Win32

 qga/commands-posix.c | 27 ++-
 qga/commands-win32.c | 29 -
 2 files changed, 30 insertions(+), 26 deletions(-)

[Qemu-devel] [PATCH v6 01/15] blockdev: Add missing bdrv_unref() in drive-backup

2015-11-04 Thread Max Reitz

All error paths after a successful bdrv_open() of target_bs should
contain a bdrv_unref(target_bs). This one did not yet, so add it.

Signed-off-by: Max Reitz 
---
 blockdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/blockdev.c b/blockdev.c
index ac2a537..4eda49c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2899,6 +2899,7 @@ void qmp_drive_backup(const char *device, const char 
*target,
 bmap = bdrv_find_dirty_bitmap(bs, bitmap);
 if (!bmap) {
 error_setg(errp, "Bitmap '%s' could not be found", bitmap);
+bdrv_unref(target_bs);
 goto out;
 }
 }
-- 
2.6.2

[Qemu-devel] [PATCH v6 07/15] block: Move BDS close notifiers into BB

2015-11-04 Thread Max Reitz

The only remaining user of the BDS close notifiers is NBD which uses
them to determine when a BDS tree is being ejected. This patch removes
the BDS-level close notifiers and adds a notifier list to the
BlockBackend structure that is invoked whenever a BDS is removed.

Symmetrically to that, another notifier list is added that is invoked
whenever a BDS is inserted. The dataplane implementations for virtio-blk
and virtio-scsi use both notifier types for setting up and removing op
blockers. This is not only important for setting up the op blockers on
insertion, but also for removing them on ejection since bdrv_delete()
asserts that there are no op blockers set up.

Signed-off-by: Max Reitz 
---
 block.c |  7 
 block/block-backend.c   | 19 +++---
 blockdev-nbd.c  | 37 +---
 hw/block/dataplane/virtio-blk.c | 77 +++--
 hw/scsi/virtio-scsi.c   | 59 +++
 include/block/block.h   |  1 -
 include/block/block_int.h   |  2 --
 include/hw/virtio/virtio-scsi.h | 10 ++
 include/sysemu/block-backend.h  |  3 +-
 nbd.c   | 13 +++
 10 files changed, 159 insertions(+), 69 deletions(-)

diff --git a/block.c b/block.c
index 23448ed..067bc39 100644
--- a/block.c
+++ b/block.c
@@ -258,7 +258,6 @@ BlockDriverState *bdrv_new(void)
 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
 QLIST_INIT(>op_blockers[i]);
 }
-notifier_list_init(>close_notifiers);
 notifier_with_return_list_init(>before_write_notifiers);
 qemu_co_queue_init(>throttled_reqs[0]);
 qemu_co_queue_init(>throttled_reqs[1]);
@@ -268,11 +267,6 @@ BlockDriverState *bdrv_new(void)
 return bs;
 }
 
-void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
-{
-notifier_list_add(>close_notifiers, notify);
-}
-
 BlockDriver *bdrv_find_format(const char *format_name)
 {
 BlockDriver *drv1;
@@ -1916,7 +1910,6 @@ void bdrv_close(BlockDriverState *bs)
 bdrv_drain(bs); /* complete I/O */
 bdrv_flush(bs);
 bdrv_drain(bs); /* in case flush left pending I/O */
-notifier_list_notify(>close_notifiers, bs);
 
 bdrv_release_all_dirty_bitmaps(bs);
 
diff --git a/block/block-backend.c b/block/block-backend.c
index 6f9309f..38580f7 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -48,6 +48,8 @@ struct BlockBackend {
 BlockdevOnError on_read_error, on_write_error;
 bool iostatus_enabled;
 BlockDeviceIoStatus iostatus;
+
+NotifierList remove_bs_notifiers, insert_bs_notifiers;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -98,6 +100,8 @@ BlockBackend *blk_new(const char *name, Error **errp)
 blk = g_new0(BlockBackend, 1);
 blk->name = g_strdup(name);
 blk->refcnt = 1;
+notifier_list_init(>remove_bs_notifiers);
+notifier_list_init(>insert_bs_notifiers);
 QTAILQ_INSERT_TAIL(_backends, blk, link);
 return blk;
 }
@@ -343,6 +347,8 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
  */
 void blk_remove_bs(BlockBackend *blk)
 {
+notifier_list_notify(>remove_bs_notifiers, blk);
+
 blk_update_root_state(blk);
 
 blk->bs->blk = NULL;
@@ -359,6 +365,8 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 bdrv_ref(bs);
 blk->bs = bs;
 bs->blk = blk;
+
+notifier_list_notify(>insert_bs_notifiers, blk);
 }
 
 /*
@@ -1105,11 +1113,14 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
 }
 }
 
-void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
 {
-if (blk->bs) {
-bdrv_add_close_notifier(blk->bs, notify);
-}
+notifier_list_add(>remove_bs_notifiers, notify);
+}
+
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
+{
+notifier_list_add(>insert_bs_notifiers, notify);
 }
 
 void blk_io_plug(BlockBackend *blk)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index bcdd18b..b28a55b 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -46,37 +46,11 @@ void qmp_nbd_server_start(SocketAddress *addr, Error **errp)
 }
 }
 
-/*
- * Hook into the BlockBackend notifiers to close the export when the
- * backend is closed.
- */
-typedef struct NBDCloseNotifier {
-Notifier n;
-NBDExport *exp;
-QTAILQ_ENTRY(NBDCloseNotifier) next;
-} NBDCloseNotifier;
-
-static QTAILQ_HEAD(, NBDCloseNotifier) close_notifiers =
-QTAILQ_HEAD_INITIALIZER(close_notifiers);
-
-static void nbd_close_notifier(Notifier *n, void *data)
-{
-NBDCloseNotifier *cn = DO_UPCAST(NBDCloseNotifier, n, n);
-
-notifier_remove(>n);
-QTAILQ_REMOVE(_notifiers, cn, next);
-
-nbd_export_close(cn->exp);
-nbd_export_put(cn->exp);
-g_free(cn);
-}
-
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
 Error **errp)
 {
 BlockBackend *blk;
 NBDExport *exp;
-

[Qemu-devel] [PATCH v6 12/15] blockdev: Keep track of monitor-owned BDS

2015-11-04 Thread Max Reitz

Signed-off-by: Max Reitz 
---
 blockdev.c | 25 +
 include/block/block_int.h  |  4 
 stubs/Makefile.objs|  1 +
 stubs/blockdev-close-all-bdrv-states.c |  5 +
 4 files changed, 35 insertions(+)
 create mode 100644 stubs/blockdev-close-all-bdrv-states.c

diff --git a/blockdev.c b/blockdev.c
index a97a881..a5ce8c2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -50,6 +50,9 @@
 #include "trace.h"
 #include "sysemu/arch_init.h"
 
+static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
+QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
+
 static const char *const if_name[IF_COUNT] = {
 [IF_NONE] = "none",
 [IF_IDE] = "ide",
@@ -662,6 +665,19 @@ fail:
 return NULL;
 }
 
+void blockdev_close_all_bdrv_states(void)
+{
+BlockDriverState *bs, *next_bs;
+
+QTAILQ_FOREACH_SAFE(bs, _bdrv_states, monitor_list, next_bs) {
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+aio_context_acquire(ctx);
+bdrv_unref(bs);
+aio_context_release(ctx);
+}
+}
+
 static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
 Error **errp)
 {
@@ -3449,6 +3465,8 @@ void qmp_blockdev_add(BlockdevOptions *options, Error 
**errp)
 if (!bs) {
 goto fail;
 }
+
+QTAILQ_INSERT_TAIL(_bdrv_states, bs, monitor_list);
 }
 
 if (bs && bdrv_key_required(bs)) {
@@ -3519,11 +3537,18 @@ void qmp_x_blockdev_del(bool has_id, const char *id,
bdrv_get_device_or_node_name(bs));
 goto out;
 }
+
+if (!blk && !bs->monitor_list.tqe_prev) {
+error_setg(errp, "Node %s is not owned by the monitor",
+   bs->node_name);
+goto out;
+}
 }
 
 if (blk) {
 blk_unref(blk);
 } else {
+QTAILQ_REMOVE(_bdrv_states, bs, monitor_list);
 bdrv_unref(bs);
 }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index bde93e3..5b7c47d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -421,6 +421,8 @@ struct BlockDriverState {
 QTAILQ_ENTRY(BlockDriverState) device_list;
 /* element of the list of all BlockDriverStates (all_bdrv_states) */
 QTAILQ_ENTRY(BlockDriverState) bs_list;
+/* element of the list of monitor-owned BDS */
+QTAILQ_ENTRY(BlockDriverState) monitor_list;
 QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
 int refcnt;
 
@@ -677,4 +679,6 @@ void blk_dev_resize_cb(BlockBackend *blk);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
 bool bdrv_requests_pending(BlockDriverState *bs);
 
+void blockdev_close_all_bdrv_states(void);
+
 #endif /* BLOCK_INT_H */
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 251443b..5fb489e 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -1,5 +1,6 @@
 stub-obj-y += arch-query-cpu-def.o
 stub-obj-y += bdrv-commit-all.o
+stub-obj-y += blockdev-close-all-bdrv-states.o
 stub-obj-y += clock-warp.o
 stub-obj-y += cpu-get-clock.o
 stub-obj-y += cpu-get-icount.o
diff --git a/stubs/blockdev-close-all-bdrv-states.c 
b/stubs/blockdev-close-all-bdrv-states.c
new file mode 100644
index 000..12d2442
--- /dev/null
+++ b/stubs/blockdev-close-all-bdrv-states.c
@@ -0,0 +1,5 @@
+#include "block/block_int.h"
+
+void blockdev_close_all_bdrv_states(void)
+{
+}
-- 
2.6.2

[Qemu-devel] [PATCH v6 08/15] block: Use blk_remove_bs() in blk_delete()

2015-11-04 Thread Max Reitz

Signed-off-by: Max Reitz 
---
 block/block-backend.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 38580f7..5702cc1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -165,10 +165,7 @@ static void blk_delete(BlockBackend *blk)
 assert(!blk->refcnt);
 assert(!blk->dev);
 if (blk->bs) {
-assert(blk->bs->blk == blk);
-blk->bs->blk = NULL;
-bdrv_unref(blk->bs);
-blk->bs = NULL;
+blk_remove_bs(blk);
 }
 if (blk->root_state.throttle_state) {
 g_free(blk->root_state.throttle_group);
@@ -347,6 +344,8 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
  */
 void blk_remove_bs(BlockBackend *blk)
 {
+assert(blk->bs->blk == blk);
+
 notifier_list_notify(>remove_bs_notifiers, blk);
 
 blk_update_root_state(blk);
-- 
2.6.2

[Qemu-devel] [PATCH v6 14/15] block: Rewrite bdrv_close_all()

2015-11-04 Thread Max Reitz

This patch rewrites bdrv_close_all(): Until now, all root BDSs have been
force-closed. This is bad because it can lead to cached data not being
flushed to disk.

Instead, try to make all reference holders relinquish their reference
voluntarily:

1. All BlockBackend users are handled by making all BBs simply eject
   their BDS tree. Since a BDS can never be on top of a BB, this will
   not cause any of the issues as seen with the force-closing of BDSs.
   The references will be relinquished and any further access to the BB
   will fail gracefully.
2. All BDSs which are owned by the monitor itself (because they do not
   have a BB) are relinquished next.
3. Besides BBs and the monitor, block jobs and other BDSs are the only
   things left that can hold a reference to BDSs. After every remaining
   block job has been canceled, there should not be any BDSs left (and
   the loop added here will always terminate (as long as NDEBUG is not
   defined), because either all_bdrv_states will be empty or there will
   not be any block job left to cancel, failing the assertion).

Signed-off-by: Max Reitz 
---
 block.c | 45 +
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index cf5dadc..95a7f98 100644
--- a/block.c
+++ b/block.c
@@ -1905,9 +1905,7 @@ static void bdrv_close(BlockDriverState *bs)
 {
 BdrvAioNotifier *ban, *ban_next;
 
-if (bs->job) {
-block_job_cancel_sync(bs->job);
-}
+assert(!bs->job);
 
 /* Disable I/O limits and drain all pending throttled requests */
 if (bs->io_limits_enabled) {
@@ -1971,13 +1969,44 @@ static void bdrv_close(BlockDriverState *bs)
 void bdrv_close_all(void)
 {
 BlockDriverState *bs;
+AioContext *aio_context;
+int original_refcount = 0;
 
-QTAILQ_FOREACH(bs, _states, device_list) {
-AioContext *aio_context = bdrv_get_aio_context(bs);
+/* Drop references from requests still in flight, such as canceled block
+ * jobs whose AIO context has not been polled yet */
+bdrv_drain_all();
 
-aio_context_acquire(aio_context);
-bdrv_close(bs);
-aio_context_release(aio_context);
+blockdev_close_all_bdrv_states();
+blk_remove_all_bs();
+
+/* Cancel all block jobs */
+while (!QTAILQ_EMPTY(_bdrv_states)) {
+QTAILQ_FOREACH(bs, _bdrv_states, bs_list) {
+aio_context = bdrv_get_aio_context(bs);
+
+aio_context_acquire(aio_context);
+if (bs->job) {
+/* So we can safely query the current refcount */
+bdrv_ref(bs);
+original_refcount = bs->refcnt;
+
+block_job_cancel_sync(bs->job);
+aio_context_release(aio_context);
+break;
+}
+aio_context_release(aio_context);
+}
+
+/* All the remaining BlockDriverStates are referenced directly or
+ * indirectly from block jobs, so there needs to be at least one BDS
+ * directly used by a block job */
+assert(bs);
+
+/* Wait for the block job to release its reference */
+while (bs->refcnt >= original_refcount) {
+aio_poll(aio_context, true);
+}
+bdrv_unref(bs);
 }
 }
 
-- 
2.6.2

Re: [Qemu-devel] [PATCH v9 20/27] qapi: Simplify QAPISchemaObjectTypeVariants.check()

2015-11-04 Thread Markus Armbruster

Eric Blake  writes:

> From: Markus Armbruster 
>
> Avoid the 'if seen' conditional by doing just the essential work
> here, namely setting self.tag_member for flat unions.
> Move the rest to callers.
>
> Signed-off-by: Markus Armbruster 
> Message-Id: <1446559499-26984-7-git-send-email-arm...@redhat.com>
> [reword commit, rebase to earlier AlternateType.check() changes]
> Signed-off-by: Eric Blake 
>
> ---
> v9: new patch
> ---
>  scripts/qapi.py | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/scripts/qapi.py b/scripts/qapi.py
> index 3054628..6653c70 100644
> --- a/scripts/qapi.py
> +++ b/scripts/qapi.py
> @@ -988,9 +988,10 @@ class QAPISchemaObjectType(QAPISchemaType):
>  for m in self.local_members:
>  m.check(schema)
>  m.check_clash(seen)
> +self.members = seen.values()
>  if self.variants:
>  self.variants.check(schema, seen)
> -self.members = seen.values()
> +assert self.variants.tag_member in self.members
>
>  def is_implicit(self):
>  # See QAPISchema._make_implicit_object_type()
> @@ -1053,8 +1054,6 @@ class QAPISchemaObjectTypeVariants(object):
>  # seen is non-empty for unions, empty for alternates

You added this comment in PATCH 10 to explain the conditional I'm
dropping here.  Let's drop the comment, too.

>  if self.tag_name:# flat union
>  self.tag_member = seen[self.tag_name]
> -if seen:
> -assert self.tag_member in seen.itervalues()
>  assert isinstance(self.tag_member.type, QAPISchemaEnumType)
>  for v in self.variants:
>  # Reset seen array for each variant, since qapi names from one

Re: [Qemu-devel] [PATCH] qcow2: Fix qcow2_get_cluster_offset() for zero clusters

2015-11-04 Thread Max Reitz

On 04.11.2015 18:16, Kevin Wolf wrote:
> When searching for contiguous zero clusters, we only need to check the
> cluster type. Before this patch, an increasing offset (L2E_OFFSET_MASK)
> was expected, so that the function never returned more than a single
> zero cluster in practice. This patch fixes it to actually return as many
> contiguous zero clusters as it can.
> 
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Kevin Wolf 
> ---
>  block/qcow2-cluster.c | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)


Thanks, applied to my block tree:

https://github.com/XanClic/qemu/commits/block



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] target-i386: enable cflushopt/clwb/pcommit instructions

2015-11-04 Thread Eduardo Habkost

On Fri, Oct 30, 2015 at 01:54:33PM -0700, Richard Henderson wrote:
> On 10/29/2015 12:31 AM, Xiao Guangrong wrote:
> > These instructions are used by NVDIMM drivers and the specification
> > locates at:
> > https://software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
> > 
> > There instructions are available on Skylake Server
> > 
> > Signed-off-by: Xiao Guangrong 
> > ---
> >  target-i386/cpu.c | 8 +---
> >  target-i386/cpu.h | 3 +++
> >  2 files changed, 8 insertions(+), 3 deletions(-)
> 
> Reviewed-by: Richard Henderson 
> 
> Although it would be nice to update the comments in translate.c to include the
> new insns, since they overlap mfence and sfence.  At present we only check for
> SSE enabled when accepting these; I suppose it's easiest to consider it 
> invalid
> to specify +clwb,-sse?

I assume you want to add the extra SSE requirement to TCG code, not to
generic x86 code, then I have no objections. Your conclusion seems to be
right for pcommit and clflushopt, if I checked the opcodes and encoding
properly. In the case of pcommit (/7, modrm == 0xf8), we check for SSE;
in the case of clflushopt (/7 with a memory operand, modrm != 0xf8), we
check for CLFLUSH.

But in the case of clwb (/6 with a memory operand, modrm != 0xc0), we
are not just requiring SSE2: we are rejecting the instruction unless
modrm == 0xc0. That means TCG is rejecting the clwb instruction, so I
believe we shouldn't add CLWB to TCG_7_0_EBX_FEATURES yet.

-- 
Eduardo

[Qemu-devel] [PATCH v6 02/15] blockjob: Call bdrv_unref() on creation error

2015-11-04 Thread Max Reitz

If block_job_create() fails, it should release its reference to the
job's BDS. Normally, this is done in the callback provided by the
caller, but that callback will not be invoked if the block job failed to
be created.

Signed-off-by: Max Reitz 
---
 blockjob.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/blockjob.c b/blockjob.c
index c02fe59..0886a4a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -70,6 +70,7 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 if (local_err) {
 block_job_release(bs);
 error_propagate(errp, local_err);
+bdrv_unref(bs);
 return NULL;
 }
 }
-- 
2.6.2

[Qemu-devel] [PATCH v6 for-2.6 00/15] block: Rework bdrv_close_all()

2015-11-04 Thread Max Reitz

Currently, bdrv_close_all() force-closes all BDSs with a BlockBackend,
which can lead to data corruption (see the iotest added in the final
patch of this series) and is most certainly very ugly.

This series reworks bdrv_close_all() to instead eject the BDS trees from
all BlockBackends and then close the monitor-owned BDS trees, which are
the only BDSs without a BB. In effect, all BDSs are closed just by
getting closed automatically due to their reference count becoming 0.

Note that the approach taken here leaks all BlockBackends. This does not
really matter, however, since qemu is about to exit anyway.


v5 is here (yes, it has been a while):
http://lists.nongnu.org/archive/html/qemu-block/2015-03/msg00044.html


v6:
- Patch 1: Added (leakage of a BDS in a drive-backup error case, becomes
  apparent with this series)
- Patch 2: Added (leakage of a BDS when a block job failed to be
  created)
- Patch 3: Added (we hacked our way around this so far, but now we have
  to fix it, see the commit message)
- Patch 5: Rebase conflicts
- Patch 6: Renamed test from 096 to 140 (rebase conflict)
- Patch 7:
  - Contextual rebase conflicts
  - Rebase conflict in a comment being removed
  - Dropping the reference to exp->blk has been moved from
nbd_export_close() to nbd_export_put(), so the notifier_remove()
call must be moved there, too
- Patch 8: blk_remove_bs() no longer silently ignores blk->bs being
  NULL, therefore we have to keep it wrapped in if (blk->bs)
- Patch 9:
  - The bdrv_drain_all() and bdrv_flush() calls have been removed from
hmp_drive_del() already, so we don't need to do it anymore.
  - As in patch 8, blk_remove_bs() must stay in the if (blk->bs) block
- Patch 11: Do not move the list entry in all_bdrv_states in
  bdrv_move_feature_fields(); every BDS is part of that list, so there
  is absolutely no point in swapping their positions here
- Patch 12:
  - Do not move the list entry in the list of monitor-owned BDS in
bdrv_move_feature_fields(); if a BDS is owned by the monitor, that
will not change by putting another BDS on top of it or by putting it
on top of another BDS
  - Add test to qmp_x_blockdev_del() whether the BDS to be deleted (if a
node is to be deleted) is actually monitor-owned
  - If qmp_x_blockdev_del() unrefs a node, it must be dropped from the
list of monitor-owned BDS
- Patch 13: Enclose blk_remove_bs() in an if (blk->bs) block
  (just like in patches 8 and 9)
- Patch 14:
  - One semi-contextual conflict in which an empty line addition is
dropped, because now there already is an empty line
  - Changes to bdrv_close_all():
- Add a bdrv_drain_all() at the beginning; it may not do anything,
  but it looked like a reasonable thing to do to me
- Add a comment what the loop is for
- A block job will not immediately release its BDS reference once it
  is canceled (maybe it did back in March, but now it doesn't), so
  we need to call aio_poll() repeatedly until the reference is
  dropped


git-backport-diff against v5:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/15:[down] 'blockdev: Add missing bdrv_unref() in drive-backup'
002/15:[down] 'blockjob: Call bdrv_unref() on creation error'
003/15:[down] 'block: Release dirty bitmaps in bdrv_close()'
004/15:[] [--] 'iotests: Move _filter_nbd into common.filter'
005/15:[0009] [FC] 'iotests: Make redirecting qemu's stderr optional'
006/15:[0004] [FC] 'iotests: Add test for eject under NBD server'
007/15:[0007] [FC] 'block: Move BDS close notifiers into BB'
008/15:[0004] [FC] 'block: Use blk_remove_bs() in blk_delete()'
009/15:[0006] [FC] 'blockdev: Use blk_remove_bs() in do_drive_del()'
010/15:[] [-C] 'block: Make bdrv_close() static'
011/15:[0003] [FC] 'block: Add list of all BlockDriverStates'
012/15:[0009] [FC] 'blockdev: Keep track of monitor-owned BDS'
013/15:[0004] [FC] 'block: Add blk_remove_all_bs()'
014/15:[0025] [FC] 'block: Rewrite bdrv_close_all()'
015/15:[] [-C] 'iotests: Add test for multiple BB on BDS tree'


Max Reitz (15):
  blockdev: Add missing bdrv_unref() in drive-backup
  blockjob: Call bdrv_unref() on creation error
  block: Release dirty bitmaps in bdrv_close()
  iotests: Move _filter_nbd into common.filter
  iotests: Make redirecting qemu's stderr optional
  iotests: Add test for eject under NBD server
  block: Move BDS close notifiers into BB
  block: Use blk_remove_bs() in blk_delete()
  blockdev: Use blk_remove_bs() in do_drive_del()
  block: Make bdrv_close() static
  block: Add list of all BlockDriverStates
  blockdev: Keep track of monitor-owned BDS
  block: Add blk_remove_all_bs()
  block: Rewrite bdrv_close_all()
  iotests: Add test for multiple BB on BDS tree

 block.c| 83 ---

[Qemu-devel] [PATCH v6 05/15] iotests: Make redirecting qemu's stderr optional

2015-11-04 Thread Max Reitz

Redirecting qemu's stderr to stdout makes working with the stderr output
difficult due to the other file descriptor magic performed in
_launch_qemu ("ambiguous redirect").

Add an option which specifies whether stderr should be redirected to
stdout or not (allowing for other modes to be added in the future).

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/common.qemu | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 8bf3969..2548a87 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -129,6 +129,8 @@ function _send_qemu_cmd()
 # $qemu_comm_method: set this variable to 'monitor' (case insensitive)
 #to use the QEMU HMP monitor for communication.
 #Otherwise, the default of QMP is used.
+# $keep_stderr: Set this variable to 'y' to keep QEMU's stderr output on 
stderr.
+#   If this variable is empty, stderr will be redirected to stdout.
 # Returns:
 # $QEMU_HANDLE: set to a handle value to communicate with this QEMU instance.
 #
@@ -151,11 +153,20 @@ function _launch_qemu()
 mkfifo "${fifo_out}"
 mkfifo "${fifo_in}"
 
-QEMU_NEED_PID='y'\
-${QEMU} -nographic -serial none ${comm} -machine accel=qtest "${@}" \
+if [ -z "$keep_stderr" ]; then
+QEMU_NEED_PID='y'\
+${QEMU} -nographic -serial none ${comm} -machine accel=qtest "${@}" \
 >"${fifo_out}" 
\
 2>&1 \
 <"${fifo_in}" &
+elif [ "$keep_stderr" = "y" ]; then
+QEMU_NEED_PID='y'\
+${QEMU} -nographic -serial none ${comm} -machine accel=qtest "${@}" \
+>"${fifo_out}" 
\
+<"${fifo_in}" &
+else
+exit 1
+fi
 
 if [[ "${BASH_VERSINFO[0]}" -ge "5" ||
 ("${BASH_VERSINFO[0]}" -ge "4"  &&  "${BASH_VERSINFO[1]}" -ge "1") ]]
-- 
2.6.2

[Qemu-devel] [PATCH v6 04/15] iotests: Move _filter_nbd into common.filter

2015-11-04 Thread Max Reitz

_filter_nbd can be useful for other NBD tests, too, therefore it should
reside in common.filter, and it should support URLs of the "nbd://"
format and export names.

The NBD log lines ("/your/source/dir/nbd.c:function():line: error")
should not be converted to empty lines but removed altogether.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/083   | 13 +
 tests/qemu-iotests/083.out   | 10 --
 tests/qemu-iotests/common.filter | 12 
 3 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083
index 1b2d3f1..aa99278 100755
--- a/tests/qemu-iotests/083
+++ b/tests/qemu-iotests/083
@@ -49,17 +49,6 @@ wait_for_tcp_port() {
done
 }
 
-filter_nbd() {
-   # nbd.c error messages contain function names and line numbers that are 
prone
-   # to change.  Message ordering depends on timing between send and 
receive
-   # callbacks sometimes, making them unreliable.
-   #
-   # Filter out the TCP port number since this changes between runs.
-   sed -e 's#^.*nbd\.c:.*##g' \
-   -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g' \
--e 's#\(exportname=foo\|PORT\): Failed to .*$#\1#'
-}
-
 check_disconnect() {
event=$1
when=$2
@@ -84,7 +73,7 @@ EOF
 
$PYTHON nbd-fault-injector.py $extra_args "127.0.0.1:$port" 
"$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null &
wait_for_tcp_port "127\\.0\\.0\\.1:$port"
-   $QEMU_IO -c "read 0 512" "$nbd_url" 2>&1 | _filter_qemu_io | filter_nbd
+   $QEMU_IO -c "read 0 512" "$nbd_url" 2>&1 | _filter_qemu_io | _filter_nbd
 
echo
 }
diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out
index 8c1441b..5c9141b 100644
--- a/tests/qemu-iotests/083.out
+++ b/tests/qemu-iotests/083.out
@@ -51,7 +51,6 @@ no file open, try 'help open'
 
 === Check disconnect after neg2 ===
 
-
 read failed: Input/output error
 
 === Check disconnect 8 neg2 ===
@@ -66,42 +65,34 @@ no file open, try 'help open'
 
 === Check disconnect before request ===
 
-
 read failed: Input/output error
 
 === Check disconnect after request ===
 
-
 read failed: Input/output error
 
 === Check disconnect before reply ===
 
-
 read failed: Input/output error
 
 === Check disconnect after reply ===
 
-
 read failed: Input/output error
 
 === Check disconnect 4 reply ===
 
-
 read failed: Input/output error
 
 === Check disconnect 8 reply ===
 
-
 read failed: Input/output error
 
 === Check disconnect before data ===
 
-
 read failed: Input/output error
 
 === Check disconnect after data ===
 
-
 read 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
@@ -132,7 +123,6 @@ no file open, try 'help open'
 
 === Check disconnect after neg-classic ===
 
-
 read failed: Input/output error
 
 *** done
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index cfdb633..0a270ca 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -230,5 +230,17 @@ _filter_qemu_img_map()
 -e 's/Mapped to *//' | _filter_testdir | _filter_imgfmt
 }
 
+_filter_nbd()
+{
+# nbd.c error messages contain function names and line numbers that are
+# prone to change.  Message ordering depends on timing between send and
+# receive callbacks sometimes, making them unreliable.
+#
+# Filter out the TCP port number since this changes between runs.
+sed -e '/nbd\.c:/d' \
+-e 's#nbd:\(//\)\?127\.0\.0\.1:[0-9]*#nbd:\1127.0.0.1:PORT#g' \
+-e 's#\(exportname=foo\|PORT\): Failed to .*$#\1#'
+}
+
 # make sure this script returns success
 true
-- 
2.6.2

[Qemu-devel] [PATCH v6 06/15] iotests: Add test for eject under NBD server

2015-11-04 Thread Max Reitz

This patch adds a test for ejecting the BlockBackend an NBD server is
connected to (the NBD server is supposed to stop).

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/140 | 90 ++
 tests/qemu-iotests/140.out | 16 +
 tests/qemu-iotests/group   |  1 +
 3 files changed, 107 insertions(+)
 create mode 100755 tests/qemu-iotests/140
 create mode 100644 tests/qemu-iotests/140.out

diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140
new file mode 100755
index 000..bcba0ec
--- /dev/null
+++ b/tests/qemu-iotests/140
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Test case for ejecting a BB with an NBD server attached to it
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=mre...@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1   # failure is the default!
+
+_cleanup()
+{
+   _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt generic
+_supported_proto file
+_supported_os Linux
+
+_make_test_img 64k
+
+$QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+keep_stderr=y \
+_launch_qemu -drive if=ide,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
+2> >(_filter_nbd)
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'qmp_capabilities' }" \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'nbd-server-start',
+   'arguments': { 'addr': { 'type': 'inet',
+'data': { 'host': '127.0.0.1',
+  'port': '10809' " \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'nbd-server-add',
+   'arguments': { 'device': 'drv' }}" \
+'return'
+
+$QEMU_IO_PROG -f raw -c 'read -P 42 0 64k' 'nbd://127.0.0.1:10809/drv' 2>&1 \
+| _filter_qemu_io | _filter_nbd
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'eject',
+   'arguments': { 'device': 'drv' }}" \
+'return'
+
+$QEMU_IO_PROG -f raw -c close 'nbd://127.0.0.1:10809/drv' 2>&1 \
+| _filter_qemu_io | _filter_nbd
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'quit' }" \
+'return'
+
+wait=1 _cleanup_qemu
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out
new file mode 100644
index 000..d98a660
--- /dev/null
+++ b/tests/qemu-iotests/140.out
@@ -0,0 +1,16 @@
+QA output created by 140
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": {}}
+{"return": {}}
+{"return": {}}
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"DEVICE_TRAY_MOVED", "data": {"device": "drv", "tray-open": true}}
+{"return": {}}
+qemu-io: can't open device nbd://127.0.0.1:PORT/drv: Failed to read export 
length
+no file open, try 'help open'
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"SHUTDOWN"}
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index c69265d..993711b 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -139,3 +139,4 @@
 137 rw auto
 138 rw auto quick
 139 rw auto quick
+140 rw auto quick
-- 
2.6.2

[Qemu-devel] [PATCH v6 03/15] block: Release dirty bitmaps in bdrv_close()

2015-11-04 Thread Max Reitz

bdrv_delete() is not very happy about deleting BlockDriverStates with
dirty bitmaps still attached to them. In the past, we got around that
very easily by relying on bdrv_close_all() bypassing bdrv_delete(), and
bdrv_close() simply ignoring that condition. We should fix that by
releasing all dirty bitmaps in bdrv_close() and drop the assertion in
bdrv_delete().

Signed-off-by: Max Reitz 
---
 block.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 3493501..23448ed 100644
--- a/block.c
+++ b/block.c
@@ -87,6 +87,8 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const 
char *filename,
  const BdrvChildRole *child_role, Error **errp);
 
 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+static void bdrv_release_all_dirty_bitmaps(BlockDriverState *bs);
+
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -1916,6 +1918,8 @@ void bdrv_close(BlockDriverState *bs)
 bdrv_drain(bs); /* in case flush left pending I/O */
 notifier_list_notify(>close_notifiers, bs);
 
+bdrv_release_all_dirty_bitmaps(bs);
+
 if (bs->blk) {
 blk_dev_change_media_cb(bs->blk, false);
 }
@@ -2123,7 +2127,6 @@ static void bdrv_delete(BlockDriverState *bs)
 assert(!bs->job);
 assert(bdrv_op_blocker_is_empty(bs));
 assert(!bs->refcnt);
-assert(QLIST_EMPTY(>dirty_bitmaps));
 
 bdrv_close(bs);
 
@@ -3318,6 +3321,21 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
 }
 }
 
+/**
+ * Release all dirty bitmaps attached to a BDS, independently of whether they
+ * are frozen or not (for use in bdrv_close()).
+ */
+static void bdrv_release_all_dirty_bitmaps(BlockDriverState *bs)
+{
+BdrvDirtyBitmap *bm, *next;
+QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
+QLIST_REMOVE(bm, list);
+hbitmap_free(bm->bitmap);
+g_free(bm->name);
+g_free(bm);
+}
+}
+
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
-- 
2.6.2

[Qemu-devel] [PATCH v6 10/15] block: Make bdrv_close() static

2015-11-04 Thread Max Reitz

There are no users of bdrv_close() left, except for one of bdrv_open()'s
failure paths, bdrv_close_all() and bdrv_delete(), and that is good.
Make bdrv_close() static so nobody makes the mistake of directly using
bdrv_close() again.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
---
 block.c   | 4 +++-
 include/block/block.h | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 067bc39..57e4b15 100644
--- a/block.c
+++ b/block.c
@@ -92,6 +92,8 @@ static void bdrv_release_all_dirty_bitmaps(BlockDriverState 
*bs);
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
+static void bdrv_close(BlockDriverState *bs);
+
 #ifdef _WIN32
 static int is_windows_drive_prefix(const char *filename)
 {
@@ -1894,7 +1896,7 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
 }
 
 
-void bdrv_close(BlockDriverState *bs)
+static void bdrv_close(BlockDriverState *bs)
 {
 BdrvAioNotifier *ban, *ban_next;
 
diff --git a/include/block/block.h b/include/block/block.h
index 8d9a049..03551d3 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -222,7 +222,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
 BlockReopenQueue *queue, Error **errp);
 void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-void bdrv_close(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
   uint8_t *buf, int nb_sectors);
 int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
-- 
2.6.2

Re: [Qemu-devel] [PATCH v9 19/27] qapi: Check for qapi collisions of flat union branches

2015-11-04 Thread Markus Armbruster

Eric Blake  writes:

> Right now, our ad hoc parser ensures that we cannot have a
> flat union that introduces any qapi member names that would
> conflict with the non-variant qapi members already present
> from the union's base type (see flat-union-clash-member.json).
> We want QAPISchema*.check() to make the same check, so we can
> later reduce some of the ad hoc checks.
>
> We already ensure that all branches of a flat union are qapi
> structs with no variants, at which point those members appear
> in the same JSON object as all non-variant members.  And we
> already have a map 'seen' of all non-variant members passed
> in to QAPISchemaObjectTypeVariants.check(), which we clone for
> each particular variant (since the members of one variant do
> not clash with another).  So all that is additionally needed
> is to actually check the each member of the variant type do
> not add any collisions.
>
> In general, a type used as a branch of a flat union cannot
> also be the base type of the flat union, so even though we are
> adding a call to variant.type.check() in order to populate
> variant.type.members, this is merely a case of gaining
> topological sorting of how types are visited (and type.check()
> is already set up to allow multiple calls due to base types).
>
> For simple unions, the same code happens to work by design,
> because of our synthesized wrapper classes (however, the
> wrapper has a single member 'data' which will never collide
> with the one non-variant member 'type', so it doesn't really
> matter).
>
> But for alternates, we do NOT want to check the type members
> for adding collisions (an alternate has no parent JSON object
> that is merging in member names, the way a flat union does), so
> we branch on whether seen is empty to distinguish whether we
> are checking a union or an alternate.
>
> No change to generated code.
>
> Signed-off-by: Eric Blake 
>
> ---
> v9: new patch, split off from v8 7/17
> ---
>  scripts/qapi.py | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/scripts/qapi.py b/scripts/qapi.py
> index a20abda..3054628 100644
> --- a/scripts/qapi.py
> +++ b/scripts/qapi.py
> @@ -1057,8 +1057,9 @@ class QAPISchemaObjectTypeVariants(object):
>  assert self.tag_member in seen.itervalues()
>  assert isinstance(self.tag_member.type, QAPISchemaEnumType)
>  for v in self.variants:
> -vseen = dict(seen)
> -v.check(schema, self.tag_member.type, vseen)
> +# Reset seen array for each variant, since qapi names from one
> +# branch do not affect another branch
> +v.check(schema, self.tag_member.type, dict(seen))
>
>
>  class QAPISchemaObjectTypeVariant(QAPISchemaObjectTypeMember):
> @@ -1068,6 +1069,14 @@ class 
> QAPISchemaObjectTypeVariant(QAPISchemaObjectTypeMember):
>  def check(self, schema, tag_type, seen):
>  QAPISchemaObjectTypeMember.check(self, schema)
>  assert self.name in tag_type.values
> +if seen:
> +# This variant is used within a union; ensure each qapi member
> +# field does not collide with the union's non-variant members.
> +assert isinstance(self.type, QAPISchemaObjectType)
> +assert not self.type.variants   # not implemented
> +self.type.check(schema)
> +for m in self.type.members:
> +m.check_clash(seen)
>
>  # This function exists to support ugly simple union special cases
>  # TODO get rid of them, and drop the function

Two call chains:

* QAPISchemaObjectType.check() via QAPISchemaObjectTypeVariants.check()

  In this case, the new conditional executes.

* QAPISchemaAlternateType.check() via same

  In this case, it doesn't.

Why can't we simply add the new code to QAPISchemaObjectType.check()?
The rest of the clash checking is already there...

[Qemu-devel] [PATCH v6 09/15] blockdev: Use blk_remove_bs() in do_drive_del()

2015-11-04 Thread Max Reitz

Signed-off-by: Max Reitz 
---
 blockdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 4eda49c..a97a881 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2494,7 +2494,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
 return;
 }
 
-bdrv_close(bs);
+blk_remove_bs(blk);
 }
 
 /* if we have a device attached to this BlockDriverState
-- 
2.6.2

[Qemu-devel] [PATCH v6 13/15] block: Add blk_remove_all_bs()

2015-11-04 Thread Max Reitz

When bdrv_close_all() is called, instead of force-closing all root
BlockDriverStates, it is better to just drop the reference from all
BlockBackends and let them be closed automatically. This prevents BDS
from getting closed that are still referenced by other BDS, which may
result in loss of cached data.

This patch adds a function for doing that, but does not yet incorporate
it in bdrv_close_all().

Signed-off-by: Max Reitz 
---
 block/block-backend.c  | 15 +++
 include/sysemu/block-backend.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 5702cc1..a63a83a 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -219,6 +219,21 @@ void blk_unref(BlockBackend *blk)
 }
 }
 
+void blk_remove_all_bs(void)
+{
+BlockBackend *blk;
+
+QTAILQ_FOREACH(blk, _backends, link) {
+AioContext *ctx = blk_get_aio_context(blk);
+
+aio_context_acquire(ctx);
+if (blk->bs) {
+blk_remove_bs(blk);
+}
+aio_context_release(ctx);
+}
+}
+
 /*
  * Return the BlockBackend after @blk.
  * If @blk is null, return the first one.
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index abb52be..9fb3e54 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -68,6 +68,7 @@ BlockBackend *blk_new_open(const char *name, const char 
*filename,
 int blk_get_refcnt(BlockBackend *blk);
 void blk_ref(BlockBackend *blk);
 void blk_unref(BlockBackend *blk);
+void blk_remove_all_bs(void);
 const char *blk_name(BlockBackend *blk);
 BlockBackend *blk_by_name(const char *name);
 BlockBackend *blk_next(BlockBackend *blk);
-- 
2.6.2

[Qemu-devel] [PATCH v6 15/15] iotests: Add test for multiple BB on BDS tree

2015-11-04 Thread Max Reitz

This adds a test for having multiple BlockBackends in one BDS tree. In
this case, there is one BB for the protocol BDS and one BB for the
format BDS in a simple two-BDS tree (with the protocol BDS and BB added
first).

When bdrv_close_all() is executed, no cached data from any BDS should be
lost; the protocol BDS may not be closed until the format BDS is closed.
Otherwise, metadata updates may be lost.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/117 | 86 ++
 tests/qemu-iotests/117.out | 14 
 tests/qemu-iotests/group   |  1 +
 3 files changed, 101 insertions(+)
 create mode 100755 tests/qemu-iotests/117
 create mode 100644 tests/qemu-iotests/117.out

diff --git a/tests/qemu-iotests/117 b/tests/qemu-iotests/117
new file mode 100755
index 000..7881fc7
--- /dev/null
+++ b/tests/qemu-iotests/117
@@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# Test case for shared BDS between backend trees
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=mre...@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1   # failure is the default!
+
+_cleanup()
+{
+   _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+_make_test_img 64k
+
+_launch_qemu
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'qmp_capabilities' }" \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'blockdev-add',
+   'arguments': { 'options': { 'id': 'protocol',
+   'driver': 'file',
+   'filename': '$TEST_IMG' } } }" \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'blockdev-add',
+   'arguments': { 'options': { 'id': 'format',
+   'driver': '$IMGFMT',
+   'file': 'protocol' } } }" \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'human-monitor-command',
+   'arguments': { 'command-line': 'qemu-io format \"write -P 42 0 64k\"' } 
}" \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{ 'execute': 'quit' }" \
+'return'
+
+wait=1 _cleanup_qemu
+
+_check_test_img
+
+$QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/117.out b/tests/qemu-iotests/117.out
new file mode 100644
index 000..f52dc1a
--- /dev/null
+++ b/tests/qemu-iotests/117.out
@@ -0,0 +1,14 @@
+QA output created by 117
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
+{"return": {}}
+{"return": {}}
+{"return": {}}
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"SHUTDOWN"}
+No errors were found on the image.
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 993711b..d157c07 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -122,6 +122,7 @@
 114 rw auto quick
 115 rw auto
 116 rw auto quick
+117 rw auto
 118 rw auto
 119 rw auto quick
 120 rw auto quick
-- 
2.6.2

[Qemu-devel] [PATCH v6 11/15] block: Add list of all BlockDriverStates

2015-11-04 Thread Max Reitz

Signed-off-by: Max Reitz 
---
 block.c   | 7 +++
 include/block/block_int.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/block.c b/block.c
index 57e4b15..cf5dadc 100644
--- a/block.c
+++ b/block.c
@@ -78,6 +78,9 @@ struct BdrvStates bdrv_states = 
QTAILQ_HEAD_INITIALIZER(bdrv_states);
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
 
+static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
+QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
+
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
 QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
@@ -266,6 +269,8 @@ BlockDriverState *bdrv_new(void)
 bs->refcnt = 1;
 bs->aio_context = qemu_get_aio_context();
 
+QTAILQ_INSERT_TAIL(_bdrv_states, bs, bs_list);
+
 return bs;
 }
 
@@ -2128,6 +2133,8 @@ static void bdrv_delete(BlockDriverState *bs)
 /* remove from list, if necessary */
 bdrv_make_anon(bs);
 
+QTAILQ_REMOVE(_bdrv_states, bs, bs_list);
+
 g_free(bs);
 }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 4e0ed15..bde93e3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -419,6 +419,8 @@ struct BlockDriverState {
 QTAILQ_ENTRY(BlockDriverState) node_list;
 /* element of the list of "drives" the guest sees */
 QTAILQ_ENTRY(BlockDriverState) device_list;
+/* element of the list of all BlockDriverStates (all_bdrv_states) */
+QTAILQ_ENTRY(BlockDriverState) bs_list;
 QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
 int refcnt;
 
-- 
2.6.2

[Qemu-devel] [PATCH RFC 2/2] snapshot: create bdrv_snapshot_all_del_snapshot helper

2015-11-04 Thread Denis V. Lunev

to delete snapshots from all loaded block drivers.

The patch also ensures proper locking.

Signed-off-by: Denis V. Lunev 
CC: Stefan Hajnoczi 
CC: Juan Quintela 
---
 block/snapshot.c | 27 
 include/block/snapshot.h |  2 ++
 migration/savevm.c   | 54 +---
 3 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index 6b5ce4e..9d1aa9b 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -391,3 +391,30 @@ bool bdrv_snapshot_all_can_snapshot(BlockDriverState 
**first_bad_bs)
 *first_bad_bs = NULL;
 return true;
 }
+
+int bdrv_snapshot_all_del_snapshot(const char *name,
+BlockDriverState **first_bad_bs, Error **err)
+{
+BlockDriverState *bs;
+AioContext *ctx;
+QEMUSnapshotInfo sn1, *snapshot = 
+
+bs = NULL;
+while ((bs = bdrv_next(bs))) {
+ctx = bdrv_get_aio_context(bs);
+
+aio_context_acquire(ctx);
+if (bdrv_can_snapshot(bs) &&
+bdrv_snapshot_find(bs, snapshot, name) >= 0) {
+bdrv_snapshot_delete_by_id_or_name(bs, name, err);
+}
+aio_context_release(ctx);
+
+if (*err) {
+*first_bad_bs = bs;
+return -1;
+}
+}
+
+return 0;
+}
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 61b4b5d..4b883e5 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -84,4 +84,6 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
  */
 bool bdrv_snapshot_all_can_snapshot(BlockDriverState **first_bad_bs);
 
+int bdrv_snapshot_all_del_snapshot(const char *name,
+BlockDriverState **first_bsd_bs, Error **err);
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 91ba0bf..4608811 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1248,35 +1248,6 @@ static BlockDriverState *find_vmstate_bs(void)
 return NULL;
 }
 
-/*
- * Deletes snapshots of a given name in all opened images.
- */
-static int del_existing_snapshots(Monitor *mon, const char *name)
-{
-BlockDriverState *bs;
-QEMUSnapshotInfo sn1, *snapshot = 
-Error *err = NULL;
-
-bs = NULL;
-while ((bs = bdrv_next(bs))) {
-if (bdrv_can_snapshot(bs) &&
-bdrv_snapshot_find(bs, snapshot, name) >= 0) {
-bdrv_snapshot_delete_by_id_or_name(bs, name, );
-if (err) {
-monitor_printf(mon,
-   "Error while deleting snapshot on device '%s':"
-   " %s\n",
-   bdrv_get_device_name(bs),
-   error_get_pretty(err));
-error_free(err);
-return -1;
-}
-}
-}
-
-return 0;
-}
-
 void hmp_savevm(Monitor *mon, const QDict *qdict)
 {
 BlockDriverState *bs, *bs1;
@@ -1334,7 +1305,11 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 }
 
 /* Delete old snapshots of the same name */
-if (name && del_existing_snapshots(mon, name) < 0) {
+if (name && bdrv_snapshot_all_del_snapshot(name, , _err) < 0) {
+monitor_printf(mon,
+   "Error while deleting snapshot on device '%s': %s\n",
+   bdrv_get_device_name(bs1), error_get_pretty(local_err));
+error_free(local_err);
 goto the_end;
 }
 
@@ -1494,20 +1469,11 @@ void hmp_delvm(Monitor *mon, const QDict *qdict)
 return;
 }
 
-bs = NULL;
-while ((bs = bdrv_next(bs))) {
-if (bdrv_can_snapshot(bs)) {
-err = NULL;
-bdrv_snapshot_delete_by_id_or_name(bs, name, );
-if (err) {
-monitor_printf(mon,
-   "Error while deleting snapshot on device '%s':"
-   " %s\n",
-   bdrv_get_device_name(bs),
-   error_get_pretty(err));
-error_free(err);
-}
-}
+if (bdrv_snapshot_all_del_snapshot(name, , ) < 0) {
+monitor_printf(mon,
+   "Error while deleting snapshot on device '%s': %s\n",
+   bdrv_get_device_name(bs), error_get_pretty(err));
+error_free(err);
 }
 }
 
-- 
2.5.0

[Qemu-devel] [PATCH RFC 1/2] snapshot: create helper to test that block drivers supports snapshots

2015-11-04 Thread Denis V. Lunev

The patch enforces proper locking for this operation.

Signed-off-by: Denis V. Lunev 
CC: Stefan Hajnoczi 
CC: Juan Quintela 
---
Patches are compile-tested only. Sent to check the approach, naming and
functions placement. Functions are returning bad BlockDriver via
parameter to make clear distinction when I'll have to return BS to write.

 block/snapshot.c | 35 +++
 include/block/snapshot.h |  9 +
 migration/savevm.c   | 17 -
 3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index 89500f2..6b5ce4e 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -25,6 +25,7 @@
 #include "block/snapshot.h"
 #include "block/block_int.h"
 #include "qapi/qmp/qerror.h"
+#include "monitor/monitor.h"
 
 QemuOptsList internal_snapshot_opts = {
 .name = "snapshot",
@@ -356,3 +357,37 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState 
*bs,
 
 return ret;
 }
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplace (take aio_context_acquire
+ * when appropriate for appropriate block drivers
+ *
+ * Returned block driver will be always locked.
+ */
+
+bool bdrv_snapshot_all_can_snapshot(BlockDriverState **first_bad_bs)
+{
+BlockDriverState *bs;
+
+while ((bs = bdrv_next(bs))) {
+bool ok;
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+continue;
+}
+
+aio_context_acquire(ctx);
+ok = bdrv_can_snapshot(bs);
+aio_context_release(ctx);
+
+if (!ok) {
+*first_bad_bs = bs;
+return false;
+}
+}
+
+*first_bad_bs = NULL;
+return true;
+}
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 770d9bb..61b4b5d 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -75,4 +75,13 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
 int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
  const char *id_or_name,
  Error **errp);
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplace (take aio_context_acquire
+ * when appropriate for appropriate block drivers
+ *
+ */
+bool bdrv_snapshot_all_can_snapshot(BlockDriverState **first_bad_bs);
+
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index dbcc39a..91ba0bf 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1290,19 +1290,10 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 const char *name = qdict_get_try_str(qdict, "name");
 Error *local_err = NULL;
 
-/* Verify if there is a device that doesn't support snapshots and is 
writable */
-bs = NULL;
-while ((bs = bdrv_next(bs))) {
-
-if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
-continue;
-}
-
-if (!bdrv_can_snapshot(bs)) {
-monitor_printf(mon, "Device '%s' is writable but does not support 
snapshots.\n",
-   bdrv_get_device_name(bs));
-return;
-}
+if (bdrv_snapshot_all_can_snapshot()) {
+monitor_printf(mon, "Device '%s' is writable but does not "
+   "support snapshots.\n", bdrv_get_device_name(bs));
+return;
 }
 
 bs = find_vmstate_bs();
-- 
2.5.0

Re: [Qemu-devel] [PATCH RFC 1/2] snapshot: create helper to test that block drivers supports snapshots

2015-11-04 Thread Juan Quintela

"Denis V. Lunev"  wrote:
> The patch enforces proper locking for this operation.
>
> Signed-off-by: Denis V. Lunev 
> CC: Stefan Hajnoczi 
> CC: Juan Quintela 
> ---
> Patches are compile-tested only. Sent to check the approach, naming and
> functions placement. Functions are returning bad BlockDriver via
> parameter to make clear distinction when I'll have to return BS to write.
>
>  block/snapshot.c | 35 +++
>  include/block/snapshot.h |  9 +
>  migration/savevm.c   | 17 -
>  3 files changed, 48 insertions(+), 13 deletions(-)
>
> diff --git a/block/snapshot.c b/block/snapshot.c
> index 89500f2..6b5ce4e 100644
> --- a/block/snapshot.c
> +++ b/block/snapshot.c
> @@ -25,6 +25,7 @@
>  #include "block/snapshot.h"
>  #include "block/block_int.h"
>  #include "qapi/qmp/qerror.h"
> +#include "monitor/monitor.h"

You don't need the monitor here O:-)

>  
>  QemuOptsList internal_snapshot_opts = {
>  .name = "snapshot",
> @@ -356,3 +357,37 @@ int 
> bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
>  
>  return ret;
>  }
> +
> +
> +/* Group operations. All block drivers are involved.
> + * These functions will properly handle dataplace (take aio_context_acquire
> + * when appropriate for appropriate block drivers
> + *
> + * Returned block driver will be always locked.
> + */
> +
> +bool bdrv_snapshot_all_can_snapshot(BlockDriverState **first_bad_bs)

bdrv_snapshot_is_possible???

> +{
> +BlockDriverState *bs;
> +
> +while ((bs = bdrv_next(bs))) {
> +bool ok;
> +AioContext *ctx = bdrv_get_aio_context(bs);
> +
> +if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
> +continue;
> +}
> +
> +aio_context_acquire(ctx);

I think that you should get the lock before the bdrv_is_inserted, but
who am I to know for sure O:-)

> +ok = bdrv_can_snapshot(bs);
> +aio_context_release(ctx);
> +
> +if (!ok) {
> +*first_bad_bs = bs;
> +return false;
> +}
> +}
> +
> +*first_bad_bs = NULL;
> +return true;
> +}
> diff --git a/include/block/snapshot.h b/include/block/snapshot.h
> index 770d9bb..61b4b5d 100644
> --- a/include/block/snapshot.h
> +++ b/include/block/snapshot.h
> @@ -75,4 +75,13 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
>  int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
>   const char *id_or_name,
>   Error **errp);
> +
> +
> +/* Group operations. All block drivers are involved.
> + * These functions will properly handle dataplace (take aio_context_acquire
> + * when appropriate for appropriate block drivers
> + *
> + */
> +bool bdrv_snapshot_all_can_snapshot(BlockDriverState **first_bad_bs);
> +
>  #endif
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dbcc39a..91ba0bf 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1290,19 +1290,10 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>  const char *name = qdict_get_try_str(qdict, "name");
>  Error *local_err = NULL;
>  
> -/* Verify if there is a device that doesn't support snapshots and is 
> writable */
> -bs = NULL;
> -while ((bs = bdrv_next(bs))) {
> -
> -if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
> -continue;
> -}
> -
> -if (!bdrv_can_snapshot(bs)) {
> -monitor_printf(mon, "Device '%s' is writable but does not 
> support snapshots.\n",
> -   bdrv_get_device_name(bs));
> -return;
> -}
> +if (bdrv_snapshot_all_can_snapshot()) {
> +monitor_printf(mon, "Device '%s' is writable but does not "
> +   "support snapshots.\n", bdrv_get_device_name(bs));
> +return;
>  }
>  
>  bs = find_vmstate_bs();

ok with the savevm.c changes.

Re: [Qemu-devel] [RFC PATCH 1/1] dataplane: alternative approach to locking

2015-11-04 Thread Denis V. Lunev


On 11/04/2015 03:03 PM, Juan Quintela wrote:

"Denis V. Lunev"  wrote:

On 11/04/2015 12:49 PM, Juan Quintela wrote:

void hmp_delvm(Monitor *mon, const QDict *qdict)
{
  const char *name = qdict_get_str(qdict, "name");

  if (!bdrv_find_snapshot_bs()) {
  monitor_printf(mon, "No block device supports snapshots\n");
  return;
  }

  del_existing_snapshots(mon, name);
}

Yes, we have changed the semantics "slightly".  Pervious version of
hmp_delvm() will try to remove all the snapshots from any device with
that name.  This one would remove them until it finds one error.  I
think that the code reuse and the consistence trumps the change in
semantics (really the change is only on error cases).

I think you are wrong here. You can not abort operation if one
disk does not have a snapshot assuming the following situation
- VM has one disk
- snapshot XXX is made
- 2nd disk is added
- remove XXX snapshot

I think that my *completely* untested suggestion handled that well.

char *name bdrv_remove_snapshots(const char *name, Error *err)
{
 BlockDriverState *bs;
 QEMUSnapshotInfo sn1, *snapshot = 

 bs = NULL;
 while ((bs = bdrv_next(bs))) {
 if (bdrv_can_snapshot(bs) &&
 bdrv_snapshot_find(bs, snapshot, name) >= 0) {
 bdrv_snapshot_delete_by_id_or_name(bs, name, );
 if (err) {
 return bdrv_get_device_name(bs);
 }
 }
 }
 return NULL;
}

It only stops without removing an snapshot if there is one error
deleting one snapshot.  Current code just tells that there is one error
and continues in the rest of the disks.

Notice that we are going to have problems on this operation, we have
found a disk with one snapshot with the name that we want to remove and
we have failed.



Your position is understood. I'll send yet another proof of concept
in an hour.

Thanks, Juan.

yes. we should follow this way in both branches.
I like this and done similar thing in my RFC :)

Den

Re: [Qemu-devel] [PATCH v7 08/35] exec: allow memory to be allocated from any kind of path

2015-11-04 Thread Eduardo Habkost

On Wed, Nov 04, 2015 at 11:12:41AM +0800, Xiao Guangrong wrote:
> On 11/04/2015 07:00 AM, Eduardo Habkost wrote:
> >On Mon, Nov 02, 2015 at 05:13:10PM +0800, Xiao Guangrong wrote:
> >>Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> >>of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> >>locates at DAX enabled filesystem
> >>
> >>So this patch let it work on any kind of path
> >>
> >>Signed-off-by: Xiao Guangrong 
> >>---
> >>  exec.c | 24 
> >>  1 file changed, 12 insertions(+), 12 deletions(-)
> >>
> >>diff --git a/exec.c b/exec.c
> >>index 9de38be..9075f4d 100644
> >>--- a/exec.c
> >>+++ b/exec.c
> >>@@ -1184,25 +1184,25 @@ static void *file_ram_alloc(RAMBlock *block,
> >>  char *c;
> >>  void *area;
> >>  int fd;
> >>-uint64_t hpagesize;
> >>+uint64_t pagesize;
> >>  Error *local_err = NULL;
> >>
> >>-hpagesize = qemu_file_get_page_size(path, _err);
> >>+pagesize = qemu_file_get_page_size(path, _err);
> >>  if (local_err) {
> >>  error_propagate(errp, local_err);
> >>  goto error;
> >>  }
> >>
> >>-if (hpagesize == getpagesize()) {
> >>-fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> >>+if (pagesize == getpagesize()) {
> >>+fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
> >
> >If the point of this patch is to allow file_ram_alloc() to not be
> >specific to hugetlbfs anymore, this warning can simply go away.
> >
> >(And in case if you really want to keep the warning, I don't see the
> >point of the changes you made to it.)
> >
> 
> This is the history why we did it like this:
> https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg02862.html

The rule I am trying to follow is simple: if there are valid use cases
(e.g. nvdimm, tmpfs) where the warning will be printed every single time
QEMU runs, the warning is not appropriate.

If you really want to keep a warning, please make it not be printed on
all other valid use cases (nvdimm and tmpfs). Personally, I don't think
adding those additional checks would be worth the trouble, that's why I
suggest removing the warning.

> 
> Q:
> | What this *actually* is trying to warn against is that
> | mapping a regular file (as opposed to hugetlbfs)
> | means transparent huge pages don't work.

I don't think the author of that warning even thought about transparent
huge pages (did THP even existed when it was written?). I believe they
just assumed that the only reason for using -mem-path would be hugetlbfs
and wanted to warn about it. That assumption isn't true anymore.

> 
> | So I don't think we should drop this warning completely.
> | Either let's add the nvdimm magic, or simply check the
> | page size.
> 
> A:
> | Check the page size sounds good, will check:
> | if (pagesize != getpagesize()) {
> |...print something...
> |}
> 
> | I agree with you that showing the info is needed, however,
> | 'Warning' might scare some users, how about drop this word or
> | just show　“Memory is not allocated from HugeTlbfs”?

With "warning:", I know it may be OK to do what I am doing and the
software is just trying to warn me. If there's no "warning:", I don't
even know if something is really broken in my config, or if it's just a
warning, and I would be very confused.

-- 
Eduardo

[Qemu-devel] [PATCH 0/2] Minor throttling updates

2015-11-04 Thread Alberto Garcia

Here's a couple of patches for the throttling code. I think the commit
messages are clear enough, but if you have any comments or questions
I'll be glad to hear them.

Berto

Alberto Garcia (2):
  throttle: Check for pending requests in throttle_group_unregister_bs()
  throttle: Use bs->throttle_state instead of bs->io_limits_enabled

 block.c   | 6 +++---
 block/qapi.c  | 2 +-
 block/throttle-groups.c   | 7 +++
 blockdev.c| 4 ++--
 include/block/block_int.h | 5 -
 5 files changed, 17 insertions(+), 7 deletions(-)

-- 
2.6.2

[Qemu-devel] [PATCH 2/2] throttle: Use bs->throttle_state instead of bs->io_limits_enabled

2015-11-04 Thread Alberto Garcia

There are two ways to check for I/O limits in a BlockDriverState:

- bs->throttle_state: if this pointer is not NULL, it means that this
  BDS is member of a throttling group, its ThrottleTimers structure
  has been initialized and its I/O limits are ready to be applied.

- bs->io_limits_enabled: if true it means that the throttle_state
  pointer is valid _and_ the limits are currently enabled.

The latter is used in several places to check whether a BDS has I/O
limits configured, but what it really checks is whether requests
are being throttled or not. For example, io_limits_enabled can be
temporarily set to false in cases like bdrv_read_unthrottled() without
otherwise touching the throtting configuration of that BDS.

This patch replaces bs->io_limits_enabled with bs->throttle_state in
all cases where what we really want to check is the existence of I/O
limits, not whether they are currently enabled or not.

Signed-off-by: Alberto Garcia 
---
 block.c   | 6 +++---
 block/qapi.c  | 2 +-
 blockdev.c| 4 ++--
 include/block/block_int.h | 5 -
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index e9f40dc..2f433d6 100644
--- a/block.c
+++ b/block.c
@@ -1901,7 +1901,7 @@ void bdrv_close(BlockDriverState *bs)
 }
 
 /* Disable I/O limits and drain all pending throttled requests */
-if (bs->io_limits_enabled) {
+if (bs->throttle_state) {
 bdrv_io_limits_disable(bs);
 }
 
@@ -3706,7 +3706,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
 baf->detach_aio_context(baf->opaque);
 }
 
-if (bs->io_limits_enabled) {
+if (bs->throttle_state) {
 throttle_timers_detach_aio_context(>throttle_timers);
 }
 if (bs->drv->bdrv_detach_aio_context) {
@@ -3742,7 +3742,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
 if (bs->drv->bdrv_attach_aio_context) {
 bs->drv->bdrv_attach_aio_context(bs, new_context);
 }
-if (bs->io_limits_enabled) {
+if (bs->throttle_state) {
 throttle_timers_attach_aio_context(>throttle_timers, new_context);
 }
 
diff --git a/block/qapi.c b/block/qapi.c
index ec0f513..89d4274 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -64,7 +64,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, 
Error **errp)
 info->backing_file_depth = bdrv_get_backing_file_depth(bs);
 info->detect_zeroes = bs->detect_zeroes;
 
-if (bs->io_limits_enabled) {
+if (bs->throttle_state) {
 ThrottleConfig cfg;
 
 throttle_group_get_config(bs, );
diff --git a/blockdev.c b/blockdev.c
index 8b8bfa9..a418fbf 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2161,14 +2161,14 @@ void qmp_block_set_io_throttle(const char *device, 
int64_t bps, int64_t bps_rd,
 if (throttle_enabled()) {
 /* Enable I/O limits if they're not enabled yet, otherwise
  * just update the throttling group. */
-if (!bs->io_limits_enabled) {
+if (!bs->throttle_state) {
 bdrv_io_limits_enable(bs, has_group ? group : device);
 } else if (has_group) {
 bdrv_io_limits_update_group(bs, group);
 }
 /* Set the new throttling configuration */
 bdrv_set_io_limits(bs, );
-} else if (bs->io_limits_enabled) {
+} else if (bs->throttle_state) {
 /* If all throttling settings are set to 0, disable I/O limits */
 bdrv_io_limits_disable(bs);
 }
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 3ceeb5a..533500e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -390,7 +390,10 @@ struct BlockDriverState {
 /* number of in-flight serialising requests */
 unsigned int serialising_in_flight;
 
-/* I/O throttling */
+/* I/O throttling.
+ * throttle_state tells us if this BDS has I/O limits configured.
+ * io_limits_enabled tells us if they are currently being
+ * enforced, but it can be temporarily set to false */
 CoQueue  throttled_reqs[2];
 bool io_limits_enabled;
 /* The following fields are protected by the ThrottleGroup lock.
-- 
2.6.2

Re: [Qemu-devel] [PATCH v9 13/27] qapi: Drop obsolete tag value collision assertions

2015-11-04 Thread Markus Armbruster

Eric Blake  writes:

> From: Markus Armbruster 
>
> Union tag values can't clash with member names in generated C anymore
> since commit e4ba22b, but QAPISchemaObjectTypeVariant.check() still
> asserts they don't.  Drop it.
>
> Signed-off-by: Markus Armbruster 
> Message-Id: <1446559499-26984-1-git-send-email-arm...@redhat.com>
> [A later patch will still need to pass vseen from Variants.check()
> to Variant.check(), so to avoid churn, change the cleanup to occur
> lower in Variant.check()]

Leaves QAPISchemaObjectTypeVariant.check() parameter seen temporarily
unused.  Okay, as long as we get in the patch that again uses it in the
same batch.

> Signed-off-by: Eric Blake 
>
> ---
> v9: new patch
> ---
>  scripts/qapi.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/scripts/qapi.py b/scripts/qapi.py
> index a814e20..145dbfe 100644
> --- a/scripts/qapi.py
> +++ b/scripts/qapi.py
> @@ -1067,7 +1067,7 @@ class 
> QAPISchemaObjectTypeVariant(QAPISchemaObjectTypeMember):
>  QAPISchemaObjectTypeMember.__init__(self, name, typ, False)
>
>  def check(self, schema, tag_type, seen):
> -QAPISchemaObjectTypeMember.check(self, schema, [], seen)
> +QAPISchemaObjectTypeMember.check(self, schema, [], {})
>  assert self.name in tag_type.values
>
>  # This function exists to support ugly simple union special cases

Re: [Qemu-devel] [PATCH v9 14/27] qapi: Fix up commit 7618b91's clash sanity checking change

2015-11-04 Thread Markus Armbruster

Eric Blake  writes:

> From: Markus Armbruster 
>
> This hunk
>
> @@ -964,6 +965,7 @@ class QAPISchemaObjectType(QAPISchemaType):
>  members = []
>  seen = {}
>  for m in members:
> +assert c_name(m.name) not in seen
>  seen[m.name] = m
>  for m in self.local_members:
>  m.check(schema, members, seen)
>
> is plainly broken.
>
> Asserting the members inherited from base don't clash is somewhat
> redundant, because self.base.check() just checked that.  But it
> doesn't hurt.
>
> The idea to use c_name(m.name) instead of m.name for collision
> checking is sound, because we need to catch clashes between the m.name
> and between the c_name(m.name), and when two m.name clash, then their
> c_name() also clash.
>
> However, using c_name(m.name) instead of m.name in one of several
> places doesn't work.  See the very next line.
>
> Keep the assertion, but drop the c_name() for now.  A future commit
> will bring it back.
>
> Signed-off-by: Markus Armbruster 
> Message-Id: <1446559499-26984-4-git-send-email-arm...@redhat.com>
> [change TAB to space]

In the commit message.  You had me wondering briefly whether my Emacs
configuration to avoid tabs in QEMU code failed :)

> Signed-off-by: Eric Blake 
>
> ---
> v9: new patch
> ---
>  scripts/qapi.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/scripts/qapi.py b/scripts/qapi.py
> index 145dbfe..c910715 100644
> --- a/scripts/qapi.py
> +++ b/scripts/qapi.py
> @@ -987,7 +987,7 @@ class QAPISchemaObjectType(QAPISchemaType):
>  members = []
>  seen = {}
>  for m in members:
> -assert c_name(m.name) not in seen
> +assert m.name not in seen
>  seen[m.name] = m
>  for m in self.local_members:
>  m.check(schema, members, seen)

Re: [Qemu-devel] [PATCH] target-sparc: fix 32-bit truncation in fpackfix

2015-11-04 Thread Markus Armbruster

Paolo Bonzini  writes:

> On 04/11/2015 12:05, Richard Henderson wrote:
>> On 11/04/2015 11:45 AM, Paolo Bonzini wrote:
>int32_t src = rs2 >> (word * 32);
> -int64_t scaled = src << scale;
> +int64_t scaled = (int64_t)src << scale;
>int64_t from_fixed = scaled >> 16;
>> ...

 I do think we'd be better served by casting to uint64_t on that line.
 Note that fpackfix requires the same correction.  And it wouldn't hurt
 to cast to uint32_t in fpack16, lest we anger the self-same shifting
 gods.
>>>
>>> Hmmm.. say src = -0x8000, scale = 1;
>>>
>>> scaled = (uint64_t)-0x800 << 1 = 0x
>>> from_fixed = 0x >> 16  = 0x
>>>
>>> Now from_fixed is positive and you get 32767 instead of -32768.  In
>>> other words, we would have to cast to uint64_t on the scaled assignment,
>>> and back to int64_t on the from_fixed assignment.  I must be
>>> misunderstanding your suggestion.
>> 
>>   int64_t scaled = (uint64_t)src << scale;
>> 
>> I.e. one explicit conversion and one implicit conversion.
>
> That does the job, but it also does look like a typo...

Make the implicit conversion explicit then.

Re: [Qemu-devel] [PATCH v2] qemu-sockets: do not test path with access() before unlinking

2015-11-04 Thread Edgar E. Iglesias

On Wed, Nov 04, 2015 at 02:48:47PM +0100, Paolo Bonzini wrote:
> Using access() is a time-of-check/time-of-use race condition.  It is
> okay to use them to provide better error messages, but that is pretty
> much it.
> 
> This is not one such case; on the other hand, access() *will* skip
> unlink() for a non-existent path, so ignore ENOENT return values from
> the unlink() system call.
> 
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Edgar E. Iglesias 


> ---
>  util/qemu-sockets.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
> index 9142917..de9145a 100644
> --- a/util/qemu-sockets.c
> +++ b/util/qemu-sockets.c
> @@ -751,8 +751,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
>  qemu_opt_set(opts, "path", un.sun_path, _abort);
>  }
>  
> -if ((access(un.sun_path, F_OK) == 0) &&
> -unlink(un.sun_path) < 0) {
> +if (unlink(un.sun_path) < 0 && errno != ENOENT) {
>  error_setg_errno(errp, errno,
>   "Failed to unlink socket %s", un.sun_path);
>  goto err;
> -- 
> 2.5.0
> 
>

Re: [Qemu-devel] [PATCH v4 2/7] e1000: Add support for migrating the entire MAC registers' array

2015-11-04 Thread Leonid Bloch

On Wed, Nov 4, 2015 at 4:35 AM, Jason Wang  wrote:
>
>
> On 11/03/2015 07:14 PM, Leonid Bloch wrote:
>> This patch enables the migration of the entire array of MAC registers
>> during live migration. The entire array is just 128 KB long, so
>> practically no penalty should be felt when transmitting it, over the
>> individual registers. But the advantages are more compact code, and no
>> need to introduce new vmstate subsections in the future, when additional
>> MAC registers will be implemented.
>>
>> Backward compatibility is preserved by introducing the e1000-specific
>> boolean parameter "full_mac_registers", which is on by default. Setting
>> it to off will enable migration to older versions of QEMU.
>>
>> Additionally, this parameter can be used to control the implementation of
>> extra MAC registers in the future. I.e. new MAC registers may be set to
>> behave differently, or not to be active at all, if "full_mac_registers"
>> will be set to "off", e.g.:
>>
>> qemu-system-x86_64 -device e1000,full_mac_registers=off,... ...
>>
>> As mentioned above, the default value is "on".
>>
>> Signed-off-by: Leonid Bloch 
>> Signed-off-by: Dmitry Fleytman 
>> ---
>>  hw/net/e1000.c | 105 
>> +++--
>>  1 file changed, 72 insertions(+), 33 deletions(-)
>>
>> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
>> index 7036842..1190bbe 100644
>> --- a/hw/net/e1000.c
>> +++ b/hw/net/e1000.c
>> @@ -135,8 +135,10 @@ typedef struct E1000State_st {
>>  /* Compatibility flags for migration to/from qemu 1.3.0 and older */
>>  #define E1000_FLAG_AUTONEG_BIT 0
>>  #define E1000_FLAG_MIT_BIT 1
>> +#define E1000_FLAG_MAC_BIT 2
>>  #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
>>  #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
>> +#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
>>  uint32_t compat_flags;
>>  } E1000State;
>>
>> @@ -1377,6 +1379,18 @@ static bool e1000_mit_state_needed(void *opaque)
>>  return s->compat_flags & E1000_FLAG_MIT;
>>  }
>>
>> +static bool e1000_full_mac_needed(void *opaque)
>> +{
>> +E1000State *s = opaque;
>> +
>> +return s->compat_flags & E1000_FLAG_MAC;
>> +}
>> +
>> +static bool e1000_compat_mac_needed(void *opaque)
>> +{
>> +return !e1000_full_mac_needed(opaque);
>> +}
>> +
>>  static const VMStateDescription vmstate_e1000_mit_state = {
>>  .name = "e1000/mit_state",
>>  .version_id = 1,
>> @@ -1392,41 +1406,23 @@ static const VMStateDescription 
>> vmstate_e1000_mit_state = {
>>  }
>>  };
>>
>> -static const VMStateDescription vmstate_e1000 = {
>> -.name = "e1000",
>> -.version_id = 2,
>> +static const VMStateDescription vmstate_e1000_full_mac_state = {
>> +.name = "e1000/full_mac_state",
>> +.version_id = 1,
>>  .minimum_version_id = 1,
>> -.pre_save = e1000_pre_save,
>> -.post_load = e1000_post_load,
>> +.needed = e1000_full_mac_needed,
>> +.fields = (VMStateField[]) {
>> +VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
>> +VMSTATE_END_OF_LIST()
>> +}
>> +};
>> +
>> +static const VMStateDescription vmstate_e1000_compat_mac_state = {
>> +.name = "e1000/compat_mac_state",
>> +.version_id = 1,
>> +.minimum_version_id = 1,
>> +.needed = e1000_compat_mac_needed,
>>  .fields = (VMStateField[]) {
>> -VMSTATE_PCI_DEVICE(parent_obj, E1000State),
>> -VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
>> -VMSTATE_UNUSED(4), /* Was mmio_base.  */
>> -VMSTATE_UINT32(rxbuf_size, E1000State),
>> -VMSTATE_UINT32(rxbuf_min_shift, E1000State),
>> -VMSTATE_UINT32(eecd_state.val_in, E1000State),
>> -VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
>> -VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
>> -VMSTATE_UINT16(eecd_state.reading, E1000State),
>> -VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
>> -VMSTATE_UINT8(tx.ipcss, E1000State),
>> -VMSTATE_UINT8(tx.ipcso, E1000State),
>> -VMSTATE_UINT16(tx.ipcse, E1000State),
>> -VMSTATE_UINT8(tx.tucss, E1000State),
>> -VMSTATE_UINT8(tx.tucso, E1000State),
>> -VMSTATE_UINT16(tx.tucse, E1000State),
>> -VMSTATE_UINT32(tx.paylen, E1000State),
>> -VMSTATE_UINT8(tx.hdr_len, E1000State),
>> -VMSTATE_UINT16(tx.mss, E1000State),
>> -VMSTATE_UINT16(tx.size, E1000State),
>> -VMSTATE_UINT16(tx.tso_frames, E1000State),
>> -VMSTATE_UINT8(tx.sum_needed, E1000State),
>> -VMSTATE_INT8(tx.ip, E1000State),
>> -VMSTATE_INT8(tx.tcp, E1000State),
>> -VMSTATE_BUFFER(tx.header, E1000State),
>> -VMSTATE_BUFFER(tx.data, E1000State),
>> -VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
>> -VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
>>  VMSTATE_UINT32(mac_reg[CTRL], E1000State),
>>

Re: [Qemu-devel] [PATCH v7 11/35] util: introduce qemu_file_getlength()

2015-11-04 Thread Xiao Guangrong




On 11/04/2015 10:44 PM, Eduardo Habkost wrote:

On Wed, Nov 04, 2015 at 11:17:09AM +0800, Xiao Guangrong wrote:



On 11/04/2015 07:21 AM, Eduardo Habkost wrote:

On Mon, Nov 02, 2015 at 05:13:13PM +0800, Xiao Guangrong wrote:
[...]

+size_t qemu_file_getlength(const char *file, Error **errp)
+{
+int64_t size;

[...]

+return size;


Can you guarantee that SIZE_MAX >= INT64_MAX on all platforms supported
by QEMU?



Actually, this function is abstracted from the common function, raw_getlength(),
in raw-posix.c whose return value is int64_t.

And i think int64_t is large enough for block devices.


int64_t should be enough, but I don't know if size_t is large enough on
all platforms.

I believe it's going to be either one of those cases:

* If you are absolutely sure SIZE_MAX >= INT64_MAX on all platforms,
   please explain why (and maybe add a QEMU_BUILD_BUG_ON?). (I don't
   think this will be the case)
* If SIZE_MAX < INT64_MAX is possible but you believe
   size <= SIZE_MAX is always true here, please explain why (and maybe
   add an assert()).
* Otherwise, we need to set an appropriate error if size > SIZE_MAX
   or change the type of qemu_file_getlength(). What about making it
   uint64_t?



It sounds better, I will change the return value from size_t to uint64_t.

Thank you for pointing it out, Eduardo!

Re: [Qemu-devel] [PATCH v4 5/6] fw_cfg: add generic non-DMA read method

2015-11-04 Thread Laszlo Ersek

On 11/03/15 22:40, Gabriel L. Somlo wrote:
> Introduce fw_cfg_data_read(), a generic read method which works
> on all access widths (1 through 8 bytes, inclusive), and can be
> used during both IOPort and MMIO read accesses.
> 
> To maintain legibility, only fw_cfg_data_mem_read() (the MMIO
> data read method) is replaced by this patch. The new method
> essentially unwinds the fw_cfg_data_mem_read() + fw_cfg_read()
> combo, but without unnecessarily repeating all the validity
> checks performed by the latter on each byte being read.
> 
> This patch also modifies the trace_fw_cfg_read prototype to
> accept a 64-bit value argument, allowing it to work properly
> with the new read method, but also remain backward compatible
> with existing call sites.
> 
> Cc: Laszlo Ersek 
> Cc: Gerd Hoffmann 
> Cc: Marc Marí 
> Signed-off-by: Gabriel Somlo 
> ---
>  hw/nvram/fw_cfg.c | 44 ++--
>  trace-events  |  2 +-
>  2 files changed, 31 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index 046fa74..9e01b46 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -274,6 +274,35 @@ static int fw_cfg_select(FWCfgState *s, uint16_t key)
>  return ret;
>  }
>  
> +static uint64_t fw_cfg_data_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +FWCfgState *s = opaque;
> +int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
> +FWCfgEntry *e = (s->cur_entry == FW_CFG_INVALID) ? NULL :
> +>entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
> +uint64_t value = 0;
> +
> +assert(size <= sizeof(value));
> +if (s->cur_entry != FW_CFG_INVALID && e->data) {
> +/* The least significant 'size' bytes of the return value are
> + * expected to contain a string preserving portion of the item
> + * data, padded with zeros to the right in case we run out early.
> + */
> +while (size && s->cur_offset < e->len) {
> +value = (value << 8) | e->data[s->cur_offset++];
> +size--;
> +}
> +/* If size is still not zero, we *did* run out early, so continue
> + * left-shifting, to add the appropriate number of padding zeros
> + * on the right.
> + */
> +value <<= 8 * size;
> +}
> +
> +trace_fw_cfg_read(s, value);
> +return value;
> +}

With the wording you proposed in
,
this looks okay.

... Except my (2a) proposal wasn't entirely correct, and now you get to
fix it up for v5. :( Apologies. (It is a different experience when you
see the code in full.)

Namely, consider the case when this code is entered with:

  (size == 8 && s->cur_offset == e->len)

(Which is possible if the guest makes a qword read access just after
reading the full blob.)

In this case, the loop won't be entered at all (which is okay), but then
you'll have:

  uint64_t << 64

which is undefined behavior. ("If the value of the right operand is
negative or is greater than or equal to the width of the promoted left
operand, the behavior is undefined.")

So please protect the final shift with "if (size < 8)".

*Alternatively*, you could restrict the *outer* condition, i.e.,

  s->cur_entry != FW_CFG_INVALID && e->data

with (s->cur_offset < e->len).

... And then you can even replace the "while" with a "do" loop. (Because
both (size > 0) and (s->cur_offset < e->len) will be true if the loop is
reached at all.)

Just the code, without comments:

assert(size <= sizeof(value));
assert(size > 0);
if (s->cur_entry != FW_CFG_INVALID && e->data &&
s->cur_offset < e->len) {
/* ... */
do {
value = (value << 8) | e->data[s->cur_offset++];
size--;
} while (size && s->cur_offset < e->len);
/* ... */
value <<= 8 * size;
}

This makes it clear that "size" is strictly smaller than sizeof(value)
when the shift is reached.

I'll let you choose between the two alternatives. :)

Thanks, and I'm sorry.
Laszlo

> +
>  static uint8_t fw_cfg_read(FWCfgState *s)
>  {
>  int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
> @@ -291,19 +320,6 @@ static uint8_t fw_cfg_read(FWCfgState *s)
>  return ret;
>  }
>  
> -static uint64_t fw_cfg_data_mem_read(void *opaque, hwaddr addr,
> - unsigned size)
> -{
> -FWCfgState *s = opaque;
> -uint64_t value = 0;
> -unsigned i;
> -
> -for (i = 0; i < size; ++i) {
> -value = (value << 8) | fw_cfg_read(s);
> -}
> -return value;
> -}
> -
>  static void fw_cfg_data_mem_write(void *opaque, hwaddr addr,
>uint64_t value, unsigned size)
>  {
> @@ -485,7 +501,7 @@ static const MemoryRegionOps fw_cfg_ctl_mem_ops = {
>  };
>  
>  static const MemoryRegionOps fw_cfg_data_mem_ops = {
> -.read =

Re: [Qemu-devel] [PATCH v7 08/35] exec: allow memory to be allocated from any kind of path

2015-11-04 Thread Xiao Guangrong




On 11/04/2015 08:40 PM, Eduardo Habkost wrote:

On Wed, Nov 04, 2015 at 11:12:41AM +0800, Xiao Guangrong wrote:

On 11/04/2015 07:00 AM, Eduardo Habkost wrote:

On Mon, Nov 02, 2015 at 05:13:10PM +0800, Xiao Guangrong wrote:

Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
locates at DAX enabled filesystem

So this patch let it work on any kind of path

Signed-off-by: Xiao Guangrong 
---
  exec.c | 24 
  1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/exec.c b/exec.c
index 9de38be..9075f4d 100644
--- a/exec.c
+++ b/exec.c
@@ -1184,25 +1184,25 @@ static void *file_ram_alloc(RAMBlock *block,
  char *c;
  void *area;
  int fd;
-uint64_t hpagesize;
+uint64_t pagesize;
  Error *local_err = NULL;

-hpagesize = qemu_file_get_page_size(path, _err);
+pagesize = qemu_file_get_page_size(path, _err);
  if (local_err) {
  error_propagate(errp, local_err);
  goto error;
  }

-if (hpagesize == getpagesize()) {
-fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+if (pagesize == getpagesize()) {
+fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");


If the point of this patch is to allow file_ram_alloc() to not be
specific to hugetlbfs anymore, this warning can simply go away.

(And in case if you really want to keep the warning, I don't see the
point of the changes you made to it.)



This is the history why we did it like this:
https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg02862.html


The rule I am trying to follow is simple: if there are valid use cases
(e.g. nvdimm, tmpfs) where the warning will be printed every single time
QEMU runs, the warning is not appropriate.

If you really want to keep a warning, please make it not be printed on
all other valid use cases (nvdimm and tmpfs). Personally, I don't think
adding those additional checks would be worth the trouble, that's why I
suggest removing the warning.



Q:
| What this *actually* is trying to warn against is that
| mapping a regular file (as opposed to hugetlbfs)
| means transparent huge pages don't work.


I don't think the author of that warning even thought about transparent
huge pages (did THP even existed when it was written?). I believe they
just assumed that the only reason for using -mem-path would be hugetlbfs
and wanted to warn about it. That assumption isn't true anymore.


Michael, your idea?

If Michael will not beat me, i will drop this. :)

Re: [Qemu-devel] [PATCH v4 4/6] fw_cfg: avoid calculating invalid current entry pointer

2015-11-04 Thread Laszlo Ersek

On 11/03/15 22:40, Gabriel L. Somlo wrote:
> When calculating a pointer to the currently selected fw_cfg item, the
> following is used:
> 
>   FWCfgEntry *e = >entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
> 
> When s->cur_entry is FW_CFG_INVALID, we are calculating the address of
> a non-existent element in s->entries[arch][...], which is undefined.
> 
> This patch ensures the resulting entry pointer is se to NULL whenever

se[t] to NULL

> s->cur_entry is FW_CFG_INVALID.
> 
> Reported-by: Laszlo Ersek 
> Cc: Marc Marí 
> Signed-off-by: Gabriel Somlo 
> ---
>  hw/nvram/fw_cfg.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index c2d3a0a..046fa74 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -277,7 +277,8 @@ static int fw_cfg_select(FWCfgState *s, uint16_t key)
>  static uint8_t fw_cfg_read(FWCfgState *s)
>  {
>  int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
> -FWCfgEntry *e = >entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
> +FWCfgEntry *e = (s->cur_entry == FW_CFG_INVALID) ? NULL :
> +>entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
>  uint8_t ret;
>  
>  if (s->cur_entry == FW_CFG_INVALID || !e->data || s->cur_offset >= 
> e->len)
> @@ -342,7 +343,8 @@ static void fw_cfg_dma_transfer(FWCfgState *s)
>  }
>  
>  arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
> -e = >entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
> +e = (s->cur_entry == FW_CFG_INVALID) ? NULL :
> +>entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
>  
>  if (dma.control & FW_CFG_DMA_CTL_READ) {
>  read = 1;
> 

Reviewed-by: Laszlo Ersek

Re: [Qemu-devel] [PATCH v2] qemu-sockets: do not test path with access() before unlinking

2015-11-04 Thread Markus Armbruster

Paolo Bonzini  writes:

> Using access() is a time-of-check/time-of-use race condition.  It is
> okay to use them to provide better error messages, but that is pretty
> much it.
>
> This is not one such case; on the other hand, access() *will* skip
> unlink() for a non-existent path, so ignore ENOENT return values from
> the unlink() system call.
>
> Signed-off-by: Paolo Bonzini 
> ---
>  util/qemu-sockets.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
> index 9142917..de9145a 100644
> --- a/util/qemu-sockets.c
> +++ b/util/qemu-sockets.c
> @@ -751,8 +751,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
>  qemu_opt_set(opts, "path", un.sun_path, _abort);
>  }
>  
> -if ((access(un.sun_path, F_OK) == 0) &&
> -unlink(un.sun_path) < 0) {
> +if (unlink(un.sun_path) < 0 && errno != ENOENT) {
>  error_setg_errno(errp, errno,
>   "Failed to unlink socket %s", un.sun_path);
>  goto err;

Reviewed-by: Markus Armbruster

Re: [Qemu-devel] [PATCH v7 27/35] nvdimm acpi: build ACPI nvdimm devices

2015-11-04 Thread Xiao Guangrong




On 11/04/2015 04:56 PM, Igor Mammedov wrote:

On Tue, 3 Nov 2015 22:22:40 +0800
Xiao Guangrong  wrote:




On 11/03/2015 09:13 PM, Igor Mammedov wrote:

On Mon,  2 Nov 2015 17:13:29 +0800
Xiao Guangrong  wrote:


NVDIMM devices is defined in ACPI 6.0 9.20 NVDIMM Devices

There is a root device under \_SB and specified NVDIMM devices are under the
root device. Each NVDIMM device has _ADR which returns its handle used to
associate MEMDEV structure in NFIT

We reserve handle 0 for root device. In this patch, we save handle, handle,
arg1 and arg2 to dsm memory. Arg3 is conditionally saved in later patch

Signed-off-by: Xiao Guangrong 
---
   hw/acpi/nvdimm.c | 184 
+++
   1 file changed, 184 insertions(+)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index dd84e5f..53ed675 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -368,6 +368,15 @@ static void nvdimm_build_nfit(GSList *device_list, GArray 
*table_offsets,
   g_array_free(structures, true);
   }

+struct NvdimmDsmIn {
+uint32_t handle;
+uint32_t revision;
+uint32_t function;
+   /* the remaining size in the page is used by arg3. */
+uint8_t arg3[0];
+} QEMU_PACKED;
+typedef struct NvdimmDsmIn NvdimmDsmIn;
+
   static uint64_t
   nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
   {
@@ -377,6 +386,7 @@ nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
   static void
   nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
   {
+fprintf(stderr, "BUG: we never write DSM notification IO Port.\n");

it doesn't seem like this hunk belongs here


Er, we have changed the logic:
- others:
1) the buffer length is directly got from IO read rather than got
   from dsm memory
[ This has documented in v5's changelog. ]

So, the IO write is replaced by IO read, nvdimm_dsm_write() should not be
triggered.




   }

   static const MemoryRegionOps nvdimm_dsm_ops = {
@@ -402,6 +412,179 @@ void nvdimm_init_acpi_state(MemoryRegion *memory, 
MemoryRegion *io,
   memory_region_add_subregion(io, NVDIMM_ACPI_IO_BASE, >io_mr);
   }

+#define BUILD_STA_METHOD(_dev_, _method_)  \
+do {   \
+_method_ = aml_method("_STA", 0);  \
+aml_append(_method_, aml_return(aml_int(0x0f)));   \
+aml_append(_dev_, _method_);   \
+} while (0)

_STA doesn't have any logic here so drop macro and just
replace its call sites with:


Okay, I was just wanting to save some code lines. I will drop this macro.



aml_append(foo_dev, aml_name_decl("_STA", aml_int(0xf));


_STA is required as a method with zero argument but this statement just
define a object. It is okay?

Spec doesn't say that it must be method, it says that it will evaluate _STA 
object
and result must be a combination of defined flags.
AML wise calling a method with 0 arguments and referencing named variable
is the same thing, both end up being just a namestring.


I just tested it, it works.



Also note that _STA here return 0xF, and spec says that if _STA is missing
OSPM shall assume its implicit value being 0xF, so you can just drop _STA
object here altogether.


Actually, it will be needed for NVDIMM hotplug, but it is okay to me
to drop it at present. Let's introduce it when we implement hotplug.









+
+#define BUILD_DSM_METHOD(_dev_, _method_, _handle_, _uuid_)\
+do {   \
+Aml *ifctx, *uuid; \
+_method_ = aml_method("_DSM", 4);  \
+/* check UUID if it is we expect, return the errorcode if not.*/   \
+uuid = aml_touuid(_uuid_); \
+ifctx = aml_if(aml_lnot(aml_equal(aml_arg(0), uuid))); \
+aml_append(ifctx, aml_return(aml_int(1 /* Not Supported */))); \
+aml_append(method, ifctx); \
+aml_append(method, aml_return(aml_call4("NCAL", aml_int(_handle_), \
+   aml_arg(1), aml_arg(2), aml_arg(3;  \
+aml_append(_dev_, _method_);   \
+} while (0)
+
+#define BUILD_FIELD_UNIT_SIZE(_field_, _byte_, _name_) \
+aml_append(_field_, aml_named_field(_name_, (_byte_) * BITS_PER_BYTE))
+
+#define BUILD_FIELD_UNIT_STRUCT(_field_, _s_, _f_, _name_) \
+BUILD_FIELD_UNIT_SIZE(_field_, sizeof(typeof_field(_s_, _f_)), _name_)
+
+static void build_nvdimm_devices(GSList *device_list, Aml *root_dev)
+{
+for (; device_list; device_list = device_list->next) {
+

Re: [Qemu-devel] [PATCH v7 11/35] util: introduce qemu_file_getlength()

2015-11-04 Thread Eduardo Habkost

On Wed, Nov 04, 2015 at 11:17:09AM +0800, Xiao Guangrong wrote:
> 
> 
> On 11/04/2015 07:21 AM, Eduardo Habkost wrote:
> >On Mon, Nov 02, 2015 at 05:13:13PM +0800, Xiao Guangrong wrote:
> >[...]
> >>+size_t qemu_file_getlength(const char *file, Error **errp)
> >>+{
> >>+int64_t size;
> >[...]
> >>+return size;
> >
> >Can you guarantee that SIZE_MAX >= INT64_MAX on all platforms supported
> >by QEMU?
> >
> 
> Actually, this function is abstracted from the common function, 
> raw_getlength(),
> in raw-posix.c whose return value is int64_t.
> 
> And i think int64_t is large enough for block devices.

int64_t should be enough, but I don't know if size_t is large enough on
all platforms.

I believe it's going to be either one of those cases:

* If you are absolutely sure SIZE_MAX >= INT64_MAX on all platforms,
  please explain why (and maybe add a QEMU_BUILD_BUG_ON?). (I don't
  think this will be the case)
* If SIZE_MAX < INT64_MAX is possible but you believe
  size <= SIZE_MAX is always true here, please explain why (and maybe
  add an assert()).
* Otherwise, we need to set an appropriate error if size > SIZE_MAX
  or change the type of qemu_file_getlength(). What about making it
  uint64_t?

-- 
Eduardo

Re: [Qemu-devel] [PATCH v9 00/27] alternate layout (post-introspection cleanups, subset C)

2015-11-04 Thread Eric Blake

On 11/04/2015 03:22 AM, Markus Armbruster wrote:
> Eric Blake  writes:
> 
>> No pending prerequisites; based on qemu.git master
>>
>> Also available as a tag at this location:
>> git fetch git://repo.or.cz/qemu/ericb.git qapi-cleanupv9c
>>
>> and will soon be part of my branch with the rest of the v5 series, at:
>> http://repo.or.cz/qemu/ericb.git/shortlog/refs/heads/qapi
>>
>> v9 notes:
>> More patches added, and several reorganized.  Lots of new patches
>> from Markus, although not in the order originally proposed.
>>
>> The first 8 patches are fairly straightforward, and could probably
>> be taken as-is. Patch 9 is a rewrite of v8 4/17, but in the opposite
>> direction (document that no sorting is done, rather than attempting
>> to sort), so it may need further fine-tuning.  Patches 12-21
>> represents a fusion of Markus' and my attempts to rewrite v5 7/17
>> into a more-reviewable set of patches, and caused further churn
>> later in the series.
> 
> Hard freeze is next week.
> 
> PATCH 01-07+09 are simple cleanups, bug fixes tests and documentation,
> which makes them obvious candidates for 2.5.
> 
> PATCH 08 is a feature, but harmless enough.  I still don't like it much,
> but I said I'll take it.  Best before the hard freeze, though.
> 
> The remainder of the series doesn't feel like post hard freeze material.
> What do you think?

My patches _were_ posted prior to soft freeze, even if the initial
review did not happen then; so on that grounds, you can continue to take
as much as you want until hard freeze actually happens. But it gets
harder and harder to justify, and the process definitely changes when
hard freeze hits (no features, regardless of when they were first
posted, but only bug fixes).

So it sounds like we won't get all of my qapi queue in 2.5.  My goal had
originally been to get netdev_add to be fully introspectible, but that's
still more than 20 patches away; and the status quo of learning about
the netdev_add command being less than perfect isn't a regression per
se.  So you are right that the later patches in my series can probably
wait until 2.6.  I'm fine with your judgment on where you want to draw
the line of what will make soft freeze.

> 
> I don't have the complete picture of your queue.  Please double-check
> whether you got anything in it that affects introspection, because
> changing introspection will become super awkward as soon as 2.5 is out.

Patches 8 and 9 in this series have to make 2.5 (and we're in agreement
that while patch 9 is not quite baked in this v9 spin, we should still
have plenty of time to get it done before hard freeze).  The only other
pending patch I have previously posted from my queue that touches
qapi-introspect.py does not actually change introspection output:

http://repo.or.cz/qemu/ericb.git/commitdiff/5c25f6eb95, first posted at:
https://lists.gnu.org/archive/html/qemu-devel/2015-09/msg05441.html

so we should be fine on that front.  Introspection itself is fine, and
the bulk of my focus has been on cleanups to the internals and
extensions of the internals to allow netdev_add to be introspectible.

I can certainly browse through my pending queue to double-check if any
of the patches there qualify as bug fixes that are safe even during hard
freeze, and focus on hoisting them to the front of the review queue,
once we get 1-9 of this series ready for pull.  And obviously that
should mean user-triggerable bugs under existing .json files (patches
like 24/27 that fix design bugs in the generator, but which don't affect
any user besides the testsuite, aren't hard-freeze material - either it
makes soft freeze or it defers to 2.6).

> 
>> Patch 23 still uses tag_member.type == None; I ran out of time to
>> work on Markus' idea of providing an instance of QAPISchemaBuiltinType
>> to fill the role for 'qtype_code' without being exposed through .json
>> files and without breaking the invariant of a valid member.type after
>> check(), and wanted to get the rest of the series started under revew.
>> So I may need a followup patch or even a v10 of the later half of
>> this series after exploring that idea more.
> 
> I'll continue reviewing meanwhile.
> 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v4 3/7] e1000: Trivial implementation of various MAC registers

2015-11-04 Thread Leonid Bloch

On Wed, Nov 4, 2015 at 4:44 AM, Jason Wang  wrote:
>
>
> On 11/03/2015 07:14 PM, Leonid Bloch wrote:
>> These registers appear in Intel's specs, but were not implemented.
>> These registers are now implemented trivially, i.e. they are initiated
>> with zero values, and if they are RW, they can be written or read by the
>> driver, or read only if they are R (essentially retaining their zero
>> values). For these registers no other procedures are performed.
>>
>> For the trivially implemented Diagnostic registers, a debug warning is
>> produced on read/write attempts.
>>
>> The registers implemented here are:
>>
>> Transmit:
>> RW: AIT
>>
>> Management:
>> RW: WUC WUS IPAVIP6AT*  IP4AT*  FFLT*   WUPM*   FFMT*   FFVT*
>>
>> Diagnostic:
>> RW: RDFHRDFTRDFHS   RDFTS   RDFPC   PBM*TDFHTDFTTDFHS
>> TDFTS   TDFPC
>>
>> Statistic:
>> RW: FCRUC
>> R:  RNBCTSCTFC  MGTPRC  MGTPDC  MGTPTC  RFC RJC SCC ECOL
>> LATECOL MCC COLCDC  TNCRS   SEC CEXTERR RLECXONRXC
>> XONTXC  XOFFRXC XOFFTXC
>>
>> Signed-off-by: Leonid Bloch 
>> Signed-off-by: Dmitry Fleytman 
>> ---
>>  hw/net/e1000.c  | 95 
>> +++--
>>  hw/net/e1000_regs.h |  6 
>>  2 files changed, 98 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
>> index 1190bbe..7db6614 100644
>> --- a/hw/net/e1000.c
>> +++ b/hw/net/e1000.c
>> @@ -170,7 +170,17 @@ enum {
>>  defreg(TPR), defreg(TPT), defreg(TXDCTL),  defreg(WUFC),
>>  defreg(RA),  defreg(MTA), defreg(CRCERRS), defreg(VFTA),
>>  defreg(VET), defreg(RDTR),defreg(RADV),defreg(TADV),
>> -defreg(ITR),
>> +defreg(ITR), defreg(FCRUC),   defreg(TDFH),defreg(TDFT),
>> +defreg(TDFHS),   defreg(TDFTS),   defreg(TDFPC),   defreg(RDFH),
>> +defreg(RDFT),defreg(RDFHS),   defreg(RDFTS),   defreg(RDFPC),
>> +defreg(IPAV),defreg(WUC), defreg(WUS), defreg(AIT),
>> +defreg(IP6AT),   defreg(IP4AT),   defreg(FFLT),defreg(FFMT),
>> +defreg(FFVT),defreg(WUPM),defreg(PBM), defreg(SCC),
>> +defreg(ECOL),defreg(MCC), defreg(LATECOL), defreg(COLC),
>> +defreg(DC),  defreg(TNCRS),   defreg(SEC), defreg(CEXTERR),
>> +defreg(RLEC),defreg(XONRXC),  defreg(XONTXC),  defreg(XOFFRXC),
>> +defreg(XOFFTXC), defreg(RFC), defreg(RJC), defreg(RNBC),
>> +defreg(TSCTFC),  defreg(MGTPRC),  defreg(MGTPDC),  defreg(MGTPTC)
>>  };
>>
>>  static void
>> @@ -1118,6 +1128,48 @@ mac_readreg(E1000State *s, int index)
>>  }
>>
>>  static uint32_t
>> +mac_readreg_prt(E1000State *s, int index)
>> +{
>> +DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
>> +   "It is not fully implemented.\n", index<<2);
>> +return s->mac_reg[index];
>> +}
>> +
>> +static uint32_t
>> +mac_low4_read(E1000State *s, int index)
>> +{
>> +return s->mac_reg[index] & 0xf;
>> +}
>> +
>> +static uint32_t
>> +mac_low11_read(E1000State *s, int index)
>> +{
>> +return s->mac_reg[index] & 0x7ff;
>> +}
>> +
>> +static uint32_t
>> +mac_low11_read_prt(E1000State *s, int index)
>> +{
>> +DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
>> +   "It is not fully implemented.\n", index<<2);
>> +return s->mac_reg[index] & 0x7ff;
>> +}
>> +
>> +static uint32_t
>> +mac_low13_read_prt(E1000State *s, int index)
>> +{
>> +DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
>> +   "It is not fully implemented.\n", index<<2);
>> +return s->mac_reg[index] & 0x1fff;
>> +}
>> +
>> +static uint32_t
>> +mac_low16_read(E1000State *s, int index)
>> +{
>> +return s->mac_reg[index] & 0x;
>> +}
>> +
>> +static uint32_t
>>  mac_icr_read(E1000State *s, int index)
>>  {
>>  uint32_t ret = s->mac_reg[ICR];
>> @@ -1161,6 +1213,14 @@ mac_writereg(E1000State *s, int index, uint32_t val)
>>  }
>>
>>  static void
>> +mac_writereg_prt(E1000State *s, int index, uint32_t val)
>> +{
>> +DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
>> +   "It is not fully implemented.\n", index<<2);
>> +s->mac_reg[index] = val;
>> +}
>> +
>> +static void
>>  set_rdt(E1000State *s, int index, uint32_t val)
>>  {
>>  s->mac_reg[index] = val & 0x;
>> @@ -1219,26 +1279,50 @@ static uint32_t (*macreg_readops[])(E1000State *, 
>> int) = {
>>  getreg(RDH),  getreg(RDT),  getreg(VET),  getreg(ICS),
>>  getreg(TDBAL),getreg(TDBAH),getreg(RDBAH),getreg(RDBAL),
>>  getreg(TDLEN),getreg(RDLEN),getreg(RDTR), getreg(RADV),
>> -getreg(TADV), getreg(ITR),
>> +getreg(TADV), getreg(ITR),  getreg(FCRUC),getreg(IPAV),
>> +getreg(WUC),  getreg(WUS),  getreg(SCC),  getreg(ECOL),
>> +getreg(MCC),  getreg(LATECOL),  getreg(COLC), getreg(DC),
>> +

[Qemu-devel] [PULL 5/5] migration: fix analyze-migration.py script

2015-11-04 Thread Juan Quintela

From: Mark Cave-Ayland 

Commit 61964 "Add configuration section" broke the analyze-migration.py script
which terminates due to the unrecognised section. Fix the script by parsing
the contents of the configuration section directly into a new
ConfigurationSection object (although nothing is done with it yet).

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Juan Quintela al3
Signed-off-by: Juan Quintela al3
---
 scripts/analyze-migration.py | 13 +
 1 file changed, 13 insertions(+)

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index f6894be..1455387 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -252,6 +252,15 @@ class HTABSection(object):
 def getDict(self):
 return ""

+
+class ConfigurationSection(object):
+def __init__(self, file):
+self.file = file
+
+def read(self):
+name_len = self.file.read32()
+name = self.file.readstr(len = name_len)
+
 class VMSDFieldGeneric(object):
 def __init__(self, desc, file):
 self.file = file
@@ -474,6 +483,7 @@ class MigrationDump(object):
 QEMU_VM_SECTION_FULL  = 0x04
 QEMU_VM_SUBSECTION= 0x05
 QEMU_VM_VMDESCRIPTION = 0x06
+QEMU_VM_CONFIGURATION = 0x07
 QEMU_VM_SECTION_FOOTER= 0x7e

 def __init__(self, filename):
@@ -514,6 +524,9 @@ class MigrationDump(object):
 section_type = file.read8()
 if section_type == self.QEMU_VM_EOF:
 break
+elif section_type == self.QEMU_VM_CONFIGURATION:
+section = ConfigurationSection(file)
+section.read()
 elif section_type == self.QEMU_VM_SECTION_START or section_type == 
self.QEMU_VM_SECTION_FULL:
 section_id = file.read32()
 name = file.readstr()
-- 
2.4.3

[Qemu-devel] [PULL 1/5] migration: defer migration_end & blk_mig_cleanup

2015-11-04 Thread Juan Quintela

From: Liang Li 

Because of the patch 3ea3b7fa9af067982f34b of kvm, which introduces a
lazy collapsing of small sptes into large sptes mechanism, now
migration_end() is a time consuming operation because it calls
memroy_global_dirty_log_stop(), which will trigger the dropping of small
sptes operation and takes about dozens of milliseconds, so call
migration_end() before all the vmsate data has already been transferred
to the destination will prolong VM downtime. This operation should be
deferred after all the data has been transferred to the destination.

blk_mig_cleanup() can be deferred too.

For a VM with 8G RAM, this patch can reduce the VM downtime about 30 ms.

Signed-off-by: Liang Li 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Juan Quintela al3
Reviewed-by: Amit Shah al3
Signed-off-by: Juan Quintela al3
---
 migration/block.c |  1 -
 migration/migration.c | 13 ++---
 migration/ram.c   |  1 -
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/migration/block.c b/migration/block.c
index f7bb1e0..8401597 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -750,7 +750,6 @@ static int block_save_complete(QEMUFile *f, void *opaque)

 qemu_put_be64(f, BLK_MIG_FLAG_EOS);

-blk_mig_cleanup();
 return 0;
 }

diff --git a/migration/migration.c b/migration/migration.c
index b092f38..d5a7304 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -613,12 +613,9 @@ static void migrate_fd_cleanup(void *opaque)

 assert(s->state != MIGRATION_STATUS_ACTIVE);

-if (s->state != MIGRATION_STATUS_COMPLETED) {
-qemu_savevm_state_cancel();
-if (s->state == MIGRATION_STATUS_CANCELLING) {
-migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
-  MIGRATION_STATUS_CANCELLED);
-}
+if (s->state == MIGRATION_STATUS_CANCELLING) {
+migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
+  MIGRATION_STATUS_CANCELLED);
 }

 notifier_list_notify(_state_notifiers, s);
@@ -1028,6 +1025,7 @@ static void *migration_thread(void *opaque)
 int64_t initial_bytes = 0;
 int64_t max_size = 0;
 int64_t start_time = initial_time;
+int64_t end_time;
 bool old_vm_running = false;

 rcu_register_thread();
@@ -1089,10 +1087,11 @@ static void *migration_thread(void *opaque)

 /* If we enabled cpu throttling for auto-converge, turn it off. */
 cpu_throttle_stop();
+end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

 qemu_mutex_lock_iothread();
+qemu_savevm_state_cancel();
 if (s->state == MIGRATION_STATUS_COMPLETED) {
-int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 uint64_t transferred_bytes = qemu_ftell(s->file);
 s->total_time = end_time - s->total_time;
 s->downtime = end_time - start_time;
diff --git a/migration/ram.c b/migration/ram.c
index a25bcc7..25e9eeb 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1344,7 +1344,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)

 rcu_read_unlock();

-migration_end();
 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

 return 0;
-- 
2.4.3

[Qemu-devel] [PATCH v2] qemu-sockets: do not test path with access() before unlinking

2015-11-04 Thread Paolo Bonzini

Using access() is a time-of-check/time-of-use race condition.  It is
okay to use them to provide better error messages, but that is pretty
much it.

This is not one such case; on the other hand, access() *will* skip
unlink() for a non-existent path, so ignore ENOENT return values from
the unlink() system call.

Signed-off-by: Paolo Bonzini 
---
 util/qemu-sockets.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 9142917..de9145a 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -751,8 +751,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
 qemu_opt_set(opts, "path", un.sun_path, _abort);
 }
 
-if ((access(un.sun_path, F_OK) == 0) &&
-unlink(un.sun_path) < 0) {
+if (unlink(un.sun_path) < 0 && errno != ENOENT) {
 error_setg_errno(errp, errno,
  "Failed to unlink socket %s", un.sun_path);
 goto err;
-- 
2.5.0

1 2 3 >

1 - 100 of 212 matches

Mail list logo