date:20170530

Re: [Qemu-devel] [Qemu devel v5 PATCH 4/5] msf2: Add Smartfusion2 SoC.

2017-05-30 Thread Philippe Mathieu-Daudé


Hi Subbaraya,

So far so good!

On 05/16/2017 12:38 PM, Subbaraya Sundeep wrote:

Smartfusion2 SoC has hardened Microcontroller subsystem
and flash based FPGA fabric. This patch adds support for
Microcontroller subsystem in the SoC.

Signed-off-by: Subbaraya Sundeep 
---
 default-configs/arm-softmmu.mak |   1 +
 hw/arm/Makefile.objs|   1 +
 hw/arm/msf2-soc.c   | 201 
 include/hw/arm/msf2-soc.h   |  69 ++
 4 files changed, 272 insertions(+)
 create mode 100644 hw/arm/msf2-soc.c
 create mode 100644 include/hw/arm/msf2-soc.h

diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 78d7af0..7062512 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -122,3 +122,4 @@ CONFIG_ACPI=y
 CONFIG_SMBIOS=y
 CONFIG_ASPEED_SOC=y
 CONFIG_GPIO_KEY=y
+CONFIG_MSF2=y
diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index 4c5c4ee..c828061 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -18,3 +18,4 @@ obj-$(CONFIG_FSL_IMX25) += fsl-imx25.o imx25_pdk.o
 obj-$(CONFIG_FSL_IMX31) += fsl-imx31.o kzm.o
 obj-$(CONFIG_FSL_IMX6) += fsl-imx6.o sabrelite.o
 obj-$(CONFIG_ASPEED_SOC) += aspeed_soc.o aspeed.o
+obj-$(CONFIG_MSF2) += msf2-soc.o
diff --git a/hw/arm/msf2-soc.c b/hw/arm/msf2-soc.c
new file mode 100644
index 000..329e30c
--- /dev/null
+++ b/hw/arm/msf2-soc.c
@@ -0,0 +1,201 @@
+/*
+ * SmartFusion2 SoC emulation.
+ *
+ * Copyright (c) 2017 Subbaraya Sundeep 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/arm/arm.h"
+#include "exec/address-spaces.h"
+#include "hw/char/serial.h"
+#include "hw/boards.h"
+#include "sysemu/block-backend.h"
+#include "hw/arm/msf2-soc.h"
+
+#define MSF2_TIMER_BASE 0x40004000
+#define MSF2_SYSREG_BASE0x40038000
+
+#define MSF2_TIMER_IRQ0 14
+#define MSF2_TIMER_IRQ1 15
+
+static const uint32_t spi_addr[MSF2_NUM_SPIS] = { 0x40001000 , 0x40011000 };
+static const uint32_t uart_addr[MSF2_NUM_UARTS] = { 0x4000 , 0x4001 };
+
+static const int spi_irq[MSF2_NUM_SPIS] = { 2, 3 };
+static const int uart_irq[MSF2_NUM_UARTS] = { 10, 11 };


Maybe you can declare timer_irq[] here instead of the defines, to keep a 
common style.



+
+static void m2sxxx_soc_initfn(Object *obj)
+{
+MSF2State *s = MSF2_SOC(obj);
+int i;
+
+object_initialize(>armv7m, sizeof(s->armv7m), TYPE_ARMV7M);
+qdev_set_parent_bus(DEVICE(>armv7m), sysbus_get_default());
+
+object_initialize(>sysreg, sizeof(s->sysreg), TYPE_MSF2_SYSREG);
+qdev_set_parent_bus(DEVICE(>sysreg), sysbus_get_default());
+
+object_initialize(>timer, sizeof(s->timer), TYPE_MSS_TIMER);
+qdev_set_parent_bus(DEVICE(>timer), sysbus_get_default());
+
+for (i = 0; i < MSF2_NUM_SPIS; i++) {
+object_initialize(>spi[i], sizeof(s->spi[i]),
+  TYPE_MSS_SPI);
+qdev_set_parent_bus(DEVICE(>spi[i]), sysbus_get_default());
+}
+}
+
+static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp)
+{
+MSF2State *s = MSF2_SOC(dev_soc);
+DeviceState *dev, *armv7m;
+SysBusDevice *busdev;
+Error *err = NULL;
+int i;
+
+MemoryRegion *system_memory = get_system_memory();
+MemoryRegion *nvm = g_new(MemoryRegion, 1);
+MemoryRegion *nvm_alias = g_new(MemoryRegion, 1);
+MemoryRegion *sram = g_new(MemoryRegion, 1);
+
+memory_region_init_ram(nvm, NULL, "MSF2.eNVM", s->envm_size,
+   _fatal);
+
+/*
+ * On power-on, the eNVM region 0x6000 is automatically
+ * remapped to the Cortex-M3 processor executable region
+ * start address (0x0). We do not support remapping other eNVM,
+ * eSRAM and DDR regions by guest(via Sysreg) currently.
+ */
+

Re: [Qemu-devel] [Qemu devel v5 PATCH 0/5] Add support for Smartfusion2 SoC

2017-05-30 Thread Philippe Mathieu-Daudé

Hi Sundeep,

On 05/29/2017 02:28 AM, sundeep subbaraya wrote:

Hi Philippe,

Any update on this? I will wait for your comments too
and send next iteration fixing Alistair comments.

Sorry I'm supposed to be in holidays ;)

Thanks,
Sundeep

On Wed, May 17, 2017 at 3:09 PM, sundeep subbaraya
> wrote:

Hi Philippe,

On Wed, May 17, 2017 at 9:57 AM, Philippe Mathieu-Daudé
> wrote:

Hi Sundeep,

This patchset is way cleaner!
I had a fast look and I like it, I'll try to make some time soon
to review details and test it.

Thank you

Is your work interested on U-Boot or more focused in Linux kernel?

I am interested more in kernel. I had to look into u-boot for first
time for Qemu only.
I worked only on FPGAs(load kernel with debugger) till now so never
got a chance to look into u-boot.

If you compile QEMU with libfdt support you can use the -dtb
option to pass the blob to the kernel directly, bypassing the
bootloader.

Yeah for armv7m I could not find any thing like that in tree.

If you need a bootloader you may give a look at coreboot which
supports dts well, see how Vladimir Serbinenko used Linux's dt
to boot a QEMU Versatile Express board:

https://mail.coreboot.org/pipermail/coreboot-gerrit/2016-February/040899.html

Cool. I will look into it.

Thanks,
Sundeep

Regards,

Phil.

On 05/16/2017 12:38 PM, Subbaraya Sundeep wrote:

Hi Qemu-devel,

I am trying to add Smartfusion2 SoC.
SoC is from Microsemi and System on Module(SOM)
board is from Emcraft systems. Smartfusion2 has hardened
Microcontroller(Cortex-M3)based Sub System and FPGA fabric.
At the moment only system timer, sysreg and SPI
controller are modelled.

Testing:
./arm-softmmu/qemu-system-arm -M smartfusion2-som -serial
mon:stdio \
-kernel u-boot.bin -display none -drive
file=spi.bin,if=mtd,format=raw

I'm not sure the timer is working correctly, U-Boot loops with this pattern:

msf2_sysreg_read: addr: 0x0048 data: 0x0220
msf2_sysreg_write: addr: 0x0048 data: 0x0220
msf2_sysreg_read: addr: 0x0048 data: 0x0220
msf2_sysreg_write: addr: 0x0048 data: 0x0020
msf2_sysreg_read: addr: 0x0048 data: 0x0020
msf2_sysreg_write: addr: 0x0048 data: 0x
msf2_sysreg_read: addr: 0x0048 data: 0x
msf2_sysreg_write: addr: 0x0048 data: 0x0020
msf2_sysreg_read: addr: 0x0048 data: 0x0020
msf2_sysreg_write: addr: 0x0048 data: 0x0220

Binaries u-boot.bin and spi.bin are at:

you can compress spi.bin!

can you share u-boot.elf with debug symbols too?

Regards,

Phil.

https://github.com/Subbaraya-Sundeep/qemu-test-binaries.git

U-boot is from Emcraft with modified
- SPI driver not to use PDMA.
- ugly hack to pass dtb to kernel in r1.
@
https://github.com/Subbaraya-Sundeep/emcraft-uboot-sf2.git

Linux is 4.5 linux with Smartfusion2 SoC dts and clocksource
driver added by myself @
https://github.com/Subbaraya-Sundeep/linux.git

v5
As per Philippe comments:
Added abort in Sysreg if guest tries to remap memory
other than default mapping.
Use of CONFIG_MSF2 in Makefile for soc.c
Fixed incorrect logic in timer model.
Renamed msf2-timer.c -> mss-timer.c
msf2-spi.c -> mss-spi.c also type names
Renamed function msf2_init->emcraft_sf2_init in
msf2-som.c
Added part-name,eNVM-size,eSRAM-size,pclk0 and pclk1
properties to soc.
Pass soc part-name,memory size and clock rate
properties from som.
v4:
Fixed build failure by using PRIx macros.
v3:
Added SoC file and board file as per Alistair comments.
v2:
Added SPI controller so that u-boot loads kernel from
spi flash.
v1:
Initial patch set with timer and sysreg

Thanks,
Sundeep

Subbaraya Sundeep (5):
  msf2: Add Smartfusion2 System timer
  msf2: Microsemi Smartfusion2 System

Re: [Qemu-devel] [PATCH v2 5/6] cputlb: add trace events

2017-05-30 Thread Philippe Mathieu-Daudé


On 05/17/2017 11:52 AM, Alex Bennée wrote:

Given the range of costs for various SoftMMU TLB operations from
deferring work for the currently running vCPU to bring the whole
emulated machine to a stop for synchronised updates simple counters
are less useful. Instead we log events via the trace infrastructure
and we can then post-process the data in a range of ways.

  tlb_flush_self - the vCPU flushed its own TLB
  tlb_flush_async_schedule - work was scheduled and the vCPU kicked
  tlb_flush_synced_schedule - exclusive work was scheduled on a vCPU
  tlb_flush_work - scheduled work was done

We can use the difference between the work being scheduled and
tlb_flush_work to calculate the latency introduced.

Signed-off-by: Alex Bennée 
---
 cputlb.c | 34 +-
 trace-events |  7 +++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/cputlb.c b/cputlb.c
index d1859c3f37..b2490863e4 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -127,6 +127,7 @@ static void tlb_flush_nocheck(CPUState *cpu)

 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
 {
+trace_tlb_flush_work(__func__, cpu->cpu_index);
 tlb_flush_nocheck(cpu);
 }

@@ -135,17 +136,22 @@ void tlb_flush(CPUState *cpu)
 if (cpu->created && !qemu_cpu_is_self(cpu)) {
 if (atomic_mb_read(>pending_tlb_flush) != ALL_MMUIDX_BITS) {
 atomic_mb_set(>pending_tlb_flush, ALL_MMUIDX_BITS);
+trace_tlb_flush_async_schedule(__func__, current_cpu ?
+   current_cpu->cpu_index :
+   cpu->cpu_index, cpu->cpu_index);
 async_run_on_cpu(cpu, tlb_flush_global_async_work,
  RUN_ON_CPU_NULL);
 }
 } else {
+trace_tlb_flush_self(__func__, cpu->cpu_index);
-tlb_flush_nocheck(cpu);
+tlb_flush_global_async_work(cpu, RUN_ON_CPU_NULL);


Reviewed-by: Philippe Mathieu-Daudé 


 }
 }

 void tlb_flush_all_cpus(CPUState *src_cpu)
 {
 const run_on_cpu_func fn = tlb_flush_global_async_work;
+trace_tlb_flush_async_schedule(__func__, src_cpu->cpu_index, -1);
 flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
 fn(src_cpu, RUN_ON_CPU_NULL);
 }
@@ -153,6 +159,7 @@ void tlb_flush_all_cpus(CPUState *src_cpu)
 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
 {
 const run_on_cpu_func fn = tlb_flush_global_async_work;
+trace_tlb_flush_synced_schedule(__func__, src_cpu->cpu_index, -1);
 flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
 }
@@ -163,6 +170,8 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, 
run_on_cpu_data data)
 unsigned long mmu_idx_bitmask = data.host_int;
 int mmu_idx;

+trace_tlb_flush_work(__func__, cpu->cpu_index);
+
 assert_cpu_is_self(cpu);

 tb_lock();
@@ -196,12 +205,16 @@ void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)

 if (pending_flushes) {
 tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
+trace_tlb_flush_async_schedule(__func__,
+   current_cpu->cpu_index,
+   cpu->cpu_index);

 atomic_or(>pending_tlb_flush, pending_flushes);
 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
  RUN_ON_CPU_HOST_INT(pending_flushes));
 }
 } else {
+trace_tlb_flush_self(__func__, cpu->cpu_index);
 tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(idxmap));
 }
@@ -212,6 +225,7 @@ void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, 
uint16_t idxmap)
 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;

 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+trace_tlb_flush_async_schedule(__func__, src_cpu->cpu_index, -1);

 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
@@ -223,6 +237,7 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;

 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+trace_tlb_flush_synced_schedule(__func__, src_cpu->cpu_index, -1);

 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
@@ -252,6 +267,7 @@ static void tlb_flush_page_async_work(CPUState *cpu, 
run_on_cpu_data data)
 assert_cpu_is_self(cpu);

 tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+trace_tlb_flush_work(__func__, cpu->cpu_index);

 /* Check if we need to flush due to large pages.  */
 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
@@ -285,9 +301,12 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
 tlb_debug("page :" TARGET_FMT_lx "\n", addr);

Re: [Qemu-devel] [PATCH risu] ppc64.risu: Fix broken constraints

2017-05-30 Thread Nikunj A Dadhania

Peter Maydell  writes:

> Commit c10b97092 changed some field names in rldicr and rldimi patterns
> but forgot to update the constraints to match the change. Since the
> field (previously 'rb' and now 'sh') is an immediate rather than a
> register number, the correct fix is to just delete the constraint
> since we don't need to avoid particular values.
>
> Signed-off-by: Peter Maydell 

Thanks for fixing it.


> ---
>  ppc64.risu | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/ppc64.risu b/ppc64.risu
> index dd304e2..e2fd4f6 100644
> --- a/ppc64.risu
> +++ b/ppc64.risu
> @@ -1473,17 +1473,17 @@ RLDICLd PPC64LE 00 rs:5 ra:5 sh:5 mb:6 000 sha:1 
> 1 \
>
>  # format:MD book:I page:105 PPC SR rldicr[.] Rotate Left Dword Immediate 
> then Clear Right
>  RLDICR PPC64LE 00 rs:5 ra:5 sh:5 me:6 001 sha:1 0 \
> -!constraints { $rs != 1 && $ra != 1 && $rb != 1 && $rs != 13 && $ra != 13 && 
> $rb != 13; }
> +!constraints { $rs != 1 && $ra != 1 && $rs != 13 && $ra != 13; }
>  # format:MD book:I page:105 PPC SR rldicr[.] Rotate Left Dword Immediate 
> then Clear Right
>  RLDICRd PPC64LE 00 rs:5 ra:5 sh:5 me:6 001 sha:1 1 \
> -!constraints { $rs != 1 && $ra != 1 && $rb != 1 && $rs != 13 && $ra != 13 && 
> $rb != 13; }
> +!constraints { $rs != 1 && $ra != 1 && $rs != 13 && $ra != 13; }
>
>  # format:MD book:I page:105 PPC SR rldimi[.] Rotate Left Dword Immediate 
> then Mask Insert
>  RLDIMI PPC64LE 00 rs:5 ra:5 sh:5 me:6 011 sha:1 0 \
> -!constraints { $rs != 1 && $ra != 1 && $rb != 1 && $rs != 13 && $ra != 13 && 
> $rb != 13; }
> +!constraints { $rs != 1 && $ra != 1 && $rs != 13 && $ra != 13; }
>  # format:MD book:I page:105 PPC SR rldimi[.] Rotate Left Dword Immediate 
> then Mask Insert
>  RLDIMId PPC64LE 00 rs:5 ra:5 sh:5 me:6 011 sha:1 1 \
> -!constraints { $rs != 1 && $ra != 1 && $rb != 1 && $rs != 13 && $ra != 13 && 
> $rb != 13; }
> +!constraints { $rs != 1 && $ra != 1 && $rs != 13 && $ra != 13; }
>
>  # format:M book:I page:102 v:P1 SR rlwimi[.] Rotate Left Word Immediate then 
> Mask Insert
>  RLWIMI PPC64LE 010100 rs:5 ra:5 sh:5 mb:5 me:5 0 \
> -- 
> 2.7.4

Re: [Qemu-devel] [PATCH risu v2] ppc64: Fix patterns for rotate doubleword instructions

2017-05-30 Thread Nikunj A Dadhania

Peter Maydell  writes:

> On 30 May 2017 at 16:39, Peter Maydell  wrote:
>> On 30 May 2017 at 16:26, Nikunj A Dadhania  wrote:
>>> Sandipan Das  writes:
>>>
 The patterns for the following instructions are fixed:
  * Rotate Left Doubleword then Clear Right (rldcr[.])
  * Rotate Left Doubleword Immediate then Clear Right (rldicr[.])
  * Rotate Left Doubleword Immediate then Mask Insert (rldimi[.])

 The first instruction has a typo. For the other two instructions,
 the extended opcodes are incorrect and the shift field 'sha' is
 absent. Also, the shift field 'sh' should be used in place of the
 register field 'rb'.

 Signed-off-by: Sandipan Das 
>>>
>>> Reviewed-by: Nikunj A Dadhania 
>>
>> Thanks; applied to risu master.
>
> ...but I foolishly didn't run build-all-archs first, which
> points out that there's a bug:

Ouch :(

> Syntax error detected evaluating RLDIMId PPC64LE constraints string: ]
> { $rs != 1 && $ra != 1 && $rb != 1 && $rs != 13 && $ra != 13 && $rb != 13; }
> Global symbol "$rb" requires explicit package name (did you forget to
> declare "my $rb"?) at (eval 429) line 1.
> Global symbol "$rb" requires explicit package name (did you forget to
> declare "my $rb"?) at (eval 429) line 1.
>
> You forgot to update the constraints when you changed the
> field names... I think that the constraints on $rb should
> be removed rather than just changed to use $sh because this
> field is an immediate, not a register number, so we don't need
> to make it avoid 1 and 13. This would bring them into line with
> the other rotate-immediates. I'll post a patch in a second.

Regards
Nikunj

Re: [Qemu-devel] [PATCH v2 1/6] scripts/replay-dump.py: replay log dumper

2017-05-30 Thread Philippe Mathieu-Daudé


On 05/17/2017 11:52 AM, Alex Bennée wrote:

This script is a debugging tool for looking through the contents of a
replay log file. It is incomplete but should fail gracefully at events
it doesn't understand.

It currently understands two different log formats as the audio
record/replay support was merged during since MTTCG. It was written to
help debug what has caused the BQL changes to break replay support.

Signed-off-by: Alex Bennée 


Reviewed-by: Philippe Mathieu-Daudé 


---
 scripts/replay-dump.py | 272 +
 1 file changed, 272 insertions(+)
 create mode 100755 scripts/replay-dump.py

diff --git a/scripts/replay-dump.py b/scripts/replay-dump.py
new file mode 100755
index 00..fdd178aba0
--- /dev/null
+++ b/scripts/replay-dump.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Dump the contents of a recorded execution stream
+#
+#  Copyright (c) 2017 Alex Bennée 
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see .
+
+import argparse
+import struct
+from collections import namedtuple
+
+# This mirrors some of the global replay state which some of the
+# stream loading refers to. Some decoders may read the next event so
+# we need handle that case. Calling reuse_event will ensure the next
+# event is read from the cache rather than advancing the file.
+
+class ReplayState(object):
+def __init__(self):
+self.event = -1
+self.event_count = 0
+self.already_read = False
+self.current_checkpoint = 0
+self.checkpoint = 0
+
+def set_event(self, ev):
+self.event = ev
+self.event_count += 1
+
+def get_event(self):
+self.already_read = False
+return self.event
+
+def reuse_event(self, ev):
+self.event = ev
+self.already_read = True
+
+def set_checkpoint(self):
+self.checkpoint = self.event - self.checkpoint_start
+
+def get_checkpoint(self):
+return self.checkpoint
+
+replay_state = ReplayState()
+
+# Simple read functions that mirror replay-internal.c
+# The file-stream is big-endian and manually written out a byte at a time.
+
+def read_byte(fin):
+"Read a single byte"
+return struct.unpack('>B', fin.read(1))[0]
+
+def read_event(fin):
+"Read a single byte event, but save some state"
+if replay_state.already_read:
+return replay_state.get_event()
+else:
+replay_state.set_event(read_byte(fin))
+return replay_state.event
+
+def read_word(fin):
+"Read a 16 bit word"
+return struct.unpack('>H', fin.read(2))[0]
+
+def read_dword(fin):
+"Read a 32 bit word"
+return struct.unpack('>I', fin.read(4))[0]
+
+def read_qword(fin):
+"Read a 64 bit word"
+return struct.unpack('>Q', fin.read(8))[0]
+
+# Generic decoder structure
+Decoder = namedtuple("Decoder", "eid name fn")
+
+def call_decode(table, index, dumpfile):
+"Search decode table for next step"
+decoder = next((d for d in table if d.eid == index), None)
+if not decoder:
+print "Could not decode index: %d" % (index)
+print "Entry is: %s" % (decoder)
+print "Decode Table is:\n%s" % (table)
+return False
+else:
+return decoder.fn(decoder.eid, decoder.name, dumpfile)
+
+# Print event
+def print_event(eid, name, string=None, event_count=None):
+"Print event with count"
+if not event_count:
+event_count = replay_state.event_count
+
+if string:
+print "%d:%s(%d) %s" % (event_count, name, eid, string)
+else:
+print "%d:%s(%d)" % (event_count, name, eid)
+
+
+# Decoders for each event type
+
+def decode_unimp(eid, name, _unused_dumpfile):
+"Unimplimented decoder, will trigger exit"
+print "%s not handled - will now stop" % (name)
+return False
+
+# Checkpoint decoder
+def swallow_async_qword(eid, name, dumpfile):
+"Swallow a qword of data without looking at it"
+step_id = read_qword(dumpfile)
+print "  %s(%d) @ %d" % (name, eid, step_id)
+return True
+
+async_decode_table = [ Decoder(0, "REPLAY_ASYNC_EVENT_BH", 
swallow_async_qword),
+   Decoder(1, "REPLAY_ASYNC_INPUT", decode_unimp),
+   Decoder(2, "REPLAY_ASYNC_INPUT_SYNC", decode_unimp),
+   Decoder(3,

Re: [Qemu-devel] [PATCH] spapr: manage hotplugged devices while the VM is not started

2017-05-30 Thread David Gibson

On Tue, May 30, 2017 at 06:04:45PM +0200, Laurent Vivier wrote:
> For QEMU, a hotlugged device is a device added using the HMP/QMP
> interface.
> For SPAPR, a hotplugged device is a device added while the
> machine is running. In this case QEMU doesn't update internal
> state but relies on the OS for this part
> 
> In the case of migration, when we (libvirt) hotplug a device
> on the source guest, we (libvirt) generally hotplug the same
> device on the destination guest. But in this case, the machine
> is stopped (RUN_STATE_INMIGRATE) and QEMU must not expect
> the OS will manage it as an hotplugged device as it will
> be "imported" by the migration.
> 
> This patch changes the meaning of "hotplugged" in spapr.c
> to manage a QEMU hotplugged device like a "coldplugged" one
> when the machine is awaiting an incoming migration.
> 
> Signed-off-by: Laurent Vivier 

So, I think this is a reasonable concept, at least in terms of
cleanliness and not doing unnecessary work.  However, if it's fixing
bugs, I suspect that means we still have problems elsewhere.

Specifically, what is it we're doing before the incoming migration
that's breaking things.  Even if it's unnecessary, anything done there
should be overwritten by the incoming stream.  That should certainly
be the case (now) for the DRC state variables.  Maybe not for the
queued hotplug events - but that means we should update the queue
migration to make sure we clear anything existing on the destination
before adding migrated events.

I'm also concerned by the fact that this makes changes for memory and
cpu hotplug, but not for PCI devices.  Why aren't they also affected
by this problem?

One nit in the implementation, see below:

> ---
>  hw/ppc/spapr.c | 20 ++--
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 0980d73..f1302d0 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2511,6 +2511,12 @@ static void spapr_nmi(NMIState *n, int cpu_index, 
> Error **errp)
>  }
>  }
>  
> +static bool spapr_coldplugged(DeviceState *dev)
> +{
> +return runstate_check(RUN_STATE_INMIGRATE) ||
> +   !dev->hotplugged;
> +}
> +
>  static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t 
> size,
> uint32_t node, bool dedicated_hp_event_source,
> Error **errp)
> @@ -2521,6 +2527,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> addr_start, uint64_t size,
>  int i, fdt_offset, fdt_size;
>  void *fdt;
>  uint64_t addr = addr_start;
> +bool coldplugged = spapr_coldplugged(dev);
>  
>  for (i = 0; i < nr_lmbs; i++) {
>  drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
> @@ -2532,9 +2539,9 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> addr_start, uint64_t size,
>  SPAPR_MEMORY_BLOCK_SIZE);
>  
>  drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> -drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
> +drck->attach(drc, dev, fdt, fdt_offset, coldplugged, errp);
>  addr += SPAPR_MEMORY_BLOCK_SIZE;
> -if (!dev->hotplugged) {
> +if (coldplugged) {
>  /* guests expect coldplugged LMBs to be pre-allocated */
>  drck->set_allocation_state(drc, 
> SPAPR_DR_ALLOCATION_STATE_USABLE);
>  drck->set_isolation_state(drc, 
> SPAPR_DR_ISOLATION_STATE_UNISOLATED);
> @@ -2543,7 +2550,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> addr_start, uint64_t size,
>  /* send hotplug notification to the
>   * guest only in case of hotplugged memory
>   */
> -if (dev->hotplugged) {
> +if (!coldplugged) {
>  if (dedicated_hp_event_source) {
>  drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
>  addr_start / SPAPR_MEMORY_BLOCK_SIZE);
> @@ -2776,6 +2783,7 @@ static void spapr_core_plug(HotplugHandler 
> *hotplug_dev, DeviceState *dev,
>  int smt = kvmppc_smt_threads();
>  CPUArchId *core_slot;
>  int index;
> +bool coldplugged = spapr_coldplugged(dev);
>  
>  core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, 
> );
>  if (!core_slot) {
> @@ -2797,7 +2805,7 @@ static void spapr_core_plug(HotplugHandler 
> *hotplug_dev, DeviceState *dev,
>  
>  if (drc) {
>  sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> -drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, 
> _err);
> +drck->attach(drc, dev, fdt, fdt_offset, coldplugged, _err);
>  if (local_err) {
>  g_free(fdt);
>  error_propagate(errp, local_err);
> @@ -2805,7 +2813,7 @@ static void spapr_core_plug(HotplugHandler 
> *hotplug_dev, DeviceState *dev,
>  }
>  }
>  
> -if (dev->hotplugged) {
> +if (!coldplugged) {
>  /*
>   *

Re: [Qemu-devel] [PATCHv4 0/5] Clean up compatibility mode handling

2017-05-30 Thread David Gibson

On Tue, May 30, 2017 at 10:01:36AM +0200, Greg Kurz wrote:
> On Tue, 30 May 2017 16:18:52 +1000
> David Gibson  wrote:
> 
> > On Tue, May 30, 2017 at 01:14:16AM +0200, Greg Kurz wrote:
> > > On Fri, 26 May 2017 15:23:14 +1000
> > > David Gibson  wrote:
> > > 
> > > [...]  
> > > > 
> > > > 
> > > > Changes since v3:
> > > >   * Backwards compatible -cpu handling now removes compat= option from
> > > > options passed on to the cpu, so it doesn't trigger further 
> > > > warnings  
> > > 
> > > This seems to also have another interesting effect.
> > > 
> > > getset_compat_deprecated() could be called either during CPU realization 
> > > from:
> > > 
> > > object_property_parse()
> > > {
> > > Visitor *v = string_input_visitor_new(string);
> > > object_property_set(obj, v, name, errp);
> > > ...
> > > }
> > > 
> > > or during a QOM set operation from:
> > > 
> > > void object_property_set_qobject(Object *obj, QObject *value,
> > >  const char *name, Error **errp)
> > > {
> > > Visitor *v;
> > > 
> > > v = qobject_input_visitor_new(value);
> > > object_property_set(obj, v, name, errp);
> > > ...
> > > }
> > > 
> > > or similarly during a QOM get operation with a QObject output visitor.
> > > 
> > > The realization path no longer exists with patch 2, so you don't need
> > > to implement a null string input visitor anymore.  
> > 
> > s/patch 2/patch 3/?
> > 
> 
> Yes indeed, sorry :)
> 
> > Is that true though?  It shouldn't get called through that path in
> > practice, because we strip the compat property from the cpu object
> > properties.  But it could get called if either a) the user explicitly
> > creates a cpu object with -device CPU,compat=whatever or b) if the
> 
> Unless I'm missing something, properties of the CPU objects aren't
> exposed by CPU devices:
> 
> qemu-system-ppc64 -machine pseries \
>   -device POWER8_v2.0-spapr-cpu-core,help
> POWER8_v2.0-spapr-cpu-core.core-id=int
> POWER8_v2.0-spapr-cpu-core.node-id=int32
> POWER8_v2.0-spapr-cpu-core.nr-threads=int
> 
> and creation of CPU objects with -object isn't possible either since
> they don't inherit from the TYPE_USER_CREATABLE class.

Ah, true, I hadn't considered that.

> > user uses the compat= property with a machine type other than pseries.
> > 
> 
> But yes, it is still possible to go through the object_property_parse()
> path with another machine type indeed.

> > Of course the user *shouldn't* do either of those things, but
> > providing a meaningful error if they do is pretty much the whole
> > purpose of this getter/setter method.
> > 
> 
> All old non-pseries machine types already complain when started with
> a POWER7 or newer CPU. Providing the extra error message looks weird:
> 
> qemu-system-ppc64 -machine ppce500 \
>   -cpu POWER7,compat=power6
> qemu-system-ppc64: CPU 'compat' property is deprecated and has no effect;
>  use max-cpu-compat machine property instead
> MMU model 983043 not supported by this machine.
> 
> but I guess it's better than crashing. :)

Well, sure POWER7 doesn't make sense for an e500 machine for other
reasons.  But POWER7 or POWER8 _would_ make sense for powernv, where
compat= doesn't.

> 
> > > 
> > > This means that patch 1 is no longer needed if I get things right but
> > > you probably want Markus to second that.
> > >   
> > > >   * Add a migration fix make cpu_synchronize_state() safe in post_load
> > > > handlers, which in turn fixes a bug in 5/5.
> > > >   * A number of bugfixes and other tweaks suggested by feedback on v2.
> > > > 
> > > > Changes since RFCv2:
> > > >   * Many patches dropped, since they're already merged
> > > >   * Rebased, fixed conflicts
> > > >   * Restored support for backwards migration (wasn't as complicated as
> > > > I thought)
> > > >   * Updated final patch's description to more accurately reflect the
> > > > logic
> > > > 
> > > > Changes since RFCv1:
> > > >   * Change CAS logic to prefer compatibility modes over raw mode
> > > >   * Simplified by giving up on half-hearted attempts to maintain
> > > > backwards migration
> > > >   * Folded migration stream changes into a single patch
> > > >   * Removed some preliminary patches which are already merged
> > > > 
> > > > David Gibson (4):
> > > >   migration: Mark CPU states dirty before incoming migration/loadvm
> > > >   pseries: Move CPU compatibility property to machine
> > > >   pseries: Reset CPU compatibility mode
> > > >   ppc: Rework CPU compatibility testing across migration
> > > > 
> > > > Greg Kurz (1):
> > > >   qapi: add explicit null to string input and output visitors
> > > > 
> > > >  cpus.c   |   9 
> > > >  hw/ppc/spapr.c   |   8 +++-
> > > >  hw/ppc/spapr_cpu_core.c  |  62 +-
> > > >  hw/ppc/spapr_hcall.c |   8 ++--
> > > >  include/hw/ppc/spapr.h   |

Re: [Qemu-devel] [PATCH v3 1/4] ACPI: Add APEI GHES Table Generation support

2017-05-30 Thread gengdongjiu

Laszlo,
   very sorry for that, it was my mistake that missing your name.
when I reply mail, I copy the "CC" list to the mail reply list, but forget to 
copy the "To" list.
I will check your comments in detailed later and reply you. thanks again.



On 2017/5/30 0:03, Laszlo Ersek wrote:
> Hi,
> 
> did you remove me from the To: / Cc: list intentionally, or was that an
> oversight? I caught your message in my list folders only by luck.
> 
> Some followup below:
> 
> On 05/29/17 17:27, gengdongjiu wrote:
> 
>>> (46) What is "physical_addr" good for? Below I can only see an 
>>> assignment to it, in ghes_update_guest(). Where is the field read?
> 
>> this "physical_addr" address is the physical error address in the
>> CPER. such as the physical address that happen hwpoison, this address
>> is delivered by the KVM and QEMU transfer this address to physical.
> I understand that in the ghes_update_guest() function, you accept a
> parameter called "physical_address", and you pass it on to
> ghes_generate_cper_record(). That makes sense, yes.
> 
> However, you also assign the same value to "ges.physical_addr". And that
> structure field is never read. So my point is that the
> "GhesErrorState.physical_addr" field is superfluous and should be removed.
> 
> I checked the other three patches in the series and they don't seem to
> read that structure member either. Correct me if I'm wrong.
> 
>>> (55) What happens if you run out of the preallocated memory?
> 
>> if it run out of the preallocated memory. it will overwrite other 
>> error source. every block's size is fixed. so it does not easy
>> dynamically extend the size if it is overflow. Anyway I will add a
>> error report if it happens overwrite.
> I understand (and agree) that dynamic allocation is not possible here.
> 
> But that doesn't justify overwriting the error status data block that
> belongs to a different data source. (Worse, if this happens with the
> last error status data block, for error source 10, you could overwrite
> memory that belongs to the OS.)
> 
> If an error status data block becomes full, then we should either wrap
> back to the start of the same data block, or else stop forwarding errors
> for that error source.
> 
> Does the ACPI spec say anything about this? I.e., about the case when
> the system runs out of the memory that was reserved for recording
> hardware errors?
> 
> +
> +mem_err = (struct cper_sec_mem_err *) (gdata + 1);
> +
> +/* In order to simplify simulation, hardcode the CPER section to 
> memory
> + * section.
> + */
> +mem_err->validation_bits |= CPER_MEM_VALID_ERROR_TYPE;
> +mem_err->error_type = 3;
>>>
>>> (58) Is this supposed to stand for "Multi-bit ECC" (from "N.2.5 Memory 
>>> Error Section" in UEFI 2.6)? Should we have a macro for that?
> 
>> Yes, it is. What do you mean a macro?
> 
> A #define for the integer value 3.
> 
>> For all the errors that happen in the guest OS, in order to simulate
>> easy, I abstract all the error section to memory section, even though
>> the error section is processor or other section.
> Why is that a valid thing to do? (I'm not doubting it is valid, I'm just
> asking.) Will that not confuse the ACPI subsystem of the guest OS?
> 
>> I do not know whether do you have some suggestion for that.
> Well I would have thought (without any expertise on the subject) that
> hardware errors from the host side should be mapped to the guest more or
> less "type correct". IOW, it looks strange that, say, a CPU error is
> reported as a memory error. But this is just an uneducated guess.
> 
> +mem_err->validation_bits |= CPER_MEM_VALID_CARD | 
> CPER_MEM_VALID_MODULE |
> +CPER_MEM_VALID_BANK | CPER_MEM_VALID_ROW |
> +CPER_MEM_VALID_COLUMN | CPER_MEM_VALID_BIT_POSITION;
> +mem_err->card = 1;
> +mem_err->module = 2;
> +mem_err->bank = 3;
> +mem_err->row = 1;
> +mem_err->column = 2;
> +mem_err->bit_pos = 5;
>>>
>>> (60) I have no idea where these values come from.
> 
>> For all the errors that happen in the guest OS, in order to simulate
>> easy, I abstract all the error section to memory section, and hard
>> code the memory section error value as above.
> Sure, but why is that safe? Will the guest OS not want to do something
> about these error details? If we are feeding the guest OS invalid error
> details, will that not lead to confusion?
> 
>>> (64) What does "reqr" stand for?
>> It stand for the request size.
> Can you please call it "req_size" or something similar? The English
> expression
> 
>   request size
> 
> contains only one "r" letter, so it's hard to understand where the
> second "r" in "reqr" comes from.
> 
> Thanks,
> Laszlo
> 
> .
>

Re: [Qemu-devel] [PATCH] pci-bridge/i82801b11: Convert to realize

2017-05-30 Thread Mao Zhongyi


Hi, Marcel & Markus

On 05/29/2017 07:37 PM, Markus Armbruster wrote:

Marcel Apfelbaum  writes:


On 27/05/2017 9:58, Mao Zhongyi wrote:



On 05/26/2017 10:08 PM, Marcel Apfelbaum wrote:



On 26/05/2017 15:15, Mao Zhongyi wrote:

The pci-birdge device i82801b11 still implements the old
PCIDeviceClass .init() through i82801b11_bridge_init()
instead of the new .realize(). All devices need to be
converted to .realize(). So convert it and rename it to
i82801b11_bridge_realize().

Signed-off-by: Mao Zhongyi 
---
  hw/pci-bridge/i82801b11.c | 9 -
  1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/hw/pci-bridge/i82801b11.c b/hw/pci-bridge/i82801b11.c
index 2404e7e..dca3162 100644
--- a/hw/pci-bridge/i82801b11.c
+++ b/hw/pci-bridge/i82801b11.c
@@ -44,6 +44,7 @@
  #include "qemu/osdep.h"
  #include "hw/pci/pci.h"
  #include "hw/i386/ich9.h"
+#include "qapi/error.h"
/*/
@@ -58,7 +59,7 @@ typedef struct I82801b11Bridge {
  /*< public >*/
  } I82801b11Bridge;
  -static int i82801b11_bridge_initfn(PCIDevice *d)
+static void i82801b11_bridge_realize(PCIDevice *d, Error **errp)
  {
  int rc;
  @@ -70,12 +71,10 @@ static int i82801b11_bridge_initfn(PCIDevice *d)
  goto err_bridge;
  }
  pci_config_set_prog_interface(d->config,
PCI_CLASS_BRIDGE_PCI_INF_SUB);
-return 0;
+return;
err_bridge:
  pci_bridge_exitfn(d);


Hi,

Since you move to realize, you may want to leverage the errp to add
info on errors.

Thanks,
Marcel



Hi, Marcel



Hi!

Thanks for your quick reply and advice. In fact, I have considered
adding an error
message to errp when an error occurs. But I found that
pci_bridge_ssvid_init() has
reported a specific info when it fails. If the error is reported
here again, it seems
a bit more superfluous, so it's not adopted.


I agree we don't want to see the error twice, but that means we may want
to pass the error pointer to pci_bridge_ssvid_init and so on.


Yes.

Callees that report errors differently are a common issue when
converting a function to Error.  The correct solution is to convert
these callees to Error, too.

Sometimes, this is just too much to be practical.  Then a "shallow"
conversion can make sense as a stop-gap, even though it'll result in
clearly inferior error reporting.

At a glance, this one doesn't look impractical.


One of the main things we achieve when moving to 'realize' is better
error handling, so if we don't do that maybe is not worth it.


We need to convert all devices to realize() so we can get rid of the old
qdev methods.  But you're right in that we better convert them
correctly.


You may need to do several changes you didn't intend to
in order to do the error propagation right...


Yes.  An Error conversion can look simple at first, then balloon into a
multi-part series.



Thanks,
Marcel


Of course, output a readable error info
to make it easier for users is also a good choice. So, are you sure
you want to do
that?

Look forward to your feedback and suggestion soon.

Thanks a lot.
Mao


Any help with converting the remaining devices to realize() is much
appreciated.



Thanks for your detailed explanation, I think I know what to do.

Thanks,
Mao

Re: [Qemu-devel] [PATCH v2 3/6] scripts/qemu-gdb/tcg: new helper to dump tcg state

2017-05-30 Thread Philippe Mathieu-Daudé


On 05/17/2017 11:52 AM, Alex Bennée wrote:

This adds a simple helper to dump lock state within TCG.

Signed-off-by: Alex Bennée 


Reviewed-by: Philippe Mathieu-Daudé 


---
 scripts/qemu-gdb.py|  3 ++-
 scripts/qemugdb/tcg.py | 46 ++
 2 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 scripts/qemugdb/tcg.py

diff --git a/scripts/qemu-gdb.py b/scripts/qemu-gdb.py
index 3e7adb87dc..91e928f6af 100644
--- a/scripts/qemu-gdb.py
+++ b/scripts/qemu-gdb.py
@@ -26,7 +26,7 @@ import os, sys

 sys.path.append(os.path.dirname(__file__))

-from qemugdb import aio, mtree, coroutine, timers
+from qemugdb import aio, mtree, coroutine, timers, tcg

 class QemuCommand(gdb.Command):
 '''Prefix for QEMU debug support commands'''
@@ -39,6 +39,7 @@ coroutine.CoroutineCommand()
 mtree.MtreeCommand()
 aio.HandlersCommand()
 timers.TimersCommand()
+tcg.TCGLockStatusCommand()

 coroutine.CoroutineSPFunction()
 coroutine.CoroutinePCFunction()
diff --git a/scripts/qemugdb/tcg.py b/scripts/qemugdb/tcg.py
new file mode 100644
index 00..8c7f1d7454
--- /dev/null
+++ b/scripts/qemugdb/tcg.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# GDB debugging support, TCG status
+#
+# Copyright 2016 Linaro Ltd
+#
+# Authors:
+#  Alex Bennée 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Contributions after 2012-01-13 are licensed under the terms of the
+# GNU GPL, version 2 or (at your option) any later version.
+
+# 'qemu tcg-lock-status' -- display the TCG lock status across threads
+
+import gdb
+
+class TCGLockStatusCommand(gdb.Command):
+'''Display TCG Execution Status'''
+def __init__(self):
+gdb.Command.__init__(self, 'qemu tcg-lock-status', gdb.COMMAND_DATA,
+ gdb.COMPLETE_NONE)
+
+def invoke(self, arg, from_tty):
+gdb.write("Thread, BQL (iothread_mutex), Replay, Blocked?\n")
+for thread in gdb.inferiors()[0].threads():
+thread.switch()
+
+iothread = gdb.parse_and_eval("iothread_locked")
+replay = gdb.parse_and_eval("replay_locked")
+
+frame = gdb.selected_frame()
+if frame.name() == "__lll_lock_wait":
+frame.older().select()
+mutex = gdb.parse_and_eval("mutex")
+owner = gdb.parse_and_eval("mutex->__data.__owner")
+blocked = ("__lll_lock_wait waiting on %s from %d" %
+   (mutex, owner))
+else:
+blocked = "not blocked"
+
+gdb.write("%d/%d, %s, %s, %s\n" % (thread.num, thread.ptid[1],
+   iothread, replay, blocked))

Re: [Qemu-devel] [PATCH] pci: Set err to errp directly rather than through error_porpagate()

2017-05-30 Thread Mao Zhongyi


Hi, Markus

On 05/29/2017 03:51 PM, Markus Armbruster wrote:

Igor Mammedov  writes:


On Fri, 26 May 2017 16:29:25 +0800
Mao Zhongyi  wrote:


ioh3420_interrupts_init() and its callers, rp_realize() and
pci_qdev_realize() fill error message to local_err, then
propagate it to errp by error_porpagate(), which's not necessary.
So eliminate it and pass errp directly instead of local_err.
Of course, error_propagate() also will be removed.

Signed-off-by: Mao Zhongyi 
---

[...]



dropping local_error here looks wrong since it's used
to check error status inside these functions and to undo
side effects in case of failure.

consider if caller pass errp = NULL
then error handling path won't be executed.


Exactly.  The big comment in include/qapi/error.h explains this and
more.


Thanks for providing the specific path, I have seen detailed comments
that have really helped me.

Thanks,
Mao




So keep the rest of the patch but drop above hunks.

Re: [Qemu-devel] [PATCH 1/2] hw/arm/virt-acpi-build: build SLIT when needed

2017-05-30 Thread Shannon Zhao



On 2017/5/30 1:37, Andrew Jones wrote:
> Cc: Shannon Zhao 
> Signed-off-by: Andrew Jones 
Reviewed-by: Shannon Zhao 

> ---
>  hw/arm/virt-acpi-build.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index e5852067f5bd..2079828c22a4 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -776,6 +776,10 @@ void virt_acpi_build(VirtMachineState *vms, 
> AcpiBuildTables *tables)
>  if (nb_numa_nodes > 0) {
>  acpi_add_table(table_offsets, tables_blob);
>  build_srat(tables_blob, tables->linker, vms);
> +if (have_numa_distance) {
> +acpi_add_table(table_offsets, tables_blob);
> +build_slit(tables_blob, tables->linker);
> +}
>  }
>  
>  if (its_class_name() && !vmc->no_its) {
> 

-- 
Shannon

Re: [Qemu-devel] [PATCH 2/2] hw/arm/virt: fdt: generate distance-map when needed

2017-05-30 Thread Shannon Zhao



On 2017/5/30 1:37, Andrew Jones wrote:
> This is based on patch Shannon Zhao originally posted.
> 
> Cc: Shannon Zhao 
> Signed-off-by: Andrew Jones 
Reviewed-by: Shannon Zhao 

> ---
>  hw/arm/virt.c | 21 +
>  1 file changed, 21 insertions(+)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index c7c8159dfd59..4db2d4207cf2 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -219,6 +219,27 @@ static void create_fdt(VirtMachineState *vms)
>  "clk24mhz");
>  qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vms->clock_phandle);
>  
> +if (have_numa_distance) {
> +int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t);
> +uint32_t *matrix = g_malloc0(size);
> +int idx, i, j;
> +
> +for (i = 0; i < nb_numa_nodes; i++) {
> +for (j = 0; j < nb_numa_nodes; j++) {
> +idx = (i * nb_numa_nodes + j) * 3;
> +matrix[idx + 0] = cpu_to_be32(i);
> +matrix[idx + 1] = cpu_to_be32(j);
> +matrix[idx + 2] = cpu_to_be32(numa_info[i].distance[j]);
> +}
> +}
> +
> +qemu_fdt_add_subnode(fdt, "/distance-map");
> +qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> +"numa-distance-map-v1");
> +qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
> + matrix, size);
> +g_free(matrix);
> +}
>  }
>  
>  static void fdt_add_psci_node(const VirtMachineState *vms)
> 

-- 
Shannon

Re: [Qemu-devel] [PATCH v2 6/6] new script/analyse-tlb-flushes-simpletrace.py

2017-05-30 Thread Pranith Kumar

Hi Alex,

Please find some comments and questions below:

On Wed, May 17, 2017 at 10:52 AM, Alex Bennée  wrote:
> This is a simple helper script to extract TLB flush stats from the a
> simpletrace file and plot the results.
>
> Signed-off-by: Alex Bennée 
>
> ---
> v2
>   - re-factored for new trace events
>   - added time and latency graphs
> ---
>  scripts/analyse-tlb-flushes-simpletrace.py | 144 
> +
>  1 file changed, 144 insertions(+)
>  create mode 100755 scripts/analyse-tlb-flushes-simpletrace.py
>
> diff --git a/scripts/analyse-tlb-flushes-simpletrace.py 
> b/scripts/analyse-tlb-flushes-simpletrace.py
> new file mode 100755
> index 00..03fab8c86b
> --- /dev/null
> +++ b/scripts/analyse-tlb-flushes-simpletrace.py



> +
> +def get_args():
> +"Grab options"
> +parser = argparse.ArgumentParser()
> +parser.add_argument("--output", "-o", type=str, help="Render plot to 
> file")
> +parser.add_argument("--vcpus", type=int, help="Number of vCPUS")

It is not really clear what this argument is for. I guess you are
saying how many cpus the guest from which trace file was generated
had? What happens if we pass in less number of vcpus than used for
generation?

> +parser.add_argument("--graph", choices=['time', 'latency'], 
> default='time')

What does latency here indicate? I tried this argument on a sample
trace file I generated, and it had three empty boxes.

> +parser.add_argument("events", type=str, help='trace file read from')
> +parser.add_argument("tracefile", type=str, help='trace file read from')

The help text for 'events' file here should be something like 'the
trace events file'.

Thanks,
--
Pranith

Re: [Qemu-devel] [Qemu devel v5 PATCH 4/5] msf2: Add Smartfusion2 SoC.

2017-05-30 Thread Alistair Francis

On Sun, May 28, 2017 at 10:17 PM, sundeep subbaraya
 wrote:
> Hi Alistair,
>
> On Sat, May 27, 2017 at 5:18 AM, Alistair Francis 
> wrote:
>>
>> On Tue, May 16, 2017 at 8:38 AM, Subbaraya Sundeep
>>  wrote:
>> > Smartfusion2 SoC has hardened Microcontroller subsystem
>> > and flash based FPGA fabric. This patch adds support for
>> > Microcontroller subsystem in the SoC.
>> >
>> > Signed-off-by: Subbaraya Sundeep 
>> > ---
>> >  default-configs/arm-softmmu.mak |   1 +
>> >  hw/arm/Makefile.objs|   1 +
>> >  hw/arm/msf2-soc.c   | 201
>> > 
>> >  include/hw/arm/msf2-soc.h   |  69 ++
>> >  4 files changed, 272 insertions(+)
>> >  create mode 100644 hw/arm/msf2-soc.c
>> >  create mode 100644 include/hw/arm/msf2-soc.h
>> >
>> > diff --git a/default-configs/arm-softmmu.mak
>> > b/default-configs/arm-softmmu.mak
>> > index 78d7af0..7062512 100644
>> > --- a/default-configs/arm-softmmu.mak
>> > +++ b/default-configs/arm-softmmu.mak
>> > @@ -122,3 +122,4 @@ CONFIG_ACPI=y
>> >  CONFIG_SMBIOS=y
>> >  CONFIG_ASPEED_SOC=y
>> >  CONFIG_GPIO_KEY=y
>> > +CONFIG_MSF2=y
>> > diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
>> > index 4c5c4ee..c828061 100644
>> > --- a/hw/arm/Makefile.objs
>> > +++ b/hw/arm/Makefile.objs
>> > @@ -18,3 +18,4 @@ obj-$(CONFIG_FSL_IMX25) += fsl-imx25.o imx25_pdk.o
>> >  obj-$(CONFIG_FSL_IMX31) += fsl-imx31.o kzm.o
>> >  obj-$(CONFIG_FSL_IMX6) += fsl-imx6.o sabrelite.o
>> >  obj-$(CONFIG_ASPEED_SOC) += aspeed_soc.o aspeed.o
>> > +obj-$(CONFIG_MSF2) += msf2-soc.o
>> > diff --git a/hw/arm/msf2-soc.c b/hw/arm/msf2-soc.c
>> > new file mode 100644
>> > index 000..329e30c
>> > --- /dev/null
>> > +++ b/hw/arm/msf2-soc.c
>> > @@ -0,0 +1,201 @@
>> > +/*
>> > + * SmartFusion2 SoC emulation.
>> > + *
>> > + * Copyright (c) 2017 Subbaraya Sundeep 
>> > + *
>> > + * Permission is hereby granted, free of charge, to any person
>> > obtaining a copy
>> > + * of this software and associated documentation files (the
>> > "Software"), to deal
>> > + * in the Software without restriction, including without limitation
>> > the rights
>> > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or
>> > sell
>> > + * copies of the Software, and to permit persons to whom the Software
>> > is
>> > + * furnished to do so, subject to the following conditions:
>> > + *
>> > + * The above copyright notice and this permission notice shall be
>> > included in
>> > + * all copies or substantial portions of the Software.
>> > + *
>> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> > EXPRESS OR
>> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> > MERCHANTABILITY,
>> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
>> > SHALL
>> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> > OTHER
>> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> > ARISING FROM,
>> > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> > DEALINGS IN
>> > + * THE SOFTWARE.
>> > + */
>> > +
>> > +#include "qemu/osdep.h"
>> > +#include "qapi/error.h"
>> > +#include "qemu-common.h"
>> > +#include "hw/arm/arm.h"
>> > +#include "exec/address-spaces.h"
>> > +#include "hw/char/serial.h"
>> > +#include "hw/boards.h"
>> > +#include "sysemu/block-backend.h"
>> > +#include "hw/arm/msf2-soc.h"
>> > +
>> > +#define MSF2_TIMER_BASE 0x40004000
>> > +#define MSF2_SYSREG_BASE0x40038000
>> > +
>> > +#define MSF2_TIMER_IRQ0 14
>> > +#define MSF2_TIMER_IRQ1 15
>> > +
>> > +static const uint32_t spi_addr[MSF2_NUM_SPIS] = { 0x40001000 ,
>> > 0x40011000 };
>> > +static const uint32_t uart_addr[MSF2_NUM_UARTS] = { 0x4000 ,
>> > 0x4001 };
>> > +
>> > +static const int spi_irq[MSF2_NUM_SPIS] = { 2, 3 };
>> > +static const int uart_irq[MSF2_NUM_UARTS] = { 10, 11 };
>> > +
>> > +static void m2sxxx_soc_initfn(Object *obj)
>> > +{
>> > +MSF2State *s = MSF2_SOC(obj);
>> > +int i;
>> > +
>> > +object_initialize(>armv7m, sizeof(s->armv7m), TYPE_ARMV7M);
>> > +qdev_set_parent_bus(DEVICE(>armv7m), sysbus_get_default());
>> > +
>> > +object_initialize(>sysreg, sizeof(s->sysreg), TYPE_MSF2_SYSREG);
>> > +qdev_set_parent_bus(DEVICE(>sysreg), sysbus_get_default());
>> > +
>> > +object_initialize(>timer, sizeof(s->timer), TYPE_MSS_TIMER);
>> > +qdev_set_parent_bus(DEVICE(>timer), sysbus_get_default());
>> > +
>> > +for (i = 0; i < MSF2_NUM_SPIS; i++) {
>> > +object_initialize(>spi[i], sizeof(s->spi[i]),
>> > +  TYPE_MSS_SPI);
>> > +qdev_set_parent_bus(DEVICE(>spi[i]), sysbus_get_default());
>> > +}
>> > +}
>> > +
>> > +static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp)
>> > +{
>> > +MSF2State *s = MSF2_SOC(dev_soc);
>> > +

Re: [Qemu-devel] [Qemu devel v5 PATCH 5/5] msf2: Add Emcraft's Smartfusion2 SOM kit.

2017-05-30 Thread Alistair Francis

On Sun, May 28, 2017 at 10:26 PM, sundeep subbaraya
 wrote:
> Hi Alistair,
>
> On Sat, May 27, 2017 at 5:30 AM, Alistair Francis 
> wrote:
>>
>> On Tue, May 16, 2017 at 8:38 AM, Subbaraya Sundeep
>>  wrote:
>> > Emulated Emcraft's Smartfusion2 System On Module starter
>> > kit.
>> >
>> > Signed-off-by: Subbaraya Sundeep 
>> > ---
>> >  hw/arm/Makefile.objs |  1 +
>> >  hw/arm/msf2-som.c| 89
>> > 
>> >  2 files changed, 90 insertions(+)
>> >  create mode 100644 hw/arm/msf2-som.c
>> >
>> > diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
>> > index c828061..4b02093 100644
>> > --- a/hw/arm/Makefile.objs
>> > +++ b/hw/arm/Makefile.objs
>> > @@ -5,6 +5,7 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o
>> > stellaris.o
>> >  obj-y += tosa.o versatilepb.o vexpress.o virt.o xilinx_zynq.o z2.o
>> >  obj-$(CONFIG_ACPI) += virt-acpi-build.o
>> >  obj-y += netduino2.o
>> > +obj-y += msf2-som.o
>>
>> This should be obj-$(CONFIG_MSF2).
>
>
> Ok will change it.
>>
>>
>> >  obj-y += sysbus-fdt.o
>> >
>> >  obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o
>> > diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c
>> > new file mode 100644
>> > index 000..cd2b759
>> > --- /dev/null
>> > +++ b/hw/arm/msf2-som.c
>> > @@ -0,0 +1,89 @@
>> > +/*
>> > + * SmartFusion2 SOM starter kit(from Emcraft) emulation.
>> > + *
>> > + * Copyright (c) 2017 Subbaraya Sundeep 
>> > + *
>> > + * Permission is hereby granted, free of charge, to any person
>> > obtaining a copy
>> > + * of this software and associated documentation files (the
>> > "Software"), to deal
>> > + * in the Software without restriction, including without limitation
>> > the rights
>> > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or
>> > sell
>> > + * copies of the Software, and to permit persons to whom the Software
>> > is
>> > + * furnished to do so, subject to the following conditions:
>> > + *
>> > + * The above copyright notice and this permission notice shall be
>> > included in
>> > + * all copies or substantial portions of the Software.
>> > + *
>> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> > EXPRESS OR
>> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> > MERCHANTABILITY,
>> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
>> > SHALL
>> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> > OTHER
>> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> > ARISING FROM,
>> > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> > DEALINGS IN
>> > + * THE SOFTWARE.
>> > + */
>> > +
>> > +#include "qemu/osdep.h"
>> > +#include "qapi/error.h"
>> > +#include "hw/boards.h"
>> > +#include "hw/arm/msf2-soc.h"
>> > +#include "hw/arm/arm.h"
>> > +#include "exec/address-spaces.h"
>> > +
>> > +#define DDR_BASE_ADDRESS  0xA000
>> > +#define DDR_SIZE  (64 * M_BYTE)
>> > +
>> > +static void emcraft_sf2_init(MachineState *machine)
>> > +{
>> > +DeviceState *dev;
>> > +DeviceState *spi_flash;
>> > +MSF2State *soc;
>> > +DriveInfo *dinfo = drive_get_next(IF_MTD);
>> > +qemu_irq cs_line;
>> > +SSIBus *spi_bus;
>> > +MemoryRegion *sysmem = get_system_memory();
>> > +MemoryRegion *ddr = g_new(MemoryRegion, 1);
>> > +
>> > +memory_region_init_ram(ddr, NULL, "ddr-ram", DDR_SIZE,
>> > +   _fatal);
>> > +vmstate_register_ram_global(ddr);
>> > +memory_region_add_subregion(sysmem, DDR_BASE_ADDRESS, ddr);
>>
>> The user can use -m to specify the amount of RAM to create in the
>> machine. Unless this board only ever includes 64MB of RAM you should
>> use that option (you will need to sanity check it though). If the
>> board only ever has 64MB it might be worth printing a warning to the
>> user if they specify an something. Although there might be a default
>> if they don't use -m, which makes it hard to print out a warning
>> message.
>
>
> This -m confuses me. Why is it necessary for an embedded board? RAM chip
> is fixed and not extendable. Whereas normal PC may have extra RAM slots.
> If another board has more RAM then we would instantiate another machine for
> it
> with that RAM size. Please explain. Maybe I am thinking in wrong direction.

I agree with you, it doesn't make sense for every board. For some
embedded boards it does make sense (ZynqMP can have a customisable
amount of memory) but for most it doesn't make too much sense.

In saying that it is a commonly used QEMU option, if you can find a
way to report a warning if the user tries to specify a custom amount
of memory I think that would be beneficial as QEMU will just ignore
their input. I have a feeling that the ram_size variable will be set
even if the user doesn't specify anything,

Re: [Qemu-devel] [PATCH 10/19] nbd/server: refactor nbd_trip

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> - do not goto into switch block from outer block

This sentence didn't quite make sense.  Better might be:

- do not use 'goto error_reply' outside a switch to jump into the middle
of the switch's default case label

> - reduce code duplications

s/duplications/duplication/

> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 53 -
>  1 file changed, 20 insertions(+), 33 deletions(-)
> 

> @@ -1098,7 +1099,7 @@ static coroutine_fn void nbd_trip(void *opaque)
>  
>  if (ret < 0) {
>  reply.error = -ret;
> -goto error_reply;
> +goto reply;
>  }

But this was indeed a case of jumping from before the switch()...

>  default:
>  LOG("invalid request type (%" PRIu32 ") received", request.type);
>  reply.error = EINVAL;
> -error_reply:
> -/* We must disconnect after NBD_CMD_WRITE if we did not
> - * read the payload.
> - */

...into the middle, which, although valid C, is indeed unusual.

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 09/19] nbd/server: rename rc to ret

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> For consistency use 'ret' name for saving return code everywhere
> in the file.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 38 +++---
>  1 file changed, 19 insertions(+), 19 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 08/19] nbd/server: get rid of fail: return rc

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> "goto fail" error handling scheme is not needed for just returning
> error code. Better is return it immediately.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 28 
>  1 file changed, 12 insertions(+), 16 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 06/19] nbd/server: remove NBDClientNewData

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> "co" field of NBDClientNewData is unused, so let's just use client
> pointer instead of extra structure.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 25 +++--
>  1 file changed, 7 insertions(+), 18 deletions(-)
> 

Reviewed-by: Eric Blake 

Might be worth mentioning in the commit message that it has never been
used, all the way back to its declaration in commit 1a6245a5.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 05/19] nbd/server: refactor nbd_co_receive_request

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> Move function tail, about receiving next request out of the function.
> Error path is simplified and nbd_co_receive_request becomes more
> corresponding to its name.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 41 +
>  1 file changed, 13 insertions(+), 28 deletions(-)

Quite the diffstat.

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 10/16] block/qcow2: Lock s->lock in preallocate()

2017-05-30 Thread Eric Blake

On 05/26/2017 11:55 AM, Max Reitz wrote:
> preallocate() is and will be called only from places that do not lock

Maybe: "that do not otherwise need to lock"

> s->lock: Currently that is qcow2_create2(), as of a future patch it will
> be called from qcow2_truncate(), too.
> 
> It therefore makes sense to move locking that mutex into preallocate()
> itself.
> 
> Signed-off-by: Max Reitz 
> ---
>  block/qcow2.c | 22 +++---
>  1 file changed, 15 insertions(+), 7 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 09/16] block/qcow2: Generalize preallocate()

2017-05-30 Thread Eric Blake

On 05/26/2017 11:55 AM, Max Reitz wrote:
> This patch adds two new parameters to the preallocate() function so we
> will be able to use it not just for preallocating a new image but also
> for preallocated image growth.
> 
> The offset parameter allows the caller to specify a virtual offset from
> which to start preallocating. For newly created images this is always 0,
> but for preallocating growth this will be the old image length.
> 
> The new_length parameter specifies the supposed new length of the image
> (basically the "end offset" for preallocation). During image truncation,
> bdrv_getlength() will return the old image length so we cannot rely on
> its return value then.

bdrv_getlength() is (currently) always sector-aligned (rounding up as
needed).

new_length is passed from qcow2_create2()'s total_size, which in turn
comes from qcow2_create()'s size, which can be user-supplied - but also
appears that we round up to ensure it is always sector-aligned.

Testing: 'qemu-img create -f qcow2 a.img 1' reports "size=1", but
'qemu-img info a.img' reports "virtual size: 512" - so we have a
secondary bug worth fixing later in that we are rounding AFTER what we
report to the user, but I'm not seeing a behavior change in this patch.

> 
> Signed-off-by: Max Reitz 
> ---
>  block/qcow2.c | 17 -
>  1 file changed, 12 insertions(+), 5 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [RFC] q35/mch: implement extended TSEG sizes

2017-05-30 Thread Laszlo Ersek

The q35 machine type currently lets the guest firmware select a 1MB, 2MB
or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even
that is not enough when a lot of VCPUs (more than approx. 224) are
configured -- SMRAM footprint scales largely proportionally with VCPU
count.

Introduce a new property for "mch" called "extended-tseg-mbytes", which
expresses (in megabytes) the user's choice of TSEG (SMRAM) size.

Invent a new, QEMU-specific register in the config space of the DRAM
Controller, at offset 0x50, in order to allow guest firmware to query the
TSEG (SMRAM) size.

According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller
Register Address Map (D0:F0)":

Warning: Address locations that are not listed are considered Intel
 Reserved registers locations. Reads to Reserved registers may
 return non-zero values. Writes to reserved locations may
 cause system failures.

 All registers that are defined in the PCI 2.3 specification,
 but are not necessary or implemented in this component are
 simply not included in this document. The
 reserved/unimplemented space in the PCI configuration header
 space is not documented as such in this summary.

Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part
of the standard PCI config space header. And they precede the capability
list as well, which starts at 0xe0 for this device.

When the guest writes value 0x to this register, the value that can be
read back is that of "mch.extended-tseg-mbytes" -- unless it remains
0x. The guest is required to write 0x first (as opposed to a
read-only register) because PCI config space is generally not cleared on
QEMU reset, and after S3 resume or reboot, new guest firmware running on
old QEMU could read a guest OS-injected value from this register.

After reading the available "extended" TSEG size, the guest firmware may
actually request that TSEG size by writing pattern 11b to the ESMRAMC
register's TSEG_SZ bit-field. (The Intel spec referenced above defines
only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.)

On the QEMU command line, the value can be set with

  -global mch.extended-tseg-mbytes=N

The default value for 2.10+ q35 machine types is 16. The value is limited
to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be
stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users
are responsible for choosing sensible TSEG sizes.

On 2.9 and earlier q35 machine types, the default value is 0. This lets
the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50,
keep their original behavior.

When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is
set to that value on reset, for completeness.

PCI config space is migrated automatically, so no VMSD changes are
necessary.

Cc: "Michael S. Tsirkin" 
Cc: Gerd Hoffmann 
Cc: Paolo Bonzini 
Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027
Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html
Signed-off-by: Laszlo Ersek 
---

Notes:
I haven't yet written any OVMF code to interface with this; I figured
I'd ask for comments on the approach first. Thanks.

 include/hw/i386/pc.h  |  5 +
 include/hw/pci-host/q35.h |  6 ++
 hw/pci-host/q35.c | 41 ++---
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index e447f5d8f4d1..78cd630f3133 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -384,6 +384,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_2_9 \
 HW_COMPAT_2_9 \
+{\
+.driver   = "mch",\
+.property = "extended-tseg-mbytes",\
+.value= stringify(0),\
+},\
 
 #define PC_COMPAT_2_8 \
 HW_COMPAT_2_8 \
diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
index 53b6760c16c5..58983c00b32d 100644
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -60,6 +60,7 @@ typedef struct MCHPCIState {
 uint64_t above_4g_mem_size;
 uint64_t pci_hole64_size;
 uint32_t short_root_bus;
+uint16_t ext_tseg_mbytes;
 } MCHPCIState;
 
 typedef struct Q35PCIHost {
@@ -91,6 +92,11 @@ typedef struct Q35PCIHost {
 /* D0:F0 configuration space */
 #define MCH_HOST_BRIDGE_REVISION_DEFAULT   0x0
 
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES0x50
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE   2
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY  0x
+#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX0xfff
+
 #define MCH_HOST_BRIDGE_PCIEXBAR   0x60/* 64bit register */
 #define MCH_HOST_BRIDGE_PCIEXBAR_SIZE  8   /* 64bit register */
 #define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT   0xb000
diff --git

Re: [Qemu-devel] [PATCH v2 5/6] vhost-user: add slave-req-fd support

2017-05-30 Thread Maxime Coquelin




On 05/30/2017 08:17 PM, Michael S. Tsirkin wrote:

On Fri, May 26, 2017 at 04:28:57PM +0200, Maxime Coquelin wrote:

From: Marc-André Lureau

Learn to give a socket to the slave to let him make requests to the
master.

Signed-off-by: Marc-André Lureau
Signed-off-by: Maxime Coquelin
---
  docs/specs/vhost-user.txt |  32 +++-
  hw/virtio/vhost-user.c| 127 ++
  2 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
index 036890f..5fa7016 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -139,6 +139,7 @@ in the ancillary data:
   * VHOST_USER_SET_VRING_KICK
   * VHOST_USER_SET_VRING_CALL
   * VHOST_USER_SET_VRING_ERR
+ * VHOST_USER_SET_SLAVE_REQ_FD
  
  If Master is unable to send the full message or receives a wrong reply it will

  close the connection. An optional reconnection mechanism can be implemented.
@@ -252,6 +253,18 @@ Once the source has finished migration, rings will be 
stopped by
  the source. No further update must be done before rings are
  restarted.
  
+Slave communication

+---
+
+An optional communication channel is provided if the slave declares
+VHOST_USER_PROTOCOL_F_SLAVE_REQ protocol feature, to allow the slave to make
+requests to the master.
+
+The fd is provided via VHOST_USER_SET_SLAVE_REQ_FD ancillary data.
+
+A slave may then send VHOST_USER_SLAVE_* messages to the master
+using this fd communication channel.
+
  Protocol features
  -
  
@@ -260,9 +273,10 @@ Protocol features

  #define VHOST_USER_PROTOCOL_F_RARP   2
  #define VHOST_USER_PROTOCOL_F_REPLY_ACK  3
  #define VHOST_USER_PROTOCOL_F_MTU4
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ  5
  
-Message types

--
+Master message types
+
  
   * VHOST_USER_GET_FEATURES
  


So down the road, we should make sure a device without
VHOST_USER_PROTOCOL_F_SLAVE_REQ does not advertise IOMMU
since you don't handle these messages otherwise.


Right, I will add a check to ensure this, and make this clearer
in the IOMMU spec part.

Thanks,
Maxime

Re: [Qemu-devel] [PATCH 07/19] nbd/server: nbd_negotiate: fix error path

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> Current code will return 0 on this write_sync fail, as rc is 0
> after successful nbd_negotiate_options. Fix this.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 

Reviewed-by: Eric Blake 

> diff --git a/nbd/server.c b/nbd/server.c
> index 3d4cd3d21c..984fad4bdb 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -607,7 +607,8 @@ static coroutine_fn int nbd_negotiate(NBDClient *client)
>  stq_be_p(buf + 18, client->exp->size);
>  stw_be_p(buf + 26, client->exp->nbdflags | myflags);
>  len = client->no_zeroes ? 10 : sizeof(buf) - 18;
> -if (write_sync(client->ioc, buf + 18, len, NULL) < 0) {
> +rc = write_sync(client->ioc, buf + 18, len, NULL);
> +if (rc < 0) {
>  LOG("write failed");
>  goto fail;
>  }
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 3/6] vhost: extend ring information update for IOTLB to all rings

2017-05-30 Thread Maxime Coquelin




On 05/30/2017 11:06 PM, Maxime Coquelin wrote:

Hi Michael,

On 05/30/2017 08:12 PM, Michael S. Tsirkin wrote:

On Fri, May 26, 2017 at 04:28:55PM +0200, Maxime Coquelin wrote:

Vhost-kernel backend need


needs


to receive IOTLB entry for used ring
information early, which is done by triggering a miss event on
its address.

This patch extends this behaviour to all rings information, to be
compatible with vhost-user backend design.


Why does vhost-user need it though?


For vhost-user, this simplifies the backend design because generally,
the backend sends IOTLB miss requests from processing threads through
the slave channel, and receives resulting IOTLB updates in vhost-user
protocol thread.

The only exception is for these rings info, where IOTLB miss requests
are sent from vhost-user protocol thread (in the SET_VRING_ENABLE
request handler), so the resulting IOTLB update is only handled by
the backend when the rings are enabled, which is too late.

It could be possible to overcome this issue, but I think it would
make the design more complex or less efficient. I see several options:

1. Change the IOTLB miss request so that the master sends the IOTLB
update as reply, instead of the reply-ack. It would mean that IOTLB
updates/invalidations would be sent either via the master channel or
the slave channel. On QEMU side, it means diverging from kernel backend
implementation. On backend side, it means having possibly multiple
threads writing to the IOTLB cache.

2. In vhost-user protocol thread, when handling SET_VRING_ENABLE, send
IOTLB miss request without setting the reply-ack flag, and poll the
vhost-user socket to read the resulting IOTLB update. The problem is
that other requests could be pending in the socket's buffer, and so it
would end-up nesting multiple requests handling.

3. Don't interpret rings info in the vhost-user protocol thread, but
only in the processing threads. The advantage is that it would address
the remark you made on patch 6 that invalidates are not affecting ring
info. The downside being the overhead induced by checking whether the
ring info are valid every time it is being used. I haven't prototyped
this solution, but I expected the performance regression to be a
blocker.

4. In SET_VRING_ENABLE, don't enable the ring if needed entries are not 
in IOTLB cache. Just send the IOTLB misses without reply-ack flag and 
postpone enable when handling IOTLB updates. It will be a little more 
complex solution than current one, but it may be the less impacting 
compared to the other 3 options.



Thinking again, maybe trying solution would be worth the effort, and 


s/solution/solution 4/


could be extended also to disable the rings when receiving invalidates
that affect rings info.

What do you think?





Signed-off-by: Maxime Coquelin 
---
v2:
  - Revert back to existing behaviour, i.e. only send IOTLB updates
at ring enablement time, not at ring address setting time (mst).
  - Extend IOTLB misses to all ring addresses, not only used ring.

  hw/virtio/vhost.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 6eddb09..7867034 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1552,11 +1552,15 @@ int vhost_dev_start(struct vhost_dev *hdev, 
VirtIODevice *vdev)

  if (vhost_dev_has_iommu(hdev)) {
  hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
-/* Update used ring information for IOTLB to work correctly,
- * vhost-kernel code requires for this.*/
+/*
+ * Update rings information for IOTLB to work correctly,
+ * vhost-kernel and vhost-user codes require for this.


Better just say "Update ring info for vhost iotlb."

The rest isn't really informative.


Ok.






+ */
  for (i = 0; i < hdev->nvqs; ++i) {
  struct vhost_virtqueue *vq = hdev->vqs + i;
  vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+vhost_device_iotlb_miss(hdev, vq->desc_phys, true);
+vhost_device_iotlb_miss(hdev, vq->avail_phys, true);


So I don't remember why does vhost in kernel want miss on used
at start time.

Jason, could you comment on this please?




  }
  }
  return 0;
--
2.9.4


Thanks,
Maxime

Re: [Qemu-devel] [PATCH v2 3/6] vhost: extend ring information update for IOTLB to all rings

2017-05-30 Thread Maxime Coquelin


Hi Michael,

On 05/30/2017 08:12 PM, Michael S. Tsirkin wrote:

On Fri, May 26, 2017 at 04:28:55PM +0200, Maxime Coquelin wrote:

Vhost-kernel backend need


needs


to receive IOTLB entry for used ring
information early, which is done by triggering a miss event on
its address.

This patch extends this behaviour to all rings information, to be
compatible with vhost-user backend design.


Why does vhost-user need it though?


For vhost-user, this simplifies the backend design because generally,
the backend sends IOTLB miss requests from processing threads through
the slave channel, and receives resulting IOTLB updates in vhost-user
protocol thread.

The only exception is for these rings info, where IOTLB miss requests
are sent from vhost-user protocol thread (in the SET_VRING_ENABLE
request handler), so the resulting IOTLB update is only handled by
the backend when the rings are enabled, which is too late.

It could be possible to overcome this issue, but I think it would
make the design more complex or less efficient. I see several options:

1. Change the IOTLB miss request so that the master sends the IOTLB
update as reply, instead of the reply-ack. It would mean that IOTLB
updates/invalidations would be sent either via the master channel or
the slave channel. On QEMU side, it means diverging from kernel backend
implementation. On backend side, it means having possibly multiple
threads writing to the IOTLB cache.

2. In vhost-user protocol thread, when handling SET_VRING_ENABLE, send
IOTLB miss request without setting the reply-ack flag, and poll the
vhost-user socket to read the resulting IOTLB update. The problem is
that other requests could be pending in the socket's buffer, and so it
would end-up nesting multiple requests handling.

3. Don't interpret rings info in the vhost-user protocol thread, but
only in the processing threads. The advantage is that it would address
the remark you made on patch 6 that invalidates are not affecting ring
info. The downside being the overhead induced by checking whether the
ring info are valid every time it is being used. I haven't prototyped
this solution, but I expected the performance regression to be a
blocker.

4. In SET_VRING_ENABLE, don't enable the ring if needed entries are not 
in IOTLB cache. Just send the IOTLB misses without reply-ack flag and 
postpone enable when handling IOTLB updates. It will be a little more 
complex solution than current one, but it may be the less impacting 
compared to the other 3 options.



Thinking again, maybe trying solution would be worth the effort, and 
could be extended also to disable the rings when receiving invalidates

that affect rings info.

What do you think?





Signed-off-by: Maxime Coquelin 
---
v2:
  - Revert back to existing behaviour, i.e. only send IOTLB updates
at ring enablement time, not at ring address setting time (mst).
  - Extend IOTLB misses to all ring addresses, not only used ring.

  hw/virtio/vhost.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 6eddb09..7867034 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1552,11 +1552,15 @@ int vhost_dev_start(struct vhost_dev *hdev, 
VirtIODevice *vdev)
  if (vhost_dev_has_iommu(hdev)) {
  hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
  
-/* Update used ring information for IOTLB to work correctly,

- * vhost-kernel code requires for this.*/
+/*
+ * Update rings information for IOTLB to work correctly,
+ * vhost-kernel and vhost-user codes require for this.


Better just say "Update ring info for vhost iotlb."

The rest isn't really informative.


Ok.






+ */
  for (i = 0; i < hdev->nvqs; ++i) {
  struct vhost_virtqueue *vq = hdev->vqs + i;
  vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+vhost_device_iotlb_miss(hdev, vq->desc_phys, true);
+vhost_device_iotlb_miss(hdev, vq->avail_phys, true);


So I don't remember why does vhost in kernel want miss on used
at start time.

Jason, could you comment on this please?




  }
  }
  return 0;
--
2.9.4


Thanks,
Maxime

Re: [Qemu-devel] [PATCH 04/19] nbd/server: get rid of EAGAIN dead code

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> For now read_sync never returns EAGAIN. So, don't handle it.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 18 +++---
>  1 file changed, 7 insertions(+), 11 deletions(-)
> 

I had to go re-read your cover letter, to learn that EAGAIN went away in
your prerequisite series (I see Paolo has a pending PULL request with
your prerequisite in place, but it's not yet on master).

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 04/16] qemu-img: Expose PreallocMode for resizing

2017-05-30 Thread Eric Blake

On 05/26/2017 11:55 AM, Max Reitz wrote:
> Add a --preallocation command line option to qemu-img resize which can
> be used to set the PreallocMode parameter of blk_truncate().
> 
> Signed-off-by: Max Reitz 
> ---
>  qemu-img.c| 33 ++---
>  qemu-img.texi |  7 ++-
>  2 files changed, 36 insertions(+), 4 deletions(-)

> @@ -3553,8 +3566,16 @@ static int img_resize(int argc, char **argv)
>  goto out;
>  }
>  
> +current_size = blk_getlength(blk);
> +if (current_size < 0) {
> +error_report("Failed to inquire current image length: %s",
> + strerror(-current_size));
> +ret = -1;
> +goto out;
> +}
> +
>  if (relative) {
> -total_size = blk_getlength(blk) + n * relative;
> +total_size = current_size + n * relative;

You snuck in a bug fix here (reporting failure, rather than using a
bogus total_size, if querying the size fails).  Please mention that in
the commit message.

> @@ -541,6 +541,11 @@ After using this command to grow a disk image, you must 
> use file system and
>  partitioning tools inside the VM to actually begin using the new space on the
>  device.
>  
> +When growing an image, the @code{--preallocation} option may be used to 
> specify
> +how the additional image area should be allocated on the host.  See the 
> format
> +description in the @code{NOTES} section which values are allowed.  Using this
> +option may result in more data being allocated than necessary.

Should we tone it down a bit by saying 'slightly more data'?  (We'd
rather over-estimate than fall short, but our over-estimation will
probably be < 1% off, and not something drastic like an order of
magnitude off).

With the improved commit message,
Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] live-block-ops.txt: Rewrite and improve it

2017-05-30 Thread Eric Blake

On 05/30/2017 03:24 PM, Kashyap Chamarthy wrote:
> First, thanks for the quick feedback!
> 
> On Tue, May 30, 2017 at 02:24:40PM -0500, Eric Blake wrote:
>> On 05/30/2017 10:38 AM, Kashyap Chamarthy wrote:
>>> This edition documents all four operations:
> 

>>> +
>>> +(1) `Disk image backing chain notation`_
>>> +(2) `Brief overview of live block QMP primitives`_
>>> +(3) `Interacting with a QEMU instance`_
>>> +(4) `Example disk image chain`_
>>> +(5) `A note on points-in-time vs file names`_
>>> +(6) `Live block streaming -- `block-stream``_
>>> +(7) `QMP invocation for `block-stream``_
>>> +(8) `Live block commit -- `block-commit``_
>>> +(9) `QMP invocation for `block-commit``_
>>> +(10) `Live disk synchronization -- `drive-mirror`&`blockdev-mirror``_
>>> +(11) `QMP invocation for `drive-mirror``_
>>> +(12) `Notes on `blockdev-mirror``_
>>> +(13) `Live disk backup -- `drive-backup`&`blockdev-backup``_
>>> +(14) `QMP invocation for `drive-backup``_
>>> +(15) `Notes on `blockdev-backup``_

> Eric, do you have an opinion:  Now that I _do_ have the notes
> available[*], do you think it is worth spelling out the invocation for
> QMP `blockdev-{backup, mirror}`?  Or wait for later?

I liked how you compared drive-mirror and blockdev-mirror, but it's
probably worth an invocation example in sections 12/15 in addition to
"this is how the block version differs from the drive version".

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] live-block-ops.txt: Rewrite and improve it

2017-05-30 Thread Kashyap Chamarthy

First, thanks for the quick feedback!

On Tue, May 30, 2017 at 02:24:40PM -0500, Eric Blake wrote:
> On 05/30/2017 10:38 AM, Kashyap Chamarthy wrote:
> > This edition documents all four operations:

[...]

> s/occurance/occurrence/
> 
> > use raw JSON; for subsequent occurances, use 'qmp-shell', with an
> 
> s/occurances/occurrences/

Will address, both.  Sigh, I first ran it through GNU `aspell`, but
forgot to have to save the typo corrections.

> > Signed-off-by: Kashyap Chamarthy 
> > ---
> > * An HTML rendered version is here:
> >   https://kashyapc.fedorapeople.org/virt/qemu/live-block-operations.html
> 
> Helpful; thanks!
> 
> > diff --git a/docs/live-block-operations.rst b/docs/live-block-operations.rst
> > new file mode 100644
> > index 
> > ..19107776cd1edd20fac17ddda192d677199190d9
> > --- /dev/null
> > +++ b/docs/live-block-operations.rst
> > @@ -0,0 +1,757 @@
> > +
> > +Live Block Device Operations
> > +
> 
> Is it worth mentioning a copyright/license in this prologue?  It's not
> mandatory (you get the default of GPLv2+ based on the top-level if you
> don't do it), but being explicit never hurts.

Good point, I add the boilerplate copyright / license text.
 
> > +
> > +Table of Contents
> > +=
> > +
> > +(1) `Disk image backing chain notation`_
> > +(2) `Brief overview of live block QMP primitives`_
> > +(3) `Interacting with a QEMU instance`_
> > +(4) `Example disk image chain`_
> > +(5) `A note on points-in-time vs file names`_
> > +(6) `Live block streaming -- `block-stream``_
> > +(7) `QMP invocation for `block-stream``_
> > +(8) `Live block commit -- `block-commit``_
> > +(9) `QMP invocation for `block-commit``_
> > +(10) `Live disk synchronization -- `drive-mirror`&`blockdev-mirror``_
> > +(11) `QMP invocation for `drive-mirror``_
> > +(12) `Notes on `blockdev-mirror``_
> > +(13) `Live disk backup -- `drive-backup`&`blockdev-backup``_
> > +(14) `QMP invocation for `drive-backup``_
> > +(15) `Notes on `blockdev-backup``_
> > +
> 
> Does .rst have any mechanisms for ensuring the ToC doesn't get out of
> sync if we later change sections around?

Hmm, good question, I don't know such a mechanism exists yet, I think it
will get out of sync; will check.  But if we _do_ rearrange, it's
trivial to fix it.  But I see where you're coming from.

> > +
> > +.. _`Disk image backing chain notation`:
> > +
> 
> > +
> > +Arrow to be read as: Image [A] is the backing file of disk image [B].
> 
> Maybe: "The arrow can be read as:"

Yes, will fix.

> > +And live QEMU is currently writing to image [B], consequently, it is
> > +also referred to as the "active layer".
> > +
> > +There are two kinds of terminology that is common when referring to
> 
> s/that is common/that are common/

Likewise.

[...]

> > +- `block-stream`: Live copy data from overlay files into backing images.
> 
> Backwards.  This is a live copy of data from backing images into
> overlays (with the optional goal of removing the backing image from the
> chain).

Err, indeed.  Will fix.

> > +
> > +- `block-commit`: Live merge data from backing files into overlay
> > +  images.  Since QEMU 2.0, this includes "active `block-commit`" (i.e.
> > +  merge the current active layer into the base image).
> 
> Backwards.  This is live merge of data from overlay images into backing
> images (with the optional goal of removing the overlay image from the
> chain).

Yes, again, will fix.

[...]

> > +Interacting with a QEMU instance
> > +
> > +
> > +To show some example invocations of command-line, we shall use the
> 
> 'shall' is okay, but sounds rather formal; using 'will' sounds more typical.

Funny you mention, I actually _began_ with "will", but think my
subconscious self 'overwrote' it with "shall", as I was reading a book
("The Blind Watchmaker") which was using "shall" throughout.  Probably
that had an influence. :-)

> > +following invocation of QEMU, with a QMP server running over UNIX
> > +socket:
> > +
> > +.. code-block::
> > +
> > +$ ./x86_64-softmmu/qemu-system-x86_64 -display none -nodefconfig \
> > +-M q35 -nodefaults -m 512 \
> > +-blockdev 
> > node-name=node-A,driver=qcow2,file.driver=file,file.filename=./a.qcow2 \
> > +-device virtio-blk,drive=node-A,id=virtio0 \
> > +-monitor stdio -qmp unix:/tmp/qmp-sock,server,nowait
> 
> Nice use of -blockdev! 

I was conscious to use the latest upstream preferences, regardless
whether they're used by higher layers.

> Is it worth also giving file.node-name for a
> non-generated node name on the protocol layer?

Hmm, if I had to guess, you say the above for completeness' sake.
Because, IIRC, the 'node-name; at the protocol layer will come in handy
when doing things like IOPS throttling (for which

Do you have a preferred `-blockdev` invocation?

> > +
> > +The `-blockdev` command-line option,

Re: [Qemu-devel] [PATCH 03/19] nbd/server: refactor nbd_co_send_reply

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> As write_sync never returns value > 0, we can get rid of extra ret.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 02/19] nbd/server: get rid of ssize_t

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> Now read_sync and friends return int, so get rid of ssize_t.

In particular, it was your earlier commit "nbd: read_sync and friends:
return 0 on success" that changed the semantics of the return value to
just be 0 instead of a size; and this is just a followup cleanup (now
that we aren't returning a size, ssize_t is overkill).

> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  nbd/server.c | 18 --
>  1 file changed, 8 insertions(+), 10 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 29/29] target/s390x: update maximum TCG model to z800

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

At the same time fix the TCG version of get_max_cpu_model to return the
maximum model like on KVM. Remove the long-displacement facility from
the additional features as it is included in the z800.

Signed-off-by: Aurelien Jarno
---
  target/s390x/cpu_models.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 24/29] target/s390x: implement PACK UNICODE

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

Use a common helper with PACK ASCII as the differences are limited to
the stride of the source operand.

Signed-off-by: Aurelien Jarno
---
  target/s390x/helper.h  |  1 +
  target/s390x/insn-data.def |  2 ++
  target/s390x/mem_helper.c  | 30 +-
  target/s390x/translate.c   | 16 
  4 files changed, 40 insertions(+), 9 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 22/29] target/s390x: implement MOVE LONG UNICODE

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

Signed-off-by: Aurelien Jarno
---
  target/s390x/helper.h  |  1 +
  target/s390x/insn-data.def |  2 ++
  target/s390x/mem_helper.c  | 47 --
  target/s390x/translate.c   | 21 +
  4 files changed, 65 insertions(+), 6 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 21/29] target/s390x: implement COMPARE LOGICAL LONG UNICODE

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

For that we need to make program_interrupt available to qemu-user.
Fortunately there is almost nothing to change as both kvm_enabled and
CONFIG_KVM evaluate to false in that case.

Signed-off-by: Aurelien Jarno
---
  target/s390x/helper.h  |  1 +
  target/s390x/insn-data.def |  2 ++
  target/s390x/mem_helper.c  | 76 ++
  target/s390x/misc_helper.c |  4 +--
  target/s390x/translate.c   | 22 ++
  5 files changed, 90 insertions(+), 15 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 19/29] target/s390x: fix adj_len_to_page

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

adj_len_to_page doesn't return the correct result when the address
is already page aligned and the length is bigger than a page. Fix that.

Signed-off-by: Aurelien Jarno
---
  target/s390x/mem_helper.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 20/29] target/s390x: improve MOVE LONG and MOVE LONG EXTENDED

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

As MVCL and MVCLE only differ by their operands, use a common
do_mvcl helper. Optimize it calling fast_memmove and fast_memset.
Correctly write back addresses. Check that r1 and r2/r3 registers
are even.

Signed-off-by: Aurelien Jarno
---
  target/s390x/mem_helper.c | 90 +--
  target/s390x/translate.c  | 40 +++--
  2 files changed, 70 insertions(+), 60 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 18/29] target/s390x: implement COMPARE LOGICAL LONG

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

As CLCL and CLCLE mostly differ by their operands, use a common do_clcl
helper. Another difference is that CLCL is not interruptible.

Signed-off-by: Aurelien Jarno
---
  target/s390x/helper.h  |  1 +
  target/s390x/insn-data.def |  2 ++
  target/s390x/mem_helper.c  | 84 +-
  target/s390x/translate.c   | 21 
  4 files changed, 84 insertions(+), 24 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 17/29] target/s390x: fix COMPARE LOGICAL LONG EXTENDED

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

There are multiple issues with the COMPARE LOGICAL LONG EXTENDED
instruction:
- The test between the two operands is inverted, leading to an inversion
   of the cc values 1 and 2.
- The address and length of an operand continue to be decreased after
   reaching the end of this operand. These values are then wrong write
   back to the registers.
- We should limit the amount of bytes to process, so that interrupts can
   be served correctly.

At the same time rename dest into src1 and src into src3 to match the
operand names and make the code less confusing.

Signed-off-by: Aurelien Jarno
---
  target/s390x/mem_helper.c | 54 ---
  target/s390x/translate.c  | 20 +-
  2 files changed, 52 insertions(+), 22 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 01/19] nbd/server: get rid of nbd_negotiate_read and friends

2017-05-30 Thread Eric Blake

On 05/30/2017 09:30 AM, Vladimir Sementsov-Ogievskiy wrote:
> Functions nbd_negotiate_{read,write,drop_sync} were introduced in
> 1a6245a5b, when nbd_wr_syncv was working through qemu_co_sendv_recvv,

There is no qemu_co_sendv_recvv. Did you mean qemu_co_recv/qemu_co_send?

> which just yields, without setting any handlers. But now, nbd_wr_syncv
> works through qio_channel_yield() which sets handlers, so watchers
> are redundant in nbd_negotiate_{read,write,drop_sync}, then, let's just
> use {read,write,drop}_sync functions.

Makes sense to me.  Note that I have a pending patch that was also
related to 1a6245a5, where we introduced an assertion failure (which
later morphed into a segfault) if a client hangs up really early during
negotiation:

https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg06240.html

> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---

> +++ b/nbd/common.c
> @@ -69,6 +69,32 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
>  return done;
>  }
>  
> +/* Discard length bytes from channel.  Return -errno on failure and 0 on
> + * success*/

Space before */, if you don't mind (yes, I know this was just code
motion, and that the old spot was wrong).

> +int drop_sync(QIOChannel *ioc, size_t size, Error **errp)

As part of moving it into nbd/common.c, please rename this function to
an nbd_ prefix, since non-static functions are more likely to collide
with the rest of the code base if not properly named.  Better yet: do it
in multiple patches:
- rename the static functions and fallout to callers (all of
nbd_drop_sync, nbd_read_sync, and nbd_write_sync)
- code motion (promote nbd_drop_sync from static in client.c to exported
in common.c)
- drop nbd_negotiate_ functions in favor of common nbd_*_sync functions

The idea makes sense, but I think there's too much going on in this one
commit.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 14/29] target/s390x: improve 24-bit and 31-bit addresses read

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

Improve fix_address to also handle the 24-bit mode. Rename fix_address
to wrap_address to better explain what is changed.

Replace the calls to get_address with x2 = 0 and b2 = 0 by
call to wrap_address, leading to the removal of this function. Rename
get_address_31fix into get_address.

Signed-off-by: Aurelien Jarno
---
  target/s390x/mem_helper.c | 71 +--
  1 file changed, 31 insertions(+), 40 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 09/29] target/s390x: implement COMPARE AND SIGNAL

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

+DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
+DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
+DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG_SE, i32, env, i64, i64, i64, i64)


They do have side effects -- recording the exception.


r~

Re: [Qemu-devel] [PATCH v2 08/29] target/s390x: implement STORE PAIR TO QUADWORD

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

+TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);


Similarly, alignment.


r~

Re: [Qemu-devel] [PATCH v3 7/7] numa: cpu: calculate/set default node-ids after all -numa CLI options are parsed

2017-05-30 Thread Eduardo Habkost

On Tue, May 30, 2017 at 06:24:02PM +0200, Igor Mammedov wrote:
> Calculating default node-ids for CPUs in possible_cpu_arch_ids()
> is rather fragile since defaults calculation uses nb_numa_nodes but
> callback might be potentially called early before all -numa CLI
> options are parsed, which would lead to cpus assigned only upto
> nb_numa_nodes at the time possible_cpu_arch_ids() is called.
> 
> Issue was introduced by
> (7c88e65 numa: mirror cpu to node mapping in MachineState::possible_cpus)
> and for example CLI:
>   -smp 4 -numa node,cpus=0 -numa node
> would set props.node-id in possible_cpus array for every non
> explicitly mapped CPU to the first node.
> 
> Issue is not visible to guest nor to mgmt interface due to
>   1) implictly mapped cpus are forced to the first node in
>  case of partial mapping
>   2) in case of default mapping possible_cpu_arch_ids() is
>  called after all -numa options are parsed (resulting
>  in correct mapping).
> 
> However it's fragile to rely on late execution of
> possible_cpu_arch_ids(), therefore add machine specific
> callback that returns node-id for CPU and use it to calculate/
> set defaults at machine_numa_finish_init() time when all -numa
> options are parsed.
> 
> Reported-by: Eduardo Habkost 
> Signed-off-by: Igor Mammedov 
> ---
>  include/hw/boards.h   |  3 +++
>  include/sysemu/numa.h |  9 +
>  hw/arm/virt.c | 16 
>  hw/core/machine.c |  1 +
>  hw/i386/pc.c  | 21 -
>  hw/ppc/spapr.c| 16 +++-
>  6 files changed, 40 insertions(+), 26 deletions(-)
> 
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index 76ce021..063f329 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -94,6 +94,8 @@ typedef struct {
>   *Returns an array of @CPUArchId architecture-dependent CPU IDs
>   *which includes CPU IDs for present and possible to hotplug CPUs.
>   *Caller is responsible for freeing returned list.
> + * @get_default_cpu_node_id:
> + *returns default board specific node_id value for CPU
>   * @has_hotpluggable_cpus:
>   *If true, board supports CPUs creation with -device/device_add.
>   * @minimum_page_bits:
> @@ -151,6 +153,7 @@ struct MachineClass {
>  CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState 
> *machine,
>   unsigned cpu_index);
>  const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
> +int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);

Aren't we trying to move away from cpu_index-based CPU
identification?  Shouldn't we make this get a CPUArchId argument?

>  };
>  
>  /**
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 610eece..ea123ef 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -36,4 +36,13 @@ void numa_legacy_auto_assign_ram(MachineClass *mc, 
> NodeInfo *nodes,
>  void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
>int nb_nodes, ram_addr_t size);
>  void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error 
> **errp);
> +
> +static inline void assert_nb_numa_nodes_change(void)

Can you document the purpose of this function?

> +{
> +static int saved_nb_numa_nodes;
> +assert(nb_numa_nodes);
> +assert(saved_nb_numa_nodes == 0 || saved_nb_numa_nodes == nb_numa_nodes);
> +saved_nb_numa_nodes = nb_numa_nodes;
> +}
> +
>  #endif
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index ce676df..74f1453 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1553,6 +1553,13 @@ virt_cpu_index_to_props(MachineState *ms, unsigned 
> cpu_index)
>  return possible_cpus->cpus[cpu_index].props;
>  }
>  
> +static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
> +{
> +assert(nb_numa_nodes);
> +assert_nb_numa_nodes_change();

I would move this assert to common code instead of copying it to
all implementations.

A machine_get_default_cpu_node_id() wrapper that calls
assert_nb_numa_nodes_change() and mc->get_default_cpu_node_id()
would be enough, and it wouldn't even need to be declared in a
header as the only caller would be at hw/core/machine.c.

(Or, if you prefer to simply drop the assert(), I think that
would be OK too.)


> +return idx % nb_numa_nodes;
> +}
> +
>  static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>  {
>  int n;
> @@ -1571,14 +1578,6 @@ static const CPUArchIdList 
> *virt_possible_cpu_arch_ids(MachineState *ms)
>  virt_cpu_mp_affinity(vms, n);
>  ms->possible_cpus->cpus[n].props.has_thread_id = true;
>  ms->possible_cpus->cpus[n].props.thread_id = n;
> -
> -/* default distribution of CPUs over NUMA nodes */
> -if (nb_numa_nodes) {
> -/* preset values but do not enable them i.e. 'has_node_id = 
>

Re: [Qemu-devel] [PATCH 08/12] announce_timer: Add ability to reset an existing

2017-05-30 Thread Vlad Yasevich

On 05/30/2017 03:35 PM, Dr. David Alan Gilbert wrote:
> * Vladislav Yasevich (vyase...@redhat.com) wrote:
>> It is now potentially possible to issue annouce-self command in a tight
>> loop.  Instead of doing nother, we can reset the timeout pararameters,
>> especially since each instance may provide it's own values.  This
>> allows the user to  extend or cut short currently runnig timer.
>>
>> Signed-off-by: Vladislav Yasevich 
> 
> ah ok, you can ignore my comment on the previous patch then!
> 
>> ---
>>  include/migration/vmstate.h |  1 +
>>  migration/savevm.c  | 41 +++--
>>  2 files changed, 32 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>> index 689b685..6dfdac3 100644
>> --- a/include/migration/vmstate.h
>> +++ b/include/migration/vmstate.h
>> @@ -1057,6 +1057,7 @@ void vmstate_register_ram_global(struct MemoryRegion 
>> *memory);
>>  
>>  typedef struct AnnounceTimer {
>>  QEMUTimer *tm;
>> +QemuMutex active_lock;
>>  struct AnnounceTimer **entry;
>>  AnnounceParameters params;
>>  QEMUClockType type;
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index dcba8bd..e43658f 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -220,20 +220,29 @@ static void qemu_announce_self_iter(NICState *nic, 
>> void *opaque)
>>  
>>  AnnounceTimer *announce_timers[QEMU_ANNOUNCE__MAX];
>>  
>> +static void qemu_announce_timer_destroy(AnnounceTimer *timer)
>> +{
>> +timer_del(timer->tm);
>> +timer_free(timer->tm);
>> +qemu_mutex_destroy(>active_lock);
> 
> Can you explain what makes this safe; we're not
> holding the lock at this point are we? Are we guaranteed
> that no one else will try and take it?
> Either way it should be commented to say why it's safe.

Looking at this again, it doesn't look safe.
The problem is the lookup code.  There is needs to be a lock on the
on the global array that needs to be held during creation/modification.

Thanks
-vlad

> 
> Dave
> 
>> +g_free(timer);
>> +}
>> +
>>  static void qemu_announce_self_once(void *opaque)
>>  {
>>  AnnounceTimer *timer = (AnnounceTimer *)opaque;
>>  
>> +qemu_mutex_lock(>active_lock);
>>  qemu_foreach_nic(qemu_announce_self_iter, NULL);
>>  
>> -if (--timer->round) {
>> +if (--timer->round ) {
>>  timer_mod(timer->tm, qemu_clock_get_ms(timer->type) +
>>self_announce_delay(timer));
>> +qemu_mutex_unlock(>active_lock);
>>  } else {
>> -*(timer->entry) = NULL;
>> -timer_del(timer->tm);
>> -timer_free(timer->tm);
>> -g_free(timer);
>> +*(timer->entry) = NULL;
>> +qemu_mutex_unlock(>active_lock);
>> +qemu_announce_timer_destroy(timer);
>>  }
>>  }
>>  
>> @@ -242,6 +251,7 @@ AnnounceTimer 
>> *qemu_announce_timer_new(AnnounceParameters *params,
>>  {
>>  AnnounceTimer *timer = g_new(AnnounceTimer, 1);
>>  
>> +qemu_mutex_init(>active_lock);
>>  timer->params = *params;
>>  timer->round = params->rounds;
>>  timer->type = type;
>> @@ -259,6 +269,21 @@ AnnounceTimer 
>> *qemu_announce_timer_create(AnnounceParameters *params,
>>  return timer;
>>  }
>>  
>> +static void qemu_announce_timer_update(AnnounceTimer *timer,
>> +   AnnounceParameters *params)
>> +{
>> +qemu_mutex_lock(>active_lock);
>> +
>> +/* Update timer paramenter with any new values.
>> + * Reset the number of rounds to run, and stop the current timer.
>> + */
>> +timer->params = *params;
>> +timer->round = params->rounds;
>> +timer_del(timer->tm);
>> +
>> +qemu_mutex_unlock(>active_lock);
>> +}
>> +
>>  void qemu_announce_self(AnnounceParameters *params, AnnounceType type)
>>  {
>>  AnnounceTimer *timer;
>> @@ -270,11 +295,7 @@ void qemu_announce_self(AnnounceParameters *params, 
>> AnnounceType type)
>>  announce_timers[type] = timer;
>>  timer->entry = _timers[type];
>>  } else {
>> -/* For now, don't do anything.  If we want to reset the timer,
>> - * we'll need to add locking to each announce timer to prevent
>> - * races between timeout handling and a reset.
>> - */
>> -return;
>> +qemu_announce_timer_update(timer, params);
>>  }
>>  qemu_announce_self_once(timer);
>>  }
>> -- 
>> 2.7.4
>>
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
>

Re: [Qemu-devel] [PATCH v2 07/29] target/s390x: implement LOAD PAIR FROM QUADWORD

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

+TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);


MO_TEQ | MO_ALIGN_16

as it's a specification error for the address to be unaligned.

r~

Re: [Qemu-devel] [PATCH v2 04/29] target/s390x: implement TEST AND SET

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

Signed-off-by: Aurelien Jarno
---
  target/s390x/insn-data.def |  3 +++
  target/s390x/translate.c   | 10 ++
  2 files changed, 13 insertions(+)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 05/29] target/s390x: implement TEST ADDRESSING MODE

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

Signed-off-by: Aurelien Jarno
---
  target/s390x/insn-data.def |  3 +++
  target/s390x/translate.c   | 10 ++
  2 files changed, 13 insertions(+)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 25/26] target/s390x: implement TRANSLATE ONE/TWO TO ONE/TWO

2017-05-30 Thread Aurelien Jarno

On 2017-05-30 12:42, Richard Henderson wrote:
> On 05/30/2017 12:25 PM, Aurelien Jarno wrote:
> > On 2017-05-30 09:45, Richard Henderson wrote:
> > > On 05/29/2017 04:17 AM, Aurelien Jarno wrote:
> > > > On 2017-05-26 10:10, Richard Henderson wrote:
> > > > > On 05/25/2017 02:05 PM, Aurelien Jarno wrote:
> > > > > > +uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
> > > > > > +  uint32_t sizes)
> > > > > > +{
> > > > > > +uintptr_t ra = GETPC();
> > > > > > +int dsize = (sizes & 1) ? 1 : 2;
> > > > > > +int ssize = (sizes & 2) ? 1 : 2;
> > > > > > +uint16_t tst = env->regs[0] & ((1 << (8 * dsize)) - 1);
> > > > > 
> > > > > I think you should pass in tst as an argument.  That way you can pass 
> > > > > in an
> > > > > out-of-band value when we implement ETF2 and test field M3 bit 3.
> > > > 
> > > > I don't mind passing r0 as an argument. That said if we want to pass tst
> > > > or bundle the M3 field, it means we need to use TCG instructions to do
> > > > so. I am not sure it brings a lot compare to doing so in the helper
> > > > side.
> > > 
> > > Not at all -- the M3 bit test would be a translation-time check.
> > 
> > I still don't really see the point. On the TCG side it means we need
> > something like that:
> > 
> >  if (m3 & 1) {
> > tcg_gen_movi_tl(r0, -1);
> >  } else if (dsize == 1) {
> > tcg_gen_ext8u(r0, regs[0]);
> >  } else if (dsize == 2)
> > tcg_gen_ext16u(r0, regs[0]);
> >  }
> 
> Yes, exactly.
> 
> > On the helper side we then need to check if the value passed equals -1
> > or not to get m3 instead of directly accessing the value in register 0.
> 
> How's that?  Why would you need to check for -1 at all?  The existing helper
> test works fine, with -1 not matching any value loaded.

Ok, I get your point now, i'll implement that in the next version. 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

Re: [Qemu-devel] [PATCH v2 03/29] target/s390x: implement local-TLB-clearing in IPTE

2017-05-30 Thread Richard Henderson


On 05/29/2017 12:24 PM, Aurelien Jarno wrote:

And at the same time make IPTE SMP aware.

Signed-off-by: Aurelien Jarno
---
  target/s390x/helper.h |  2 +-
  target/s390x/mem_helper.c | 19 ---
  target/s390x/translate.c  |  6 +-
  3 files changed, 18 insertions(+), 9 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v3 0/7] numa: code consolidation and fixes

2017-05-30 Thread Eduardo Habkost

On Tue, May 30, 2017 at 06:23:55PM +0200, Igor Mammedov wrote:
> changelog since v2:
>   (Eduardo)
>  - keep original logic in when moving numa part into helper
> numa: consolidate cpu_preplug fixups/checks for pc/arm/spapr
>  - drop "numa: fallback to default NUMA node instead of node 0"
>  - split out monitor hunk into separate patch
>  - split out spapr_fixup_cpu_numa_dt refactoring into separate patch
>  - add extra patch to make default node-id calculation more robust
> changelog since v1:
>   (Eduardo)
>  - user error_abort in numa_cpu_pre_plug()
>  - make default_mapping boolean
>  - redo default mapping detection loop as a combo of for/if
>  - return back lost TODO comment
>  - new patch removing numa_node from generic CPUState
>   - drop silence test patch as it's already in pull req on list
>   - new patch [3/5] to fix missing _PXM/fdt nodes for implicitly mapped CPUs
>   - new patch dropping fallback to node 0
> 
> 
> git repo for testing:
>https://github.com/imammedo/qemu.git cphp_numa_cfg_follow_up_v3_cleanups_v3
> 
> CC: qemu-...@nongnu.org
> CC: qemu-...@nongnu.org
> CC: Eduardo Habkost 
> CC: David Gibson 
> CC: Andrew Jones 
> 
> 
> Igor Mammedov (7):
>   numa: consolidate cpu_preplug fixups/checks for pc/arm/spapr
>   numa: move default mapping init to machine
>   numa: make sure that all cpus have has_node_id set if numa is enabled
>   numa: make hmp 'info numa' fetch numa nodes from qmp_query_cpus()
> result
>   numa: move numa_node from CPUState into target specific classes
>   spapr: cleanup spapr_fixup_cpu_numa_dt() usage
>   numa: cpu: calculate/set default node-ids after all -numa CLI options
> are parsed

Patches 1-6 queued on numa-next.  Thanks.

-- 
Eduardo

Re: [Qemu-devel] [Qemu-ppc] [PATCH v3 6/7] spapr: cleanup spapr_fixup_cpu_numa_dt() usage

2017-05-30 Thread Eduardo Habkost

On Tue, May 30, 2017 at 06:40:22PM +0200, Greg Kurz wrote:
> On Tue, 30 May 2017 18:24:01 +0200
> Igor Mammedov  wrote:
> 
> > even though spapr_fixup_cpu_numa_dt() has no effect on FDT
> > if numa is disabled, don't call it uselessly. It makes it
> > obvious at call sites that function is need only when numa
> 
> s/need/needed

Fixed while applying the patch.  Thanks!

> 
> > is enabled.
> > 
> > Signed-off-by: Igor Mammedov 
> > ---
> 
> Reviewed-by: Greg Kurz 
> 

-- 
Eduardo

Re: [Qemu-devel] [PATCH v3 5/7] numa: move numa_node from CPUState into target specific classes

2017-05-30 Thread Eduardo Habkost

On Tue, May 30, 2017 at 11:30:27AM -0500, Eric Blake wrote:
> On 05/30/2017 11:24 AM, Igor Mammedov wrote:
> > Move vcpu's assocciated numa_node field out of generic CPUState
> 
> s/assocciated/associated/
> 
> > into inherited classes that actually care about cpu<->numa mapping,
> > i.e: ARMCPU, PowerPCCPU, X86CPU.
> > 
> > Signed-off-by: Igor Mammedov 
> > ---
> 
> > +++ b/target/arm/cpu.h
> > @@ -703,6 +703,8 @@ struct ARMCPU {
> >  
> >  ARMELChangeHook *el_change_hook;
> >  void *el_change_hook_opaque;
> > +
> > +int32_t node_id; /* NUMA node this CPU is belonging to */
> 
> s/is belonging/belongs/  (multiple places in the patch)

I have fixed those while applying the patch.  Thanks!

-- 
Eduardo

Re: [Qemu-devel] [PATCH 25/26] target/s390x: implement TRANSLATE ONE/TWO TO ONE/TWO

2017-05-30 Thread Richard Henderson


On 05/30/2017 12:25 PM, Aurelien Jarno wrote:

On 2017-05-30 09:45, Richard Henderson wrote:

On 05/29/2017 04:17 AM, Aurelien Jarno wrote:

On 2017-05-26 10:10, Richard Henderson wrote:

On 05/25/2017 02:05 PM, Aurelien Jarno wrote:

+uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
+  uint32_t sizes)
+{
+uintptr_t ra = GETPC();
+int dsize = (sizes & 1) ? 1 : 2;
+int ssize = (sizes & 2) ? 1 : 2;
+uint16_t tst = env->regs[0] & ((1 << (8 * dsize)) - 1);


I think you should pass in tst as an argument.  That way you can pass in an
out-of-band value when we implement ETF2 and test field M3 bit 3.


I don't mind passing r0 as an argument. That said if we want to pass tst
or bundle the M3 field, it means we need to use TCG instructions to do
so. I am not sure it brings a lot compare to doing so in the helper
side.


Not at all -- the M3 bit test would be a translation-time check.


I still don't really see the point. On the TCG side it means we need
something like that:

 if (m3 & 1) {
tcg_gen_movi_tl(r0, -1);
 } else if (dsize == 1) {
tcg_gen_ext8u(r0, regs[0]);
 } else if (dsize == 2)
tcg_gen_ext16u(r0, regs[0]);
 }


Yes, exactly.


On the helper side we then need to check if the value passed equals -1
or not to get m3 instead of directly accessing the value in register 0.


How's that?  Why would you need to check for -1 at all?  The existing helper 
test works fine, with -1 not matching any value loaded.



r~

Re: [Qemu-devel] [PATCH 08/12] announce_timer: Add ability to reset an existing

2017-05-30 Thread Dr. David Alan Gilbert

* Vladislav Yasevich (vyase...@redhat.com) wrote:
> It is now potentially possible to issue annouce-self command in a tight
> loop.  Instead of doing nother, we can reset the timeout pararameters,
> especially since each instance may provide it's own values.  This
> allows the user to  extend or cut short currently runnig timer.
> 
> Signed-off-by: Vladislav Yasevich 

ah ok, you can ignore my comment on the previous patch then!

> ---
>  include/migration/vmstate.h |  1 +
>  migration/savevm.c  | 41 +++--
>  2 files changed, 32 insertions(+), 10 deletions(-)
> 
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 689b685..6dfdac3 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -1057,6 +1057,7 @@ void vmstate_register_ram_global(struct MemoryRegion 
> *memory);
>  
>  typedef struct AnnounceTimer {
>  QEMUTimer *tm;
> +QemuMutex active_lock;
>  struct AnnounceTimer **entry;
>  AnnounceParameters params;
>  QEMUClockType type;
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dcba8bd..e43658f 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -220,20 +220,29 @@ static void qemu_announce_self_iter(NICState *nic, void 
> *opaque)
>  
>  AnnounceTimer *announce_timers[QEMU_ANNOUNCE__MAX];
>  
> +static void qemu_announce_timer_destroy(AnnounceTimer *timer)
> +{
> +timer_del(timer->tm);
> +timer_free(timer->tm);
> +qemu_mutex_destroy(>active_lock);

Can you explain what makes this safe; we're not
holding the lock at this point are we? Are we guaranteed
that no one else will try and take it?
Either way it should be commented to say why it's safe.

Dave

> +g_free(timer);
> +}
> +
>  static void qemu_announce_self_once(void *opaque)
>  {
>  AnnounceTimer *timer = (AnnounceTimer *)opaque;
>  
> +qemu_mutex_lock(>active_lock);
>  qemu_foreach_nic(qemu_announce_self_iter, NULL);
>  
> -if (--timer->round) {
> +if (--timer->round ) {
>  timer_mod(timer->tm, qemu_clock_get_ms(timer->type) +
>self_announce_delay(timer));
> +qemu_mutex_unlock(>active_lock);
>  } else {
> -*(timer->entry) = NULL;
> -timer_del(timer->tm);
> -timer_free(timer->tm);
> -g_free(timer);
> +*(timer->entry) = NULL;
> +qemu_mutex_unlock(>active_lock);
> +qemu_announce_timer_destroy(timer);
>  }
>  }
>  
> @@ -242,6 +251,7 @@ AnnounceTimer *qemu_announce_timer_new(AnnounceParameters 
> *params,
>  {
>  AnnounceTimer *timer = g_new(AnnounceTimer, 1);
>  
> +qemu_mutex_init(>active_lock);
>  timer->params = *params;
>  timer->round = params->rounds;
>  timer->type = type;
> @@ -259,6 +269,21 @@ AnnounceTimer 
> *qemu_announce_timer_create(AnnounceParameters *params,
>  return timer;
>  }
>  
> +static void qemu_announce_timer_update(AnnounceTimer *timer,
> +   AnnounceParameters *params)
> +{
> +qemu_mutex_lock(>active_lock);
> +
> +/* Update timer paramenter with any new values.
> + * Reset the number of rounds to run, and stop the current timer.
> + */
> +timer->params = *params;
> +timer->round = params->rounds;
> +timer_del(timer->tm);
> +
> +qemu_mutex_unlock(>active_lock);
> +}
> +
>  void qemu_announce_self(AnnounceParameters *params, AnnounceType type)
>  {
>  AnnounceTimer *timer;
> @@ -270,11 +295,7 @@ void qemu_announce_self(AnnounceParameters *params, 
> AnnounceType type)
>  announce_timers[type] = timer;
>  timer->entry = _timers[type];
>  } else {
> -/* For now, don't do anything.  If we want to reset the timer,
> - * we'll need to add locking to each announce timer to prevent
> - * races between timeout handling and a reset.
> - */
> -return;
> +qemu_announce_timer_update(timer, params);
>  }
>  qemu_announce_self_once(timer);
>  }
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH 03/12] migration: Switch to using announcement timer

2017-05-30 Thread Vlad Yasevich

On 05/30/2017 03:19 PM, Dr. David Alan Gilbert wrote:
> * Vladislav Yasevich (vyase...@redhat.com) wrote:
>> Switch qemu_announce_self and virtio annoucements to use
>> the announcement timer framework.  This makes sure that both
>> timers use the same timeouts and number of annoucement attempts
>>
>> Based on work by Dr. David Alan Gilbert 
>>
>> Signed-off-by: Vladislav Yasevich 
>> ---
>>  hw/net/virtio-net.c| 28 
>>  include/hw/virtio/virtio-net.h |  3 +--
>>  include/migration/vmstate.h| 17 +++--
>>  include/sysemu/sysemu.h|  2 +-
>>  migration/migration.c  |  2 +-
>>  migration/savevm.c | 28 ++--
>>  6 files changed, 44 insertions(+), 36 deletions(-)
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index 7d091c9..1c65825 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -25,6 +25,7 @@
>>  #include "qapi/qmp/qjson.h"
>>  #include "qapi-event.h"
>>  #include "hw/virtio/virtio-access.h"
>> +#include "migration/migration.h"
>>  
>>  #define VIRTIO_NET_VM_VERSION11
>>  
>> @@ -115,7 +116,7 @@ static void virtio_net_announce_timer(void *opaque)
>>  VirtIONet *n = opaque;
>>  VirtIODevice *vdev = VIRTIO_DEVICE(n);
>>  
>> -n->announce_counter--;
>> +n->announce_timer->round--;
>>  n->status |= VIRTIO_NET_S_ANNOUNCE;
>>  virtio_notify_config(vdev);
>>  }
>> @@ -427,8 +428,8 @@ static void virtio_net_reset(VirtIODevice *vdev)
>>  n->nobcast = 0;
>>  /* multiqueue is disabled by default */
>>  n->curr_queues = 1;
>> -timer_del(n->announce_timer);
>> -n->announce_counter = 0;
>> +timer_del(n->announce_timer->tm);
>> +n->announce_timer->round = 0;
>>  n->status &= ~VIRTIO_NET_S_ANNOUNCE;
>>  
>>  /* Flush any MAC and VLAN filter table state */
>> @@ -872,10 +873,10 @@ static int virtio_net_handle_announce(VirtIONet *n, 
>> uint8_t cmd,
>>  if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
>>  n->status & VIRTIO_NET_S_ANNOUNCE) {
>>  n->status &= ~VIRTIO_NET_S_ANNOUNCE;
>> -if (n->announce_counter) {
>> -timer_mod(n->announce_timer,
>> +if (n->announce_timer->round) {
>> +timer_mod(n->announce_timer->tm,
>>qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
>> -  self_announce_delay(n->announce_counter));
>> +  self_announce_delay(n->announce_timer));
>>  }
>>  return VIRTIO_NET_OK;
>>  } else {
>> @@ -1615,8 +1616,8 @@ static int virtio_net_post_load_device(void *opaque, 
>> int version_id)
>>  
>>  if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
>>  virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
>> -n->announce_counter = SELF_ANNOUNCE_ROUNDS;
>> -timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
>> +n->announce_timer->round = n->announce_timer->params.rounds;
>> +timer_mod(n->announce_timer->tm, 
>> qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
> 
> Do you think it's worth having a 
>   qemu_announce_timer_init(AnnounceTimer *, QEMU_CLOCK_*)
> that would initialise the round and any other values and 
> do the initial timer_mod ?

I had that initially, but ended up with just 1 user (virtio).  So removed.

I think I might put it back.  It's really a announce_timer_start() type thing.
May be it'll convert both places to use that api to make it cleaner.

> 
>>  }
>>  
>>  return 0;
>> @@ -1938,8 +1939,10 @@ static void virtio_net_device_realize(DeviceState 
>> *dev, Error **errp)
>>  qemu_macaddr_default_if_unset(>nic_conf.macaddr);
>>  memcpy(>mac[0], >nic_conf.macaddr, sizeof(n->mac));
>>  n->status = VIRTIO_NET_S_LINK_UP;
>> -n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
>> - virtio_net_announce_timer, n);
>> +n->announce_timer = qemu_announce_timer_new(qemu_get_announce_params(),
>> +QEMU_CLOCK_VIRTUAL);
>> +n->announce_timer->tm = timer_new_ms(QEMU_CLOCK_VIRTUAL,
>> +  virtio_net_announce_timer, n);
>>  
>>  if (n->netclient_type) {
>>  /*
>> @@ -2001,8 +2004,9 @@ static void virtio_net_device_unrealize(DeviceState 
>> *dev, Error **errp)
>>  virtio_net_del_queue(n, i);
>>  }
>>  
>> -timer_del(n->announce_timer);
>> -timer_free(n->announce_timer);
>> +timer_del(n->announce_timer->tm);
>> +timer_free(n->announce_timer->tm);
>> +g_free(n->announce_timer);
> 
> Hmm, why is this all safe - I guess this is in the BQL?

Well, this is virito's explicit timer.  I didn't check, but it really no 
different then
destroying the virtio-specific QEMU timer created for the devices.

> 
>>  g_free(n->vqs);
>>  qemu_del_nic(n->nic);
>>  virtio_cleanup(vdev);
>>

Re: [Qemu-devel] [PATCH 07/12] migration: Allow for a limited number of announce timers

2017-05-30 Thread Dr. David Alan Gilbert

* Vladislav Yasevich (vyase...@redhat.com) wrote:
> We currently create a new announcement timer every time
> qemu_announce_self() is called.  Since this is now a qmp
> command, this can lead to abuse.   Limit the number of
> timers that are created.  Give QMP interface and migration
> process 1 timer each.  This way, QMP can't abuse the
> announce_self mechanism.
> 
> Signed-off-by: Vladislav Yasevich 
> ---
>  include/migration/vmstate.h |  1 +
>  include/sysemu/sysemu.h |  9 -
>  migration/migration.c   |  2 +-
>  migration/savevm.c  | 24 +++-
>  4 files changed, 29 insertions(+), 7 deletions(-)
> 
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index f8aed9b..689b685 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -1057,6 +1057,7 @@ void vmstate_register_ram_global(struct MemoryRegion 
> *memory);
>  
>  typedef struct AnnounceTimer {
>  QEMUTimer *tm;
> +struct AnnounceTimer **entry;
>  AnnounceParameters params;
>  QEMUClockType type;
>  int round;
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 2ef1687..85a2af1 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -78,14 +78,21 @@ void qemu_remove_machine_init_done_notifier(Notifier 
> *notify);
>  int save_vmstate(const char *name, Error **errp);
>  int load_vmstate(const char *name, Error **errp);
>  
> +typedef enum AnnounceType {
> +QEMU_ANNOUNCE_MIGRATION,
> +QEMU_ANNOUNCE_USER,
> +QEMU_ANNOUNCE__MAX,
> +} AnnounceType;
> +
>  AnnounceParameters *qemu_get_announce_params(void);
>  void qemu_fill_announce_parameters(AnnounceParameters **to,
> AnnounceParameters *from);
> +
>  bool qemu_validate_announce_parameters(AnnounceParameters *params,
> Error **errp);
>  void qemu_set_announce_parameters(AnnounceParameters *announce_params,
>AnnounceParameters *params);
> -void qemu_announce_self(AnnounceParameters *params);
> +void qemu_announce_self(AnnounceParameters *params, AnnounceType type);
>  
>  /* Subcommands for QEMU_VM_COMMAND */
>  enum qemu_vm_cmd {
> diff --git a/migration/migration.c b/migration/migration.c
> index 987c1cf..724fc40 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -345,7 +345,7 @@ static void process_incoming_migration_bh(void *opaque)
>   * This must happen after all error conditions are dealt with and
>   * we're sure the VM is going to be running on this host.
>   */
> -qemu_announce_self(qemu_get_announce_params());
> +qemu_announce_self(qemu_get_announce_params(), QEMU_ANNOUNCE_MIGRATION);
>  
>  /* If global state section was not received or we are in running
> state, we need to obey autostart. Any other state is set with
> diff --git a/migration/savevm.c b/migration/savevm.c
> index b55ce6a..dcba8bd 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -218,6 +218,8 @@ static void qemu_announce_self_iter(NICState *nic, void 
> *opaque)
>  }
>  }
>  
> +AnnounceTimer *announce_timers[QEMU_ANNOUNCE__MAX];
> +
>  static void qemu_announce_self_once(void *opaque)
>  {
>  AnnounceTimer *timer = (AnnounceTimer *)opaque;
> @@ -228,6 +230,7 @@ static void qemu_announce_self_once(void *opaque)
>  timer_mod(timer->tm, qemu_clock_get_ms(timer->type) +
>self_announce_delay(timer));
>  } else {
> +*(timer->entry) = NULL;
>  timer_del(timer->tm);
>  timer_free(timer->tm);
>  g_free(timer);
> @@ -256,12 +259,23 @@ AnnounceTimer 
> *qemu_announce_timer_create(AnnounceParameters *params,
>  return timer;
>  }
>  
> -void qemu_announce_self(AnnounceParameters *params)
> +void qemu_announce_self(AnnounceParameters *params, AnnounceType type)
>  {
>  AnnounceTimer *timer;
>  
> -timer = qemu_announce_timer_create(params, QEMU_CLOCK_REALTIME,
> -   qemu_announce_self_once);
> +timer = announce_timers[type];
> +if (!timer) {
> +timer = qemu_announce_timer_create(params, QEMU_CLOCK_REALTIME,
> +qemu_announce_self_once);
> +announce_timers[type] = timer;
> +timer->entry = _timers[type];
> +} else {
> +/* For now, don't do anything.  If we want to reset the timer,
> + * we'll need to add locking to each announce timer to prevent
> + * races between timeout handling and a reset.
> + */

I worry that this is racy anyway; if you issue a command and it doesn't
start because it's still doing the last one and you don't get any
warning of that it's difficult (as in my comment on the 12th).

Is this really racy, isn't this in the big lock ? Hmm I guess the qmp
triggered one is, this probably isn't.

Dave

> +return;
>

Re: [Qemu-devel] How do you -sd squashfs.img without truncating?

2017-05-30 Thread Rob Landley

On 05/30/2017 07:17 AM, Peter Maydell wrote:
> On 8 May 2017 at 06:08, Rob Landley  wrote:
>> As far as I can tell "qemu-system-arm -M vexpress-a9" only implements sd
>> card, not any conventional hard drive, and it uses an sdcard block size
>> of 262144 bytes rounded down. This means when I create a squashfs image
>> and feed it in through the sd card, it truncates it.
>>
>> Wouldn't -sd rounding _up_ be more useful?
> 
> This vaguely rings a bell but I forget exactly. I think the
> intention is that you're supposed to pass it a file of
> exactly the right length, not one that's too short. If we
> rounded up, then it would create the awkward question of
> "what happens when the guest writes into the bit of the
> sd card image that doesn't actually exist in the image file".
> 
> I'm not strongly attached to the current behaviour (especially
> if the behaviour for sd card images doesn't match that for
> hd images and other kinds of disk), but I think that may have
> been the "this is more complicated than it appears on the surface"
> reason that nobody's got round to trying to change the behaviour.
> It's been years though, so I might just be completely misremembering.

I worked around it by switching to the -M virt machine. (The project is
https://github.com/landley/mkroot if you're curious.)

Thanks,

Rob

Re: [Qemu-devel] [PATCH] linux-user: Fix TARGET_MAP* and TARGET_F_??LCK for hppa arch

2017-05-30 Thread Helge Deller

On 19.05.2017 15:59, Riku Voipio wrote:
> On Sun, Mar 12, 2017 at 08:17:46AM +1000, Richard Henderson wrote:
>> On 03/12/2017 03:50 AM, Helge Deller wrote:
>>> TARGET_MAP_TYPE needs to be 0x03 instead of 0x0f on the hppa
>>> architecture, otherwise it conflicts with MAP_FIXED which is 0x04.
>>>
>>> Add missing TARGET_MAP_STACK and TARGET_MAP_HUGETLB values.
>>>
>>> Fix TARGET_F_RDLCK, TARGET_F_WRLCK and TARGET_F_UNLCK.
>>>
>>> Signed-off-by: Helge Deller 
>>
>> I applied the MAP_FIXED and TARGET_F_* parts separately in my tree.  I'd
>> like to see what others think about the other MAP_* defines before including
>> that.
> 
> What's the current state of these patches? Are these patches still waiting for
> opinions?

It would be nice to get them included (if it hasn't happened yet).

Helge

Re: [Qemu-devel] [PATCH] live-block-ops.txt: Rewrite and improve it

2017-05-30 Thread Eric Blake

On 05/30/2017 10:38 AM, Kashyap Chamarthy wrote:
> This edition documents all four operations:
> 
>   - `block-stream`
>   - `block-commit`
>   - `drive-mirror` (& `blockdev-mirror`)
>   - `drive-backup` (& `blockdev-backup`)
> 
> Things considered while writing this document:
> 
>   - Use reStructuredText format.  It is gentler on the eye, and can be
> trivially converted to different formats.  (Another reason: upstream
> QEMU is considering to switch to Sphinx, which uses reStructuredText
> as its markup language.)
> 
>   - Raw QMP JSON output vs. 'qmp-shell'.  I debated with myself whether
> to only show raw QMP JSON output (as that is the canonical
> representation), or use 'qmp-shell', which takes key-value pairs.  I
> settled on the approach of: for the first occurance of a command,

s/occurance/occurrence/

> use raw JSON; for subsequent occurances, use 'qmp-shell', with an

s/occurances/occurrences/

> occasional exception.
> 
>   - Using 'node-name' vs. file path to refer to disks.  While we have
> `blockdev-{mirror, backup}` as 'node-name'-alternatives for
> `drive-{mirror, backup}`, the `block-{stream, commit}` commands
> still operate on file names for parameters 'base' and 'top'.  So I
> added a caveat at the beginning to that effect.
> 
> Refer this related thread that I started:
> https://lists.nongnu.org/archive/html/qemu-devel/2017-05/msg06466.html
> "[RFC] Making 'block-stream', and 'block-commit' accept node-name"
> 
> All commands showed in this document were tested while documenting.
> 
> Thanks: Eric Blake for the section: "A note on points-in-time vs file
> names".  This useful bit was originally articulated by Eric in his
> KVMForum 2015 presentation, so I included that specific bit in this
> document.
> 
> Signed-off-by: Kashyap Chamarthy 
> ---
> * An HTML rendered version is here:
>   https://kashyapc.fedorapeople.org/virt/qemu/live-block-operations.html

Helpful; thanks!

> diff --git a/docs/live-block-operations.rst b/docs/live-block-operations.rst
> new file mode 100644
> index 
> ..19107776cd1edd20fac17ddda192d677199190d9
> --- /dev/null
> +++ b/docs/live-block-operations.rst
> @@ -0,0 +1,757 @@
> +
> +Live Block Device Operations
> +

Is it worth mentioning a copyright/license in this prologue?  It's not
mandatory (you get the default of GPLv2+ based on the top-level if you
don't do it), but being explicit never hurts.

> +
> +Table of Contents
> +=
> +
> +(1) `Disk image backing chain notation`_
> +(2) `Brief overview of live block QMP primitives`_
> +(3) `Interacting with a QEMU instance`_
> +(4) `Example disk image chain`_
> +(5) `A note on points-in-time vs file names`_
> +(6) `Live block streaming -- `block-stream``_
> +(7) `QMP invocation for `block-stream``_
> +(8) `Live block commit -- `block-commit``_
> +(9) `QMP invocation for `block-commit``_
> +(10) `Live disk synchronization -- `drive-mirror`&`blockdev-mirror``_
> +(11) `QMP invocation for `drive-mirror``_
> +(12) `Notes on `blockdev-mirror``_
> +(13) `Live disk backup -- `drive-backup`&`blockdev-backup``_
> +(14) `QMP invocation for `drive-backup``_
> +(15) `Notes on `blockdev-backup``_
> +

Does .rst have any mechanisms for ensuring the ToC doesn't get out of
sync if we later change sections around?

> +
> +.. _`Disk image backing chain notation`:
> +

> +
> +Arrow to be read as: Image [A] is the backing file of disk image [B].

Maybe: "The arrow can be read as:"

> +And live QEMU is currently writing to image [B], consequently, it is
> +also referred to as the "active layer".
> +
> +There are two kinds of terminology that is common when referring to

s/that is common/that are common/

> +files in a disk image backing chain:
> +

> +
> +.. _`Brief overview of live block QMP primitives`:
> +
> +Brief overview of live block QMP primitives
> +---
> +
> +The following are the four different kinds of live block operations that
> +QEMU block layer supports.
> +
> +- `block-stream`: Live copy data from overlay files into backing images.

Backwards.  This is a live copy of data from backing images into
overlays (with the optional goal of removing the backing image from the
chain).

> +
> +- `block-commit`: Live merge data from backing files into overlay
> +  images.  Since QEMU 2.0, this includes "active `block-commit`" (i.e.
> +  merge the current active layer into the base image).

Backwards.  This is live merge of data from overlay images into backing
images (with the optional goal of removing the overlay image from the
chain).

> +
> +- `drive-mirror` (and `blockdev-mirror`): Synchronize running disk to
> +  another image.
> +
> +- `drive-backup` (and `blockdev-backup`): Point-in-time (live) copy of a
> +  block device to a destination.

These are okay.

> +
> +
> +.. _`Interacting

Re: [Qemu-devel] [PATCH 25/26] target/s390x: implement TRANSLATE ONE/TWO TO ONE/TWO

2017-05-30 Thread Aurelien Jarno

On 2017-05-30 09:45, Richard Henderson wrote:
> On 05/29/2017 04:17 AM, Aurelien Jarno wrote:
> > On 2017-05-26 10:10, Richard Henderson wrote:
> > > On 05/25/2017 02:05 PM, Aurelien Jarno wrote:
> > > > +uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
> > > > +  uint32_t sizes)
> > > > +{
> > > > +uintptr_t ra = GETPC();
> > > > +int dsize = (sizes & 1) ? 1 : 2;
> > > > +int ssize = (sizes & 2) ? 1 : 2;
> > > > +uint16_t tst = env->regs[0] & ((1 << (8 * dsize)) - 1);
> > > 
> > > I think you should pass in tst as an argument.  That way you can pass in 
> > > an
> > > out-of-band value when we implement ETF2 and test field M3 bit 3.
> > 
> > I don't mind passing r0 as an argument. That said if we want to pass tst
> > or bundle the M3 field, it means we need to use TCG instructions to do
> > so. I am not sure it brings a lot compare to doing so in the helper
> > side.
> 
> Not at all -- the M3 bit test would be a translation-time check.

I still don't really see the point. On the TCG side it means we need
something like that:

if (m3 & 1) {
   tcg_gen_movi_tl(r0, -1);
} else if (dsize == 1) {
   tcg_gen_ext8u(r0, regs[0]);
} else if (dsize == 2) 
   tcg_gen_ext16u(r0, regs[0]);
}

On the helper side we then need to check if the value passed equals -1
or not to get m3 instead of directly accessing the value in register 0.
If we want to bundle m3 with something else, it might be better to bundle
it with the "sizes" argument.

If what worries you is the cast of tst to uint8_t / uint16_t as function
of dsize in the helper, we can rename trXX into an inline function
do_trXX and use one helper per instruction instead of a common helper
for the 3 instructions.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net

[Qemu-devel] [PULL v2 3/5] target/sh4: introduce DELAY_SLOT_MASK

2017-05-30 Thread Aurelien Jarno

This will make easier the introduction of a new flag in the next
patches.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/cpu.h   |  3 ++-
 target/sh4/helper.c|  4 ++--
 target/sh4/translate.c | 17 -
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 6c07c6b24b..7969c9af98 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -91,6 +91,7 @@
 #define FPSCR_RM_NEAREST   (0 << 0)
 #define FPSCR_RM_ZERO  (1 << 0)
 
+#define DELAY_SLOT_MASK0x3
 #define DELAY_SLOT (1 << 0)
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
 
@@ -380,7 +381,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, 
target_ulong *pc,
 {
 *pc = env->pc;
 *cs_base = 0;
-*flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) /* Bits 0-1 
*/
+*flags = (env->flags & DELAY_SLOT_MASK)/* Bits  0- 1 */
 | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
 | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))  /* Bits 29-30 */
 | (env->sr & (1u << SR_FD))/* Bit 15 */
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 16fcd1bbf2..5785d6d22a 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -172,11 +172,11 @@ void superh_cpu_do_interrupt(CPUState *cs)
 env->sgr = env->gregs[15];
 env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
 
-if (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+if (env->flags & DELAY_SLOT_MASK) {
 /* Branch instruction should be executed again before delay slot. */
env->spc -= 2;
/* Clear flags for exception/interrupt routine. */
-env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+env->flags &= ~DELAY_SLOT_MASK;
 }
 
 if (do_exp) {
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 0bc2f9ff19..aba316f593 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -217,8 +217,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, 
bool save_pc)
 if (ctx->delayed_pc != (uint32_t) -1) {
 tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
 }
-if ((ctx->tbflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
-!= ctx->envflags) {
+if ((ctx->tbflags & DELAY_SLOT_MASK) != ctx->envflags) {
 tcg_gen_movi_i32(cpu_flags, ctx->envflags);
 }
 }
@@ -329,7 +328,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
 
 #define CHECK_NOT_DELAY_SLOT \
-if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
+if (ctx->envflags & DELAY_SLOT_MASK) {   \
 gen_save_cpu_state(ctx, true);   \
 gen_helper_raise_slot_illegal_instruction(cpu_env);  \
 ctx->bstate = BS_EXCP;   \
@@ -339,7 +338,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define CHECK_PRIVILEGED \
 if (IS_USER(ctx)) {  \
 gen_save_cpu_state(ctx, true);   \
-if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
+if (ctx->envflags & DELAY_SLOT_MASK) {   \
 gen_helper_raise_slot_illegal_instruction(cpu_env);  \
 } else { \
 gen_helper_raise_illegal_instruction(cpu_env);   \
@@ -351,7 +350,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define CHECK_FPU_ENABLED\
 if (ctx->tbflags & (1u << SR_FD)) {  \
 gen_save_cpu_state(ctx, true);   \
-if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
+if (ctx->envflags & DELAY_SLOT_MASK) {   \
 gen_helper_raise_slot_fpu_disable(cpu_env);  \
 } else { \
 gen_helper_raise_fpu_disable(cpu_env);   \
@@ -1784,7 +1783,7 @@ static void _decode_opc(DisasContext * ctx)
 fflush(stderr);
 #endif
 gen_save_cpu_state(ctx, true);
-if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+if (ctx->envflags & DELAY_SLOT_MASK) {
 gen_helper_raise_slot_illegal_instruction(cpu_env);
 } else {
 gen_helper_raise_illegal_instruction(cpu_env);
@@ -1798,9 +1797,9 @@ static void decode_opc(DisasContext * ctx)
 
 _decode_opc(ctx);
 
-if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+if (old_flags & DELAY_SLOT_MASK) {
 /* go

Re: [Qemu-devel] [PATCH 03/12] migration: Switch to using announcement timer

2017-05-30 Thread Dr. David Alan Gilbert

* Vladislav Yasevich (vyase...@redhat.com) wrote:
> Switch qemu_announce_self and virtio annoucements to use
> the announcement timer framework.  This makes sure that both
> timers use the same timeouts and number of annoucement attempts
> 
> Based on work by Dr. David Alan Gilbert 
> 
> Signed-off-by: Vladislav Yasevich 
> ---
>  hw/net/virtio-net.c| 28 
>  include/hw/virtio/virtio-net.h |  3 +--
>  include/migration/vmstate.h| 17 +++--
>  include/sysemu/sysemu.h|  2 +-
>  migration/migration.c  |  2 +-
>  migration/savevm.c | 28 ++--
>  6 files changed, 44 insertions(+), 36 deletions(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 7d091c9..1c65825 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -25,6 +25,7 @@
>  #include "qapi/qmp/qjson.h"
>  #include "qapi-event.h"
>  #include "hw/virtio/virtio-access.h"
> +#include "migration/migration.h"
>  
>  #define VIRTIO_NET_VM_VERSION11
>  
> @@ -115,7 +116,7 @@ static void virtio_net_announce_timer(void *opaque)
>  VirtIONet *n = opaque;
>  VirtIODevice *vdev = VIRTIO_DEVICE(n);
>  
> -n->announce_counter--;
> +n->announce_timer->round--;
>  n->status |= VIRTIO_NET_S_ANNOUNCE;
>  virtio_notify_config(vdev);
>  }
> @@ -427,8 +428,8 @@ static void virtio_net_reset(VirtIODevice *vdev)
>  n->nobcast = 0;
>  /* multiqueue is disabled by default */
>  n->curr_queues = 1;
> -timer_del(n->announce_timer);
> -n->announce_counter = 0;
> +timer_del(n->announce_timer->tm);
> +n->announce_timer->round = 0;
>  n->status &= ~VIRTIO_NET_S_ANNOUNCE;
>  
>  /* Flush any MAC and VLAN filter table state */
> @@ -872,10 +873,10 @@ static int virtio_net_handle_announce(VirtIONet *n, 
> uint8_t cmd,
>  if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
>  n->status & VIRTIO_NET_S_ANNOUNCE) {
>  n->status &= ~VIRTIO_NET_S_ANNOUNCE;
> -if (n->announce_counter) {
> -timer_mod(n->announce_timer,
> +if (n->announce_timer->round) {
> +timer_mod(n->announce_timer->tm,
>qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
> -  self_announce_delay(n->announce_counter));
> +  self_announce_delay(n->announce_timer));
>  }
>  return VIRTIO_NET_OK;
>  } else {
> @@ -1615,8 +1616,8 @@ static int virtio_net_post_load_device(void *opaque, 
> int version_id)
>  
>  if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
>  virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
> -n->announce_counter = SELF_ANNOUNCE_ROUNDS;
> -timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
> +n->announce_timer->round = n->announce_timer->params.rounds;
> +timer_mod(n->announce_timer->tm, 
> qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));

Do you think it's worth having a 
  qemu_announce_timer_init(AnnounceTimer *, QEMU_CLOCK_*)
that would initialise the round and any other values and 
do the initial timer_mod ?

>  }
>  
>  return 0;
> @@ -1938,8 +1939,10 @@ static void virtio_net_device_realize(DeviceState 
> *dev, Error **errp)
>  qemu_macaddr_default_if_unset(>nic_conf.macaddr);
>  memcpy(>mac[0], >nic_conf.macaddr, sizeof(n->mac));
>  n->status = VIRTIO_NET_S_LINK_UP;
> -n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
> - virtio_net_announce_timer, n);
> +n->announce_timer = qemu_announce_timer_new(qemu_get_announce_params(),
> +QEMU_CLOCK_VIRTUAL);
> +n->announce_timer->tm = timer_new_ms(QEMU_CLOCK_VIRTUAL,
> +  virtio_net_announce_timer, n);
>  
>  if (n->netclient_type) {
>  /*
> @@ -2001,8 +2004,9 @@ static void virtio_net_device_unrealize(DeviceState 
> *dev, Error **errp)
>  virtio_net_del_queue(n, i);
>  }
>  
> -timer_del(n->announce_timer);
> -timer_free(n->announce_timer);
> +timer_del(n->announce_timer->tm);
> +timer_free(n->announce_timer->tm);
> +g_free(n->announce_timer);

Hmm, why is this all safe - I guess this is in the BQL?

>  g_free(n->vqs);
>  qemu_del_nic(n->nic);
>  virtio_cleanup(vdev);
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index 1eec9a2..51dd4c3 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -94,8 +94,7 @@ typedef struct VirtIONet {
>  char *netclient_name;
>  char *netclient_type;
>  uint64_t curr_guest_offloads;
> -QEMUTimer *announce_timer;
> -int announce_counter;
> +AnnounceTimer *announce_timer;
>  bool needs_vnet_hdr_swap;
>  } VirtIONet;
>  
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>

[Qemu-devel] [PULL v2 5/5] target/sh4: fix RTE instruction delay slot

2017-05-30 Thread Aurelien Jarno

The ReTurn from Exception (RTE) instruction loads the system register
(SR) with the saved system register (SSR). It has a delay slot, and
behaves specially according to the SH4 manual:

  The SR value accessed by the instruction in the RTE delay slot is the
  value restored from SSR by the RTE instruction. The SR and MD values
  defined prior to RTE execution are used to fetch the instruction in
  the RTE delay slot.

The instruction in the delay slot being often a NOP, it doesn't cause
any issue most of the time except in some rare cases where the NOP is
being splitted in a different TB (for example when the TCG op buffer
is full). In that case the NOP is fetched with the user permissions
and causes an instruction TLB protection violation exception.

This patches fixes that by introducing a new delay slot flag for the
RTE instruction. Given it's a privileged instruction, the RTE delay
slot instruction is always fetched in privileged mode. It is therefore
enough to to check for this flag in cpu_mmu_index.

Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/cpu.h   | 13 ++---
 target/sh4/translate.c |  8 ++--
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 7969c9af98..ffb91687b8 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -91,9 +91,10 @@
 #define FPSCR_RM_NEAREST   (0 << 0)
 #define FPSCR_RM_ZERO  (1 << 0)
 
-#define DELAY_SLOT_MASK0x3
+#define DELAY_SLOT_MASK0x7
 #define DELAY_SLOT (1 << 0)
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
+#define DELAY_SLOT_RTE (1 << 2)
 
 typedef struct tlb_t {
 uint32_t vpn;  /* virtual page number */
@@ -264,7 +265,13 @@ void cpu_load_tlb(CPUSH4State * env);
 #define MMU_USER_IDX 1
 static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
 {
-return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
+/* The instruction in a RTE delay slot is fetched in privileged
+   mode, but executed in user mode.  */
+if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
+return 0;
+} else {
+return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
+}
 }
 
 #include "exec/cpu-all.h"
@@ -381,7 +388,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, 
target_ulong *pc,
 {
 *pc = env->pc;
 *cs_base = 0;
-*flags = (env->flags & DELAY_SLOT_MASK)/* Bits  0- 1 */
+*flags = (env->flags & DELAY_SLOT_MASK)/* Bits  0- 2 */
 | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
 | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))  /* Bits 29-30 */
 | (env->sr & (1u << SR_FD))/* Bit 15 */
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index aba316f593..8bc132b27b 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -185,6 +185,9 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f,
 } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
cpu_fprintf(f, "in conditional delay slot (delayed_pc=0x%08x)\n",
env->delayed_pc);
+} else if (env->flags & DELAY_SLOT_RTE) {
+cpu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
+env->delayed_pc);
 }
 }
 
@@ -427,8 +430,9 @@ static void _decode_opc(DisasContext * ctx)
CHECK_NOT_DELAY_SLOT
 gen_write_sr(cpu_ssr);
tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
-ctx->envflags |= DELAY_SLOT;
+ctx->envflags |= DELAY_SLOT_RTE;
ctx->delayed_pc = (uint32_t) - 1;
+ctx->bstate = BS_STOP;
return;
 case 0x0058:   /* sets */
 tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
@@ -1804,7 +1808,7 @@ static void decode_opc(DisasContext * ctx)
 ctx->bstate = BS_BRANCH;
 if (old_flags & DELAY_SLOT_CONDITIONAL) {
gen_delayed_conditional_jump(ctx);
-} else if (old_flags & DELAY_SLOT) {
+} else {
 gen_jump(ctx);
}
 
-- 
2.11.0

[Qemu-devel] [PULL v2 0/5] Queued target/sh4 patches

2017-05-30 Thread Aurelien Jarno

The following changes since commit 0748b3526e8cb78b9cd64208426bfc3d54a72b04:

  Merge remote-tracking branch 'kwolf/tags/for-upstream' into staging 
(2017-05-30 14:15:15 +0100)

are available in the git repository at:

  git://git.aurel32.net/qemu.git tags/pull-target-sh4-20170530

for you to fetch changes up to be53081a619443dc4512039d89345475ef7d9a46:

  target/sh4: fix RTE instruction delay slot (2017-05-30 21:00:56 +0200)


Queued target/sh4 patches


Aurelien Jarno (5):
  target/sh4: log unauthorized accesses using qemu_log_mask
  target/sh4: fix reset when using a kernel and an initrd
  target/sh4: introduce DELAY_SLOT_MASK
  target/sh4: ignore interrupts in a delay slot
  target/sh4: fix RTE instruction delay slot

 target/sh4/cpu.h   | 12 ++--
 target/sh4/helper.c| 28 ++--
 target/sh4/translate.c | 25 ++---
 3 files changed, 46 insertions(+), 19 deletions(-)


Note that is based against master, but rather against
https://github.com/stefanha/qemu/commits/staging

-- 
2.11.0

[Qemu-devel] [PULL v2 4/5] target/sh4: ignore interrupts in a delay slot

2017-05-30 Thread Aurelien Jarno

Delay slots are indivisible, therefore avoid scheduling an interrupt in
the delay slot. However exceptions are possible.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 5785d6d22a..28d93c2543 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -871,8 +871,16 @@ int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr)
 bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
 if (interrupt_request & CPU_INTERRUPT_HARD) {
-superh_cpu_do_interrupt(cs);
-return true;
+SuperHCPU *cpu = SUPERH_CPU(cs);
+CPUSH4State *env = >env;
+
+/* Delay slots are indivisible, ignore interrupts */
+if (env->flags & DELAY_SLOT_MASK) {
+return false;
+} else {
+superh_cpu_do_interrupt(cs);
+return true;
+}
 }
 return false;
 }
-- 
2.11.0

Re: [Qemu-devel] [RESEND PATCH 2/2] hostmem-file: add an attribute 'align' to set its alignment

2017-05-30 Thread Dan Williams

On Tue, May 30, 2017 at 5:17 AM, Paolo Bonzini  wrote:
>
>
> On 26/05/2017 16:24, Dan Williams wrote:
>>> For DAX device only, QEMU can figure out the proper alignment by
>>> itself. However, I'm not sure whether there are other non-DAX cases
>>> requiring non-default alignment, so I think it's better to just add an
>>> interface (i.e. align attribute) in QEMU and let other management
>>> tools (e.g. libvirt?) fill a proper value.
>> I can't imagine any cases where you would want to specify an
>> alignment. If it's regular file mmap any alignment is fine, and if
>> it's device-dax only the configured alignment of the device instance
>> is allowed. So, I don't think this should be a configurable option,
>> just read it from the device instance and you're done.
>
> A 2M or 1G alignment lets KVM use EPT hugepages if the host physical
> addresses are contiguous and 2M- or 1G-aligned.
>
> QEMU only does this for hugetlbfs currently, where the requirement on
> the host physical addresses is always satisfied.  Would the same apply
> to NVDIMM device DAX?
>

Yes, I believe it is similar. Device-dax guarantees and enforces (mmap
area must be aligned) a given fault granularity.

[Qemu-devel] [PULL v2 1/5] target/sh4: log unauthorized accesses using qemu_log_mask

2017-05-30 Thread Aurelien Jarno

qemu_log_mask() is preferred over fprintf() for logging errors.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 8f8ce81401..4c024f9529 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -420,7 +420,7 @@ static int get_physical_address(CPUSH4State * env, 
target_ulong * physical,
 if (!(env->sr & (1u << SR_MD))
&& (address < 0xe000 || address >= 0xe400)) {
/* Unauthorized access in user mode (only store queues are 
available) */
-   fprintf(stderr, "Unauthorized access\n");
+qemu_log_mask(LOG_GUEST_ERROR, "Unauthorized access\n");
if (rw == 0)
return MMU_DADDR_ERROR_READ;
else if (rw == 1)
-- 
2.11.0

[Qemu-devel] [PULL v2 2/5] target/sh4: fix reset when using a kernel and an initrd

2017-05-30 Thread Aurelien Jarno

When a masked exception happens, the SH4 CPU generates a non-masked
reset exception, which then jumps to the reset vector at address
0xA000. While this is emulated correctly in QEMU, this does not
work when using a kernel and initrd as this address then contain an
illegal instruction (and there is no guarantee the kernel and initrd
haven't been overwritten).

Therefore call qemu_system_reset_request to reload the kernel and initrd
and load the program counter to the kernel entry point.

Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 4c024f9529..16fcd1bbf2 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -21,6 +21,7 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/log.h"
+#include "sysemu/sysemu.h"
 
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/sh4/sh_intc.h"
@@ -92,7 +93,14 @@ void superh_cpu_do_interrupt(CPUState *cs)
 
 if (env->sr & (1u << SR_BL)) {
 if (do_exp && cs->exception_index != 0x1e0) {
-cs->exception_index = 0x000; /* masked exception -> reset */
+/* In theory a masked exception generates a reset exception,
+   which in turn jumps to the reset vector. However this only
+   works when using a bootloader. When using a kernel and an
+   initrd, they need to be reloaded and the program counter
+   should be loaded with the kernel entry point.
+   qemu_system_reset_request takes care of that.  */
+qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+return;
 }
 if (do_irq && !env->in_sleep) {
 return; /* masked */
-- 
2.11.0

[Qemu-devel] [PULL 2/2] numa: Fix format string for "Invalid node" message

2017-05-30 Thread Eduardo Habkost

Some compilers complain about the PRIu16 format string with the
MAX(src, dst) and MAX_NODES arguments.  Example output from Apple LLVM
version 7.3.0 (clang-703.0.31):

  numa.c:236:20: warning: format specifies type 'unsigned short' but the 
argument has type 'int' [-Wformat]
 MAX(src, dst), MAX_NODES);
  ~~~^~~~
  include/qapi/error.h:163:35: note: expanded from macro 'error_setg'
  (fmt), ## __VA_ARGS__)
^~~
  glib/2.52.2/include/glib-2.0/glib/gmacros.h:288:20: note: expanded from macro 
'MAX'
  #define MAX(a, b)  (((a) > (b)) ? (a) : (b))
 ^
  numa.c:236:35: warning: format specifies type 'unsigned short' but the 
argument has type 'int' [-Wformat]
 MAX(src, dst), MAX_NODES);
  ~~^
  include/qapi/error.h:163:35: note: expanded from macro 'error_setg'
  (fmt), ## __VA_ARGS__)
^~~
  include/sysemu/sysemu.h:165:19: note: expanded from macro 'MAX_NODES'
  #define MAX_NODES 128
^~~
MAX(src, dst) promotes the src and dst arguments to int, and MAX_NODES
is an int.  Use %d to silence those warnings.

Signed-off-by: Eduardo Habkost 
Message-Id: <20170530184013.31044-1-ehabk...@redhat.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eduardo Habkost 
---
 numa.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/numa.c b/numa.c
index ca731455e9..be50c62aa9 100644
--- a/numa.c
+++ b/numa.c
@@ -231,8 +231,7 @@ static void parse_numa_distance(NumaDistOptions *dist, 
Error **errp)
 
 if (src >= MAX_NODES || dst >= MAX_NODES) {
 error_setg(errp,
-   "Invalid node %" PRIu16
-   ", max possible could be %" PRIu16,
+   "Invalid node %d, max possible could be %d",
MAX(src, dst), MAX_NODES);
 return;
 }
-- 
2.11.0.259.g40922b1

[Qemu-devel] [PULL 1/2] numa-test: fix query-cpus leaks

2017-05-30 Thread Eduardo Habkost

From: Marc-André Lureau 

Fix test leaks introduced in commit 2941020a476.

(and small extra space removed)

Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
Message-Id: <20170526110456.32004-1-marcandre.lur...@redhat.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eduardo Habkost 
---
 tests/numa-test.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/numa-test.c b/tests/numa-test.c
index c3475d6d5e..3f636840b1 100644
--- a/tests/numa-test.c
+++ b/tests/numa-test.c
@@ -92,7 +92,7 @@ static QList *get_cpus(QDict **resp)
 *resp = qmp("{ 'execute': 'query-cpus' }");
 g_assert(*resp);
 g_assert(qdict_haskey(*resp, "return"));
-return  qdict_get_qlist(*resp, "return");
+return qdict_get_qlist(*resp, "return");
 }
 
 static void test_query_cpus(const void *data)
@@ -100,7 +100,7 @@ static void test_query_cpus(const void *data)
 char *cli;
 QDict *resp;
 QList *cpus;
-const QObject *e;
+QObject *e;
 
 cli = make_cli(data, "-smp 8 -numa node,cpus=0-3 -numa node,cpus=4-7");
 qtest_start(cli);
@@ -124,6 +124,7 @@ static void test_query_cpus(const void *data)
 } else {
 g_assert_cmpint(node, ==, 1);
 }
+qobject_decref(e);
 }
 
 QDECREF(resp);
@@ -136,7 +137,7 @@ static void pc_numa_cpu(const void *data)
 char *cli;
 QDict *resp;
 QList *cpus;
-const QObject *e;
+QObject *e;
 
 cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 "
 "-numa node,nodeid=0 -numa node,nodeid=1 "
@@ -176,6 +177,7 @@ static void pc_numa_cpu(const void *data)
 } else {
 g_assert(false);
 }
+qobject_decref(e);
 }
 
 QDECREF(resp);
@@ -188,7 +190,7 @@ static void spapr_numa_cpu(const void *data)
 char *cli;
 QDict *resp;
 QList *cpus;
-const QObject *e;
+QObject *e;
 
 cli = make_cli(data, "-smp 4,cores=4 "
 "-numa node,nodeid=0 -numa node,nodeid=1 "
@@ -220,6 +222,7 @@ static void spapr_numa_cpu(const void *data)
 } else {
 g_assert(false);
 }
+qobject_decref(e);
 }
 
 QDECREF(resp);
@@ -232,7 +235,7 @@ static void aarch64_numa_cpu(const void *data)
 char *cli;
 QDict *resp;
 QList *cpus;
-const QObject *e;
+QObject *e;
 
 cli = make_cli(data, "-smp 2 "
 "-numa node,nodeid=0 -numa node,nodeid=1 "
@@ -262,6 +265,7 @@ static void aarch64_numa_cpu(const void *data)
 } else {
 g_assert(false);
 }
+qobject_decref(e);
 }
 
 QDECREF(resp);
-- 
2.11.0.259.g40922b1

[Qemu-devel] [PULL 0/2] NUMA fixes, 2017-05-30

2017-05-30 Thread Eduardo Habkost

The following changes since commit 0748b3526e8cb78b9cd64208426bfc3d54a72b04:

  Merge remote-tracking branch 'kwolf/tags/for-upstream' into staging 
(2017-05-30 14:15:15 +0100)

are available in the git repository at:

  git://github.com/ehabkost/qemu.git tags/numa-pull-request

for you to fetch changes up to f892291eee376505cfec8b6cade7ccf952a6d3e0:

  numa: Fix format string for "Invalid node" message (2017-05-30 16:09:58 -0300)


NUMA fixes, 2017-05-30



Eduardo Habkost (1):
  numa: Fix format string for "Invalid node" message

Marc-André Lureau (1):
  numa-test: fix query-cpus leaks

 numa.c|  3 +--
 tests/numa-test.c | 14 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

-- 
2.11.0.259.g40922b1

Re: [Qemu-devel] [PATCH] numa: Fix format string for "Invalid node" message

2017-05-30 Thread Eric Blake

On 05/30/2017 01:40 PM, Eduardo Habkost wrote:
> Some compilers complain about the PRIu16 format string with the
> MAX(src, dst) and MAX_NODES arguments.  Example output from Apple LLVM
> version 7.3.0 (clang-703.0.31):

> ^~~
> MAX(src, dst) promotes the src and dst arguments to int, and MAX_NODES
> is an int.  Use %d to silence those warnings.
> 
> Signed-off-by: Eduardo Habkost 
> ---
>  numa.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 01/12] migration: Introduce announce parameters

2017-05-30 Thread Dr. David Alan Gilbert

* Vladislav Yasevich (vyase...@redhat.com) wrote:
> Add parameters that control RARP/GARP announcement timeouts.
> The parameters structure is added to the QAPI and a qmp command
> is added to set/get the parameter data.
> 
> Based on work by "Dr. David Alan Gilbert" 
> 
> Signed-off-by: Vladislav Yasevich 

Thanks; this is pretty much duplicating the migrate-parameter code;
that's OK but I wonder if there's an easier/better way.
If you made a global gom object with these values as properties, then
they could be set with either -global or  with 'qom-set'

Would that work/be simpler?
You wouldn't need to add any new commands.

(Some typo spots below)

Dave

> ---
>  include/sysemu/sysemu.h |  7 
>  migration/savevm.c  | 98 
> +
>  qapi-schema.json| 84 ++
>  3 files changed, 189 insertions(+)
> 
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 83c1ceb..7fd49c4 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -78,6 +78,13 @@ void qemu_remove_machine_init_done_notifier(Notifier 
> *notify);
>  int save_vmstate(const char *name, Error **errp);
>  int load_vmstate(const char *name, Error **errp);
>  
> +AnnounceParameters *qemu_get_announce_params(void);
> +void qemu_fill_announce_parameters(AnnounceParameters **to,
> +   AnnounceParameters *from);
> +bool qemu_validate_announce_parameters(AnnounceParameters *params,
> +   Error **errp);
> +void qemu_set_announce_parameters(AnnounceParameters *announce_params,
> +  AnnounceParameters *params);
>  void qemu_announce_self(void);
>  
>  /* Subcommands for QEMU_VM_COMMAND */
> diff --git a/migration/savevm.c b/migration/savevm.c
> index f5e8194..cee2837 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -78,6 +78,104 @@ static struct mig_cmd_args {
>  [MIG_CMD_MAX]  = { .len = -1, .name = "MAX" },
>  };
>  
> +#define QEMU_ANNOUNCE_INITIAL50
> +#define QEMU_ANNOUNCE_MAX   550
> +#define QEMU_ANNOUNCE_ROUNDS  5
> +#define QEMU_ANNOUNCE_STEP  100
> +
> +AnnounceParameters *qemu_get_announce_params(void)
> +{
> +static AnnounceParameters announce = {
> +.initial = QEMU_ANNOUNCE_INITIAL,
> +.max = QEMU_ANNOUNCE_MAX,
> +.rounds = QEMU_ANNOUNCE_ROUNDS,
> +.step = QEMU_ANNOUNCE_STEP,
> +};
> +
> +return 
> +}
> +
> +void qemu_fill_announce_parameters(AnnounceParameters **to,
> +   AnnounceParameters *from)
> +{
> +AnnounceParameters *params;
> +
> +params = *to = g_malloc0(sizeof(*params));
> +params->has_initial = true;
> +params->initial = from->initial;
> +params->has_max = true;
> +params->max = from->max;
> +params->has_rounds = true;
> +params->rounds = from->rounds;
> +params->has_step = true;
> +params->step = from->step;
> +}
> +
> +bool qemu_validate_announce_parameters(AnnounceParameters *params, Error 
> **errp)
> +{
> +if (params->has_initial &&
> +(params->initial < 1 || params->initial > 10)) {
> +error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "initial",
> +   "is invalid, it should be in the range of 1 to 10 
> ms");
> +return false;
> +}
> +if (params->has_max &&
> +(params->max < 1 || params->max > 10)) {
> +error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "max",
> +   "is invalid, it should be in the range of 1 to 10 
> ms");
> +return false;
> +}
> +if (params->has_rounds &&
> +(params->rounds < 1 || params->rounds > 1000)) {
> +error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "rounds",
> +   "is invalid, it should be in the range of 1 to 1000");
> +return false;
> +}
> +if (params->has_step &&
> +(params->step < 0 || params->step > 1)) {
> +error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "step",
> +   "is invalid, it should be in the range of 1 to 1 ms");
> +return false;
> +}
> +
> +return true;
> +}
> +
> +void qemu_set_announce_parameters(AnnounceParameters *announce_params,
> +  AnnounceParameters *params)
> +{
> +if (params->has_initial) {
> +announce_params->initial = params->initial;
> +}
> +if (params->has_max) {
> +announce_params->max = params->max;
> +}
> +if (params->has_rounds) {
> +announce_params->rounds = params->rounds;
> +}
> +if (params->has_step) {
> +announce_params->step = params->step;
> +}
> +}
> +
> +AnnounceParameters *qmp_query_announce_parameters(Error **errp)
> +{
> +AnnounceParameters *params = NULL;
> +
> +qemu_fill_announce_parameters(,

Re: [Qemu-devel] Commit b2a575a1c652 broke i486 support.

2017-05-30 Thread Rob Landley

On 05/29/2017 05:14 AM, Richard W.M. Jones wrote:
> I see in the disassembly use of cmovne (new in Pentium Pro) and
> bswap (new in 486).
> [http://cse.unl.edu/~goddard/Courses/CSCE351/IntelArchitecture/InstructionSetSummary.pdf]
> 
> The cmovne instruction is generated by the compiler (GCC in my case),
> 
> The following patch removes the cmovne instruction, so it should work
> on 486 (although I didn't test it).  It's not possible to remove bswap
> without surgery on the inline assembler.

Is there any way to make it just _not_ load the option rom for -cpu 486?
It ran fine before that thing went in...

Rob

Re: [Qemu-devel] [PATCH Risu v3 4/4] build: Add support to PowerPC BE and remove ARCH

2017-05-30 Thread joserz

On Tue, May 30, 2017 at 03:27:33PM +0100, Peter Maydell wrote:
> On 25 May 2017 at 20:10, Jose Ricardo Ziviani  
> wrote:
> > Essentialy the code for PowerPC BE and LE are the same, so this patch
> > renames all *ppc64le.* files to *ppc64.* and reflects such in the
> > Makefile.
> >
> > Due to the fact that all supported archs are covered by guess_arch
> > function, this also drops the ARCH parameter from the Makefile.
> 
> This change isn't right -- the point is that guess_arch might
> (in theory) guess wrong, so the user can override it.
> 
> You forgot to delete the comment about "powerpc64-linux-gnu doesn't
> work" from the build-all-archs script.
> 
> The rest is fine though, so I've made those minor tweaks and
> applied the series to risu master.
> 
> PS: for patches like this there is a neat git option --find-renames
> which you can use when you're generating the patch email to
> create smaller and easier to read patches.
> 
> thanks
> -- PMM
> 

Peter, thanks for your review, I always learn from it. Just
added the "git config diff.renames true; git config diff.algorithm
patience" config here. Next time it'll be better indeed! :)

Thanks

Re: [Qemu-devel] [PATCH 01/12] migration: Introduce announce parameters

2017-05-30 Thread Dr. David Alan Gilbert

* Vlad Yasevich (vyase...@redhat.com) wrote:
> On 05/26/2017 12:03 AM, Jason Wang wrote:
> > 
> > On 2017年05月25日 02:05, Vladislav Yasevich wrote:
> >> Add parameters that control RARP/GARP announcement timeouts.
> >> The parameters structure is added to the QAPI and a qmp command
> >> is added to set/get the parameter data.
> >>
> >> Based on work by "Dr. David Alan Gilbert" 
> >>
> >> Signed-off-by: Vladislav Yasevich 
> > 
> > I think it's better to explain e.g under which condition do we need to 
> > tweak such parameters.
> > 
> > Thanks
> > 
> 
> OK.  I'll rip off what dgilbert wrote in his original series for the 
> description.
> 
> Dave, if you have any text to add as to why migration might need to tweak 
> this, I'd
> appreciate it.

Pretty much what I originally said;  that the existing values
are arbitrary and fixed, and for systems with complex/sluggish
network reconfiguration systems they can be too slow.

Dave

> Thanks
> -vlad
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [Qemu-block] [PATCH v2 4/4] qemu-iotests: Block migration test

2017-05-30 Thread Jeff Cody

On Tue, May 30, 2017 at 06:57:05PM +0200, Kevin Wolf wrote:
> Am 30.05.2017 um 17:52 hat Jeff Cody geschrieben:
> > On Tue, May 30, 2017 at 05:22:53PM +0200, Kevin Wolf wrote:
> > > Signed-off-by: Kevin Wolf 
> > > ---
> > >  tests/qemu-iotests/183 | 143 
> > > +
> > >  tests/qemu-iotests/183.out |  46 +++
> > >  tests/qemu-iotests/group   |   1 +
> > >  3 files changed, 190 insertions(+)
> > >  create mode 100755 tests/qemu-iotests/183
> > >  create mode 100644 tests/qemu-iotests/183.out
> > > 
> > > diff --git a/tests/qemu-iotests/183 b/tests/qemu-iotests/183
> > > new file mode 100755
> > > index 000..5eda0e9
> > > --- /dev/null
> > > +++ b/tests/qemu-iotests/183
> > > @@ -0,0 +1,143 @@
> > > +#!/bin/bash
> > > +#
> > > +# Test old-style block migration (migrate -b)
> > > +#
> > > +# Copyright (C) 2017 Red Hat, Inc.
> > > +#
> > > +# This program is free software; you can redistribute it and/or modify
> > > +# it under the terms of the GNU General Public License as published by
> > > +# the Free Software Foundation; either version 2 of the License, or
> > > +# (at your option) any later version.
> > > +#
> > > +# This program is distributed in the hope that it will be useful,
> > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > +# GNU General Public License for more details.
> > > +#
> > > +# You should have received a copy of the GNU General Public License
> > > +# along with this program.  If not, see .
> > > +#
> > > +
> > > +# creator
> > > +owner=kw...@redhat.com
> > > +
> > > +seq=`basename $0`
> > > +echo "QA output created by $seq"
> > > +
> > > +here=`pwd`
> > > +status=1 # failure is the default!
> > > +
> > > +MIG_SOCKET="${TEST_DIR}/migrate"
> > > +
> > > +_cleanup()
> > > +{
> > > +rm -f "${MIG_SOCKET}"
> > > +rm -f "${TEST_IMG}.dest"
> > > + _cleanup_test_img
> > > +_cleanup_qemu
> > > +}
> > > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > > +
> > > +# get standard environment, filters and checks
> > > +. ./common.rc
> > > +. ./common.filter
> > > +. ./common.qemu
> > > +
> > > +_supported_fmt generic
> > 
> > Not sure to what extent we are really keeping the _supported_fmt up to date,
> > but this test will only work on qcow2 and raw, right?
> > 
> > I'd recommend changing this to:
> > 
> > + _supported_fmt qcow2 raw
> > 
> > (that can probably be done when applying, however).
> 
> Seems you are right, but I fail to see why the other formats shouldn't
> work? Are these just bugs or do I make any assumption in the test script
> that is invalid for other image formats?
>

Well, the following formats have live migration blockers:

vmdk, vhdx, vdi, vpc, qcow, vvfat

So that leaves qed, dmg, quorum.

Of those, qed is the only one that fails.  And I would guess it is a bug?

While waiting for  "status: active" from the query-migrate (in the section
where I proposed a timeout), the migration is hanging, and appears to never
get started.  If I redirect that output to a debug file, I just see this
over and over (I prettified the json output here):


{
  "return": {
"expected-downtime": 300,
"status": "active",
"disk": {
  "total": 67108864,
  "postcopy-requests": 0,
  "dirty-sync-count": 0,
  "page-size": 0,
  "remaining": 63963136,
  "mbps": 0,
  "transferred": 3145728,
  "duplicate": 0,
  "dirty-pages-rate": 0,
  "skipped": 0,
  "normal-bytes": 0,
  "normal": 0
},
"setup-time": 1,
"total-time": 4956,
"ram": {
  "total": 134750208,
  "postcopy-requests": 0,
  "dirty-sync-count": 1,
  "page-size": 4096,
  "remaining": 134750208,
  "mbps": 0,
  "transferred": 0,
  "duplicate": 0,
  "dirty-pages-rate": 0,
  "skipped": 0,
  "normal-bytes": 0,
  "normal": 0
}
  }
}

The only thing that advances is "total-time".

So I would make _supported_fmt be 'qcow2 raw qed dmg quorum', and the
failure of qed is an actual failure.

[...]

> > > +echo === Do block migration to destination ===
> > > +echo
> > > +
> > > +reply="$(_send_qemu_cmd $src \
> > > +"{ 'execute': 'migrate',
> > > +   'arguments': { 'uri': 'unix:${MIG_SOCKET}', 'blk': true } }" \
> > > +'return\|error')"
> > > +echo "$reply"
> > > +if echo "$reply" | grep "compiled without old-style" > /dev/null; then
> > > +_notrun "migrate -b support not compiled in"
> > > +fi
> > > +
> > > +while _send_qemu_cmd $src "{ 'execute': 'query-migrate' }" "return"  |
> > > +  grep '"status": "active"' > /dev/null
> > > +do
> > > +sleep 0.1
> > 
> > Would it make sense here to add a timeout?  It would be nice to be able to
> > reply on scripts always failing for git-bisect (rather than potentially
> > hanging at a spot).
> 
> What would you think about squashing this in:
> 
> ---

[Qemu-devel] [PATCH] numa: Fix format string for "Invalid node" message

2017-05-30 Thread Eduardo Habkost

Some compilers complain about the PRIu16 format string with the
MAX(src, dst) and MAX_NODES arguments.  Example output from Apple LLVM
version 7.3.0 (clang-703.0.31):

  numa.c:236:20: warning: format specifies type 'unsigned short' but the 
argument has type 'int' [-Wformat]
 MAX(src, dst), MAX_NODES);
  ~~~^~~~
  include/qapi/error.h:163:35: note: expanded from macro 'error_setg'
  (fmt), ## __VA_ARGS__)
^~~
  glib/2.52.2/include/glib-2.0/glib/gmacros.h:288:20: note: expanded from macro 
'MAX'
  #define MAX(a, b)  (((a) > (b)) ? (a) : (b))
 ^
  numa.c:236:35: warning: format specifies type 'unsigned short' but the 
argument has type 'int' [-Wformat]
 MAX(src, dst), MAX_NODES);
  ~~^
  include/qapi/error.h:163:35: note: expanded from macro 'error_setg'
  (fmt), ## __VA_ARGS__)
^~~
  include/sysemu/sysemu.h:165:19: note: expanded from macro 'MAX_NODES'
  #define MAX_NODES 128
^~~
MAX(src, dst) promotes the src and dst arguments to int, and MAX_NODES
is an int.  Use %d to silence those warnings.

Signed-off-by: Eduardo Habkost 
---
 numa.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/numa.c b/numa.c
index ca731455e9..be50c62aa9 100644
--- a/numa.c
+++ b/numa.c
@@ -231,8 +231,7 @@ static void parse_numa_distance(NumaDistOptions *dist, 
Error **errp)
 
 if (src >= MAX_NODES || dst >= MAX_NODES) {
 error_setg(errp,
-   "Invalid node %" PRIu16
-   ", max possible could be %" PRIu16,
+   "Invalid node %d, max possible could be %d",
MAX(src, dst), MAX_NODES);
 return;
 }
-- 
2.11.0.259.g40922b1

Re: [Qemu-devel] [PATCH v2 1/3] migration: Use savevm_handlers instead of loadvm copy

2017-05-30 Thread Juan Quintela

"Dr. David Alan Gilbert"  wrote:
> * Juan Quintela (quint...@redhat.com) wrote:
>> There is no reason for having the loadvm_handlers at all.  There is
>> only one use, and we can use the savevm handlers.
>> 
>> We will remove the loadvm handlers on a following patch.
>
> 
>
>>  trace_qemu_loadvm_state_section_partend(section_id);
>> -QLIST_FOREACH(le, >loadvm_handlers, entry) {
>> -if (le->section_id == section_id) {
>> +QTAILQ_FOREACH(se, _state.handlers, entry) {
>> +if (se->section_id == section_id) {
>
> Isn't this the problem?  What guarantees that the se->section_id
> is the same as the source's section_id - I don't think anything.
> It's just dynamically allocated in register_savevm_live so the
> initialisation order on source/dest could be different and you'd
> get different ID.  You can't update se->section_id
> unless you guaranteed to updated all of them.

Ok.  It worked for me because I was using the same command line in both
sides.

Changed to add load_section_id and adjust all around.

Thanks, Juan.

Re: [Qemu-devel] [PULL 02/29] numa: Allow setting NUMA distance for different NUMA nodes

2017-05-30 Thread Eric Blake

On 05/30/2017 01:10 PM, Eduardo Habkost wrote:
> On Tue, May 30, 2017 at 10:28:21AM -0500, Eric Blake wrote:
>> On 05/30/2017 09:01 AM, Eduardo Habkost wrote:
>>
 The OSX compiler is pickier about format strings than gcc,
 and neither "MAX_NODES" nor "MAX(anything)" are uint16_t type.
>>>
>>> src and dst are both uint16_t, so MAX(src, dst) is also uint16_t,
>>> isn't it?  It looks like MAX_NODES is the problem.
>>
>> No. MAX() invokes arithmetic (both ?: and > operators), and arithmetic
>> involves default promotion (anything shorter than int becomes 'int').
> 
> Oh, I didn't know that.

And the fact that var-arg passing REQUIRES promotion means you CAN'T
tell the difference from a true uint16_t argument and an int argument
which resulted from arithmetic promotion.  Which is why Paolo has argued
that the MacOS compiler warning is bogus because it is overly picky.
But we obviously are in no position to get it changed.

> 
>>
>>> +++ b/numa.c
>>> @@ -232,7 +232,7 @@ static void parse_numa_distance(NumaDistOptions *dist, 
>>> Error **errp)
>>>  if (src >= MAX_NODES || dst >= MAX_NODES) {
>>>  error_setg(errp,
>>> "Invalid node %" PRIu16
>>> -   ", max possible could be %" PRIu16,
>>> +   ", max possible could be %d",
>>
>> Don't you want %u to match PRIu16, rather than %d?
> 
> Can't this (using %u with an int argument) make more pedantic
> compilers complain?

Yes, gcc's -Wformat-signedness would (probably) flag it.  But we don't
use that.  At any rate, ALL uint16_t values are formatted identically
for both %u and %d, so as long as we have a formula for keeping picky
compilers silent, I don't care beyond that point.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 0/6] vhost-user: Specify and implement device IOTLB support

2017-05-30 Thread Michael S. Tsirkin

On Fri, May 26, 2017 at 04:28:52PM +0200, Maxime Coquelin wrote:
> This series aims at specifying ans implementing the protocol update
> required to support device IOTLB with user backends.
> 
> In this second non-RFC version, main changes are:
>  - spec fixes and clarification
>  - rings information update has been restored back to ring enablement time
>  - Work around GCC 4.4.7 limitation wrt assignment in unnamed union at
> declaration time.
> 
> The series can be tested with vhost_iotlb_proto_v2 branch on my gitlab
> account[0].
> 
> The slave requests channel part is re-used from Marc-André's series submitted
> last year[1], with main changes from original version being request/feature
> names renaming and addition of the REPLY_ACK feature support.
> 
> Regarding IOTLB protocol, one noticeable change is the IOTLB miss request
> reply made optionnal (i.e. only if slave requests it by setting the
> VHOST_USER_NEED_REPLY flag in the message header). This change provides
> more flexibility in the backend implementation of the feature.
> 
> The protocol is very close to kernel backends, except that a new
> communication channel is introduced to enable the slave to send
> requests to the master.
> 
> [0]: 
> https://gitlab.com/mcoquelin/dpdk-next-virtio/commits/vhost_iotlb_proto_v2
> [1]: https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg00095.html

Overall, this looks good to me. I do think patch 3 isn't a good idea
though, if slave wants something let it request it.

Need to find out why does vhost in kernel want the used ring iotlb at
start time - especially considering we aren't even guaranteed one entry
covers the whole ring, and invalidates should affect all addresses at
least in theory.


> Marc-André Lureau (2):
>   vhost-user: add vhost_user to hold the chr
>   vhost-user: add slave-req-fd support
> 
> Maxime Coquelin (4):
>   vhost: propagate errors in vhost_device_iotlb_miss()
>   vhost: rework IOTLB messaging
>   vhost: extend ring information update for IOTLB to all rings
>   spec/vhost-user spec: Add IOMMU support
> 
>  docs/specs/vhost-user.txt | 118 -
>  hw/virtio/vhost-backend.c | 130 
>  hw/virtio/vhost-user.c| 177 
> +-
>  hw/virtio/vhost.c |  27 --
>  include/hw/virtio/vhost-backend.h |  23 +++--
>  include/hw/virtio/vhost.h |   2 +-
>  6 files changed, 397 insertions(+), 80 deletions(-)
> 
> -- 
> 2.9.4

Re: [Qemu-devel] [PATCH v2 5/6] vhost-user: add slave-req-fd support

2017-05-30 Thread Michael S. Tsirkin

On Fri, May 26, 2017 at 04:28:57PM +0200, Maxime Coquelin wrote:
> From: Marc-André Lureau 
> 
> Learn to give a socket to the slave to let him make requests to the
> master.
> 
> Signed-off-by: Marc-André Lureau 
> Signed-off-by: Maxime Coquelin 
> ---
>  docs/specs/vhost-user.txt |  32 +++-
>  hw/virtio/vhost-user.c| 127 
> ++
>  2 files changed, 157 insertions(+), 2 deletions(-)
> 
> diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> index 036890f..5fa7016 100644
> --- a/docs/specs/vhost-user.txt
> +++ b/docs/specs/vhost-user.txt
> @@ -139,6 +139,7 @@ in the ancillary data:
>   * VHOST_USER_SET_VRING_KICK
>   * VHOST_USER_SET_VRING_CALL
>   * VHOST_USER_SET_VRING_ERR
> + * VHOST_USER_SET_SLAVE_REQ_FD
>  
>  If Master is unable to send the full message or receives a wrong reply it 
> will
>  close the connection. An optional reconnection mechanism can be implemented.
> @@ -252,6 +253,18 @@ Once the source has finished migration, rings will be 
> stopped by
>  the source. No further update must be done before rings are
>  restarted.
>  
> +Slave communication
> +---
> +
> +An optional communication channel is provided if the slave declares
> +VHOST_USER_PROTOCOL_F_SLAVE_REQ protocol feature, to allow the slave to make
> +requests to the master.
> +
> +The fd is provided via VHOST_USER_SET_SLAVE_REQ_FD ancillary data.
> +
> +A slave may then send VHOST_USER_SLAVE_* messages to the master
> +using this fd communication channel.
> +
>  Protocol features
>  -
>  
> @@ -260,9 +273,10 @@ Protocol features
>  #define VHOST_USER_PROTOCOL_F_RARP   2
>  #define VHOST_USER_PROTOCOL_F_REPLY_ACK  3
>  #define VHOST_USER_PROTOCOL_F_MTU4
> +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ  5
>  
> -Message types
> --
> +Master message types
> +
>  
>   * VHOST_USER_GET_FEATURES
>  


So down the road, we should make sure a device without
VHOST_USER_PROTOCOL_F_SLAVE_REQ does not advertise IOMMU
since you don't handle these messages otherwise.


> @@ -486,6 +500,20 @@ Message types
>If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond
>with zero in case the specified MTU is valid, or non-zero otherwise.
>  
> + * VHOST_USER_SET_SLAVE_REQ_FD
> +
> +  Id: 21
> +  Equivalent ioctl: N/A
> +  Master payload: N/A
> +
> +  Set the socket file descriptor for slave initiated requests. It is 
> passed
> +  in the ancillary data.
> +  This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES
> +  has been negotiated, and protocol feature bit 
> VHOST_USER_PROTOCOL_F_SLAVE_REQ
> +  bit is present in VHOST_USER_GET_PROTOCOL_FEATURES.
> +  If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond
> +  with zero for success, non-zero otherwise.
> +
>  VHOST_USER_PROTOCOL_F_REPLY_ACK:
>  ---
>  The original vhost-user specification only demands replies for certain
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 8a602e0..ea988fe 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -32,6 +32,7 @@ enum VhostUserProtocolFeature {
>  VHOST_USER_PROTOCOL_F_RARP = 2,
>  VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
>  VHOST_USER_PROTOCOL_F_NET_MTU = 4,
> +VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
>  
>  VHOST_USER_PROTOCOL_F_MAX
>  };
> @@ -60,9 +61,15 @@ typedef enum VhostUserRequest {
>  VHOST_USER_SET_VRING_ENABLE = 18,
>  VHOST_USER_SEND_RARP = 19,
>  VHOST_USER_NET_SET_MTU = 20,
> +VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  VHOST_USER_MAX
>  } VhostUserRequest;
>  
> +typedef enum VhostUserSlaveRequest {
> +VHOST_USER_SLAVE_NONE = 0,
> +VHOST_USER_SLAVE_MAX
> +}  VhostUserSlaveRequest;
> +
>  typedef struct VhostUserMemoryRegion {
>  uint64_t guest_phys_addr;
>  uint64_t memory_size;
> @@ -112,6 +119,7 @@ static VhostUserMsg m __attribute__ ((unused));
>  
>  struct vhost_user {
>  CharBackend *chr;
> +int slave_fd;
>  };
>  
>  static bool ioeventfd_enabled(void)
> @@ -578,6 +586,115 @@ static int vhost_user_reset_device(struct vhost_dev 
> *dev)
>  return 0;
>  }
>  
> +static void slave_read(void *opaque)
> +{
> +struct vhost_dev *dev = opaque;
> +struct vhost_user *u = dev->opaque;
> +VhostUserMsg msg = { 0, };
> +int size, ret = 0;
> +
> +/* Read header */
> +size = read(u->slave_fd, , VHOST_USER_HDR_SIZE);
> +if (size != VHOST_USER_HDR_SIZE) {
> +error_report("Failed to read from slave.");
> +goto err;
> +}
> +
> +if (msg.size > VHOST_USER_PAYLOAD_SIZE) {
> +error_report("Failed to read msg header."
> +" Size %d exceeds the maximum %zu.", msg.size,
> +VHOST_USER_PAYLOAD_SIZE);
> +goto

Re: [Qemu-devel] [PATCH v2 3/6] vhost: extend ring information update for IOTLB to all rings

2017-05-30 Thread Michael S. Tsirkin

On Fri, May 26, 2017 at 04:28:55PM +0200, Maxime Coquelin wrote:
> Vhost-kernel backend need

needs

> to receive IOTLB entry for used ring
> information early, which is done by triggering a miss event on
> its address.
> 
> This patch extends this behaviour to all rings information, to be
> compatible with vhost-user backend design.

Why does vhost-user need it though?

> 
> Signed-off-by: Maxime Coquelin 
> ---
> v2:
>  - Revert back to existing behaviour, i.e. only send IOTLB updates
> at ring enablement time, not at ring address setting time (mst).
>  - Extend IOTLB misses to all ring addresses, not only used ring.
> 
>  hw/virtio/vhost.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 6eddb09..7867034 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -1552,11 +1552,15 @@ int vhost_dev_start(struct vhost_dev *hdev, 
> VirtIODevice *vdev)
>  if (vhost_dev_has_iommu(hdev)) {
>  hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
>  
> -/* Update used ring information for IOTLB to work correctly,
> - * vhost-kernel code requires for this.*/
> +/*
> + * Update rings information for IOTLB to work correctly,
> + * vhost-kernel and vhost-user codes require for this.

Better just say "Update ring info for vhost iotlb."

The rest isn't really informative.



> + */
>  for (i = 0; i < hdev->nvqs; ++i) {
>  struct vhost_virtqueue *vq = hdev->vqs + i;
>  vhost_device_iotlb_miss(hdev, vq->used_phys, true);
> +vhost_device_iotlb_miss(hdev, vq->desc_phys, true);
> +vhost_device_iotlb_miss(hdev, vq->avail_phys, true);

So I don't remember why does vhost in kernel want miss on used
at start time.

Jason, could you comment on this please?



>  }
>  }
>  return 0;
> -- 
> 2.9.4

Re: [Qemu-devel] [PULL 02/29] numa: Allow setting NUMA distance for different NUMA nodes

2017-05-30 Thread Eduardo Habkost

On Tue, May 30, 2017 at 10:28:21AM -0500, Eric Blake wrote:
> On 05/30/2017 09:01 AM, Eduardo Habkost wrote:
> 
> >> The OSX compiler is pickier about format strings than gcc,
> >> and neither "MAX_NODES" nor "MAX(anything)" are uint16_t type.
> > 
> > src and dst are both uint16_t, so MAX(src, dst) is also uint16_t,
> > isn't it?  It looks like MAX_NODES is the problem.
> 
> No. MAX() invokes arithmetic (both ?: and > operators), and arithmetic
> involves default promotion (anything shorter than int becomes 'int').

Oh, I didn't know that.

> 
> > +++ b/numa.c
> > @@ -232,7 +232,7 @@ static void parse_numa_distance(NumaDistOptions *dist, 
> > Error **errp)
> >  if (src >= MAX_NODES || dst >= MAX_NODES) {
> >  error_setg(errp,
> > "Invalid node %" PRIu16
> > -   ", max possible could be %" PRIu16,
> > +   ", max possible could be %d",
> 
> Don't you want %u to match PRIu16, rather than %d?

Can't this (using %u with an int argument) make more pedantic
compilers complain?

-- 
Eduardo

Re: [Qemu-devel] [PATCH v2 6/6] spec/vhost-user spec: Add IOMMU support

2017-05-30 Thread Michael S. Tsirkin

On Fri, May 26, 2017 at 04:28:58PM +0200, Maxime Coquelin wrote:
> This patch specifies and implements the master/slave communication
> to support device IOTLB in slave.
> 
> The vhost_iotlb_msg structure introduced for kernel backends is
> re-used, making the design close between the two backends.
> 
> An exception is the use of the secondary channel to enable the
> slave to send IOTLB miss requests to the master.
> 
> Signed-off-by: Maxime Coquelin 
> ---
> 
> v2:
>  - spec: fixed possible permission field values
>  - spec: rewrote "IOMMU support" section
>  docs/specs/vhost-user.txt | 86 
> +++
>  hw/virtio/vhost-user.c| 31 +
>  2 files changed, 117 insertions(+)
> 
> diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> index 5fa7016..0799ef1 100644
> --- a/docs/specs/vhost-user.txt
> +++ b/docs/specs/vhost-user.txt
> @@ -97,6 +97,25 @@ Depending on the request type, payload can be:
> log offset: offset from start of supplied file descriptor
> where logging starts (i.e. where guest address 0 would be logged)
>  
> + * An IOTLB message
> +   -
> +   | iova | size | user address | permissions flags | type |
> +   -
> +
> +   IOVA: a 64-bit I/O virtual address programmed by the guest
> +   Size: a 64-bit size
> +   User address: a 64-bit user address
> +   Permissions: a 8-bit value:
> +- 0: No access
> +- 1: Read access
> +- 2: Write access
> +- 3: Read/Write access
> +   Type: a 8-bit IOTLB message type:
> +- 1: IOTLB miss
> +- 2: IOTLB update
> +- 3: IOTLB invalidate
> +- 4: IOTLB access fail
> +
>  In QEMU the vhost-user message is implemented with the following struct:
>  
>  typedef struct VhostUserMsg {
> @@ -109,6 +128,7 @@ typedef struct VhostUserMsg {
>  struct vhost_vring_addr addr;
>  VhostUserMemory memory;
>  VhostUserLog log;
> +struct vhost_iotlb_msg iotlb;
>  };
>  } QEMU_PACKED VhostUserMsg;
>  
> @@ -253,6 +273,40 @@ Once the source has finished migration, rings will be 
> stopped by
>  the source. No further update must be done before rings are
>  restarted.
>  
> +IOMMU support
> +-
> +
> +When the VIRTIO_F_IOMMU_PLATFORM feature has been negotiated, the master
> +sends IOTLB entries update & invalidation by sending VHOST_USER_IOTLB_MSG
> +requests to the slave with a struct vhost_iotlb_msg as payload. For update
> +events, the iotlb payload has to be filled with the update message type (2),
> +the I/O virtual address, the size, the user virtual address, and the
> +permissions flags. Addresses and size must be within vhost memory regions set
> +via the VHOST_USER_SET_MEM_TABLE request. For invalidation events, the iotlb
> +payload has to be filled with the invalidation message type (3), the I/O 
> virtual
> +address and the size. On success, the slave is expected to reply with a zero
> +payload, non-zero otherwise.
> +
> +The slave relies on the slave communcation channel (see "Slave communication"
> +section below) to send IOTLB miss and access failure events, by sending
> +VHOST_USER_SLAVE_IOTLB_MSG requests to the master with a struct 
> vhost_iotlb_msg
> +as payload. For miss events, the iotlb payload has to be filled with the miss
> +message type (1), the I/O virtual address and the permissions flags. For 
> access
> +failure event, the iotlb payload has to be filled with the access failure
> +message type (4), the I/O virtual address and the permissions flags.
> +For synchronization purpose, the slave may rely on the reply-ack feature,
> +so the master may send a reply when operation is completed if the reply-ack
> +feature is negotiated and slaves requests a reply. For miss events, completed
> +operation means either master sent an update message containing the IOTLB 
> entry
> +containing requested address and permission, or master sent nothing if the 
> IOTLB
> +miss message is invalid (invalid IOVA or permission).
> +
> +The master isn't generally expected to take the initiative to send IOTLB 
> update
> +messages, as the slave sends IOTLB miss messages for the guest virtual memory
> +areas it needs to access. The exception is the rings information addresses
> +shared with the VHOST_USER_SET_VRING_ADDR request, for which the master must
> +send corresponding IOTLB updates before the rings are enabled.
> +

Is the implication that invalidates do not affect ring translations
true? I find this problematic.

If not then can we replace above with "may send"?

>  Slave communication
>  ---
>  
> @@ -514,6 +568,38 @@ Master message types
>If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond
>with zero for success, non-zero otherwise.
>  
> + * VHOST_USER_IOTLB_MSG
> +
> +  Id: 22
> +  Equivalent ioctl: N/A (equivalent to

Re: [Qemu-devel] [PATCH v3 0/7] numa: code consolidation and fixes

2017-05-30 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 1496161442-96665-1-git-send-email-imamm...@redhat.com
Subject: [Qemu-devel] [PATCH v3 0/7] numa: code consolidation and fixes
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
9136a97 numa: cpu: calculate/set default node-ids after all -numa CLI options 
are parsed
4d75574 spapr: cleanup spapr_fixup_cpu_numa_dt() usage
b946845 numa: move numa_node from CPUState into target specific classes
609347e numa: make hmp 'info numa' fetch numa nodes from qmp_query_cpus() result
9d2b597 numa: make sure that all cpus have has_node_id set if numa is enabled
c0d1aad numa: move default mapping init to machine
6c6afbf numa: consolidate cpu_preplug fixups/checks for pc/arm/spapr

=== OUTPUT BEGIN ===
Checking PATCH 1/7: numa: consolidate cpu_preplug fixups/checks for 
pc/arm/spapr...
ERROR: suspect code indent for conditional statements (4, 9)
#144: FILE: numa.c:546:
+if (!slot->props.has_node_id) {
+ mapped_node_id = 0;

total: 1 errors, 0 warnings, 123 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 2/7: numa: move default mapping init to machine...
Checking PATCH 3/7: numa: make sure that all cpus have has_node_id set if numa 
is enabled...
Checking PATCH 4/7: numa: make hmp 'info numa' fetch numa nodes from 
qmp_query_cpus() result...
Checking PATCH 5/7: numa: move numa_node from CPUState into target specific 
classes...
Checking PATCH 6/7: spapr: cleanup spapr_fixup_cpu_numa_dt() usage...
Checking PATCH 7/7: numa: cpu: calculate/set default node-ids after all -numa 
CLI options are parsed...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

Re: [Qemu-devel] [PATCH v2 1/3] migration: Use savevm_handlers instead of loadvm copy

2017-05-30 Thread Dr. David Alan Gilbert

* Juan Quintela (quint...@redhat.com) wrote:
> There is no reason for having the loadvm_handlers at all.  There is
> only one use, and we can use the savevm handlers.
> 
> We will remove the loadvm handlers on a following patch.



>  trace_qemu_loadvm_state_section_partend(section_id);
> -QLIST_FOREACH(le, >loadvm_handlers, entry) {
> -if (le->section_id == section_id) {
> +QTAILQ_FOREACH(se, _state.handlers, entry) {
> +if (se->section_id == section_id) {

Isn't this the problem?  What guarantees that the se->section_id
is the same as the source's section_id - I don't think anything.
It's just dynamically allocated in register_savevm_live so the
initialisation order on source/dest could be different and you'd
get different ID.  You can't update se->section_id
unless you guaranteed to updated all of them.

Dave

>  break;
>  }
>  }
> -if (le == NULL) {
> +if (se == NULL) {
>  error_report("Unknown savevm section %d", section_id);
>  return -EINVAL;
>  }
>  
> -ret = vmstate_load(f, le->se, le->version_id);
> +ret = vmstate_load(f, se, se->load_version_id);
>  if (ret < 0) {
>  error_report("error while loading state section id %d(%s)",
> - section_id, le->se->idstr);
> + section_id, se->idstr);
>  return ret;
>  }
> -if (!check_section_footer(f, le)) {
> +if (!check_section_footer(f, se)) {
>  return -EINVAL;
>  }
>  
> -- 
> 2.9.4
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [Qemu-ppc] [PATCH 3/4] spapr: disable hotplugging without OS

2017-05-30 Thread Michael Roth

Quoting David Gibson (2017-05-24 22:16:26)
> On Wed, May 24, 2017 at 12:40:37PM -0500, Michael Roth wrote:
> > Quoting Laurent Vivier (2017-05-24 11:02:30)
> > > On 24/05/2017 17:54, Greg Kurz wrote:
> > > > On Wed, 24 May 2017 12:14:02 +0200
> > > > Igor Mammedov  wrote:
> > > > 
> > > >> On Wed, 24 May 2017 11:28:57 +0200
> > > >> Greg Kurz  wrote:
> > > >>
> > > >>> On Wed, 24 May 2017 15:07:54 +1000
> > > >>> David Gibson  wrote:
> > > >>>   
> > >  On Tue, May 23, 2017 at 01:18:11PM +0200, Laurent Vivier wrote:
> > > > If the OS is not started, QEMU sends an event to the OS
> > > > that is lost and cannot be recovered. An unplug is not
> > > > able to restore QEMU in a coherent state.
> > > > So, while the OS is not started, disable CPU and memory hotplug.
> > > > We use option vector 6 to know if the OS is started
> > > >
> > > > Signed-off-by: Laurent Vivier   
> > > 
> > >  Urgh.. I'm not terribly confident that this is really correct.  As
> > >  discussed on the previous patch, you're essentially using OV6 as a
> > >  flag that CAS is complete.
> > > 
> > >  But while it undoubtedly makes the race window much smaller, I don't
> > >  see that there's any guarantee the guest OS will really be able to
> > >  handle hotplug events immediately after CAS.
> > > 
> > >  In particular if the CAS process completes partially but then needs 
> > >  to
> > >  trigger a reboot, I think that would end up setting the ov6 variable,
> > >  but the OS would definitely not be in a state to accept events.  
> > > >> wouldn't guest on reboot pick up updated fdt and online hotplugged
> > > >> before crash cpu along with initial cpus?
> > > >>
> > > > 
> > > > Yes and that's what actually happens with cpus.
> > > > 
> > > > But catching up with the background for this series, I have the
> > > > impression that the issue isn't the fact we loose an event if the OS
> > > > isn't started (which is not true), but more something wrong happening
> > > > when hotplugging+unplugging memory as described in this commit:
> > > > 
> > > > commit fe6824d12642b005c69123ecf8631f9b13553f8b
> > > > Author: Laurent Vivier 
> > > > Date:   Tue Mar 28 14:09:34 2017 +0200
> > > > 
> > > > spapr: fix memory hot-unplugging
> > > > 
> > > 
> > > Yes, this commit try to fix that, but it's not possible. Some objects
> > > remain in memory: you can see with "info cpus" or "info memory-devices"
> > > that they are not really removed, and this prevents to hotplug them
> > > again, and moreover in the case of the memory hot-unplug we can rerun
> > > the device_del and crash qemu (as before the fix).
> > > 
> > > Moreover all stuff normally cleared in detach() are not, and we can't do
> > > it later in set_allocation_state() because some are in use by the
> > > kernel, and this is the last call from the kernel.
> > 
> > Focusing on the hotplug/add case, it's a bit odd that the guest would be
> > using the memory even though the hotplug event is clearly still sitting
> > in the queue.
> > 
> > I think part of the issue is us not having a clear enough distinction in
> > the code between what constitutes the need for "boot-time" handling vs.
> > "hotplug" handling.
> > 
> > We have this hook in spapr_add_lmbs:
> > 
> > if (!dev->hotplugged) {
> > /* guests expect coldplugged LMBs to be pre-allocated */
> > drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
> > drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
> > }
> > 
> > Whereas the default allocation/isolation state for LMBs in spapr_drc.c is
> > UNUSABLE/ISOLATED, which is what covers the dev->hotplugged == true case.
> > 
> > I need to spend some time testing to confirm, but trying to walk through the
> > various scenarios looking at the code:
> > 
> > case 1)
> > 
> > If the hotplug occurs before reset (not sure how likely this is), the event
> > will get dropped by reset handler, and the DRC stuff will be left in
> > UNUSABLE/ISOLATED. I think it's more appropriate to treat this as 
> > "boot-time"
> > and set it to USABLE/UNISOLATED like the !dev->hotplugged case.
> 
> Right.  It looks like we might need to go through all DRCs and sanitize
> their state at reset time.  Essentially whatever their state before
> the reset, they should appear as cold-plugged after the reset, I
> think.
> 
> > case 2)
> > 
> > If the hotplug it occurs after reset, but before CAS,
> > spapr_populate_drconf_memory will be called to populate the DT with all 
> > active
> > LMBs. AFAICT, for hotplugged LMBs it marks everything where
> > memory_region_preset(get_system_memory(), addr) == true as
> > SPAPR_LMB_FLAGS_ASSIGNED. Since the region is mapped regardless of whether 
> > the
> > guest has acknowledged the hotplug, I think this would end up

Re: [Qemu-devel] [PULL 02/29] numa: Allow setting NUMA distance for different NUMA nodes

2017-05-30 Thread Daniel P. Berrange

On Tue, May 30, 2017 at 06:08:04PM +0100, Peter Maydell wrote:
> On 30 May 2017 at 15:01, Eduardo Habkost  wrote:
> > On Tue, May 30, 2017 at 11:45:55AM +0100, Peter Maydell wrote:
> >> Hi all; I'm afraid this patch breaks compilation on OSX:
> >
> > It looks like this wasn't detected by travis-ci because the build
> > is not using -Werror:
> > https://travis-ci.org/ehabkost/qemu/jobs/236485778#L3881
> 
> Hmm. It would be nice to fix that. configure doesn't enable -Werror
> by default on OSX because it doesn't handle some of the pragmas
> to disable warnings that we use, but on the typical OSX build
> the warnings that we use the pragmas to suppress don't get
> generated anyway, so -Werror works in practice. It needs
> configure '--extra-cflags=-Werror' to turn on.
> (It might also need --disable-vnc-sasl depending on OSX version
> since Apple helpfully added a pile of deprecation warnings to
> their version of the sasl headers without suggesting what to
> use instead.)

The same warnings hit libvirt on OS-X too and some pragmas were
sufficient to shut it up

_Pragma ("GCC diagnostic push")
_Pragma ("GCC diagnostic ignored \"-Wdeprecated-declarations\"")

 sasl code...

_Pragma ("GCC diagnostic pop")


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PULL 02/29] numa: Allow setting NUMA distance for different NUMA nodes

2017-05-30 Thread Peter Maydell

On 30 May 2017 at 15:01, Eduardo Habkost  wrote:
> On Tue, May 30, 2017 at 11:45:55AM +0100, Peter Maydell wrote:
>> Hi all; I'm afraid this patch breaks compilation on OSX:
>
> It looks like this wasn't detected by travis-ci because the build
> is not using -Werror:
> https://travis-ci.org/ehabkost/qemu/jobs/236485778#L3881

Hmm. It would be nice to fix that. configure doesn't enable -Werror
by default on OSX because it doesn't handle some of the pragmas
to disable warnings that we use, but on the typical OSX build
the warnings that we use the pragmas to suppress don't get
generated anyway, so -Werror works in practice. It needs
configure '--extra-cflags=-Werror' to turn on.
(It might also need --disable-vnc-sasl depending on OSX version
since Apple helpfully added a pile of deprecation warnings to
their version of the sasl headers without suggesting what to
use instead.)

thanks
-- PMM

Re: [Qemu-devel] [Qemu-block] [PATCH v2 4/4] qemu-iotests: Block migration test

2017-05-30 Thread Kevin Wolf

Am 30.05.2017 um 17:52 hat Jeff Cody geschrieben:
> On Tue, May 30, 2017 at 05:22:53PM +0200, Kevin Wolf wrote:
> > Signed-off-by: Kevin Wolf 
> > ---
> >  tests/qemu-iotests/183 | 143 
> > +
> >  tests/qemu-iotests/183.out |  46 +++
> >  tests/qemu-iotests/group   |   1 +
> >  3 files changed, 190 insertions(+)
> >  create mode 100755 tests/qemu-iotests/183
> >  create mode 100644 tests/qemu-iotests/183.out
> > 
> > diff --git a/tests/qemu-iotests/183 b/tests/qemu-iotests/183
> > new file mode 100755
> > index 000..5eda0e9
> > --- /dev/null
> > +++ b/tests/qemu-iotests/183
> > @@ -0,0 +1,143 @@
> > +#!/bin/bash
> > +#
> > +# Test old-style block migration (migrate -b)
> > +#
> > +# Copyright (C) 2017 Red Hat, Inc.
> > +#
> > +# This program is free software; you can redistribute it and/or modify
> > +# it under the terms of the GNU General Public License as published by
> > +# the Free Software Foundation; either version 2 of the License, or
> > +# (at your option) any later version.
> > +#
> > +# This program is distributed in the hope that it will be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program.  If not, see .
> > +#
> > +
> > +# creator
> > +owner=kw...@redhat.com
> > +
> > +seq=`basename $0`
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +status=1   # failure is the default!
> > +
> > +MIG_SOCKET="${TEST_DIR}/migrate"
> > +
> > +_cleanup()
> > +{
> > +rm -f "${MIG_SOCKET}"
> > +rm -f "${TEST_IMG}.dest"
> > +   _cleanup_test_img
> > +_cleanup_qemu
> > +}
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +# get standard environment, filters and checks
> > +. ./common.rc
> > +. ./common.filter
> > +. ./common.qemu
> > +
> > +_supported_fmt generic
> 
> Not sure to what extent we are really keeping the _supported_fmt up to date,
> but this test will only work on qcow2 and raw, right?
> 
> I'd recommend changing this to:
> 
> + _supported_fmt qcow2 raw
> 
> (that can probably be done when applying, however).

Seems you are right, but I fail to see why the other formats shouldn't
work? Are these just bugs or do I make any assumption in the test script
that is invalid for other image formats?

> > +_supported_proto file
> > +_supported_os Linux
> > +
> > +size=64M
> > +_make_test_img $size
> > +TEST_IMG="${TEST_IMG}.dest" _make_test_img $size
> > +
> > +echo
> > +echo === Starting VMs ===
> > +echo
> > +
> > +qemu_comm_method="qmp"
> > +
> > +_launch_qemu \
> > +-drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk
> > +src=$QEMU_HANDLE
> > +_send_qemu_cmd $src "{ 'execute': 'qmp_capabilities' }" 'return'
> > +
> > +_launch_qemu \
> > +-drive 
> > file="${TEST_IMG}.dest",cache=${CACHEMODE},driver=$IMGFMT,id=disk \
> > +-incoming "unix:${MIG_SOCKET}"
> > +dest=$QEMU_HANDLE
> > +_send_qemu_cmd $dest "{ 'execute': 'qmp_capabilities' }" 'return'
> > +
> > +echo
> > +echo === Write something on the source ===
> > +echo
> > +
> > +_send_qemu_cmd $src \
> > +"{ 'execute': 'human-monitor-command',
> > +   'arguments': { 'command-line':
> > +  'qemu-io disk \"write -P 0x55 0 64k\"' } }" \
> > +'return'
> > +_send_qemu_cmd $src \
> > +"{ 'execute': 'human-monitor-command',
> > +   'arguments': { 'command-line':
> > +  'qemu-io disk \"read -P 0x55 0 64k\"' } }" \
> > +'return'
> > +
> > +echo
> > +echo === Do block migration to destination ===
> > +echo
> > +
> > +reply="$(_send_qemu_cmd $src \
> > +"{ 'execute': 'migrate',
> > +   'arguments': { 'uri': 'unix:${MIG_SOCKET}', 'blk': true } }" \
> > +'return\|error')"
> > +echo "$reply"
> > +if echo "$reply" | grep "compiled without old-style" > /dev/null; then
> > +_notrun "migrate -b support not compiled in"
> > +fi
> > +
> > +while _send_qemu_cmd $src "{ 'execute': 'query-migrate' }" "return"  |
> > +  grep '"status": "active"' > /dev/null
> > +do
> > +sleep 0.1
> 
> Would it make sense here to add a timeout?  It would be nice to be able to
> reply on scripts always failing for git-bisect (rather than potentially
> hanging at a spot).

What would you think about squashing this in:

--- a/tests/qemu-iotests/183
+++ b/tests/qemu-iotests/183
@@ -96,11 +96,8 @@ if echo "$reply" | grep "compiled without old-style" > 
/dev/null; then
 _notrun "migrate -b support not compiled in"
 fi
 
-while _send_qemu_cmd $src "{ 'execute': 'query-migrate' }" "return"  |
-  grep '"status": "active"' > /dev/null
-do
-sleep 0.1
-done
+QEMU_COMM_TIMEOUT=0.1 qemu_cmd_repeat=50 silent=yes \
+_send_qemu_cmd $src "{ 'execute': 'query-migrate' }" '"status":

Re: [Qemu-devel] [PATCH] pc-bios/s390-ccw: use STRIP variable in Makefile

2017-05-30 Thread Greg Kurz

On Tue, 30 May 2017 18:34:46 +0200
Christian Borntraeger  wrote:

> On 05/30/2017 05:04 PM, Greg Kurz wrote:
> > The docker-run-test-build@debian-s390x-cross target fails with:
> > 
> > strip --strip-unneeded s390-ccw.elf -o s390-ccw.img
> > strip: Unable to recognise the format of the input file `s390-ccw.elf'
> > 
> > The configure script defines a STRIP makefile variable whose default
> > value is ${cross_prefix}strip. Let's use it.
> > 
> > Signed-off-by: Greg Kurz 
> > ---
> >  pc-bios/s390-ccw/Makefile |2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
> > index 79a46b67356f..7af57dad109c 100644
> > --- a/pc-bios/s390-ccw/Makefile
> > +++ b/pc-bios/s390-ccw/Makefile
> > @@ -22,7 +22,7 @@ s390-ccw.elf: $(OBJECTS)
> > $(call quiet-command,$(CC) $(LDFLAGS) -o $@ 
> > $(OBJECTS),"BUILD","$(TARGET_DIR)$@")
> > 
> >  s390-ccw.img: s390-ccw.elf
> > -   $(call quiet-command,strip --strip-unneeded $< -o 
> > $@,"STRIP","$(TARGET_DIR)$@")
> > +   $(call quiet-command,$(STRIP) --strip-unneeded $< -o 
> > $@,"STRIP","$(TARGET_DIR)$@")
> > 
> >  $(OBJECTS): Makefile
> > 
> >   
> 
> I was going to apply this, but it fails with --disable-strip like
> 
> /bin/sh: --strip-unneeded: command not found
> Makefile:25: recipe for target 's390-ccw.img' failed
> make[1]: *** [s390-ccw.img] Error 127
> Makefile:354: recipe for target 'romsubdir-s390-ccw' failed
> make: *** [romsubdir-s390-ccw] Error 2
> 
> Not yet sure whats going on.
> 

configure doesn't generate STRIP when --disable-strip or --enable-debug... so
I guess we shouldn't use it here, but rather generate a s390_cross_prefix like
in roms/Makefile ?


pgp3jQCERjbE0.pgp
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 21/26] target/s390x: implement PACK UNICODE

2017-05-30 Thread Richard Henderson


On 05/29/2017 04:22 AM, Aurelien Jarno wrote:

On 2017-05-26 09:35, Richard Henderson wrote:

On 05/25/2017 02:05 PM, Aurelien Jarno wrote:

+} else if (srclen > ssize) {
   b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
-src--;
-srclen--;
+src -= ssize;
+srclen -= ssize;


Surely we need to use lduw in order to correctly read the big-endian 16-bit
value, or bias src by +1 to read the low byte of same.


The operands are read from right to left, therefore src is adjusted to
point to the last byte before the for loop:

+/* The operands are processed from right to left.  */
+src += srclen - 1;
+dest += destlen - 1;

Given s390 is big-endian, it means src initially always point to the
low byte independently to the size of the value.



Quite right.  I didn't consider the processing order.


r~

Re: [Qemu-devel] [PATCH 25/26] target/s390x: implement TRANSLATE ONE/TWO TO ONE/TWO

2017-05-30 Thread Richard Henderson


On 05/29/2017 04:17 AM, Aurelien Jarno wrote:

On 2017-05-26 10:10, Richard Henderson wrote:

On 05/25/2017 02:05 PM, Aurelien Jarno wrote:

+uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
+  uint32_t sizes)
+{
+uintptr_t ra = GETPC();
+int dsize = (sizes & 1) ? 1 : 2;
+int ssize = (sizes & 2) ? 1 : 2;
+uint16_t tst = env->regs[0] & ((1 << (8 * dsize)) - 1);


I think you should pass in tst as an argument.  That way you can pass in an
out-of-band value when we implement ETF2 and test field M3 bit 3.


I don't mind passing r0 as an argument. That said if we want to pass tst
or bundle the M3 field, it means we need to use TCG instructions to do
so. I am not sure it brings a lot compare to doing so in the helper
side.


Not at all -- the M3 bit test would be a translation-time check.


r~

Re: [Qemu-devel] [Qemu-ppc] [PATCH v3 6/7] spapr: cleanup spapr_fixup_cpu_numa_dt() usage

2017-05-30 Thread Greg Kurz

On Tue, 30 May 2017 18:24:01 +0200
Igor Mammedov  wrote:

> even though spapr_fixup_cpu_numa_dt() has no effect on FDT
> if numa is disabled, don't call it uselessly. It makes it
> obvious at call sites that function is need only when numa

s/need/needed

> is enabled.
> 
> Signed-off-by: Igor Mammedov 
> ---

Reviewed-by: Greg Kurz 

>  hw/ppc/spapr.c | 22 ++
>  1 file changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 34bb03d..96a2a74 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -178,10 +178,8 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, 
> PowerPCCPU *cpu,
>  return ret;
>  }
>  
> -static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
> +static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
>  {
> -int ret = 0;
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
>  int index = ppc_get_vcpu_dt_id(cpu);
>  uint32_t associativity[] = {cpu_to_be32(0x5),
>  cpu_to_be32(0x0),
> @@ -191,12 +189,8 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int 
> offset, CPUState *cs)
>  cpu_to_be32(index)};
>  
>  /* Advertise NUMA via ibm,associativity */
> -if (nb_numa_nodes > 1) {
> -ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> +return fdt_setprop(fdt, offset, "ibm,associativity", associativity,
>sizeof(associativity));
> -}
> -
> -return ret;
>  }
>  
>  /* Populate the "ibm,pa-features" property */
> @@ -321,9 +315,11 @@ static int spapr_fixup_cpu_dt(void *fdt, 
> sPAPRMachineState *spapr)
>  return ret;
>  }
>  
> -ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs);
> -if (ret < 0) {
> -return ret;
> +if (nb_numa_nodes > 1) {
> +ret = spapr_fixup_cpu_numa_dt(fdt, offset, cpu);
> +if (ret < 0) {
> +return ret;
> +}
>  }
>  
>  ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt);
> @@ -538,7 +534,9 @@ static void spapr_populate_cpu_dt(CPUState *cs, void 
> *fdt, int offset,
>  _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
>pft_size_prop, sizeof(pft_size_prop;
>  
> -_FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs));
> +if (nb_numa_nodes > 1) {
> +_FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
> +}
>  
>  _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
>  



pgpLJBBnVp5I_.pgp
Description: OpenPGP digital signature

1 2 3 4 5 >

1 - 100 of 419 matches

Mail list logo