date:20151120

Re: [Qemu-devel] [Qemu-ppc] [PATCH 23/77] ppc: Turn a bunch of booleans from int to bool

2015-11-20 Thread David Gibson

On Wed, Nov 11, 2015 at 11:27:36AM +1100, Benjamin Herrenschmidt wrote:
> Signed-off-by: Benjamin Herrenschmidt 

Reviewed-by: David Gibson 


> ---
>  target-ppc/translate.c | 39 +++
>  1 file changed, 19 insertions(+), 20 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 4d01fd0..a5ab2eb 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -189,21 +189,20 @@ struct DisasContext {
>  uint32_t opcode;
>  uint32_t exception;
>  /* Routine used to access memory */
> -bool pr, hv, dr;
> +bool pr, hv, dr, le_mode;
>  int mem_idx;
>  int access_type;
>  /* Translation flags */
> -int le_mode;
>  TCGMemOp default_tcg_memop_mask;
>  #if defined(TARGET_PPC64)
> -int sf_mode;
> -int has_cfar;
> +bool sf_mode;
> +bool has_cfar;
>  #endif
> -int fpu_enabled;
> -int altivec_enabled;
> -int vsx_enabled;
> -int spe_enabled;
> -int tm_enabled;
> +bool fpu_enabled;
> +bool altivec_enabled;
> +bool vsx_enabled;
> +bool spe_enabled;
> +bool tm_enabled;
>  ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
>  int singlestep_enabled;
>  uint64_t insns_flags;
> @@ -380,7 +379,7 @@ typedef struct opcode_t {
>  #if defined(CONFIG_USER_ONLY)
>  #define CHK_HV GEN_PRIV
>  #define CHK_SV GEN_PRIV
> -#define CHK_HVDR GEN_PRIV
> +#define CHK_HVRM GEN_PRIV
>  #else
>  #define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
>  #define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
> @@ -11407,31 +11406,31 @@ void gen_intermediate_code(CPUPPCState *env, struct 
> TranslationBlock *tb)
>  ctx.insns_flags = env->insns_flags;
>  ctx.insns_flags2 = env->insns_flags2;
>  ctx.access_type = -1;
> -ctx.le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0;
> +ctx.le_mode = !!(env->hflags & (1 << MSR_LE));
>  ctx.default_tcg_memop_mask = ctx.le_mode ? MO_LE : MO_BE;
>  #if defined(TARGET_PPC64)
>  ctx.sf_mode = msr_is_64bit(env, env->msr);
>  ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
>  #endif
> -ctx.fpu_enabled = msr_fp;
> +ctx.fpu_enabled = !!msr_fp;
>  if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
> -ctx.spe_enabled = msr_spe;
> +ctx.spe_enabled = !!msr_spe;
>  else
> -ctx.spe_enabled = 0;
> +ctx.spe_enabled = false;
>  if ((env->flags & POWERPC_FLAG_VRE) && msr_vr)
> -ctx.altivec_enabled = msr_vr;
> +ctx.altivec_enabled = !!msr_vr;
>  else
> -ctx.altivec_enabled = 0;
> +ctx.altivec_enabled = false;
>  if ((env->flags & POWERPC_FLAG_VSX) && msr_vsx) {
> -ctx.vsx_enabled = msr_vsx;
> +ctx.vsx_enabled = !!msr_vsx;
>  } else {
> -ctx.vsx_enabled = 0;
> +ctx.vsx_enabled = false;
>  }
>  #if defined(TARGET_PPC64)
>  if ((env->flags & POWERPC_FLAG_TM) && msr_tm) {
> -ctx.tm_enabled = msr_tm;
> +ctx.tm_enabled = !!msr_tm;
>  } else {
> -ctx.tm_enabled = 0;
> +ctx.tm_enabled = false;
>  }
>  #endif
>  if ((env->flags & POWERPC_FLAG_SE) && msr_se)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] memory: emulate ioeventfd

2015-11-20 Thread Michael S. Tsirkin

On Fri, Nov 20, 2015 at 12:37:16PM +0300, Pavel Fedin wrote:
> The ioeventfd mechanism is used by vhost, dataplane, and virtio-pci to
> turn guest MMIO/PIO writes into eventfd file descriptor events.  This
> allows arbitrary threads to be notified when the guest writes to a
> specific MMIO/PIO address.
> 
> qtest and TCG do not support ioeventfd because memory writes are not
> checked against registered ioeventfds in QEMU.  This patch implements
> this in memory_region_dispatch_write() so qtest can use ioeventfd.
> 
> Also this patch fixes vhost aborting on some misconfigured old kernels
> like 3.18.0 on ARM. It is possible to explicitly enable CONFIG_EVENTFD
> in expert settings, while MMIO binding support in KVM will still be
> missing.
> 
> Signed-off-by: Stefan Hajnoczi 
> Signed-off-by: Pavel Fedin 

Reviewed-by: Michael S. Tsirkin 

> ---
> RFC => PATCH:
> - Add !kvm_eventfds_enabled() conditions to bypass eventfd injection when not 
> needed
> - Renamed "ioeventfd" to "eventfd", just to make words shorter
> - Add a one-shot warning about missing MMIO bindings in KVM
> ---
>  kvm-all.c |  6 --
>  memory.c  | 42 ++
>  2 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index ddb007a..70f5cec 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1633,8 +1633,10 @@ static int kvm_init(MachineState *ms)
>  
>  kvm_state = s;
>  
> -s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> -s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +if (kvm_eventfds_allowed) {
> +s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> +s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +}
>  s->memory_listener.listener.coalesced_mmio_add = 
> kvm_coalesce_mmio_region;
>  s->memory_listener.listener.coalesced_mmio_del = 
> kvm_uncoalesce_mmio_region;
>  
> diff --git a/memory.c b/memory.c
> index e193658..4d138fb 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -18,12 +18,14 @@
>  #include "exec/ioport.h"
>  #include "qapi/visitor.h"
>  #include "qemu/bitops.h"
> +#include "qemu/error-report.h"
>  #include "qom/object.h"
>  #include "trace.h"
>  #include 
>  
>  #include "exec/memory-internal.h"
>  #include "exec/ram_addr.h"
> +#include "sysemu/kvm.h"
>  #include "sysemu/sysemu.h"
>  
>  //#define DEBUG_UNASSIGNED
> @@ -1141,6 +1143,32 @@ MemTxResult memory_region_dispatch_read(MemoryRegion 
> *mr,
>  return r;
>  }
>  
> +/* Return true if an eventfd was signalled */
> +static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
> +hwaddr addr,
> +uint64_t data,
> +unsigned size,
> +MemTxAttrs attrs)
> +{
> +MemoryRegionIoeventfd ioeventfd = {
> +.addr = addrrange_make(int128_make64(addr), int128_make64(size)),
> +.data = data,
> +};
> +unsigned i;
> +
> +for (i = 0; i < mr->ioeventfd_nb; i++) {
> +ioeventfd.match_data = mr->ioeventfds[i].match_data;
> +ioeventfd.e = mr->ioeventfds[i].e;
> +
> +if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
> +event_notifier_set(ioeventfd.e);
> +return true;
> +}
> +}
> +
> +return false;
> +}
> +
>  MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>   hwaddr addr,
>   uint64_t data,
> @@ -1154,6 +1182,11 @@ MemTxResult memory_region_dispatch_write(MemoryRegion 
> *mr,
>  
>  adjust_endianness(mr, , size);
>  
> +if ((!kvm_eventfds_enabled()) &&
> +memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
> +return MEMTX_OK;
> +}
> +
>  if (mr->ops->write) {
>  return access_with_adjusted_size(addr, , size,
>   mr->ops->impl.min_access_size,
> @@ -1672,6 +1705,8 @@ void memory_region_clear_global_locking(MemoryRegion 
> *mr)
>  mr->global_locking = false;
>  }
>  
> +static bool userspace_eventfd_warning;
> +
>  void memory_region_add_eventfd(MemoryRegion *mr,
> hwaddr addr,
> unsigned size,
> @@ -1688,6 +1723,13 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>  };
>  unsigned i;
>  
> +if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
> +userspace_eventfd_warning))) {
> +userspace_eventfd_warning = true;
> +error_report("Using eventfd without MMIO binding in KVM. "
> + "Suboptimal performance expected");
> +}
> +
>  if (size) {
>  adjust_endianness(mr, , size);
>  }
> -- 
>

Re: [Qemu-devel] [PATCH 1/2] mirror: Rewrite mirror_iteration

2015-11-20 Thread Fam Zheng

On Wed, 11/18 11:38, Paolo Bonzini wrote:
> 
> 
> On 17/11/2015 12:41, Fam Zheng wrote:
> > +/* Wait for I/O to this cluster (from a previous iteration) to be
> > + * done.*/
> > +while (test_bit(next_chunk, s->in_flight_bitmap)) {
> > +trace_mirror_yield_in_flight(s, next_sector, s->in_flight);
> > +s->waiting_for_io = true;
> > +qemu_coroutine_yield();
> > +s->waiting_for_io = false;
> > +}
> > +
> 
> I think this could just "break" if nb_chunks > 0, like
> 
>   if (test_bit(next_chunk, s->in_flight_bitmap)) {
>   if (nb_chunks > 0) {
>   break;
>   }
>   mirror_wait_for_io(s);
>   /* Now retry.  */
>   } else {
>   hbitmap_next = hbitmap_iter_next(>hbi);
>   assert(hbitmap_next == next_sector);
>   nb_chunks++;
>   }
> 
> but it can be done later (the usage of mirror_wait_for_io is a hint :)).
> 
> There's a typo though:
> 
> > +/* Clear dirty bits before querying the block status, because
> > + * calling bdrv_reset_dirty_bitmap could yield - if some blocks are 
> > marked
> 
> That's bdrv_get_block_status_above.
> 
> > + * dirty in this window, we need to know.
> > + */
> 

Thanks. I'll adopt both suggestions and resend.

Fam

[Qemu-devel] [PATCH for-2.5] iothread: include id in thread name

2015-11-20 Thread Paolo Bonzini

This makes it easier to find the desired thread

Signed-off-by: Paolo Bonzini 
---
 iothread.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/iothread.c b/iothread.c
index da6ce7b..8a1d6f8 100644
--- a/iothread.c
+++ b/iothread.c
@@ -72,6 +72,7 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
 {
 Error *local_error = NULL;
 IOThread *iothread = IOTHREAD(obj);
+char *name, *thread_name;
 
 iothread->stopping = false;
 iothread->thread_id = -1;
@@ -87,8 +88,12 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
 /* This assumes we are called from a thread with useful CPU affinity for us
  * to inherit.
  */
-qemu_thread_create(>thread, "iothread", iothread_run,
+name = object_get_canonical_path_component(OBJECT(obj));
+thread_name = g_strdup_printf("iothread %s", name);
+qemu_thread_create(>thread, thread_name, iothread_run,
iothread, QEMU_THREAD_JOINABLE);
+g_free(thread_name);
+g_free(name);
 
 /* Wait for initialization to complete */
 qemu_mutex_lock(>init_done_lock);
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH] PCI: minor performance optimization

2015-11-20 Thread Michael S. Tsirkin

On Fri, Nov 20, 2015 at 07:58:01PM +0800, Cao jin wrote:
> 
> 
> On 11/20/2015 07:26 PM, Michael S. Tsirkin wrote:
> >On Fri, Nov 20, 2015 at 07:04:07PM +0800, Cao jin wrote:
> >>
> >>
> >>On 11/20/2015 06:45 PM, Michael S. Tsirkin wrote:
> >>>On Fri, Nov 20, 2015 at 06:45:01PM +0800, Cao jin wrote:
> >>>
> 2. As spec says, each capability must be DWORD aligned, so an 
> optimization can
> be done via Loop Unrolling.
> >>>
> >>>Why do we want to optimize it?
> >>>
> >>
> >>For tiny performance improvement via less loop. take pcie express
> >>capability(60 bytes at most) for example, it may loop 60 times, now we just
> >>need 15 times, a quarter of before.
> >
> >But who cares? This is not a data path operation.
> 
> It is tiny thing I found when browsing code. When found there are several
> places looks like this, I think maybe it does good to qemu to do this and
> CCed to you because it don`t look like a simple trivial patch.
> 
> So, hey Michael, if you don`t like this kind of optimization, that`t ok,
> forget it. But I think it make me little confused when determine which kind
> of patch should be CCed to you.

Optimization patches should normally include performance numbers
if they are to be merged.
Try to come up with a benchmark and you will realize that the speed of
this function has no effect under even half way realistic conditions.

> >
> 
> Signed-off-by: Cao jin 
> ---
>   hw/pci/pci.c | 12 
>   1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 168b9cc..1e99603 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1924,13 +1924,15 @@ PCIDevice *pci_create_simple(PCIBus *bus, int 
> devfn, const char *name)
>   static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
>   {
>   int offset = PCI_CONFIG_HEADER_SIZE;
> -int i;
> -for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
> +int i = PCI_CONFIG_HEADER_SIZE;;
> +
> +for (; i < PCI_CONFIG_SPACE_SIZE; i = i + 4) {
>   if (pdev->used[i])
> -offset = i + 1;
> -else if (i - offset + 1 == size)
> +offset = i + 4;
> +else if (i - offset >= size)
>   return offset;
>   }
> +
>   return 0;
>   }
> 
> @@ -2144,6 +2146,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t 
> cap_id,
>   uint8_t *config;
>   int i, overlapping_cap;
> 
> +assert(size > 0);
> +
>   if (!offset) {
>   offset = pci_find_space(pdev, size);
>   if (!offset) {
> --
> 2.1.0
> >>>.
> >>>
> >>
> >>--
> >>Yours Sincerely,
> >>
> >>Cao Jin
> >.
> >
> 
> -- 
> Yours Sincerely,
> 
> Cao Jin

Re: [Qemu-devel] [PULL 00/14] Migration pull request

2015-11-20 Thread Kevin Wolf

Am 20.11.2015 um 15:00 hat Peter Lieven geschrieben:
> Am 20.11.2015 um 14:53 schrieb Kevin Wolf:
> > Am 20.11.2015 um 14:44 hat Peter Lieven geschrieben:
> >> Am 20.11.2015 um 12:33 schrieb Peter Maydell:
> >>> On 20 November 2015 at 09:38, Peter Lieven  wrote:
>  I wonder if there is a glitch in the PIO implementation of test-ide.c. 
>  As far as I understand the specs
>  it is not allowed to read data while the BSY flag is set. With the 
>  following change to the test-ide script
>  the test does not race:
> 
>  diff --git a/tests/ide-test.c b/tests/ide-test.c
>  index d1014bb..ab0489e 100644
>  --- a/tests/ide-test.c
>  +++ b/tests/ide-test.c
>  @@ -728,6 +728,7 @@ static void cdrom_pio_impl(int nblocks)
>   for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
>   size_t offset = i * (limit / 2);
>   size_t rem = (rxsize / 2) - offset;
>  +ide_wait_clear(BSY);
>   for (j = 0; j < MIN((limit / 2), rem); j++) {
>   rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
>   }
> 
>  Note: in the old sync version of the ATAPI PIO implementation this could 
>  not happen.
> >>> This certainly fixes the stalls for me, though I don't know enough
> >>> IDE to say whether it is the correct fix.
> >> Thanks for testing.
> >>
> >> I hope that John or Kevin can verify this fix?
> > The fix looks correct to me.
> >
> > While you're improving the test, you could also add an assertion that
> > DRQ is set after BSY has been cleared.
> 
> I would actually move the check (which is already there) into the loop:
> 
> diff --git a/tests/ide-test.c b/tests/ide-test.c
> index d1014bb..d7ee376 100644
> --- a/tests/ide-test.c
> +++ b/tests/ide-test.c
> @@ -712,11 +712,6 @@ static void cdrom_pio_impl(int nblocks)
>  /* HPD3: INTRQ_Wait */
>  ide_wait_intr(IDE_PRIMARY_IRQ);
>  
> -/* HPD2: Check_Status_B */
> -data = ide_wait_clear(BSY);
> -assert_bit_set(data, DRQ | DRDY);
> -assert_bit_clear(data, ERR | DF | BSY);
> -
>  /* Read data back: occurs in bursts of 'BYTE_COUNT_LIMIT' bytes.
>   * If BYTE_COUNT_LIMIT is odd, we transfer BYTE_COUNT_LIMIT - 1 bytes.
>   * We allow an odd limit only when the remaining transfer size is
> @@ -728,6 +723,10 @@ static void cdrom_pio_impl(int nblocks)
>  for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
>  size_t offset = i * (limit / 2);
>  size_t rem = (rxsize / 2) - offset;
> +/* HPD2: Check_Status_B */
> +data = ide_wait_clear(BSY);
> +assert_bit_set(data, DRQ | DRDY);
> +assert_bit_clear(data, ERR | DF | BSY);
>  for (j = 0; j < MIN((limit / 2), rem); j++) {
>  rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
>  }
> 
> Are you okay with that? @John, you also?

Oh, yes, I missed that the check was already there, just in the wrong
place. I agree that this is better.

I also see that we have the state names from the ATA spec in a comment,
so getting that part right might be nice, too. For a start, HPD* are the
wrong states (they are for DMA transfers), it should be HP* everywhere.
And for the part that your patch touches, the loop that actually
transfers data is part of "HP4: Transfer_Data", so we might add a
comment right before the (nested) for.

Kevin

[Qemu-devel] [PATCH for-2.5] target-arm: Don't mask out bits [47:40] in LPAE descriptors for v8

2015-11-20 Thread Peter Maydell

In an LPAE format descriptor in ARMv8 the address field extends
up to bit 47, not just bit 39. Correct the masking so we don't
give incorrect results if the output address size is greater
than 40 bits, as it can be for AArch64.

(Note that we don't yet support the new-in-v8 Address Size fault which
should be generated if any translation table entry or TTBR contains
an address with non-zero bits above the most significant bit of the
maximum output address size.)

Signed-off-by: Peter Maydell 
---
This is worth fixing for 2.5 I think. As the commit message notes,
we don't support the Addres Size faults we ought to take in some
cases, but that seems more 2.6-ish.
---
 target-arm/helper.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 4ecae61..afc4163 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -6642,6 +6642,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
target_ulong address,
 int ap, ns, xn, pxn;
 uint32_t el = regime_el(env, mmu_idx);
 bool ttbr1_valid = true;
+uint64_t descaddrmask;
 
 /* TODO:
  * This code does not handle the different format TCR for VTCR_EL2.
@@ -6831,6 +6832,15 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
target_ulong address,
 descaddr = extract64(ttbr, 0, 48);
 descaddr &= ~((1ULL << (inputsize - (stride * (4 - level - 1);
 
+/* The address field in the descriptor goes up to bit 39 for ARMv7
+ * but up to bit 47 for ARMv8.
+ */
+if (arm_feature(env, ARM_FEATURE_V8)) {
+descaddrmask = 0xf000ULL;
+} else {
+descaddrmask = 0xfff000ULL;
+}
+
 /* Secure accesses start with the page table in secure memory and
  * can be downgraded to non-secure at any step. Non-secure accesses
  * remain non-secure. We implement this by just ORing in the NSTable/NS
@@ -6854,7 +6864,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
target_ulong address,
 /* Invalid, or the Reserved level 3 encoding */
 goto do_fault;
 }
-descaddr = descriptor & 0xfff000ULL;
+descaddr = descriptor & descaddrmask;
 
 if ((descriptor & 2) && (level < 3)) {
 /* Table entry. The top five bits are attributes which  may
-- 
1.9.1

Re: [Qemu-devel] [PATCH 06/18] armv7m: new NVIC utility functions

2015-11-20 Thread Peter Maydell

On 9 November 2015 at 01:11, Michael Davidsaver  wrote:
> Internal functions for operations previously done
> by GIC internals.
>
> nvic_irq_update() recalculates highest pending/active
> exceptions.
>
> armv7m_nvic_set_pending() include exception escalation
> logic.
>
> armv7m_nvic_acknowledge_irq() and nvic_irq_update()
> update ARMCPU fields.

Hi; I have a lot of review comments on this patch set, but that's
really because v7M exception logic is pretty complicated and
our current code is a long way away from correct. You might
find it helpful to separate out "restructure the NVIC code
into its own device" into separate patches from "and now add
functionality like exception escalation", perhaps.

> Signed-off-by: Michael Davidsaver 
> ---
>  hw/intc/armv7m_nvic.c | 250 
> +++---
>  1 file changed, 235 insertions(+), 15 deletions(-)
>
> diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
> index 487a09a..ebb4d4e 100644
> --- a/hw/intc/armv7m_nvic.c
> +++ b/hw/intc/armv7m_nvic.c
> @@ -140,36 +140,256 @@ static void systick_reset(nvic_state *s)
>  timer_del(s->systick.timer);
>  }
>
> -/* The external routines use the hardware vector numbering, ie. the first
> -   IRQ is #16.  The internal GIC routines use #32 as the first IRQ.  */
> +/* caller must call nvic_irq_update() after this */
> +static
> +void set_prio(nvic_state *s, unsigned irq, uint8_t prio)
> +{
> +unsigned submask = (1<<(s->prigroup+1))-1;
> +
> +assert(irq > 3); /* only use for configurable prios */
> +assert(irq < NVIC_MAX_VECTORS);
> +
> +s->vectors[irq].raw_prio = prio;
> +s->vectors[irq].prio_group = (prio>>(s->prigroup+1));
> +s->vectors[irq].prio_sub = irq + (prio)*NVIC_MAX_VECTORS;

Precalculating the group priority seems a bit odd, since it
will change later anyway if the guest writes to PRIGROUP.

> +
> +DPRINTF(0, "Set %u priority grp %d sub %u\n", irq,
> +s->vectors[irq].prio_group, s->vectors[irq].prio_sub);
> +}
> +
> +/* recompute highest pending */
> +static
> +void nvic_irq_update(nvic_state *s, int update_active)
> +{
> +unsigned i;
> +int lvl;
> +CPUARMState *env = >cpu->env;
> +int16_t act_group = 0x100, pend_group = 0x100;
> +uint16_t act_sub = 0, pend_sub = 0;
> +uint16_t act_irq = 0, pend_irq = 0;
> +
> +/* find highest priority */
> +for (i = 1; i < s->num_irq; i++) {
> +vec_info *I = >vectors[i];

Minor style issue: we prefer lower case for variable names,
and CamelCase for structure type names (see CODING_STYLE).
So this would be VecInfo *vec; or something similar.

> +
> +DPRINTF(2, " VECT %d %d:%u\n", i, I->prio_group, I->prio_sub);
> +
> +if (I->active && ((I->prio_group < act_group)
> +|| (I->prio_group == act_group && I->prio_sub < act_sub)))

Because the group priority and sub priority are just two contiguous
parts of the raw priority, you get the same effect here by just
doing a comparison on the raw value: "I->raw_prio <  act_raw_prio".

Note however that the running priority is actually only the
group priority part (see the pseudocode ExecutionPriority function)
and so you want something more like:

highestpri = 0x100;
for (each vector) {
if (vector->active && vector->raw_prio < highestpri) {
highestpri = vector->raw_prio & prigroup_mask;
act_sub = vector->raw_prio & ~prigroup_mask; // if you need it
}
}

(this is why it's not worth precalculating the group and
subpriorities -- it's cheap enough to do at this point.)

> +{
> +act_group = I->prio_group;
> +act_sub = I->prio_sub;
> +act_irq = i;
> +}
> +
> +if (I->enabled && I->pending && ((I->prio_group < pend_group)
> +|| (I->prio_group == pend_group && I->prio_sub < pend_sub)))
> +{
> +pend_group = I->prio_group;
> +pend_sub = I->prio_sub;
> +pend_irq = i;
> +}
> +}
> +
> +env->v7m.pending = pend_irq;
> +env->v7m.pending_prio = pend_group;
> +
> +if (update_active) {
> +env->v7m.exception = act_irq;
> +env->v7m.exception_prio = act_group;
> +}
> +
> +/* Raise NVIC output even if pend_group is masked.
> + * This is necessary as we get no notification
> + * when PRIMASK et al. are changed.
> + * As long as our output is high cpu_exec() will call
> + * into arm_v7m_cpu_exec_interrupt() frequently, which
> + * then tests to see if the pending exception
> + * is permitted.
> + */

This is bad -- we should instead arrange to update our idea
of the current running priority when PRIMASK  are updated.
(Also it's not clear why we need to have an outbound qemu_irq
just to tell the core there's a pending exception. I think
we should just be able to call cpu_interrupt().)

> +lvl = pend_irq > 0;
> +DPRINTF(1,

Re: [Qemu-devel] [PATCH v2 1/9] cpus: Reclaim vCPU objects

2015-11-20 Thread Matthew Rosato

On 11/19/2015 09:33 PM, Bharata B Rao wrote:
> On Thu, Nov 19, 2015 at 10:10:06AM -0500, Matthew Rosato wrote:
>> From: Gu Zheng 
>>
>> In order to deal well with the kvm vcpus (which can not be removed without 
>> any
>> protection), we do not close KVM vcpu fd, just record and mark it as stopped
>> into a list, so that we can reuse it for the appending cpu hot-add request if
>> possible. It is also the approach that kvm guys suggested:
>> https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
>>
>> Signed-off-by: Chen Fan 
>> Signed-off-by: Gu Zheng 
>> Signed-off-by: Zhu Guihua 
>> Signed-off-by: Bharata B Rao 
>>[Explicit CPU_REMOVE() from qemu_kvm/tcg_destroy_vcpu()
>> isn't needed as it is done from cpu_exec_exit()]
> 
> I didn't look very closely but the patch that removes cpu from the list
> from cpu_exec_exit() isn't part of this series. The above change requires
> 
> https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg00656.html
> 
> I have just cleaned that patch a bit and will be posting early next
> week with another patch that does CPU vmstate unregistration too from
> cpu_exec_exit(). I think since we do vmstate registration from cpu_exec_init()
> it makes sense to do unregistration from cpu_exec_exit() instead of
> archs doing it themselves. I had a version of this at
> 
> https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg00649.html
> 
> With the above patch, you woudn't need 7/9 in this series.
> 

Hi Bharata -- Looking at the mailing list discussion from your patch
set, I got the impression that handling this in cpu_exec_exit() might
not be acceptable for all architectures.  So, my patch just tries to
handle the s390 case in patch 7/9, doing list removal and vmstate
unregistration.

FWIW, the 2 patches you referenced would be fine for s390, so if you can
get those approved I'd have no problem dropping 7/9 in favor of your
patches.

Matt

Re: [Qemu-devel] [PULL 00/14] Migration pull request

2015-11-20 Thread Peter Lieven

Am 20.11.2015 um 12:33 schrieb Peter Maydell:
> On 20 November 2015 at 09:38, Peter Lieven  wrote:
>> I wonder if there is a glitch in the PIO implementation of test-ide.c. As 
>> far as I understand the specs
>> it is not allowed to read data while the BSY flag is set. With the following 
>> change to the test-ide script
>> the test does not race:
>>
>> diff --git a/tests/ide-test.c b/tests/ide-test.c
>> index d1014bb..ab0489e 100644
>> --- a/tests/ide-test.c
>> +++ b/tests/ide-test.c
>> @@ -728,6 +728,7 @@ static void cdrom_pio_impl(int nblocks)
>>  for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
>>  size_t offset = i * (limit / 2);
>>  size_t rem = (rxsize / 2) - offset;
>> +ide_wait_clear(BSY);
>>  for (j = 0; j < MIN((limit / 2), rem); j++) {
>>  rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
>>  }
>>
>> Note: in the old sync version of the ATAPI PIO implementation this could not 
>> happen.
> This certainly fixes the stalls for me, though I don't know enough
> IDE to say whether it is the correct fix.
Thanks for testing.

I hope that John or Kevin can verify this fix?

Peter

[Qemu-devel] [PATCH] target-mips: flush QEMU TLB when disabling 64-bit addressing

2015-11-20 Thread Leon Alrae

CP0.Status.KX/SX/UX bits are responsible for enabling access to 64-bit
Kernel/Supervisor/User Segments. If bit is cleared an access to
corresponding segment should generate Address Error Exception.

However, the guest may still be able to access some pages belonging to
the disabled 64-bit segment because we forget to flush QEMU TLB.

This patch fixes it.

Signed-off-by: Leon Alrae 
---
 target-mips/cpu.h   | 18 +-
 target-mips/op_helper.c | 13 -
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index fa919c1..89c01f7 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -961,6 +961,15 @@ static inline void compute_hflags(CPUMIPSState *env)
 }
 
 #ifndef CONFIG_USER_ONLY
+static inline void cpu_mips_tlb_flush(CPUMIPSState *env, int flush_global)
+{
+MIPSCPU *cpu = mips_env_get_cpu(env);
+
+/* Flush qemu's TLB and discard all shadowed entries.  */
+tlb_flush(CPU(cpu), flush_global);
+env->tlb->tlb_in_use = env->tlb->nb_tlb;
+}
+
 /* Called for updates to CP0_Status.  */
 static inline void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
 {
@@ -999,6 +1008,7 @@ static inline void sync_c0_status(CPUMIPSState *env, 
CPUMIPSState *cpu, int tc)
 static inline void cpu_mips_store_status(CPUMIPSState *env, target_ulong val)
 {
 uint32_t mask = env->CP0_Status_rw_bitmask;
+target_ulong old = env->CP0_Status;
 
 if (env->insn_flags & ISA_MIPS32R6) {
 bool has_supervisor = extract32(mask, CP0St_KSU, 2) == 0x3;
@@ -1014,7 +1024,13 @@ static inline void cpu_mips_store_status(CPUMIPSState 
*env, target_ulong val)
 mask &= ~(((1 << CP0St_SR) | (1 << CP0St_NMI)) & val);
 }
 
-env->CP0_Status = (env->CP0_Status & ~mask) | (val & mask);
+env->CP0_Status = (old & ~mask) | (val & mask);
+#if defined(TARGET_MIPS64)
+if ((env->CP0_Status ^ old) & (old & (7 << CP0St_UX))) {
+/* Access to at least one of the 64-bit segments has been disabled */
+cpu_mips_tlb_flush(env, 1);
+}
+#endif
 if (env->CP0_Config3 & (1 << CP0C3_MT)) {
 sync_c0_status(env, env, env->current_tc);
 } else {
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 056d53b..d2c98c9 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -23,10 +23,6 @@
 #include "exec/cpu_ldst.h"
 #include "sysemu/kvm.h"
 
-#ifndef CONFIG_USER_ONLY
-static inline void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global);
-#endif
-
 /*/
 /* Exceptions processing helpers */
 
@@ -1846,15 +1842,6 @@ target_ulong helper_yield(CPUMIPSState *env, 
target_ulong arg)
 
 #ifndef CONFIG_USER_ONLY
 /* TLB management */
-static void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global)
-{
-MIPSCPU *cpu = mips_env_get_cpu(env);
-
-/* Flush qemu's TLB and discard all shadowed entries.  */
-tlb_flush(CPU(cpu), flush_global);
-env->tlb->tlb_in_use = env->tlb->nb_tlb;
-}
-
 static void r4k_mips_tlb_flush_extra (CPUMIPSState *env, int first)
 {
 /* Discard entries from env->tlb[first] onwards.  */
-- 
2.1.0

[Qemu-devel] [PATCH v1 1/1] xlnx-ep108: Fix minimum RAM check

2015-11-20 Thread Alistair Francis

The minimum RAM check logic for the Xiilnx EP108 was off by one,
which caused a false positive. Correct the logic to only print
warnings when the RAM is below 0x800.

Signed-off-by: Alistair Francis 
---

 hw/arm/xlnx-ep108.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/xlnx-ep108.c b/hw/arm/xlnx-ep108.c
index 2899698..85b978f 100644
--- a/hw/arm/xlnx-ep108.c
+++ b/hw/arm/xlnx-ep108.c
@@ -51,7 +51,7 @@ static void xlnx_ep108_init(MachineState *machine)
 machine->ram_size = EP108_MAX_RAM_SIZE;
 }
 
-if (machine->ram_size <= 0x0800) {
+if (machine->ram_size < 0x0800) {
 qemu_log("WARNING: RAM size " RAM_ADDR_FMT " is small for EP108",
  machine->ram_size);
 }
-- 
2.5.0

Re: [Qemu-devel] [PATCH 00/18] Fix exception handling and msr/mrs access

2015-11-20 Thread Peter Maydell

On 17 November 2015 at 17:07, Peter Maydell  wrote:
> On 9 November 2015 at 01:11, Michael Davidsaver  wrote:
>> This series grew from a previous incorrect patch attempting to fix
>> some incorrect behavior.  After spending some time going through the
>> arch. ref. manual for v7-M I think I understand better how this should
>> work and have made a number of changes which actually improve the situation.
>>
>> These changes have not yet been cross checked against real hardware, and
>> I therefore don't consider them mergeable.  It's gotten big enough though
>> that I'd like to get some feedback.

> I'll reply to the various patches individually with comments.

I think I've now done that at least for the earlier patches.
There are probably some other finer details that I'll get to
in a later round of patch review but hopefully you have enough
to do some of the fixes and restructuring of this patchset for v2.

I think the most important thing here is getting the structure
of the changes into patches right so they're easy to review.
The general principles here are:
 * each patch should aim to be self-contained and to do one thing,
   not several things (for instance, avoid making several bug fixes
   in one patch, avoid putting "restructure/refactor code" and
   "add new feature" in the same patch, and so on)
 * at each point in the patch series QEMU needs to still compile
   and run (this is particularly important to allow bisection
   of bugs later on where people will want to be able to narrow
   down which commit introduced a bug)
If the structure of the patchset is right it should be fairly
easy to review your improvements against the architecture manual.

thanks
-- PMM

Re: [Qemu-devel] [PATCH] numa: Clean up query-memdev error handling, plug leak

2015-11-20 Thread Eduardo Habkost

On Fri, Nov 20, 2015 at 02:00:40PM +0100, Markus Armbruster wrote:
> qmp_query_memdev() doesn't fail.  Instead, it returns an empty list.
> That's wrong.
> 
> Two error paths:
> 
> * When object_get_objects_root() returns null.  It never does, so
>   simply drop the useless error handling.
> 
> * When query_memdev() fails.  This can happen, and the error to return
>   is the one that query_memdev() currently leaks.  Passing the error
>   from query_memdev() to qmp_query_memdev() isn't so simple, because
>   object_child_foreach() is in the way.  Fixable, but I'd rather not
>   try it in hard freeze.  Plug the leak, make up an error, and add a
>   FIXME for the remaining work.
> 
> Screwed up in commit 76b5d85 "qmp: add query-memdev".
> 
> Signed-off-by: Markus Armbruster 

Reviewed-by: Eduardo Habkost 

Do you know how to trigger a query_memdev() error today, or is
just theoretical?

-- 
Eduardo

Re: [Qemu-devel] [PATCH] tests: fix cdrom_pio_impl in ide-test

2015-11-20 Thread Peter Maydell

On 20 November 2015 at 14:29, Peter Lieven  wrote:
> The check for the cleared BSY flag has to be performed
> before each data transfer and not just before the
> first one.
>
> Commit 5f81724d revealed this glitch as the BSY flag
> was not set in ATAPI PIO transfers before.
>
> While at it fix the desciptions and add a comment before
> the nested for loop that transfers the data.
>
> Signed-off-by: Peter Lieven 

If the IDE folks can review this I'd like to apply it
direct to master this afternoon so we can tag and roll rc1
today.

thanks
-- PMM

Re: [Qemu-devel] [PATCH v9 1/6] target-arm: kvm64 - introduce kvm_arm_init_debug()

2015-11-20 Thread Peter Maydell

On 20 November 2015 at 15:05, Peter Maydell  wrote:
> On 12 November 2015 at 16:20, Alex Bennée  wrote:
>> As we haven't always had guest debug support we need to probe for it.
>> Additionally we don't do this in the start-up capability code so we
>> don't fall over on old kernels.
>>
>> Signed-off-by: Alex Bennée 
>> ---
>>  target-arm/kvm64.c | 18 ++
>>  1 file changed, 18 insertions(+)
>>
>> diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
>> index ceebfeb..d087794 100644
>> --- a/target-arm/kvm64.c
>> +++ b/target-arm/kvm64.c
>> @@ -25,6 +25,22 @@
>>  #include "internals.h"
>>  #include "hw/arm/arm.h"
>>
>> +static bool have_guest_debug;
>> +
>> +/**
>> + * kvm_arm_init_debug()
>> + * @cs: CPUState
>> + *
>> + * Check for guest debug capabilities.
>> + *
>> + */
>> +static void kvm_arm_init_debug(CPUState *cs)
>> +{
>> +have_guest_debug = kvm_check_extension(cs->kvm_state,
>> +   KVM_CAP_SET_GUEST_DEBUG);
>> +return;
>> +}
>> +
>>  static inline void set_feature(uint64_t *features, int feature)
>>  {
>>  *features |= 1ULL << feature;
>> @@ -121,6 +137,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>  }
>>  cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
>>
>> +kvm_arm_init_debug(cs);
>> +
>>  return kvm_arm_init_cpreg_list(cpu);
>>  }
>
> I assume in practice the kernel guarantees that either all
> CPUs have the SET_GUEST_DEBUG cap, or none do :-)
>
> Reviewed-by: Peter Maydell 

...except I've just noticed that nothing else in this patchset
ever reads the have_guest_debug bool we just set, so what is
the purpose of this patch?

thanks
-- PMM

Re: [Qemu-devel] [PATCH 11/18] armv7m: fix I and F flag handling

2015-11-20 Thread Peter Maydell

On 9 November 2015 at 01:11, Michael Davidsaver  wrote:
> Despite having the same notation, these bits
> have completely different meaning than -AR.
>
> Add armv7m_excp_unmasked()
> to calculate the currently runable exception priority
> taking into account masks and active handlers.
> Use this in conjunction with the pending exception
> priority to determine if the pending exception
> can interrupt execution.

This function is used by code added in earlier patches in
this series, so this patch needs to be moved earlier in the
series, or those patches won't compile.

> Signed-off-by: Michael Davidsaver 
> ---
>  target-arm/cpu.c | 26 +++---
>  target-arm/cpu.h | 27 ++-
>  2 files changed, 33 insertions(+), 20 deletions(-)
>
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index be026bc..5d03117 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -173,6 +173,8 @@ static void arm_cpu_reset(CPUState *s)
>  uint32_t initial_pc; /* Loaded from 0x4 */
>  uint8_t *rom;
>
> +env->v7m.exception_prio = env->v7m.pending_prio = 0x100;
> +
>  env->daif &= ~PSTATE_I;
>  rom = rom_ptr(0);
>  if (rom) {
> @@ -301,29 +303,15 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, 
> int interrupt_request)
>  {
>  CPUClass *cc = CPU_GET_CLASS(cs);
>  ARMCPU *cpu = ARM_CPU(cs);
> -CPUARMState *env = >env;
>  bool ret = false;
>
> -
> -if (interrupt_request & CPU_INTERRUPT_FIQ
> -&& !(env->daif & PSTATE_F)) {
> -cs->exception_index = EXCP_FIQ;
> -cc->do_interrupt(cs);
> -ret = true;
> -}
> -/* ARMv7-M interrupt return works by loading a magic value
> - * into the PC.  On real hardware the load causes the
> - * return to occur.  The qemu implementation performs the
> - * jump normally, then does the exception return when the
> - * CPU tries to execute code at the magic address.
> - * This will cause the magic PC value to be pushed to
> - * the stack if an interrupt occurred at the wrong time.
> - * We avoid this by disabling interrupts when
> - * pc contains a magic address.

This (removing this comment and the checks for the magic address)
seem to be part of a separate change [probably the one in
"armv7m: Undo armv7m.hack"] and shouldn't be in this patch.

> +/* ARMv7-M interrupt masking works differently than -A or -R.
> + * There is no FIQ/IRQ distinction.
> + * Instead of masking interrupt sources, the I and F bits
> + * (along with basepri) mask certain exception priority levels.
>   */
>  if (interrupt_request & CPU_INTERRUPT_HARD
> -&& !(env->daif & PSTATE_I)
> -&& (env->regs[15] < 0xfff0)) {
> +&& (armv7m_excp_unmasked(cpu) >= cpu->env.v7m.pending_prio)) {
>  cs->exception_index = EXCP_IRQ;
>  cc->do_interrupt(cs);
>  ret = true;
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index c193fbb..29d89ce 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -1033,9 +1033,34 @@ void arm_cpu_list(FILE *f, fprintf_function 
> cpu_fprintf);
>  uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
>   uint32_t cur_el, bool secure);
>
> +/* @returns highest (numerically lowest) unmasked exception priority
> + */
> +static inline
> +int armv7m_excp_unmasked(ARMCPU *cpu)

What this is really calculating is the current execution
priority (running priority) of the CPU, so I think a better
name would be armv7m_current_exec_priority() or
armv7m_current_priority() or armv7m_running_priority() or similar.

> +{
> +CPUARMState *env = >env;
> +int runnable;
> +
> +/* find highest (numerically lowest) priority which could
> + * run based on masks
> + */
> +if (env->daif_F) { /* FAULTMASK */

Style issue -- operands should have spaces around them.

> +runnable = -2;

These all seem to be off by one: FAULTMASK sets the
running priority to -1, not -2, PRIMASK sets it to 0,
not -1, and so on.

> +} else if (env->daif_I) { /* PRIMASK */
> +runnable = -1;
> +} else if (env->v7m.basepri > 0) {
> +/* BASEPRI==1 -> runnable==-1 (same as PRIMASK==1) */

(applies to operands in comments too)

> +runnable = env->v7m.basepri-2;

Where is this - 2 from? Also, BASEPRI values honour the
PRIGROUP setting. (Compare the ExecutionPriority pseudocode).

> +} else {
> +runnable = 0x100; /* lower than any possible priority */
> +}
> +/* consider priority of active handler */
> +return MIN(runnable, env->v7m.exception_prio-1);

I don't think this -1 should be here.

> +}
> +
>  /* Interface between CPU and Interrupt controller.  */
>  void armv7m_nvic_set_pending(void *opaque, int irq);
> -int armv7m_nvic_acknowledge_irq(void *opaque);
> +void armv7m_nvic_acknowledge_irq(void *opaque);
>  void

Re: [Qemu-devel] [PULL 00/14] Migration pull request

2015-11-20 Thread Peter Lieven

Am 20.11.2015 um 14:53 schrieb Kevin Wolf:
> Am 20.11.2015 um 14:44 hat Peter Lieven geschrieben:
>> Am 20.11.2015 um 12:33 schrieb Peter Maydell:
>>> On 20 November 2015 at 09:38, Peter Lieven  wrote:
 I wonder if there is a glitch in the PIO implementation of test-ide.c. As 
 far as I understand the specs
 it is not allowed to read data while the BSY flag is set. With the 
 following change to the test-ide script
 the test does not race:

 diff --git a/tests/ide-test.c b/tests/ide-test.c
 index d1014bb..ab0489e 100644
 --- a/tests/ide-test.c
 +++ b/tests/ide-test.c
 @@ -728,6 +728,7 @@ static void cdrom_pio_impl(int nblocks)
  for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
  size_t offset = i * (limit / 2);
  size_t rem = (rxsize / 2) - offset;
 +ide_wait_clear(BSY);
  for (j = 0; j < MIN((limit / 2), rem); j++) {
  rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
  }

 Note: in the old sync version of the ATAPI PIO implementation this could 
 not happen.
>>> This certainly fixes the stalls for me, though I don't know enough
>>> IDE to say whether it is the correct fix.
>> Thanks for testing.
>>
>> I hope that John or Kevin can verify this fix?
> The fix looks correct to me.
>
> While you're improving the test, you could also add an assertion that
> DRQ is set after BSY has been cleared.

I would actually move the check (which is already there) into the loop:

diff --git a/tests/ide-test.c b/tests/ide-test.c
index d1014bb..d7ee376 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -712,11 +712,6 @@ static void cdrom_pio_impl(int nblocks)
 /* HPD3: INTRQ_Wait */
 ide_wait_intr(IDE_PRIMARY_IRQ);
 
-/* HPD2: Check_Status_B */
-data = ide_wait_clear(BSY);
-assert_bit_set(data, DRQ | DRDY);
-assert_bit_clear(data, ERR | DF | BSY);
-
 /* Read data back: occurs in bursts of 'BYTE_COUNT_LIMIT' bytes.
  * If BYTE_COUNT_LIMIT is odd, we transfer BYTE_COUNT_LIMIT - 1 bytes.
  * We allow an odd limit only when the remaining transfer size is
@@ -728,6 +723,10 @@ static void cdrom_pio_impl(int nblocks)
 for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
 size_t offset = i * (limit / 2);
 size_t rem = (rxsize / 2) - offset;
+/* HPD2: Check_Status_B */
+data = ide_wait_clear(BSY);
+assert_bit_set(data, DRQ | DRDY);
+assert_bit_clear(data, ERR | DF | BSY);
 for (j = 0; j < MIN((limit / 2), rem); j++) {
 rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
 }

Are you okay with that? @John, you also?

Peter

Re: [Qemu-devel] [PATCH v9 1/6] target-arm: kvm64 - introduce kvm_arm_init_debug()

2015-11-20 Thread Peter Maydell

On 12 November 2015 at 16:20, Alex Bennée  wrote:
> As we haven't always had guest debug support we need to probe for it.
> Additionally we don't do this in the start-up capability code so we
> don't fall over on old kernels.
>
> Signed-off-by: Alex Bennée 
> ---
>  target-arm/kvm64.c | 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
> index ceebfeb..d087794 100644
> --- a/target-arm/kvm64.c
> +++ b/target-arm/kvm64.c
> @@ -25,6 +25,22 @@
>  #include "internals.h"
>  #include "hw/arm/arm.h"
>
> +static bool have_guest_debug;
> +
> +/**
> + * kvm_arm_init_debug()
> + * @cs: CPUState
> + *
> + * Check for guest debug capabilities.
> + *
> + */
> +static void kvm_arm_init_debug(CPUState *cs)
> +{
> +have_guest_debug = kvm_check_extension(cs->kvm_state,
> +   KVM_CAP_SET_GUEST_DEBUG);
> +return;
> +}
> +
>  static inline void set_feature(uint64_t *features, int feature)
>  {
>  *features |= 1ULL << feature;
> @@ -121,6 +137,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
>  }
>  cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
>
> +kvm_arm_init_debug(cs);
> +
>  return kvm_arm_init_cpreg_list(cpu);
>  }

I assume in practice the kernel guarantees that either all
CPUs have the SET_GUEST_DEBUG cap, or none do :-)

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [Qemu-devel] [Qemu-ppc] [OpenBIOS] CUDA has problems with Mac OS 10.4

2015-11-20 Thread BALATON Zoltan


On Thu, 19 Nov 2015, Segher Boessenkool wrote:

Some mac99/pmu99 hardware has an ADB keyboard, fwiw (tibook, for example).
The do have built-in USB; that, and being newworld, are not directly
related things.


Maybe, but the PowerMac3,1 we are trying to emulate here does not have ADB 
AFAIK. Although qemu's mac99 is not a real machine now we should move to 
being closer to some existing hardware if we want OSes written for that 
hardware to run.


Regards,
BALATON Zoltan

Re: [Qemu-devel] [PULL 00/14] Migration pull request

2015-11-20 Thread Kevin Wolf

Am 20.11.2015 um 14:44 hat Peter Lieven geschrieben:
> Am 20.11.2015 um 12:33 schrieb Peter Maydell:
> > On 20 November 2015 at 09:38, Peter Lieven  wrote:
> >> I wonder if there is a glitch in the PIO implementation of test-ide.c. As 
> >> far as I understand the specs
> >> it is not allowed to read data while the BSY flag is set. With the 
> >> following change to the test-ide script
> >> the test does not race:
> >>
> >> diff --git a/tests/ide-test.c b/tests/ide-test.c
> >> index d1014bb..ab0489e 100644
> >> --- a/tests/ide-test.c
> >> +++ b/tests/ide-test.c
> >> @@ -728,6 +728,7 @@ static void cdrom_pio_impl(int nblocks)
> >>  for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
> >>  size_t offset = i * (limit / 2);
> >>  size_t rem = (rxsize / 2) - offset;
> >> +ide_wait_clear(BSY);
> >>  for (j = 0; j < MIN((limit / 2), rem); j++) {
> >>  rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
> >>  }
> >>
> >> Note: in the old sync version of the ATAPI PIO implementation this could 
> >> not happen.
> > This certainly fixes the stalls for me, though I don't know enough
> > IDE to say whether it is the correct fix.
> Thanks for testing.
> 
> I hope that John or Kevin can verify this fix?

The fix looks correct to me.

While you're improving the test, you could also add an assertion that
DRQ is set after BSY has been cleared.

Kevin

[Qemu-devel] [PATCH] tests: fix cdrom_pio_impl in ide-test

2015-11-20 Thread Peter Lieven

The check for the cleared BSY flag has to be performed
before each data transfer and not just before the
first one.

Commit 5f81724d revealed this glitch as the BSY flag
was not set in ATAPI PIO transfers before.

While at it fix the desciptions and add a comment before
the nested for loop that transfers the data.

Signed-off-by: Peter Lieven 
---
 tests/ide-test.c |   16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/ide-test.c b/tests/ide-test.c
index d1014bb..fc1ce52 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -699,24 +699,19 @@ static void cdrom_pio_impl(int nblocks)
 outb(IDE_BASE + reg_lba_middle, BYTE_COUNT_LIMIT & 0xFF);
 outb(IDE_BASE + reg_lba_high, (BYTE_COUNT_LIMIT >> 8 & 0xFF));
 outb(IDE_BASE + reg_command, CMD_PACKET);
-/* HPD0: Check_Status_A State */
+/* HP0: Check_Status_A State */
 nsleep(400);
 data = ide_wait_clear(BSY);
-/* HPD1: Send_Packet State */
+/* HP1: Send_Packet State */
 assert_bit_set(data, DRQ | DRDY);
 assert_bit_clear(data, ERR | DF | BSY);
 
 /* SCSI CDB (READ10) -- read n*2048 bytes from block 0 */
 send_scsi_cdb_read10(0, nblocks);
 
-/* HPD3: INTRQ_Wait */
+/* HP3: INTRQ_Wait */
 ide_wait_intr(IDE_PRIMARY_IRQ);
 
-/* HPD2: Check_Status_B */
-data = ide_wait_clear(BSY);
-assert_bit_set(data, DRQ | DRDY);
-assert_bit_clear(data, ERR | DF | BSY);
-
 /* Read data back: occurs in bursts of 'BYTE_COUNT_LIMIT' bytes.
  * If BYTE_COUNT_LIMIT is odd, we transfer BYTE_COUNT_LIMIT - 1 bytes.
  * We allow an odd limit only when the remaining transfer size is
@@ -728,6 +723,11 @@ static void cdrom_pio_impl(int nblocks)
 for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
 size_t offset = i * (limit / 2);
 size_t rem = (rxsize / 2) - offset;
+/* HP2: Check_Status_B */
+data = ide_wait_clear(BSY);
+assert_bit_set(data, DRQ | DRDY);
+assert_bit_clear(data, ERR | DF | BSY);
+/* HP4: Transfer_Data */
 for (j = 0; j < MIN((limit / 2), rem); j++) {
 rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
 }
-- 
1.7.9.5

Re: [Qemu-devel] [vfio-users] [PATCH 2/2] input: linux evdev support

2015-11-20 Thread Blank Field

2015-11-18 20:55 GMT+03:00 Gerd Hoffmann :
> The advantage is that it'll work without virtio-input drivers in the
> guest, the events are delivered to the usual ps/2 or usb input devices
> (depending on what the machine happens to have).  And for keyboards
> qemu is able to switch the keyboard between guest and host on hotkey.
> The hotkey is hard-coded for now (both control keys), initialy the
> guest owns the keyboard.

You might already know that there are
-display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off]
-ctrl-grab  use Right-Ctrl to grab mouse (instead of Ctrl-Alt)
options in regular qemu. Maybe it'd be convenient to use this switch
to change the "ungrab" key combination? Or make something similiar?

Personally I use SDL, and i'm pretty much happy with it except random
SegFaults.(i see some connection to disk activity, however).
SDL creates a surface(which is getting decorated by the WM) which
grabs all the input, filters it for ctrl-alt-shift release
combination, and sends it to the VM.
The other minor problem is that SDL scales mouse movements relative to
surface size.
Before SDL i used qemu's default graphics with all the fancy buttons
it provided(it sent the power button event too!), but it doesn't
transfer audio(SDL does that) and sometimes the mouse freezes. But it
specially required a "graphics" device to be attached, even if it's
just -device qxl which isn't touched by the guest system at all.

Re: [Qemu-devel] [Intel-gfx] [Announcement] 2015-Q3 release of XenGT - a Mediated Graphics Passthrough Solution from Intel

2015-11-20 Thread Zhiyuan Lv

On Fri, Nov 20, 2015 at 04:36:15PM +0800, Tian, Kevin wrote:
> > From: Gerd Hoffmann [mailto:kra...@redhat.com]
> > Sent: Friday, November 20, 2015 4:26 PM
> > 
> >   Hi,
> > 
> > > > iGVT-g_Setup_Guide.txt mentions a "Indirect Display Mode", but doesn't
> > > > explain how the guest framebuffer can be accessed then.
> > >
> > > You can check "fb_decoder.h". One thing to clarify. Its format is
> > > actually based on drm definition, instead of OpenGL. Sorry for
> > > that.
> > 
> > drm is fine.  That header explains the format, but not how it can be
> > accessed.  Is the guest fb exported as dma-buf?
> 
> Currently not, but per our previous discussion we should move to use
> dma-buf. We have some demo code in user space. Not sure whether
> they're public now. Jike could you help do a check?

Our current implementation did not use dma-buf yet, still based on DRM_FLINK
interface. We will switch to dma-buf. Thanks!

Regards,
-Zhiyuan

> 
> > 
> > > > So, for non-opengl rendering qemu needs the guest framebuffer data so it
> > > > can feed it into the vnc server.  The vfio framebuffer region is meant
> > > > to support this use case.
> > >
> > > what's the format requirement on that framebuffer? If you are familiar
> > > with Intel Graphics, there's a so-called tiling feature applied on frame
> > > buffer so it can't be used as a raw input to vnc server. w/o opengl you
> > > need do some conversion on CPU first.
> > 
> > Yes, that conversion needs to happen, qemu can't deal with tiled
> > graphics.  Anything which pixman can handle will work.  Prefered would
> > be PIXMAN_x8r8g8b8 (aka DRM_FORMAT_XRGB on little endian host) which
> > is the format used by the vnc server (and other places in qemu)
> > internally.
> > 
> > qemu can also use the opengl texture for the guest fb, then fetch the
> > data with glReadPixels().  Which will probably do exactly the same
> > conversion.  But it'll add a opengl dependency to the non-opengl
> > rendering path in qemu, would be nice if we can avoid that.
> > 
> > While being at it:  When importing a dma-buf with a tiled framebuffer
> > into opengl (via eglCreateImageKHR + EGL_LINUX_DMA_BUF_EXT) I suspect we
> > have to pass in the tile size as attribute to make it work.  Is that
> > correct?
> > 
> 
> I'd guess so, but need double confirm later when reaching that level of 
> detail. 
> some homework on dma-buf is required first. :-)
> 
> Thanks
> Kevin

Re: [Qemu-devel] [PATCH] numa: Clean up query-memdev error handling, plug leak

2015-11-20 Thread Markus Armbruster

Eduardo Habkost  writes:

> On Fri, Nov 20, 2015 at 02:00:40PM +0100, Markus Armbruster wrote:
>> qmp_query_memdev() doesn't fail.  Instead, it returns an empty list.
>> That's wrong.
>> 
>> Two error paths:
>> 
>> * When object_get_objects_root() returns null.  It never does, so
>>   simply drop the useless error handling.
>> 
>> * When query_memdev() fails.  This can happen, and the error to return
>>   is the one that query_memdev() currently leaks.  Passing the error
>>   from query_memdev() to qmp_query_memdev() isn't so simple, because
>>   object_child_foreach() is in the way.  Fixable, but I'd rather not
>>   try it in hard freeze.  Plug the leak, make up an error, and add a
>>   FIXME for the remaining work.
>> 
>> Screwed up in commit 76b5d85 "qmp: add query-memdev".
>> 
>> Signed-off-by: Markus Armbruster 
>
> Reviewed-by: Eduardo Habkost 
>
> Do you know how to trigger a query_memdev() error today, or is
> just theoretical?

Theoretical; I tested by injecting an error with gdb.

query_memdev() fails exactly when some object_property_get_FOO() fails.
If we decide such a failure would always be a programming error, we can
pass _abort and simplify things.  Opinions?

Re: [Qemu-devel] [PATCH for-2.5] target-arm: Don't mask out bits [47:40] in LPAE descriptors for v8

2015-11-20 Thread Laurent Desnogues

Hello,

On Fri, Nov 20, 2015 at 3:32 PM, Peter Maydell  wrote:
> In an LPAE format descriptor in ARMv8 the address field extends
> up to bit 47, not just bit 39. Correct the masking so we don't
> give incorrect results if the output address size is greater
> than 40 bits, as it can be for AArch64.
>
> (Note that we don't yet support the new-in-v8 Address Size fault which
> should be generated if any translation table entry or TTBR contains
> an address with non-zero bits above the most significant bit of the
> maximum output address size.)
>
> Signed-off-by: Peter Maydell 
> ---
> This is worth fixing for 2.5 I think. As the commit message notes,
> we don't support the Addres Size faults we ought to take in some
> cases, but that seems more 2.6-ish.
> ---
>  target-arm/helper.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 4ecae61..afc4163 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -6642,6 +6642,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
> target_ulong address,
>  int ap, ns, xn, pxn;
>  uint32_t el = regime_el(env, mmu_idx);
>  bool ttbr1_valid = true;
> +uint64_t descaddrmask;
>
>  /* TODO:
>   * This code does not handle the different format TCR for VTCR_EL2.
> @@ -6831,6 +6832,15 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
> target_ulong address,
>  descaddr = extract64(ttbr, 0, 48);
>  descaddr &= ~((1ULL << (inputsize - (stride * (4 - level - 1);
>
> +/* The address field in the descriptor goes up to bit 39 for ARMv7
> + * but up to bit 47 for ARMv8.
> + */
> +if (arm_feature(env, ARM_FEATURE_V8)) {
> +descaddrmask = 0xf000ULL;
> +} else {
> +descaddrmask = 0xfff000ULL;
> +}

My understanding is that 48 bits are used if you are running AArch64
code, and 40 bits are used for 32-bit code even on an ARMv8 CPU, so
checking for ARM_FEATURE_V8 is perhaps not enough.

Thanks,

Laurent

> +
>  /* Secure accesses start with the page table in secure memory and
>   * can be downgraded to non-secure at any step. Non-secure accesses
>   * remain non-secure. We implement this by just ORing in the NSTable/NS
> @@ -6854,7 +6864,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
> target_ulong address,
>  /* Invalid, or the Reserved level 3 encoding */
>  goto do_fault;
>  }
> -descaddr = descriptor & 0xfff000ULL;
> +descaddr = descriptor & descaddrmask;
>
>  if ((descriptor & 2) && (level < 3)) {
>  /* Table entry. The top five bits are attributes which  may
> --
> 1.9.1
>

Re: [Qemu-devel] [PULL 00/14] Migration pull request

2015-11-20 Thread Peter Lieven

Am 19.11.2015 um 16:16 schrieb Dr. David Alan Gilbert:
> * Peter Maydell (peter.mayd...@linaro.org) wrote:
>> On 19 November 2015 at 14:44, Peter Maydell  wrote:
>>> On 19 November 2015 at 13:21, Peter Maydell  
>>> wrote:
 On 19 November 2015 at 13:12, Peter Maydell  
 wrote:
> Hi. Unfortunately this failed in 'make check' (x86-64 Linux, debug build):
>
> GTESTER check-qtest-i386
> blkdebug: Suspended request 'A'
> blkdebug: Resuming request 'A'
> **
> ERROR:/home/petmay01/linaro/qemu-for-merges/tests/ide-test.c:656:ide_wait_clear:
> code should not be reached
> GTester: last random seed: R02Sf1d98a814dee34e3985479976b17883c
> main-loop: WARNING: I/O thread spun for 1000 iterations
> main-loop: WARNING: I/O thread spun for 1000 iterations
> main-loop: WARNING: I/O thread spun for 1000 iterations
> main-loop: WARNING: I/O thread spun for 1000 iterations
> [vmxnet3][WR][vmxnet3_peer_has_vnet_hdr]: Peer has no virtio
> extension. Task offloads will be emulated.
> make: *** [check-qtest-i386] Error 1
>
> It might be an intermittent test fail from an earlier IDE pull?
 Yep, intermittent. Second run of the tests passed...
>>> and repro'd on current master, so definitely not the fault of anything
>>> in this pull.
>> while QTEST_QEMU_BINARY=i386-softmmu/qemu-system-i386
>> QTEST_QEMU_IMG=qemu-img gtester -k --verbose -m=quick tests/ide-test ;
>> do true; done
>>
>> will eventually hang on /i386/ide/cdrom/pio_large (may take a fair
>> number of loops; when it works fine the test does not take an
>> appreciable amount of time). After a long time the timeout in the
>> test kicks in and the assert happens.
> cc'ing in Peter Lieven given his recent IDE buffering changes.

I wonder if there is a glitch in the PIO implementation of test-ide.c. As far 
as I understand the specs
it is not allowed to read data while the BSY flag is set. With the following 
change to the test-ide script
the test does not race:

diff --git a/tests/ide-test.c b/tests/ide-test.c
index d1014bb..ab0489e 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -728,6 +728,7 @@ static void cdrom_pio_impl(int nblocks)
 for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
 size_t offset = i * (limit / 2);
 size_t rem = (rxsize / 2) - offset;
+ide_wait_clear(BSY);
 for (j = 0; j < MIN((limit / 2), rem); j++) {
 rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
 }

Note: in the old sync version of the ATAPI PIO implementation this could not 
happen.

Peter

Re: [Qemu-devel] [PATCH] PCI: minor performance optimization

2015-11-20 Thread Michael S. Tsirkin

On Fri, Nov 20, 2015 at 07:04:07PM +0800, Cao jin wrote:
> 
> 
> On 11/20/2015 06:45 PM, Michael S. Tsirkin wrote:
> >On Fri, Nov 20, 2015 at 06:45:01PM +0800, Cao jin wrote:
> >>1. Do param check in pci_add_capability2(), as it is a public API.
> >
> >Separate patch pls.
> 
> OK
> 
> >
> >>2. As spec says, each capability must be DWORD aligned, so an optimization 
> >>can
> >>be done via Loop Unrolling.
> >
> >Why do we want to optimize it?
> >
> 
> For tiny performance improvement via less loop. take pcie express
> capability(60 bytes at most) for example, it may loop 60 times, now we just
> need 15 times, a quarter of before.

But who cares? This is not a data path operation.

> >>
> >>Signed-off-by: Cao jin 
> >>---
> >>  hw/pci/pci.c | 12 
> >>  1 file changed, 8 insertions(+), 4 deletions(-)
> >>
> >>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>index 168b9cc..1e99603 100644
> >>--- a/hw/pci/pci.c
> >>+++ b/hw/pci/pci.c
> >>@@ -1924,13 +1924,15 @@ PCIDevice *pci_create_simple(PCIBus *bus, int 
> >>devfn, const char *name)
> >>  static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
> >>  {
> >>  int offset = PCI_CONFIG_HEADER_SIZE;
> >>-int i;
> >>-for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
> >>+int i = PCI_CONFIG_HEADER_SIZE;;
> >>+
> >>+for (; i < PCI_CONFIG_SPACE_SIZE; i = i + 4) {
> >>  if (pdev->used[i])
> >>-offset = i + 1;
> >>-else if (i - offset + 1 == size)
> >>+offset = i + 4;
> >>+else if (i - offset >= size)
> >>  return offset;
> >>  }
> >>+
> >>  return 0;
> >>  }
> >>
> >>@@ -2144,6 +2146,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t 
> >>cap_id,
> >>  uint8_t *config;
> >>  int i, overlapping_cap;
> >>
> >>+assert(size > 0);
> >>+
> >>  if (!offset) {
> >>  offset = pci_find_space(pdev, size);
> >>  if (!offset) {
> >>--
> >>2.1.0
> >.
> >
> 
> -- 
> Yours Sincerely,
> 
> Cao Jin

Re: [Qemu-devel] [PULL v3 00/15] Tracing patches

2015-11-20 Thread Alistair Francis

On Fri, Nov 13, 2015 at 10:33 PM, Edgar E. Iglesias
 wrote:
> On Fri, Nov 13, 2015 at 11:28:41AM +, Peter Maydell wrote:
>> On 13 November 2015 at 10:30, Peter Maydell  wrote:
>> > I also now get a handful of extra warnings in the 'make check' output:
>> >
>> >   /aarch64/qom/xilinx-zynq-a9: OK
>> >   /aarch64/qom/xlnx-ep108:
>> > WARNING: RAM size 800 is small for EP108OK
>> >   /aarch64/qom/vexpress-a9:OK
>> >
>> > (note the missing newline...)
>> >
>> > TEST: tests/qom-test... (pid=19738)
>> >   /microblaze/qom/none:OK
>> >   /microblaze/qom/petalogix-s3adsp1800:
>> > Invalid MicroBlaze version number: (null)
>> > OK
>> >   /microblaze/qom/petalogix-ml605:
>> > Invalid MicroBlaze version number: (null)
>> > OK
>>
>> Peter/Alistair/Edgar: this patchset of Stefan's has now been deferred
>> until 2.6, but you might want to look into the log warning issues
>> it exposed (the missing newline, attempt to print NULL, and maybe
>> whether the warnings should really be printed for a simple attempt
>> to start the board with no arguments).
>>
>
> Thanks,
>
> I've posted patches to fix the MicroBlaze warings on the list.
>
> I don't know what the inention was with the E1P08 warning.. IMO we
> can remove it...

The warning is there because the RAM size is user definable, if the
user tries to set a size that is too small QEMU will warn them.

I sent a patch that fixes up the size check logic.

Thanks,

Alistair

>
> Cheers,
> Edgar
>

Re: [Qemu-devel] [PATCH v2] vhost-user: start/stop all rings

2015-11-20 Thread Victor Kaplansky

On Mon, Nov 16, 2015 at 06:47:03PM +0200, Michael S. Tsirkin wrote:
> We are currently only sending VRING_ENABLE message for the first ring,
> that's wrong: we must start/stop them all.
> 
> Reported-by:  Victor Kaplansky 
> Signed-off-by: Michael S. Tsirkin 
> ---
> 
> Changes from v1:
> typo fix
> 
>  hw/virtio/vhost-user.c | 17 +++--
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 5bc6c45..71c3e16 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -333,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev 
> *dev,
>  
>  static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
>  {
> -struct vhost_vring_state state = {
> -.index = dev->vq_index,
> -.num   = enable,
> -};
> +int i;
>  
>  if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
>  return -1;
>  }
>  
> -return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, );
> -}
> +for (i = 0; i < dev->nvqs; ++i) {
> +struct vhost_vring_state state = {
> +.index = dev->vq_index + i,
> +.num   = enable,
> +};
> +
> +vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, );

Just noted that the patch make vhost_user_set_vring_enable() to
ignore the return value of vhost_set_vring()

> +}
>  
> +return 0;
> +}
>  
>  static int vhost_user_get_vring_base(struct vhost_dev *dev,
>   struct vhost_vring_state *ring)
> -- 
> MST

Re: [Qemu-devel] [Qemu-ppc] [PATCH 25/77] ppc: Add P7/P8 Power Management instructions

2015-11-20 Thread David Gibson

On Wed, Nov 11, 2015 at 11:27:38AM +1100, Benjamin Herrenschmidt wrote:
> This adds the ISA 2.06 and later power management instructions
> (doze, nap, sleep and rvwinkle) and associated wakeup cause testing
> in LPCR
> 
> Signed-off-by: Benjamin Herrenschmidt 

Looks fine, though I haven't checked against the ISA in detail.

> ---
>  target-ppc/cpu.h| 26 -
>  target-ppc/excp_helper.c| 59 +
>  target-ppc/helper.h |  1 +
>  target-ppc/translate.c  | 66 
>  target-ppc/translate_init.c | 92 
> -
>  5 files changed, 241 insertions(+), 3 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 3d22a4f..a7236cf 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -300,6 +300,15 @@ enum {
>  };
>  
>  
> /*/
> +/* PM instructions */
> +typedef enum {
> +PPC_PM_DOZE,
> +PPC_PM_NAP,
> +PPC_PM_SLEEP,
> +PPC_PM_RVWINKLE,
> +} powerpc_pm_insn_t;
> +
> +/*/
>  /* Input pins model  
> */
>  typedef enum powerpc_input_t powerpc_input_t;
>  enum powerpc_input_t {
> @@ -490,6 +499,14 @@ struct ppc_slb_t {
>  #define LPCR_LPES1(1ull << (63-61))
>  #define LPCR_AIL_SHIFT(63-40)  /* Alternate interrupt location */
>  #define LPCR_AIL  (3ull << LPCR_AIL_SHIFT)
> +#define LPCR_P7_PECE0 (1ull << (63-49))
> +#define LPCR_P7_PECE1 (1ull << (63-50))
> +#define LPCR_P7_PECE2 (1ull << (63-51))
> +#define LPCR_P8_PECE0 (1ull << (63-47))
> +#define LPCR_P8_PECE1 (1ull << (63-48))
> +#define LPCR_P8_PECE2 (1ull << (63-49))
> +#define LPCR_P8_PECE3 (1ull << (63-50))
> +#define LPCR_P8_PECE4 (1ull << (63-51))
>  
>  #define msr_sf   ((env->msr >> MSR_SF)   & 1)
>  #define msr_isf  ((env->msr >> MSR_ISF)  & 1)
> @@ -1126,6 +1143,11 @@ struct CPUPPCState {
>   * instructions and SPRs are diallowed if MSR:HV is 0
>   */
>  bool has_hv_mode;
> +/* On P7/P8, set when in PM state, we need to handle resume
> + * in a special way (such as routing some resume causes to
> + * 0x100), so flag this here.
> + */
> +bool in_pm_state;
>  #endif
>  
>  /* Those resources are used only during code translation */
> @@ -2069,6 +2091,8 @@ enum {
>  PPC2_FP_CVT_S64= 0x0001ULL,
>  /* Transactional Memory (ISA 2.07, Book II)  
> */
>  PPC2_TM= 0x0002ULL,
> +/* Server PM instructgions (ISA 2.06, Book III)  
> */
> +PPC2_PM_ISA206 = 0x0004ULL,
>  
>  #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | 
> \
>  PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
> @@ -2076,7 +2100,7 @@ enum {
>  PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
>  PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
>  PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
> -PPC2_FP_CVT_S64 | PPC2_TM)
> +PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206)
>  };
>  
>  
> /*/
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 80a70f4..3f77df7 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -100,6 +100,44 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
> excp_model, int excp)
>  asrr0 = -1;
>  asrr1 = -1;
>  
> +/* check for special resume at 0x100 from doze/nap/sleep/winkle on P7/P8 
> */
> +if (env->in_pm_state) {
> +env->in_pm_state = false;
> +
> +/* Pretend to be returning from doze always as we don't lose state */
> +msr |= (0x1ull << (63 - 47));
> +
> +/* Non-machine check are routed to 0x100 with a wakeup cause
> + * encoded in SRR1
> + */
> +if (excp != POWERPC_EXCP_MCHECK) {
> +switch(excp) {
> +case POWERPC_EXCP_RESET:
> +msr |= 0x4ull << (63-45);
> +break;
> +case POWERPC_EXCP_EXTERNAL:
> +msr |= 0x8ull << (63-45);
> +break;
> +case POWERPC_EXCP_DECR:
> +msr |= 0x6ull << (63-45);
> +break;
> +case POWERPC_EXCP_SDOOR:
> +msr |= 0x5ull << (63-45);
> +break;
> +case POWERPC_EXCP_SDOOR_HV:
> +msr |= 0x3ull << (63-45);
> +break;
> +case POWERPC_EXCP_HV_MAINT:
> +msr |= 0xaull << (63-45);
> +break;
> +default:
> +

[Qemu-devel] [PATCH] qemu-iotests: Add -nographic when starting QEMU in 120

2015-11-20 Thread Fam Zheng

Otherwise, a window flashes on my desktop (built with SDL). Other
iotest cases have that.

Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/120 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/120 b/tests/qemu-iotests/120
index 9f13078..d899a3f 100755
--- a/tests/qemu-iotests/120
+++ b/tests/qemu-iotests/120
@@ -49,7 +49,7 @@ echo "{'execute': 'qmp_capabilities'}
   {'execute': 'human-monitor-command',
'arguments': {'command-line': 'qemu-io drv \"write -P 42 0 64k\"'}}
   {'execute': 'quit'}" \
-| $QEMU -qmp stdio -nodefaults \
+| $QEMU -qmp stdio -nographic -nodefaults \
 -drive 
id=drv,if=none,file="$TEST_IMG",driver=raw,file.driver=$IMGFMT \
 | _filter_qmp | _filter_qemu_io
 $QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
-- 
2.4.3

[Qemu-devel] [PATCH for-2.5] call bdrv_drain_all() even if the vm is stopped

2015-11-20 Thread Wen Congyang

There are still I/O operations when the vm is stopped. For example, stop the 
vm, and do block
migration. In this case, we don't drain all I/O operation, and may meet the 
following problem:
qemu-system-x86_64: migration/block.c:731: block_save_complete: Assertion 
`block_mig_state.submitted == 0' failed.

Signed-off-by: Wen Congyang 
---
 cpus.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpus.c b/cpus.c
index 877bd70..43676fa 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1415,6 +1415,8 @@ int vm_stop_force_state(RunState state)
 return vm_stop(state);
 } else {
 runstate_set(state);
+
+bdrv_drain_all();
 /* Make sure to return an error if the flush in a previous vm_stop()
  * failed. */
 return bdrv_flush_all();
-- 
2.5.0

Re: [Qemu-devel] [PULL 08/15] i440fx: print an error message if user tries to enable iommu

2015-11-20 Thread Michael S. Tsirkin

On Thu, Nov 19, 2015 at 10:00:38PM +0100, Markus Armbruster wrote:
> "Michael S. Tsirkin"  writes:
> 
> > On Thu, Nov 19, 2015 at 03:38:03PM -0500, Bandan Das wrote:
> >> "Michael S. Tsirkin"  writes:
> >> 
> >> > From: Bandan Das 
> >> >
> >> > There's no indication of any sort that i440fx doesn't support
> >> > "iommu=on"
> >> 
> >> Oh, Markus quite didn't like this approach because this is
> >> true for all other machines too. Anyway, I will keep in
> >> mind to take care of this when I post a generic patch. 
> >
> > Do you think I should revert this one then?
> 
> The patch isn't wrong, it merely addresses only one special case of a
> generic issue.  Probably the most important case in practice.  If I
> understood Bandan correctly, he intended to drop this patch and work on
> a general solution.  As far as I'm concerned, you can keep this patch if
> dropping it is inconvenient.

Bandan, I suggest you include the revert in your patchset
when it's ready then. Maybe post 2.5.

-- 
MST

Re: [Qemu-devel] [PATCH V5 7/8] introduce xlnx-dp

2015-11-20 Thread Alistair Francis

On Fri, Oct 16, 2015 at 7:11 PM,   wrote:
> From: KONRAD Frederic 
>
> This is the implementation of the DisplayPort.
> It has an aux-bus to access dpcd and edid.
>
> Graphic plane is connected to the channel 3.
> Video plane is connected to the channel 0.
> Audio stream are connected to the channels 4 and 5.

This patch doesn't pass checkpatch (I didn't test any others).
Can you run the series through checkpatch?

Also a super small nit pick, some of your line splitting doesn't line up,
can you make sure that the function arguments and if statement conditions
line up with the previous line.

>
> Signed-off-by: KONRAD Frederic 
> Tested-By: Hyun Kwon 
> ---
>  hw/display/Makefile.objs |1 +
>  hw/display/xlnx_dp.c | 1370 
> ++
>  include/hw/display/xlnx_dp.h |  110 
>  3 files changed, 1481 insertions(+)
>  create mode 100644 hw/display/xlnx_dp.c
>  create mode 100644 include/hw/display/xlnx_dp.h
>
> diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
> index 250a43f..3625ab2 100644
> --- a/hw/display/Makefile.objs
> +++ b/hw/display/Makefile.objs
> @@ -43,3 +43,4 @@ virtio-gpu.o-libs += $(VIRGL_LIBS)
>  virtio-gpu-3d.o-cflags := $(VIRGL_CFLAGS)
>  virtio-gpu-3d.o-libs += $(VIRGL_LIBS)
>  obj-$(CONFIG_DPCD) += dpcd.o
> +obj-$(CONFIG_XLNX_ZYNQMP) += xlnx_dp.o
> diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
> new file mode 100644
> index 000..e91221c
> --- /dev/null
> +++ b/hw/display/xlnx_dp.c
> @@ -0,0 +1,1370 @@
> +/*
> + * xlnx_dp.c
> + *
> + *  Copyright (C) 2015 : GreenSocs Ltd
> + *  http://www.greensocs.com/ , email: i...@greensocs.com
> + *
> + *  Developed by :
> + *  Frederic Konrad   
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option)any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see .
> + *
> + */
> +
> +#include "hw/display/xlnx_dp.h"
> +
> +#ifndef DEBUG_DP
> +#define DEBUG_DP 0
> +#endif
> +
> +#define DPRINTF(fmt, ...) do {   
>   \
> +if (DEBUG_DP) {  
>   \
> +qemu_log("xlnx_dp: " fmt , ## __VA_ARGS__);  
>   \
> +}
>   \
> +} while (0);
> +
> +/*
> + * Register offset for DP.
> + */
> +#define DP_LINK_BW_SET  (0x >> 2)
> +#define DP_LANE_COUNT_SET   (0x0004 >> 2)
> +#define DP_ENHANCED_FRAME_EN(0x0008 >> 2)
> +#define DP_TRAINING_PATTERN_SET (0x000C >> 2)
> +#define DP_LINK_QUAL_PATTERN_SET(0x0010 >> 2)
> +#define DP_SCRAMBLING_DISABLE   (0x0014 >> 2)
> +#define DP_DOWNSPREAD_CTRL  (0x0018 >> 2)
> +#define DP_SOFTWARE_RESET   (0x001C >> 2)
> +#define DP_TRANSMITTER_ENABLE   (0x0080 >> 2)
> +#define DP_MAIN_STREAM_ENABLE   (0x0084 >> 2)
> +#define DP_FORCE_SCRAMBLER_RESET(0x00C0 >> 2)
> +#define DP_VERSION_REGISTER (0x00F8 >> 2)
> +#define DP_CORE_ID  (0x00FC >> 2)
> +
> +#define DP_AUX_COMMAND_REGISTER (0x0100 >> 2)
> +#define AUX_ADDR_ONLY_MASK  (0x1000)
> +#define AUX_COMMAND_MASK(0x0F00)
> +#define AUX_COMMAND_SHIFT   (8)
> +#define AUX_COMMAND_NBYTES  (0x000F)
> +
> +#define DP_AUX_WRITE_FIFO   (0x0104 >> 2)
> +#define DP_AUX_ADDRESS  (0x0108 >> 2)
> +#define DP_AUX_CLOCK_DIVIDER(0x010C >> 2)
> +#define DP_TX_USER_FIFO_OVERFLOW(0x0110 >> 2)
> +#define DP_INTERRUPT_SIGNAL_STATE   (0x0130 >> 2)
> +#define DP_AUX_REPLY_DATA   (0x0134 >> 2)
> +#define DP_AUX_REPLY_CODE   (0x0138 >> 2)
> +#define DP_AUX_REPLY_COUNT  (0x013C >> 2)
> +#define DP_REPLY_DATA_COUNT (0x0148 >> 2)
> +#define DP_REPLY_STATUS (0x014C >> 2)
> +#define DP_HPD_DURATION (0x0150 >> 2)
> +#define DP_MAIN_STREAM_HTOTAL   (0x0180 >> 2)
> +#define DP_MAIN_STREAM_VTOTAL   (0x0184 >> 2)
> +#define DP_MAIN_STREAM_POLARITY (0x0188 >> 2)
> +#define

[Qemu-devel] [PATCH] memory: emulate ioeventfd

2015-11-20 Thread Pavel Fedin

The ioeventfd mechanism is used by vhost, dataplane, and virtio-pci to
turn guest MMIO/PIO writes into eventfd file descriptor events.  This
allows arbitrary threads to be notified when the guest writes to a
specific MMIO/PIO address.

qtest and TCG do not support ioeventfd because memory writes are not
checked against registered ioeventfds in QEMU.  This patch implements
this in memory_region_dispatch_write() so qtest can use ioeventfd.

Also this patch fixes vhost aborting on some misconfigured old kernels
like 3.18.0 on ARM. It is possible to explicitly enable CONFIG_EVENTFD
in expert settings, while MMIO binding support in KVM will still be
missing.

Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Pavel Fedin 
---
RFC => PATCH:
- Add !kvm_eventfds_enabled() conditions to bypass eventfd injection when not 
needed
- Renamed "ioeventfd" to "eventfd", just to make words shorter
- Add a one-shot warning about missing MMIO bindings in KVM
---
 kvm-all.c |  6 --
 memory.c  | 42 ++
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index ddb007a..70f5cec 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1633,8 +1633,10 @@ static int kvm_init(MachineState *ms)
 
 kvm_state = s;
 
-s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
-s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+if (kvm_eventfds_allowed) {
+s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
+s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+}
 s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
 s->memory_listener.listener.coalesced_mmio_del = 
kvm_uncoalesce_mmio_region;
 
diff --git a/memory.c b/memory.c
index e193658..4d138fb 100644
--- a/memory.c
+++ b/memory.c
@@ -18,12 +18,14 @@
 #include "exec/ioport.h"
 #include "qapi/visitor.h"
 #include "qemu/bitops.h"
+#include "qemu/error-report.h"
 #include "qom/object.h"
 #include "trace.h"
 #include 
 
 #include "exec/memory-internal.h"
 #include "exec/ram_addr.h"
+#include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
 
 //#define DEBUG_UNASSIGNED
@@ -1141,6 +1143,32 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
 return r;
 }
 
+/* Return true if an eventfd was signalled */
+static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
+hwaddr addr,
+uint64_t data,
+unsigned size,
+MemTxAttrs attrs)
+{
+MemoryRegionIoeventfd ioeventfd = {
+.addr = addrrange_make(int128_make64(addr), int128_make64(size)),
+.data = data,
+};
+unsigned i;
+
+for (i = 0; i < mr->ioeventfd_nb; i++) {
+ioeventfd.match_data = mr->ioeventfds[i].match_data;
+ioeventfd.e = mr->ioeventfds[i].e;
+
+if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
+event_notifier_set(ioeventfd.e);
+return true;
+}
+}
+
+return false;
+}
+
 MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
  hwaddr addr,
  uint64_t data,
@@ -1154,6 +1182,11 @@ MemTxResult memory_region_dispatch_write(MemoryRegion 
*mr,
 
 adjust_endianness(mr, , size);
 
+if ((!kvm_eventfds_enabled()) &&
+memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
+return MEMTX_OK;
+}
+
 if (mr->ops->write) {
 return access_with_adjusted_size(addr, , size,
  mr->ops->impl.min_access_size,
@@ -1672,6 +1705,8 @@ void memory_region_clear_global_locking(MemoryRegion *mr)
 mr->global_locking = false;
 }
 
+static bool userspace_eventfd_warning;
+
 void memory_region_add_eventfd(MemoryRegion *mr,
hwaddr addr,
unsigned size,
@@ -1688,6 +1723,13 @@ void memory_region_add_eventfd(MemoryRegion *mr,
 };
 unsigned i;
 
+if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
+userspace_eventfd_warning))) {
+userspace_eventfd_warning = true;
+error_report("Using eventfd without MMIO binding in KVM. "
+ "Suboptimal performance expected");
+}
+
 if (size) {
 adjust_endianness(mr, , size);
 }
-- 
1.9.5.msysgit.0

[Qemu-devel] [PATCH for-2.5] block-migration: limit the memory usage

2015-11-20 Thread Wen Congyang

If we set migration speed in a very large value, block-migration will try to 
read
all data to the memory. Because
(block_mig_state.submitted + block_mig_state.read_done) * BLOCK_SIZE
will be overflow, and it will be always less than rate limit.

There is no need to read too many data into memory when the rate limit is very 
large.
So limit the memory usage can fix the overflow problem.

Signed-off-by: Wen Congyang 
---
 migration/block.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/migration/block.c b/migration/block.c
index 310e2b3..656f38f 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -36,6 +36,8 @@
 
 #define MAX_IS_ALLOCATED_SEARCH 65536
 
+#define MAX_INFLIGHT_IO 512
+
 //#define DEBUG_BLK_MIGRATION
 
 #ifdef DEBUG_BLK_MIGRATION
@@ -665,7 +667,10 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
 blk_mig_lock();
 while ((block_mig_state.submitted +
 block_mig_state.read_done) * BLOCK_SIZE <
-   qemu_file_get_rate_limit(f)) {
+   qemu_file_get_rate_limit(f) &&
+   (block_mig_state.submitted +
+block_mig_state.read_done) <
+   MAX_INFLIGHT_IO) {
 blk_mig_unlock();
 if (block_mig_state.bulk_completed == 0) {
 /* first finish the bulk phase */
-- 
2.5.0

Re: [Qemu-devel] [PATCH] PCI: minor performance optimization

2015-11-20 Thread Cao jin




On 11/20/2015 06:45 PM, Michael S. Tsirkin wrote:

On Fri, Nov 20, 2015 at 06:45:01PM +0800, Cao jin wrote:

1. Do param check in pci_add_capability2(), as it is a public API.


Separate patch pls.


OK




2. As spec says, each capability must be DWORD aligned, so an optimization can
be done via Loop Unrolling.


Why do we want to optimize it?



For tiny performance improvement via less loop. take pcie express 
capability(60 bytes at most) for example, it may loop 60 times, now we 
just need 15 times, a quarter of before.




Signed-off-by: Cao jin 
---
  hw/pci/pci.c | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..1e99603 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1924,13 +1924,15 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, 
const char *name)
  static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
  {
  int offset = PCI_CONFIG_HEADER_SIZE;
-int i;
-for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
+int i = PCI_CONFIG_HEADER_SIZE;;
+
+for (; i < PCI_CONFIG_SPACE_SIZE; i = i + 4) {
  if (pdev->used[i])
-offset = i + 1;
-else if (i - offset + 1 == size)
+offset = i + 4;
+else if (i - offset >= size)
  return offset;
  }
+
  return 0;
  }

@@ -2144,6 +2146,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id,
  uint8_t *config;
  int i, overlapping_cap;

+assert(size > 0);
+
  if (!offset) {
  offset = pci_find_space(pdev, size);
  if (!offset) {
--
2.1.0

.



--
Yours Sincerely,

Cao Jin

[Qemu-devel] [PATCH v5 05/10] cpu: Add a sync version of cpu_remove()

2015-11-20 Thread Bharata B Rao

This sync API will be used by the CPU hotplug code to wait for the CPU to
completely get removed before flagging the failure to the device_add
command.

Sync version of this call is needed to correctly recover from CPU
realization failures when ->plug() handler fails.

Signed-off-by: Bharata B Rao 
---
 cpus.c| 12 
 include/qom/cpu.h |  8 
 2 files changed, 20 insertions(+)

diff --git a/cpus.c b/cpus.c
index af2b274..c2444ba 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1070,6 +1070,8 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
 qemu_kvm_wait_io_event(cpu);
 if (cpu->exit && !cpu_can_run(cpu)) {
 qemu_kvm_destroy_vcpu(cpu);
+cpu->created = false;
+qemu_cond_signal(_cpu_cond);
 qemu_mutex_unlock(_global_mutex);
 return NULL;
 }
@@ -1174,6 +1176,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
 }
 if (remove_cpu) {
 qemu_tcg_destroy_vcpu(remove_cpu);
+cpu->created = false;
+qemu_cond_signal(_cpu_cond);
 remove_cpu = NULL;
 }
 }
@@ -1339,6 +1343,14 @@ void cpu_remove(CPUState *cpu)
 qemu_cpu_kick(cpu);
 }
 
+void cpu_remove_sync(CPUState *cpu)
+{
+cpu_remove(cpu);
+while (cpu->created) {
+qemu_cond_wait(_cpu_cond, _global_mutex);
+}
+}
+
 /* For temporary buffers for forming a name */
 #define VCPU_THREAD_NAME_SIZE 16
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 67e05b0..7fc6696 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -705,6 +705,14 @@ void cpu_resume(CPUState *cpu);
  */
 void cpu_remove(CPUState *cpu);
 
+ /**
+ * cpu_remove_sync:
+ * @cpu: The CPU to remove.
+ *
+ * Requests the CPU to be removed and waits till it is removed.
+ */
+void cpu_remove_sync(CPUState *cpu);
+
 /**
  * qemu_init_vcpu:
  * @cpu: The vCPU to initialize.
-- 
2.1.0

[Qemu-devel] [PATCH v5 06/10] xics_kvm: Add cpu_destroy method to XICS

2015-11-20 Thread Bharata B Rao

XICS is setup for each CPU during initialization. Provide a routine
to undo the same when CPU is unplugged.

This allows reboot of a VM that has undergone CPU hotplug and unplug
to work correctly.

Signed-off-by: Bharata B Rao 
Reviewed-by: David Gibson 
---
 hw/intc/xics.c| 12 
 hw/intc/xics_kvm.c| 13 +++--
 include/hw/ppc/xics.h |  2 ++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 9ff5796..e1161b2 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -44,6 +44,18 @@ static int get_cpu_index_by_dt_id(int cpu_dt_id)
 return -1;
 }
 
+void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu)
+{
+CPUState *cs = CPU(cpu);
+XICSStateClass *info = XICS_COMMON_GET_CLASS(icp);
+
+assert(cs->cpu_index < icp->nr_servers);
+
+if (info->cpu_destroy) {
+info->cpu_destroy(icp, cpu);
+}
+}
+
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
 {
 CPUState *cs = CPU(cpu);
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index d58729c..cb96f69 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -331,6 +331,8 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU 
*cpu)
 abort();
 }
 
+ss->cs = cs;
+
 /*
  * If we are reusing a parked vCPU fd corresponding to the CPU
  * which was hot-removed earlier we don't have to renable
@@ -343,8 +345,6 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU 
*cpu)
 if (icpkvm->kernel_xics_fd != -1) {
 int ret;
 
-ss->cs = cs;
-
 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0,
   icpkvm->kernel_xics_fd, 
kvm_arch_vcpu_id(cs));
 if (ret < 0) {
@@ -356,6 +356,14 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU 
*cpu)
 }
 }
 
+static void xics_kvm_cpu_destroy(XICSState *icp, PowerPCCPU *cpu)
+{
+CPUState *cs = CPU(cpu);
+ICPState *ss = >ss[cs->cpu_index];
+
+ss->cs = NULL;
+}
+
 static void xics_kvm_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error 
**errp)
 {
 icp->nr_irqs = icp->ics->nr_irqs = nr_irqs;
@@ -486,6 +494,7 @@ static void xics_kvm_class_init(ObjectClass *oc, void *data)
 
 dc->realize = xics_kvm_realize;
 xsc->cpu_setup = xics_kvm_cpu_setup;
+xsc->cpu_destroy = xics_kvm_cpu_destroy;
 xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
 xsc->set_nr_servers = xics_kvm_set_nr_servers;
 }
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 355a966..2faad48 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -68,6 +68,7 @@ struct XICSStateClass {
 DeviceClass parent_class;
 
 void (*cpu_setup)(XICSState *icp, PowerPCCPU *cpu);
+void (*cpu_destroy)(XICSState *icp, PowerPCCPU *cpu);
 void (*set_nr_irqs)(XICSState *icp, uint32_t nr_irqs, Error **errp);
 void (*set_nr_servers)(XICSState *icp, uint32_t nr_servers, Error **errp);
 };
@@ -166,5 +167,6 @@ int xics_alloc_block(XICSState *icp, int src, int num, bool 
lsi, bool align);
 void xics_free(XICSState *icp, int irq, int num);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
+void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu);
 
 #endif /* __XICS_H__ */
-- 
2.1.0

[Qemu-devel] [PATCH v5 04/10] cpu: Reclaim vCPU objects

2015-11-20 Thread Bharata B Rao

From: Gu Zheng 

In order to deal well with the kvm vcpus (which can not be removed without any
protection), we do not close KVM vcpu fd, just record and mark it as stopped
into a list, so that we can reuse it for the appending cpu hot-add request if
possible. It is also the approach that kvm guys suggested:
https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html

Signed-off-by: Chen Fan 
Signed-off-by: Gu Zheng 
Signed-off-by: Zhu Guihua 
Signed-off-by: Bharata B Rao 
   [- Explicit CPU_REMOVE() from qemu_kvm/tcg_destroy_vcpu()
  isn't needed as it is done from cpu_exec_exit()]
Reviewed-by: David Gibson 
---
 cpus.c   | 41 +
 include/qom/cpu.h| 10 +
 include/sysemu/kvm.h |  1 +
 kvm-all.c| 57 +++-
 kvm-stub.c   |  5 +
 5 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/cpus.c b/cpus.c
index 877bd70..af2b274 100644
--- a/cpus.c
+++ b/cpus.c
@@ -953,6 +953,21 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void 
*data), void *data)
 qemu_cpu_kick(cpu);
 }
 
+static void qemu_kvm_destroy_vcpu(CPUState *cpu)
+{
+if (kvm_destroy_vcpu(cpu) < 0) {
+error_report("kvm_destroy_vcpu failed.\n");
+exit(EXIT_FAILURE);
+}
+
+object_unparent(OBJECT(cpu));
+}
+
+static void qemu_tcg_destroy_vcpu(CPUState *cpu)
+{
+object_unparent(OBJECT(cpu));
+}
+
 static void flush_queued_work(CPUState *cpu)
 {
 struct qemu_work_item *wi;
@@ -1053,6 +1068,11 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
 }
 }
 qemu_kvm_wait_io_event(cpu);
+if (cpu->exit && !cpu_can_run(cpu)) {
+qemu_kvm_destroy_vcpu(cpu);
+qemu_mutex_unlock(_global_mutex);
+return NULL;
+}
 }
 
 return NULL;
@@ -1108,6 +1128,7 @@ static void tcg_exec_all(void);
 static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
 CPUState *cpu = arg;
+CPUState *remove_cpu = NULL;
 
 rcu_register_thread();
 
@@ -1145,6 +1166,16 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
 }
 }
 qemu_tcg_wait_io_event(QTAILQ_FIRST());
+CPU_FOREACH(cpu) {
+if (cpu->exit && !cpu_can_run(cpu)) {
+remove_cpu = cpu;
+break;
+}
+}
+if (remove_cpu) {
+qemu_tcg_destroy_vcpu(remove_cpu);
+remove_cpu = NULL;
+}
 }
 
 return NULL;
@@ -1301,6 +1332,13 @@ void resume_all_vcpus(void)
 }
 }
 
+void cpu_remove(CPUState *cpu)
+{
+cpu->stop = true;
+cpu->exit = true;
+qemu_cpu_kick(cpu);
+}
+
 /* For temporary buffers for forming a name */
 #define VCPU_THREAD_NAME_SIZE 16
 
@@ -1506,6 +1544,9 @@ static void tcg_exec_all(void)
 break;
 }
 } else if (cpu->stop || cpu->stopped) {
+if (cpu->exit) {
+next_cpu = CPU_NEXT(cpu);
+}
 break;
 }
 }
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 51a1323..67e05b0 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -223,6 +223,7 @@ struct kvm_run;
  * @halted: Nonzero if the CPU is in suspended state.
  * @stop: Indicates a pending stop request.
  * @stopped: Indicates the CPU has been artificially stopped.
+ * @exit: Indicates the CPU has exited due to an unplug operation.
  * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
  * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
  *   CPU and return to its top level loop.
@@ -274,6 +275,7 @@ struct CPUState {
 bool created;
 bool stop;
 bool stopped;
+bool exit;
 bool crash_occurred;
 bool exit_request;
 uint32_t interrupt_request;
@@ -696,6 +698,14 @@ void cpu_exit(CPUState *cpu);
 void cpu_resume(CPUState *cpu);
 
 /**
+ * cpu_remove:
+ * @cpu: The CPU to remove.
+ *
+ * Requests the CPU to be removed.
+ */
+void cpu_remove(CPUState *cpu);
+
+/**
  * qemu_init_vcpu:
  * @cpu: The vCPU to initialize.
  *
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index b31f325..dd1b783 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -206,6 +206,7 @@ int kvm_has_intx_set_mask(void);
 
 int kvm_init_vcpu(CPUState *cpu);
 int kvm_cpu_exec(CPUState *cpu);
+int kvm_destroy_vcpu(CPUState *cpu);
 
 #ifdef NEED_CPU_H
 
diff --git a/kvm-all.c b/kvm-all.c
index c648b81..3befc59 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -60,6 +60,12 @@
 
 #define KVM_MSI_HASHTAB_SIZE256
 
+struct KVMParkedVcpu {
+unsigned long vcpu_id;
+int kvm_fd;
+QLIST_ENTRY(KVMParkedVcpu) node;
+};
+
 struct KVMState
 {
 AccelState parent_obj;
@@ -93,6 +99,7 @@ struct KVMState

Re: [Qemu-devel] [PATCH v2] vhost-user: start/stop all rings

2015-11-20 Thread Michael S. Tsirkin

On Fri, Nov 20, 2015 at 02:56:43PM +0200, Victor Kaplansky wrote:
> On Mon, Nov 16, 2015 at 06:47:03PM +0200, Michael S. Tsirkin wrote:
> > We are currently only sending VRING_ENABLE message for the first ring,
> > that's wrong: we must start/stop them all.
> > 
> > Reported-by:  Victor Kaplansky 
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> > 
> > Changes from v1:
> > typo fix
> > 
> >  hw/virtio/vhost-user.c | 17 +++--
> >  1 file changed, 11 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index 5bc6c45..71c3e16 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -333,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev 
> > *dev,
> >  
> >  static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
> >  {
> > -struct vhost_vring_state state = {
> > -.index = dev->vq_index,
> > -.num   = enable,
> > -};
> > +int i;
> >  
> >  if (!virtio_has_feature(dev->features, 
> > VHOST_USER_F_PROTOCOL_FEATURES)) {
> >  return -1;
> >  }
> >  
> > -return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, );
> > -}
> > +for (i = 0; i < dev->nvqs; ++i) {
> > +struct vhost_vring_state state = {
> > +.index = dev->vq_index + i,
> > +.num   = enable,
> > +};
> > +
> > +vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, );
> 
> Just noted that the patch make vhost_user_set_vring_enable() to
> ignore the return value of vhost_set_vring()


Right. But all callers ignore the return value of this function ATM
so I don't think it's a big deal.

> > +}
> >  
> > +return 0;
> > +}
> >  
> >  static int vhost_user_get_vring_base(struct vhost_dev *dev,
> >   struct vhost_vring_state *ring)
> > -- 
> > MST

Re: [Qemu-devel] [Qemu-ppc] [PATCH 24/77] ppc: Move exception generation code out of line

2015-11-20 Thread David Gibson

On Wed, Nov 11, 2015 at 11:27:37AM +1100, Benjamin Herrenschmidt wrote:
> There's no point inlining this, if you hit the exception case you exit
> anyway,

That doesn't quite seem relevant - IIUC this is affecting inlining in
the code generation path, rather than the code execution path.

> and not inlining saves about 100K of code size (and cache
> footprint).

That sounds like a win, though.

> 
> Signed-off-by: Benjamin Herrenschmidt 
> ---
>  target-ppc/translate.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index a5ab2eb..ac62942 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -279,7 +279,8 @@ void gen_update_current_nip(void *opaque)
>  tcg_gen_movi_tl(cpu_nip, ctx->nip);
>  }
>  
> -static inline void gen_exception_err(DisasContext *ctx, uint32_t excp, 
> uint32_t error)
> +static void __attribute__((noinline))
> +gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)

I thought we generally avoided bare gcc attributes in qemu, but I
don't see a helper macro for it and I do see it used in a few other
places, so I guess its ok.

>  {
>  TCGv_i32 t0, t1;
>  if (ctx->exception == POWERPC_EXCP_NONE) {
> @@ -293,7 +294,8 @@ static inline void gen_exception_err(DisasContext *ctx, 
> uint32_t excp, uint32_t
>  ctx->exception = (excp);
>  }
>  
> -static inline void gen_exception(DisasContext *ctx, uint32_t excp)
> +static void __attribute__((noinline))
> +gen_exception(DisasContext *ctx, uint32_t excp)
>  {
>  TCGv_i32 t0;
>  if (ctx->exception == POWERPC_EXCP_NONE) {
> @@ -305,7 +307,8 @@ static inline void gen_exception(DisasContext *ctx, 
> uint32_t excp)
>  ctx->exception = (excp);
>  }
>  
> -static inline void gen_debug_exception(DisasContext *ctx)
> +static void __attribute__((noinline))
> +gen_debug_exception(DisasContext *ctx)
>  {
>  TCGv_i32 t0;
>  

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [RFC v1] virtio-crypto specification

2015-11-20 Thread Gonglei (Arei)

Hi Michael,

> -Original Message-
> From: Michael S. Tsirkin [mailto:m...@redhat.com]
> Sent: Friday, November 20, 2015 5:40 PM
> 
> Thanks, looks good overall.
> You might want to join the TC if you maintain a device.

Yes, IIRC Jani is a member of the TC, right? Jani ?

> Generally, I think this needs a bit more formal conformance statements.
> Some examples below.
> 
> On Fri, Nov 20, 2015 at 03:27:51AM +, Gonglei (Arei) wrote:
> > Hi guys,
> >
> > After initial discussion at this year's KVM forum, I post the RFC
> > version of virtio-crypto device specification now.
> >
> > If you have any comments, please let me know, thanks.
> >
> > Regards,
> > -Gonglei
> >
> >
> > 1   Crypto Device
> >
> > The virtio crypto device is a virtual crypto device (ie. hardware crypto
> accelerator card). Encrypt and decrypt requests are placed in the data queue,
> and handled by the real hardware crypto accelerators finally. A second queue 
> is
> the controlling queue, which is used to create/destroy session or some other
> advanced filtering features.
> >
> > 1.1 Device ID
> >
> > 65535 (experimental)
> >
> > 1.2 Virtqueues
> >
> > 0
> > controlq
> > 1
> > dataq
> >
> > 1.3 Feature bits
> >
> > VIRTIO_CRYPTO_F_REQ_SIZE_MAX (0)
> > Maximum size of any single request is in “size_max”.
> > VIRTIO_CRYPTO_F_SYM (1)
> > Device supports the symmetric cryptography API.
> > VIRTIO_CRYPTO_F_DH (2)
> > Device supports the Diffie Hellman API.
> > VIRTIO_CRYPTO_F_DSA (3)
> > Device supports the DSA API.
> > VIRTIO_CRYPTO_F_RSA (4)
> > Device supports the RSA API.
> > VIRTIO_CRYPTO_F_EC (5)
> > Device supports the Elliptic Curve API.
> > VIRTIO_CRYPTO_F_ECDH (6)
> > Device supports the Elliptic Curve Diffie Hellman API.
> > VIRTIO_CRYPTO_F_ECDSA (7)
> > Device supports the Elliptic Curve DSA API.
> > VIRTIO_CRYPTO_F _KEY (8)
> > Device supports the Key Generation API.
> > VIRTIO_CRYPTO_F_LN (9)
> > Device supports the Large Number API.
> > VIRTIO_CRYPTO_F_PRIME (10)
> > Device supports the prime number testing API.
> > VIRTIO_CRYPTO_F_DRGB (11)
> > Device supports the DRGB API.
> > VIRTIO_CRYPTO_F_NRGB (12)
> > Device supports the NRGB API.
> > VIRTIO_CRYPTO_F_RAND (13)
> > Device supports the random bit/number generation API.
> >
> > 1.4 Device configuration layout
> >
> > struct virtio_crypto_config {
> > le32 size_max; /* Maximum size of any single request */ }
> >
> > 1.5 Device Initialization
> >
> > 1. The initialization routine should identify the data and control 
> > virtqueues.
> > 2. If the VIRTIO_CRYPTO_F_SYM feature bit is negotiated, identify the device
> supports the symmetric cryptography API, which as the same as other
> features.
> >
> > 1.6 Device Operation
> >
> > The controlq is used to control session operations, such as create or
> > destroy. Meanwhile, some other features or functions can also be
> > handled by controlq.
> 
> In future versions of the specification?
> 
Yeah, because I just use the controlq to control session operations at present. 

> > The control request is preceded by a header:
> > struct virtio_crypto_ctx_outhdr {
> > /* cipher algorithm type (ie. aes-cbc ) */
> > __virtio32 alg;
> > /* length of key */
> > __virtio32 keylen;
> > /* reserved */
> > __virtio32 flags;
> > /* control type  */
> > uint8_t type;
> > /* encrypt or decrypt */
> > uint8_t op;
> > /* mode of hash operation, including authenticated/plain/nested hash
> */
> > uint8_t hash_mode;
> > /* authenticate hash/cipher ordering  */
> > uint8_t alg_chain_order;
> > /* length of authenticated key */
> > __virtio32 auth_key_len;
> > /* hash algorithm type */
> > __virtio32 hash_alg;
> 
> You can make this all le too: I don't think we need to support legacy devices 
> of
> this type.
> Spec also does not use uint8_t.
> 

Okay, will fix them.

> > };
> > The encrypt/decrypt requests and the corresponding results are transmitted
> by placing them in dataq. The request itself is preceded by a header:
> > struct virtio_crypto_req_outhdr {
> > /* algorithm type (ie. aes-128-cbc ) */
> > __virtio32 mode;
> > /* length of iv */
> > __virtio32 ivlen;
> > /* length of source data */
> > __virtio32 len;
> > /* length of auth data */
> > __virtio32 auth_len;
> > /* the backend session id */
> > __virtio64 session_id;
> > /* reserved */
> > __virtio32 flags;
> > };
> >
> > Both ctx and data requests end by a status byte. The ﬁnal status byte is
> written by the device: either VIRTIO_CRYPTO_S_OK for success,
> VIRTIO_BLK_S_IOERR for device or driver error or VIRTIO_BLK_S_UNSUPP for a
> request unsupported by device, VIRTIO_CRYPTO_S_BADMSG for verification
> failed when decrypt AEAD algorithms:
> >
> > #define VIRTIO_CRYPTO_S_OK0
> > #define VIRTIO_CRYPTO_S_ERR1
> > #define VIRTIO_CRYPTO_S_UNSUPP2
> > #define VIRTIO_CRYPTO_S_BADMSG3
> >
> > For symmetric cryptography, three types

[Qemu-devel] [PATCH] PCI: minor performance optimization

2015-11-20 Thread Cao jin

1. Do param check in pci_add_capability2(), as it is a public API.
2. As spec says, each capability must be DWORD aligned, so an optimization can
   be done via Loop Unrolling.

Signed-off-by: Cao jin 
---
 hw/pci/pci.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..1e99603 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1924,13 +1924,15 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, 
const char *name)
 static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
 {
 int offset = PCI_CONFIG_HEADER_SIZE;
-int i;
-for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
+int i = PCI_CONFIG_HEADER_SIZE;;
+
+for (; i < PCI_CONFIG_SPACE_SIZE; i = i + 4) {
 if (pdev->used[i])
-offset = i + 1;
-else if (i - offset + 1 == size)
+offset = i + 4;
+else if (i - offset >= size)
 return offset;
 }
+
 return 0;
 }
 
@@ -2144,6 +2146,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id,
 uint8_t *config;
 int i, overlapping_cap;
 
+assert(size > 0);
+
 if (!offset) {
 offset = pci_find_space(pdev, size);
 if (!offset) {
-- 
2.1.0

Re: [Qemu-devel] [PATCH] PCI: minor performance optimization

2015-11-20 Thread Michael S. Tsirkin

On Fri, Nov 20, 2015 at 06:45:01PM +0800, Cao jin wrote:
> 1. Do param check in pci_add_capability2(), as it is a public API.

Separate patch pls.

> 2. As spec says, each capability must be DWORD aligned, so an optimization can
>be done via Loop Unrolling.

Why do we want to optimize it?

> 
> Signed-off-by: Cao jin 
> ---
>  hw/pci/pci.c | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 168b9cc..1e99603 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1924,13 +1924,15 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, 
> const char *name)
>  static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
>  {
>  int offset = PCI_CONFIG_HEADER_SIZE;
> -int i;
> -for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
> +int i = PCI_CONFIG_HEADER_SIZE;;
> +
> +for (; i < PCI_CONFIG_SPACE_SIZE; i = i + 4) {
>  if (pdev->used[i])
> -offset = i + 1;
> -else if (i - offset + 1 == size)
> +offset = i + 4;
> +else if (i - offset >= size)
>  return offset;
>  }
> +
>  return 0;
>  }
>  
> @@ -2144,6 +2146,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id,
>  uint8_t *config;
>  int i, overlapping_cap;
>  
> +assert(size > 0);
> +
>  if (!offset) {
>  offset = pci_find_space(pdev, size);
>  if (!offset) {
> -- 
> 2.1.0

Re: [Qemu-devel] [PATCH for-2.5] eepro100: Prevent two endless loops

2015-11-20 Thread P J P

+-- On Fri, 20 Nov 2015, Stefan Weil wrote --+
| include/hw/pci/pci.h:static inline uint##_bits##_t
| ld##_l##_pci_dma(PCIDevice *dev,  \

   I see.
 
| Is there an ideal count? If it is too low, it might break some use cases.
| If it is too high, it will take longer until the loop is finished.

  -> https://url.corp.redhat.com/8255x-manual-pdf

  I tried to look trough the 8255x manual above, it does not have a specific 
value for the count, as it's a linked list of command blocks.

 
| I don't think EEPRO100 emulation is used in critical production 
| applications. Therefore a lower value and a debug message when this value is 
| exceeded might be helpful to find out which lowest value is acceptable. If 
| you want to avoid this risk, the value should be set to 256, 1, 65536 or 
| any other higher value. Feel free to change this when you apply the patch.

  I guess Jason would be best to decide that.


Thank you.
--
 - P J P
47AF CE69 3A90 54AA 9045 1053 DD13 3D32 FE5B 041F

Re: [Qemu-devel] [PATCH] PCI: minor performance optimization

2015-11-20 Thread Cao jin




On 11/20/2015 07:26 PM, Michael S. Tsirkin wrote:

On Fri, Nov 20, 2015 at 07:04:07PM +0800, Cao jin wrote:



On 11/20/2015 06:45 PM, Michael S. Tsirkin wrote:

On Fri, Nov 20, 2015 at 06:45:01PM +0800, Cao jin wrote:


2. As spec says, each capability must be DWORD aligned, so an optimization can
be done via Loop Unrolling.


Why do we want to optimize it?



For tiny performance improvement via less loop. take pcie express
capability(60 bytes at most) for example, it may loop 60 times, now we just
need 15 times, a quarter of before.


But who cares? This is not a data path operation.


It is tiny thing I found when browsing code. When found there are 
several places looks like this, I think maybe it does good to qemu to do 
this and CCed to you because it don`t look like a simple trivial patch.


So, hey Michael, if you don`t like this kind of optimization, that`t ok, 
forget it. But I think it make me little confused when determine which 
kind of patch should be CCed to you.






Signed-off-by: Cao jin 
---
  hw/pci/pci.c | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..1e99603 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1924,13 +1924,15 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, 
const char *name)
  static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
  {
  int offset = PCI_CONFIG_HEADER_SIZE;
-int i;
-for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
+int i = PCI_CONFIG_HEADER_SIZE;;
+
+for (; i < PCI_CONFIG_SPACE_SIZE; i = i + 4) {
  if (pdev->used[i])
-offset = i + 1;
-else if (i - offset + 1 == size)
+offset = i + 4;
+else if (i - offset >= size)
  return offset;
  }
+
  return 0;
  }

@@ -2144,6 +2146,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id,
  uint8_t *config;
  int i, overlapping_cap;

+assert(size > 0);
+
  if (!offset) {
  offset = pci_find_space(pdev, size);
  if (!offset) {
--
2.1.0

.



--
Yours Sincerely,

Cao Jin

.



--
Yours Sincerely,

Cao Jin

[Qemu-devel] [PATCH v5 00/10] sPAPR CPU hotplug

2015-11-20 Thread Bharata B Rao

This patchset adds CPU hotplug support for sPAPR PowerPC guests using
device_add and device_del commands

(qemu) device_add POWER8-powerpc64-cpu,id=cpu0

The first 5 patches are generic changes. Out of these 4/10 is required
by x86 and s390 as well and has been posted in their CPU hotplug patchsets.
I believe 2/10 and 3/10 would be useful for other archs too.

Andreas - If and when found appropriate, would you be taking patches 1 to
5 via your tree ? Should I post them as separate pre-req patchset ?

Patches 6 to 10 are Power specific.

Changes in v5
-
- Get rid of a new element (cpu->queued) the previous version introduced
  and have the same logic to determine if cpu is already dequeued for
  both implementations of cpu_exec_exit(). (2/10)
- Call cpu_remove() from cpu_remove_sync() instead of code duplication. (5/10)
- s/smp_cores/spapr_smp_cores (8/10)
- Set correct tb offset for hotplugged CPU. (8/10)
- s/spapr_hotplug_req_add_event/spapr_hotplug_req_add_by_index (8/10)
- Removed support for incomplete cores and added a separate patch
  to prevent such topologies. (8/10 and 1/10)

v4: https://lists.gnu.org/archive/html/qemu-devel/2015-08/msg00650.html

This series applies on top of ppc-for-2.6 branch of David Gibson's tree.

Bharata B Rao (9):
  vl: Don't allow CPU toplogies with partially filled cores
  exec: Remove cpu from cpus list during cpu_exec_exit()
  exec: Do vmstate unregistration from cpu_exec_exit()
  cpu: Add a sync version of cpu_remove()
  xics_kvm: Add cpu_destroy method to XICS
  spapr: Enable CPU hotplug for pseries-2.5 and add CPU DRC DT entries
  spapr: CPU hotplug support
  spapr: CPU hot unplug support
  target-ppc: Enable CPU hotplug for POWER8 CPU family

Gu Zheng (1):
  cpu: Reclaim vCPU objects

 cpus.c  |  53 ++
 exec.c  |  30 ++
 hw/intc/xics.c  |  12 +++
 hw/intc/xics_kvm.c  |  13 ++-
 hw/ppc/spapr.c  | 250 +++-
 hw/ppc/spapr_events.c   |   3 +
 hw/ppc/spapr_rtas.c |  24 +
 include/hw/ppc/spapr.h  |   1 +
 include/hw/ppc/xics.h   |   2 +
 include/qom/cpu.h   |  18 
 include/sysemu/kvm.h|   1 +
 kvm-all.c   |  57 +-
 kvm-stub.c  |   5 +
 target-ppc/translate_init.c |  10 ++
 vl.c|   9 ++
 15 files changed, 483 insertions(+), 5 deletions(-)

-- 
2.1.0

Re: [Qemu-devel] [Qemu-ppc] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8

2015-11-20 Thread David Gibson

On Wed, Nov 11, 2015 at 11:27:35AM +1100, Benjamin Herrenschmidt wrote:
> Those instructions are only available in hypervisor real mode and
> allow cache inhibited garded access to devices in that mode.
> 
> Signed-off-by: Benjamin Herrenschmidt 
> ---
>  target-ppc/cpu.h|  4 +++-
>  target-ppc/translate.c  | 56 
> +++--
>  target-ppc/translate_init.c |  6 +++--
>  3 files changed, 51 insertions(+), 15 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 23479b1..3d22a4f 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1913,6 +1913,8 @@ enum {
>  PPC_POPCNTB= 0x1000ULL,
>  /*   string load / store 
> */
>  PPC_STRING = 0x2000ULL,
> +/*   real mode cache inhibited load / store  
> */
> +PPC_CILDST = 0x4000ULL,
>  
>  /* Floating-point unit extensions
> */
>  /*   Optional floating point instructions
> */
> @@ -2027,7 +2029,7 @@ enum {
>  | PPC_MFAPIDI | PPC_TLBIVA | PPC_TLBIVAX \
>  | PPC_4xx_COMMON | PPC_40x_ICBT | PPC_RFMCI \
>  | PPC_RFDI | PPC_DCR | PPC_DCRX | PPC_DCRUX \
> -| PPC_POPCNTWD)
> +| PPC_POPCNTWD | PPC_CILDST)
>  
>  /* extended type values */
>  
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 3f657b1..4d01fd0 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -189,7 +189,7 @@ struct DisasContext {
>  uint32_t opcode;
>  uint32_t exception;
>  /* Routine used to access memory */
> -bool pr, hv;
> +bool pr, hv, dr;
>  int mem_idx;
>  int access_type;
>  /* Translation flags */
> @@ -380,9 +380,11 @@ typedef struct opcode_t {
>  #if defined(CONFIG_USER_ONLY)
>  #define CHK_HV GEN_PRIV
>  #define CHK_SV GEN_PRIV
> +#define CHK_HVDR GEN_PRIV

I'm guessing this is supposed to be CHK_HVRM as below.

>  #else
>  #define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
>  #define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
> +#define CHK_HVRM do { if (unlikely(ctx->pr || !ctx->hv || ctx->dr)) 
> GEN_PRIV; } while(0)
>  #endif
>  
>  #define CHK_NONE
> @@ -2887,7 +2889,7 @@ static void glue(gen_, name##u)(DisasContext *ctx)
>  }
>  
>  #define GEN_LDUX(name, ldop, opc2, opc3, type)   
>  \
> -static void glue(gen_, name##ux)(DisasContext *ctx)  
>  \
> +static void glue(gen_, name##ux)(DisasContext *ctx)  
>  \

Extraneous change.

>  {
>  \
>  TCGv EA; 
>  \
>  if (unlikely(rA(ctx->opcode) == 0 || 
>  \
> @@ -2903,18 +2905,23 @@ static void glue(gen_, name##ux)(DisasContext *ctx)
>  tcg_temp_free(EA);   
>  \
>  }
>  
> -#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2)   
>  \
> +#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)  
>  \
>  static void glue(gen_, name##x)(DisasContext *ctx)   
>  \
>  {
>  \
>  TCGv EA; 
>  \
> +chk; 
>  \
>  gen_set_access_type(ctx, ACCESS_INT);
>  \
>  EA = tcg_temp_new(); 
>  \
>  gen_addr_reg_index(ctx, EA); 
>  \
>  gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);  
>  \
>  tcg_temp_free(EA);   
>  \
>  }
> +
>  #define GEN_LDX(name, ldop, opc2, opc3, type)
>  \
> -GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE)
> +GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_NONE)
> +
> +#define GEN_LDX_HVRM(name, ldop, opc2, opc3, type)   
>  \
> +GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
>  
>  #define GEN_LDS(name, ldop, op, type)
>  \
>  GEN_LD(name, ldop, op | 0x20, type); 
>  \
> @@ -2940,6 +2947,12 @@ GEN_LDUX(ld, ld64, 0x15, 0x01, PPC_64B);
>  /* ldx */
>  GEN_LDX(ld, ld64, 0x15, 0x00, PPC_64B);
>  
> +/* CI load/store variants */
> +GEN_LDX_HVRM(ldcix, ld64, 0x15, 0x1b, PPC_CILDST)
>

Re: [Qemu-devel] [Qemu-ppc] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts

2015-11-20 Thread David Gibson

On Wed, Nov 11, 2015 at 11:27:34AM +1100, Benjamin Herrenschmidt wrote:
> Recent server processors use the Hypervisor Emulation Assistance
> interrupt for illegal instructions and *some* type of SPR accesses.
> 
> Also the code was always generating inval instructions even for priv
> violations due to setting the wrong flags
> 
> Finally, the checking for PR/HV was open coded everywhere.
> 
> This reworks it all, using little helper macros for checking, and
> adding the HV interrupt (which gets converted back to program check
> in the slow path of excp_helper.c on CPUs that don't want it).
> 
> Signed-off-by: Benjamin Herrenschmidt 

[snip]
>  static void spr_noaccess(DisasContext *ctx, int gprn, int sprn)
> @@ -4340,7 +4350,7 @@ static inline void gen_op_mfspr(DisasContext *ctx)
>  printf("Trying to read privileged spr %d (0x%03x) at "
> TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>  }
> -gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> +gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
>  }
>  } else {
>  /* Not defined */
> @@ -4348,7 +4358,25 @@ static inline void gen_op_mfspr(DisasContext *ctx)
>   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>  printf("Trying to read invalid spr %d (0x%03x) at "
> TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);

So, I'm not 100% following the logic below, but it looks like the
existing code used SPR_NOACCESS to mark things which generated a
privilege exception compared to NULL for things which generated an
invalid instruction exception.  Using that encoding, can you simplify
the logic here?  Alternatively can you use the logic here to avoid the
SPR_NOACESS encoding?

> -gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +
> +/* The behaviour depends on MSR:PR and SPR# bit 0x10,
> + * it can generate a priv, a hv emu or a no-op
> + */
> +if (sprn & 0x10) {
> +if (ctx->pr) {
> +gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +}
> +} else {
> +if (ctx->pr || sprn == 0 || sprn == 4 || sprn == 5 || sprn == 6) 
> {
> +gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +}
> +}
> +#if !defined(CONFIG_USER_ONLY)
> +/* HV priv */
> +if (ctx->spr_cb[sprn].hea_read) {
> +gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +}

If you're in PR mode, and it's an SPR with an hea_read function and
has the 0x10 bit set, won't this call gen_priv_exception twice?

I also see no path here which will call gen_inval_exception(), is that
right?  If you're in HV mode and it's a truly invalid SPRN, isn't that
what you'd want?

> +#endif
>  }
>  }



>  
> @@ -4395,13 +4423,9 @@ static void gen_mtcrf(DisasContext *ctx)
>  #if defined(TARGET_PPC64)
>  static void gen_mtmsrd(DisasContext *ctx)
>  {
> -#if defined(CONFIG_USER_ONLY)
> -gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -#else
> -if (unlikely(ctx->pr)) {
> -gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -return;
> -}
> +CHK_SV;
> +
> +#if !defined(CONFIG_USER_ONLY)
>  if (ctx->opcode & 0x0001) {
>  /* Special form that does not need any synchronisation */
>  TCGv t0 = tcg_temp_new();
> @@ -4420,20 +,16 @@ static void gen_mtmsrd(DisasContext *ctx)
>  /* Note that mtmsr is not always defined as context-synchronizing */
>  gen_stop_exception(ctx);
>  }
> -#endif
> +#endif /* !defined(CONFIG_USER_ONLY) */
>  }
> -#endif
> +#endif /* defined(TARGET_PPC64) */
>  
>  static void gen_mtmsr(DisasContext *ctx)
>  {
> -#if defined(CONFIG_USER_ONLY)
> -gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -#else
> -if (unlikely(ctx->pr)) {
> -gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -return;
> -}
> -if (ctx->opcode & 0x0001) {
> +CHK_SV;
> +
> +#if !defined(CONFIG_USER_ONLY)
> +   if (ctx->opcode & 0x0001) {
>  /* Special form that does not need any synchronisation */
>  TCGv t0 = tcg_temp_new();
>  tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << 
> MSR_EE));
> @@ -4488,7 +4508,7 @@ static void gen_mtspr(DisasContext *ctx)
>   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>  printf("Trying to write privileged spr %d (0x%03x) at "
> TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
> -gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> +gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
>  }
>  } else {
>  /* Not defined */
> @@ -4496,7 +4516,25 @@ static void gen_mtspr(DisasContext *ctx)
>   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>  printf("Trying to write invalid spr %d (0x%03x) at "
>

[Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-20 Thread Fam Zheng

"s->bitmap" tracks done sectors, we only check bit states without using any
iterator which HBitmap is good for. Switch to "Bitmap" which is simpler and
more memory efficient.

Meanwhile, rename it to done_bitmap, to reflect the intention.

Signed-off-by: Fam Zheng 
---
 block/backup.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 3b39119..d408f98 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -22,6 +22,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
 
 #define BACKUP_CLUSTER_BITS 16
 #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
@@ -47,7 +48,7 @@ typedef struct BackupBlockJob {
 BlockdevOnError on_target_error;
 CoRwlock flush_rwlock;
 uint64_t sectors_read;
-HBitmap *bitmap;
+unsigned long *done_bitmap;
 QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
 
@@ -113,7 +114,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 cow_request_begin(_request, job, start, end);
 
 for (; start < end; start++) {
-if (hbitmap_get(job->bitmap, start)) {
+if (test_bit(start, job->done_bitmap)) {
 trace_backup_do_cow_skip(job, start);
 continue; /* already copied */
 }
@@ -164,7 +165,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 goto out;
 }
 
-hbitmap_set(job->bitmap, start, 1);
+bitmap_set(job->done_bitmap, start, 1);
 
 /* Publish progress, guest I/O counts as progress too.  Note that the
  * offset field is an opaque progress value, it is not a disk offset.
@@ -394,7 +395,7 @@ static void coroutine_fn backup_run(void *opaque)
 start = 0;
 end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
 
-job->bitmap = hbitmap_alloc(end, 0);
+job->done_bitmap = bitmap_new(end);
 
 bdrv_set_enable_write_cache(target, true);
 if (target->blk) {
@@ -475,7 +476,7 @@ static void coroutine_fn backup_run(void *opaque)
 /* wait until pending backup_do_cow() calls have completed */
 qemu_co_rwlock_wrlock(>flush_rwlock);
 qemu_co_rwlock_unlock(>flush_rwlock);
-hbitmap_free(job->bitmap);
+g_free(job->done_bitmap);
 
 if (target->blk) {
 blk_iostatus_disable(target->blk);
-- 
2.4.3

Re: [Qemu-devel] [Qemu-ppc] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform

2015-11-20 Thread David Gibson

On Wed, Nov 11, 2015 at 11:27:39AM +1100, Benjamin Herrenschmidt wrote:
> No devices yet, not even an interrupt controller, just to get
> started.
> 
> Signed-off-by: Benjamin Herrenschmidt 
> ---
>  default-configs/ppc64-softmmu.mak |   1 +
>  hw/ppc/Makefile.objs  |   2 +
>  hw/ppc/pnv.c  | 600 
> ++
>  include/hw/ppc/pnv.h  |  36 +++
>  4 files changed, 639 insertions(+)
>  create mode 100644 hw/ppc/pnv.c
>  create mode 100644 include/hw/ppc/pnv.h

Many of my comments below may be made irrelevant by later patches in
the series.

> diff --git a/default-configs/ppc64-softmmu.mak 
> b/default-configs/ppc64-softmmu.mak
> index bb71b23..96574c8 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -40,6 +40,7 @@ CONFIG_I8259=y
>  CONFIG_XILINX=y
>  CONFIG_XILINX_ETHLITE=y
>  CONFIG_PSERIES=y
> +CONFIG_POWERNV=y
>  CONFIG_PREP=y
>  CONFIG_MAC=y
>  CONFIG_E500=y
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index c1ffc77..cd74c96 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -4,6 +4,8 @@ obj-y += ppc.o ppc_booke.o
>  obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
>  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
> +# IBM PowerNV
> +obj-$(CONFIG_POWERNV) += pnv.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> new file mode 100644
> index 000..e68c9b1
> --- /dev/null
> +++ b/hw/ppc/pnv.c
> @@ -0,0 +1,600 @@
> +/*
> + * QEMU PowerPC PowerNV model
> + *
> + * Copyright (c) 2004-2007 Fabrice Bellard
> + * Copyright (c) 2007 Jocelyn Mayer
> + * Copyright (c) 2010 David Gibson, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + *
> + */
> +#include "sysemu/sysemu.h"
> +#include "hw/hw.h"
> +#include "hw/fw-path-provider.h"
> +#include "elf.h"
> +#include "net/net.h"
> +#include "sysemu/block-backend.h"
> +#include "sysemu/cpus.h"
> +#include "sysemu/kvm.h"
> +#include "sysemu/numa.h"
> +#include "kvm_ppc.h"
> +#include "mmu-hash64.h"
> +#include "qom/cpu.h"
> +
> +#include "hw/boards.h"
> +#include "hw/ppc/ppc.h"
> +#include "hw/ppc/pnv.h"
> +#include "hw/loader.h"
> +
> +#include "exec/address-spaces.h"
> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "trace.h"
> +#include "hw/nmi.h"
> +
> +#include "hw/compat.h"
> +
> +#include 
> +
> +#define FDT_ADDR0x0100
> +#define FDT_MAX_SIZE0x0010
> +#define FW_MAX_SIZE 0x0040
> +#define FW_FILE_NAME"skiboot.lid"
> +#define KERNEL_FILE_NAME"skiroot.lid"
> +#define KERNEL_LOAD_ADDR0x2000
> +
> +#define TIMEBASE_FREQ   51200ULL
> +
> +#define MAX_CPUS255
> +
> +#define PHANDLE_XICP0x
> +
> +typedef struct sPowerNVMachineState sPowerNVMachineState;
> +
> +#define TYPE_POWERNV_MACHINE  "powernv-machine"
> +#define POWERNV_MACHINE(obj) \
> +OBJECT_CHECK(sPowerNVMachineState, (obj), TYPE_POWERNV_MACHINE)
> +
> +/**
> + * sPowerNVMachineState:
> + */
> +struct sPowerNVMachineState {
> +/*< private >*/
> +MachineState parent_obj;
> +PnvSystem sys;
> +};
> +
> +static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
> + size_t maxsize)
> +{
> +size_t maxcells = maxsize / sizeof(uint32_t);
> +int i, j, count;
> +uint32_t *p = prop;
> +
> +for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
> +struct ppc_one_seg_page_size *sps = >sps.sps[i];
> +
> +if (!sps->page_shift) {
> +break;
> +}
> +for (count = 0; count <

[Qemu-devel] [PATCH for 2.6 2/3] block: Hide HBitmap in block dirty bitmap interface

2015-11-20 Thread Fam Zheng

HBitmap is an implementation detail of block dirty bitmap that should be hidden
from users. Introduce a BdrvDirtyBitmapIter to encapsulate the underlying
HBitmapIter.

A small difference in the interface is, before, an HBitmapIter is initialized
in place, now the new BdrvDirtyBitmapIter must be dynamically allocated because
the structure definition is in block.c.

Two current users are converted too.

Signed-off-by: Fam Zheng 
---
 block.c   | 79 ++-
 block/backup.c| 14 +
 block/mirror.c| 14 +
 include/block/block.h |  9 --
 4 files changed, 82 insertions(+), 34 deletions(-)

diff --git a/block.c b/block.c
index 3a7324b..e225050 100644
--- a/block.c
+++ b/block.c
@@ -63,14 +63,22 @@
  * or enabled. A frozen bitmap can only abdicate() or reclaim().
  */
 struct BdrvDirtyBitmap {
+int gran_shift; /* Bits to right shift from sector number to
+   bit index. */
 HBitmap *bitmap;/* Dirty sector bitmap implementation */
 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
 char *name; /* Optional non-empty unique ID */
 int64_t size;   /* Size of the bitmap (Number of sectors) */
 bool disabled;  /* Bitmap is read-only */
+int active_iterators;   /* How many iterators are active */
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
+struct BdrvDirtyBitmapIter {
+HBitmapIter hbi;
+BdrvDirtyBitmap *bitmap;
+};
+
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -3157,24 +3165,26 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
 {
 int64_t bitmap_size;
 BdrvDirtyBitmap *bitmap;
-uint32_t sector_granularity;
+int gran_shift;
 
 assert((granularity & (granularity - 1)) == 0);
+/* Caller should check that */
+assert(granularity >= BDRV_SECTOR_SIZE);
 
+gran_shift = ctz32(granularity) - BDRV_SECTOR_BITS;
 if (name && bdrv_find_dirty_bitmap(bs, name)) {
 error_setg(errp, "Bitmap already exists: %s", name);
 return NULL;
 }
-sector_granularity = granularity >> BDRV_SECTOR_BITS;
-assert(sector_granularity);
-bitmap_size = bdrv_nb_sectors(bs);
+bitmap_size = DIV_ROUND_UP(bdrv_getlength(bs), granularity);
 if (bitmap_size < 0) {
 error_setg_errno(errp, -bitmap_size, "could not get length of device");
 errno = -bitmap_size;
 return NULL;
 }
 bitmap = g_new0(BdrvDirtyBitmap, 1);
-bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+bitmap->bitmap = hbitmap_alloc(bitmap_size, 0);
+bitmap->gran_shift = gran_shift;
 bitmap->size = bitmap_size;
 bitmap->name = g_strdup(name);
 bitmap->disabled = false;
@@ -3293,9 +3303,10 @@ BdrvDirtyBitmap 
*bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 {
 BdrvDirtyBitmap *bitmap;
-uint64_t size = bdrv_nb_sectors(bs);
 
 QLIST_FOREACH(bitmap, >dirty_bitmaps, list) {
+int64_t size = bdrv_nb_sectors(bs) >> bitmap->gran_shift;
+/* TODO: what if size < 0? */
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
 hbitmap_truncate(bitmap->bitmap, size);
 bitmap->size = size;
@@ -3307,6 +3318,7 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
 BdrvDirtyBitmap *bm, *next;
 QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
 if (bm == bitmap) {
+assert(!bitmap->active_iterators);
 assert(!bdrv_dirty_bitmap_frozen(bm));
 QLIST_REMOVE(bitmap, list);
 hbitmap_free(bitmap->bitmap);
@@ -3354,7 +3366,7 @@ BlockDirtyInfoList 
*bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t 
sector)
 {
 if (bitmap) {
-return hbitmap_get(bitmap->bitmap, sector);
+return hbitmap_get(bitmap->bitmap, sector >> bitmap->gran_shift);
 } else {
 return 0;
 }
@@ -3382,26 +3394,56 @@ uint32_t 
bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
 
 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
 {
-return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+return BDRV_SECTOR_SIZE << bitmap->gran_shift;
 }
 
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
+ uint64_t first_sector)
 {
-hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
+hbitmap_iter_init(>hbi, bitmap->bitmap,
+  first_sector >> bitmap->gran_shift);
+iter->bitmap = bitmap;
+

[Qemu-devel] [PATCH v6 1/2] mirror: Rewrite mirror_iteration

2015-11-20 Thread Fam Zheng

The "pnum < nb_sectors" condition in deciding whether to actually copy
data is unnecessarily strict, and the qiov initialization is
unnecessarily for bdrv_aio_write_zeroes and bdrv_aio_discard.

Rewrite mirror_iteration to fix both flaws.

Signed-off-by: Fam Zheng 

---
v5: Address Max's review comments:
- Fix parameter name of mirror_do_read().
- Simplify the buffer waiting loop in mirror_do_read.
- Don't skip next dirty chunk when collecting consective dirty
  chunks.
- Check sector range when collecting consective dirty chunks.
- Don't misuse a negative return value of
  bdrv_get_block_status_above.
---
 block/mirror.c | 307 +++--
 1 file changed, 187 insertions(+), 120 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 52c9abf..ff8149d 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -45,7 +45,6 @@ typedef struct MirrorBlockJob {
 BlockdevOnError on_source_error, on_target_error;
 bool synced;
 bool should_complete;
-int64_t sector_num;
 int64_t granularity;
 size_t buf_size;
 int64_t bdev_length;
@@ -157,113 +156,76 @@ static void mirror_read_complete(void *opaque, int ret)
 mirror_write_complete, op);
 }
 
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
+ * return the offset of the adjusted ending sector against
+ * sector_num + nb_sectors. */
+static int mirror_cow_align(MirrorBlockJob *s,
+int64_t *sector_num,
+int *nb_sectors)
+{
+bool head_need_cow, tail_need_cow;
+int diff = 0;
+int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+
+head_need_cow = !test_bit(*sector_num / sectors_per_chunk, s->cow_bitmap);
+tail_need_cow = !test_bit((*sector_num + *nb_sectors) / sectors_per_chunk,
+ s->cow_bitmap);
+if (head_need_cow || tail_need_cow) {
+int64_t rounded_sector_num;
+int rounded_nb_sectors;
+bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+   _sector_num, _nb_sectors);
+assert(*sector_num >= rounded_sector_num);
+assert(rounded_nb_sectors >= *nb_sectors);
+if (tail_need_cow) {
+int diff = rounded_sector_num + rounded_nb_sectors
+- (*sector_num + *nb_sectors);
+*nb_sectors += diff;
+}
+if (head_need_cow) {
+int diff = *sector_num - rounded_sector_num;
+*sector_num = rounded_sector_num;
+*nb_sectors += diff;
+}
+}
+return diff;
+}
+
+/* Submit async read while handling COW.
+ * Returns: nb_sectors if no alignment is necessary, or
+ *  (new_end - sector_num) if tail is rounded up or down due to
+ *  alignment or buffer limit.
+ */
+static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
+  int nb_sectors)
 {
 BlockDriverState *source = s->common.bs;
-int nb_sectors, sectors_per_chunk, nb_chunks;
-int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
-uint64_t delay_ns = 0;
+int sectors_per_chunk, nb_chunks;
+int ret = nb_sectors;
 MirrorOp *op;
-int pnum;
-int64_t ret;
 
-s->sector_num = hbitmap_iter_next(>hbi);
-if (s->sector_num < 0) {
-bdrv_dirty_iter_init(s->dirty_bitmap, >hbi);
-s->sector_num = hbitmap_iter_next(>hbi);
-trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
-assert(s->sector_num >= 0);
-}
-
-hbitmap_next_sector = s->sector_num;
-sector_num = s->sector_num;
 sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-end = s->bdev_length / BDRV_SECTOR_SIZE;
 
-/* Extend the QEMUIOVector to include all adjacent blocks that will
- * be copied in this operation.
- *
- * We have to do this if we have no backing file yet in the destination,
- * and the cluster size is very large.  Then we need to do COW ourselves.
- * The first time a cluster is copied, copy it entirely.  Note that,
- * because both the granularity and the cluster size are powers of two,
- * the number of sectors to copy cannot exceed one cluster.
- *
- * We also want to extend the QEMUIOVector to include more adjacent
- * dirty blocks if possible, to limit the number of I/O operations and
- * run efficiently even with a small granularity.
- */
-nb_chunks = 0;
-nb_sectors = 0;
-next_sector = sector_num;
-next_chunk = sector_num / sectors_per_chunk;
+if (s->cow_bitmap) {
+ret += mirror_cow_align(s, _num, _sectors);
+}
+/* We can only handle as much as buf_size at a time. */
+nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+assert(nb_sectors);
+/* The sector range

[Qemu-devel] [PATCH v6 2/2] mirror: Add mirror_wait_for_io

2015-11-20 Thread Fam Zheng

The three lines are duplicated a number of times now, refactor a
function.

Signed-off-by: Fam Zheng 
---
 block/mirror.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index ff8149d..ea5a76a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -191,6 +191,14 @@ static int mirror_cow_align(MirrorBlockJob *s,
 return diff;
 }
 
+static inline void mirror_wait_for_io(MirrorBlockJob *s)
+{
+assert(!s->waiting_for_io);
+s->waiting_for_io = true;
+qemu_coroutine_yield();
+s->waiting_for_io = false;
+}
+
 /* Submit async read while handling COW.
  * Returns: nb_sectors if no alignment is necessary, or
  *  (new_end - sector_num) if tail is rounded up or down due to
@@ -221,9 +229,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t 
sector_num,
 
 while (s->buf_free_count < nb_chunks) {
 trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
-s->waiting_for_io = true;
-qemu_coroutine_yield();
-s->waiting_for_io = false;
+mirror_wait_for_io(s);
 }
 
 /* Allocate a MirrorOp that is used as an AIO callback.  */
@@ -314,9 +320,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 break;
 }
 trace_mirror_yield_in_flight(s, next_sector, s->in_flight);
-s->waiting_for_io = true;
-qemu_coroutine_yield();
-s->waiting_for_io = false;
+mirror_wait_for_io(s);
 /* Now retry.  */
 } else {
 hbitmap_next = hbitmap_iter_next(>hbi);
@@ -406,9 +410,7 @@ static void mirror_free_init(MirrorBlockJob *s)
 static void mirror_drain(MirrorBlockJob *s)
 {
 while (s->in_flight > 0) {
-s->waiting_for_io = true;
-qemu_coroutine_yield();
-s->waiting_for_io = false;
+mirror_wait_for_io(s);
 }
 }
 
@@ -583,9 +585,7 @@ static void coroutine_fn mirror_run(void *opaque)
 if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
 (cnt == 0 && s->in_flight > 0)) {
 trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
-s->waiting_for_io = true;
-qemu_coroutine_yield();
-s->waiting_for_io = false;
+mirror_wait_for_io(s);
 continue;
 } else if (cnt != 0) {
 delay_ns = mirror_iteration(s);
-- 
2.4.3

[Qemu-devel] [PATCH v6 0/2] mirror: Improve zero write and discard

2015-11-20 Thread Fam Zheng

v6: Address Paolo's comments in mirror_iteration:
- break if we've already got a chunk to work on;
- fix a typo in comment.

Patch 1 rewrites mirror_iteration. Patch 2 is a small DRY cleaning up.


Fam Zheng (2):
  mirror: Rewrite mirror_iteration
  mirror: Add mirror_wait_for_io

 block/mirror.c | 325 ++---
 1 file changed, 196 insertions(+), 129 deletions(-)

-- 
2.4.3

Re: [Qemu-devel] [PATCH] Assume madvise for (no)hugepage works

2015-11-20 Thread Juan Quintela

"Dr. David Alan Gilbert (git)"  wrote:
> From: "Dr. David Alan Gilbert" 
>
> madvise() returns EINVAL in the case of many failures, but also
> returns it in cases where the host kernel doesn't have THP enabled.
> Postcopy only really cares that THP is off before it detects faults,
> and turns it back on afterwards; so we're going to have
> to assume that if the madvise fails then the host just doesn't do
> THP and we can carry on with the postcopy.
>
> Signed-off-by: Dr. David Alan Gilbert 
> Tested-by: Jason J. Herne 

Reviewed-by: Juan Quintela

[Qemu-devel] [PATCH v5 08/10] spapr: CPU hotplug support

2015-11-20 Thread Bharata B Rao

Support CPU hotplug via device-add command. Set up device tree
entries for the hotplugged CPU core and use the exising EPOW event
infrastructure to send CPU hotplug notification to the guest.

Create only cores explicitly from boot path as well as hotplug path
and let the ->plug() handler of the core create the threads of the core.

Also support cold plugged CPUs that are specified by -device option
on cmdline.

Signed-off-by: Bharata B Rao 
---
 hw/ppc/spapr.c  | 156 +++-
 hw/ppc/spapr_events.c   |   3 +
 hw/ppc/spapr_rtas.c |  24 +++
 target-ppc/translate_init.c |   8 +++
 4 files changed, 189 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 814b0a6..4434d45 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -596,6 +596,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, 
int offset,
 size_t page_sizes_prop_size;
 uint32_t vcpus_per_socket = smp_threads * smp_cores;
 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
+sPAPRDRConnector *drc;
+sPAPRDRConnectorClass *drck;
+int drc_index;
+
+if (smc->dr_cpu_enabled) {
+drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
+g_assert(drc);
+drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+drc_index = drck->get_index(drc);
+_FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
+}
 
 /* Note: we keep CI large pages off for now because a 64K capable guest
  * provisioned with large pages might otherwise try to map a qemu
@@ -1739,6 +1751,7 @@ static void ppc_spapr_init(MachineState *machine)
 char *filename;
 int smt = kvmppc_smt_threads();
 int smp_max_cores = DIV_ROUND_UP(max_cpus, smp_threads);
+int spapr_smp_cores = DIV_ROUND_UP(smp_cpus, smp_threads);
 
 msi_supported = true;
 
@@ -1818,7 +1831,7 @@ static void ppc_spapr_init(MachineState *machine)
 if (machine->cpu_model == NULL) {
 machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
 }
-for (i = 0; i < smp_cpus; i++) {
+for (i = 0; i < spapr_smp_cores; i++) {
 cpu = cpu_ppc_init(machine->cpu_model);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
@@ -2207,10 +2220,135 @@ out:
 error_propagate(errp, local_err);
 }
 
+static void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
+   int *fdt_offset,
+   sPAPRMachineState *spapr)
+{
+PowerPCCPU *cpu = POWERPC_CPU(cs);
+DeviceClass *dc = DEVICE_GET_CLASS(cs);
+int id = ppc_get_vcpu_dt_id(cpu);
+void *fdt;
+int offset, fdt_size;
+char *nodename;
+
+fdt = create_device_tree(_size);
+nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
+offset = fdt_add_subnode(fdt, 0, nodename);
+
+spapr_populate_cpu_dt(cs, fdt, offset, spapr);
+g_free(nodename);
+
+*fdt_offset = offset;
+return fdt;
+}
+
+static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+Error **errp)
+{
+sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
+sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
+CPUState *cs = CPU(dev);
+PowerPCCPU *cpu = POWERPC_CPU(cs);
+int id = ppc_get_vcpu_dt_id(cpu);
+sPAPRDRConnector *drc =
+spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
+sPAPRDRConnectorClass *drck;
+int smt = kvmppc_smt_threads();
+Error *local_err = NULL;
+void *fdt = NULL;
+int i, fdt_offset = 0;
+
+/* Set NUMA node for the added CPUs  */
+for (i = 0; i < nb_numa_nodes; i++) {
+if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
+cs->numa_node = i;
+break;
+}
+}
+
+/*
+ * Currently CPU core and threads of a core aren't really different
+ * from QEMU point of view since all of them are just CPU devices. Hence
+ * there is no separate realize routines for cores and threads.
+ * We use the id check below to do things differently for cores and 
threads.
+ *
+ * SMT threads return from here, only main thread (core) will
+ * continue, create threads and signal hotplug event to the guest.
+ */
+if ((id % smt) != 0) {
+return;
+}
+
+/* Create SMT threads of the core. */
+for (i = 1; i < smp_threads; i++) {
+cpu = cpu_ppc_init(current_machine->cpu_model);
+if (!cpu) {
+error_report("Unable to find PowerPC CPU definition: %s",
+  current_machine->cpu_model);
+exit(EXIT_FAILURE);
+}
+}
+
+if (!smc->dr_cpu_enabled) {
+/*
+ * This is a cold plugged CPU but the machine doesn't support
+

[Qemu-devel] [PATCH v5 02/10] exec: Remove cpu from cpus list during cpu_exec_exit()

2015-11-20 Thread Bharata B Rao

CPUState *cpu gets added to the cpus list during cpu_exec_init(). It
should be removed from cpu_exec_exit().

cpu_exec_init() is called from generic CPU::instance_finalize and some
archs like PowerPC call it from CPU unrealizefn. So ensure that we
dequeue the cpu only once.

Now -1 value for cpu->cpu_index indicates that we have already dequeued
the cpu for CONFIG_USER_ONLY case also.

Signed-off-by: Bharata B Rao 
---
 exec.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/exec.c b/exec.c
index b09f18b..fbac85b 100644
--- a/exec.c
+++ b/exec.c
@@ -593,6 +593,7 @@ void cpu_exec_exit(CPUState *cpu)
 return;
 }
 
+QTAILQ_REMOVE(, cpu, node);
 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 cpu->cpu_index = -1;
 }
@@ -611,6 +612,15 @@ static int cpu_get_free_index(Error **errp)
 
 void cpu_exec_exit(CPUState *cpu)
 {
+cpu_list_lock();
+if (cpu->cpu_index == -1) {
+cpu_list_unlock();
+return;
+}
+
+QTAILQ_REMOVE(, cpu, node);
+cpu->cpu_index = -1;
+cpu_list_unlock();
 }
 #endif
 
-- 
2.1.0

[Qemu-devel] [PATCH v5 07/10] spapr: Enable CPU hotplug for pseries-2.5 and add CPU DRC DT entries

2015-11-20 Thread Bharata B Rao

Start supporting CPU hotplug from pseries-2.5 onwards. Add CPU
DRC (Dynamic Resource Connector) device tree entries.

Signed-off-by: Bharata B Rao 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr.c | 23 +++
 include/hw/ppc/spapr.h |  1 +
 2 files changed, 24 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 030ee35..814b0a6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -973,6 +973,16 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
 _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
 }
 
+if (smc->dr_cpu_enabled) {
+int offset = fdt_path_offset(fdt, "/cpus");
+ret = spapr_drc_populate_dt(fdt, offset, NULL,
+SPAPR_DR_CONNECTOR_TYPE_CPU);
+if (ret < 0) {
+fprintf(stderr, "Couldn't set up CPU DR device tree properties\n");
+exit(1);
+}
+}
+
 _FDT((fdt_pack(fdt)));
 
 if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
@@ -1727,6 +1737,8 @@ static void ppc_spapr_init(MachineState *machine)
 long load_limit, fw_size;
 bool kernel_le = false;
 char *filename;
+int smt = kvmppc_smt_threads();
+int smp_max_cores = DIV_ROUND_UP(max_cpus, smp_threads);
 
 msi_supported = true;
 
@@ -1793,6 +1805,15 @@ static void ppc_spapr_init(MachineState *machine)
 spapr_validate_node_memory(machine);
 }
 
+if (smc->dr_cpu_enabled) {
+for (i = 0; i < smp_max_cores; i++) {
+sPAPRDRConnector *drc =
+spapr_dr_connector_new(OBJECT(spapr),
+   SPAPR_DR_CONNECTOR_TYPE_CPU, i * smt);
+qemu_register_reset(spapr_drc_reset, drc);
+}
+}
+
 /* init CPUs */
 if (machine->cpu_model == NULL) {
 machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
@@ -2277,6 +2298,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
 
 smc->dr_lmb_enabled = false;
+smc->dr_cpu_enabled = false;
 fwc->get_dev_path = spapr_get_fw_dev_path;
 nc->nmi_monitor_handler = spapr_nmi;
 }
@@ -2443,6 +2465,7 @@ static void spapr_machine_2_5_class_init(ObjectClass *oc, 
void *data)
 mc->alias = "pseries";
 mc->is_default = 1;
 smc->dr_lmb_enabled = true;
+smc->dr_cpu_enabled = true;
 }
 
 static const TypeInfo spapr_machine_2_5_info = {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5baa906..716d7ad 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -36,6 +36,7 @@ struct sPAPRMachineClass {
 
 /*< public >*/
 bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */
+bool dr_cpu_enabled; /* enable dynamic-reconfig/hotplug of CPUs */
 };
 
 /**
-- 
2.1.0

[Qemu-devel] [PATCH v5 03/10] exec: Do vmstate unregistration from cpu_exec_exit()

2015-11-20 Thread Bharata B Rao

cpu_exec_init() does vmstate_register and register_savevm for the CPU device.
These need to be undone from cpu_exec_exit(). These changes are needed to
support CPU hot removal and also to correctly fail hotplug attempts
beyond max_cpus.

Signed-off-by: Bharata B Rao 
---
 exec.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/exec.c b/exec.c
index fbac85b..47f03c0 100644
--- a/exec.c
+++ b/exec.c
@@ -588,6 +588,8 @@ static int cpu_get_free_index(Error **errp)
 
 void cpu_exec_exit(CPUState *cpu)
 {
+CPUClass *cc = CPU_GET_CLASS(cpu);
+
 if (cpu->cpu_index == -1) {
 /* cpu_index was never allocated by this @cpu or was already freed. */
 return;
@@ -596,6 +598,15 @@ void cpu_exec_exit(CPUState *cpu)
 QTAILQ_REMOVE(, cpu, node);
 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 cpu->cpu_index = -1;
+if (cc->vmsd != NULL) {
+vmstate_unregister(NULL, cc->vmsd, cpu);
+}
+#if defined(CPU_SAVE_VERSION)
+unregister_savevm(NULL, "cpu", cpu->env_ptr);
+#endif
+if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
+vmstate_unregister(NULL, _cpu_common, cpu);
+}
 }
 #else
 
@@ -612,6 +623,8 @@ static int cpu_get_free_index(Error **errp)
 
 void cpu_exec_exit(CPUState *cpu)
 {
+CPUClass *cc = CPU_GET_CLASS(cpu);
+
 cpu_list_lock();
 if (cpu->cpu_index == -1) {
 cpu_list_unlock();
@@ -621,6 +634,13 @@ void cpu_exec_exit(CPUState *cpu)
 QTAILQ_REMOVE(, cpu, node);
 cpu->cpu_index = -1;
 cpu_list_unlock();
+
+if (cc->vmsd != NULL) {
+vmstate_unregister(NULL, cc->vmsd, cpu);
+}
+if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
+vmstate_unregister(NULL, _cpu_common, cpu);
+}
 }
 #endif
 
-- 
2.1.0

Re: [Qemu-devel] [PATCH for-2.5] iothread: include id in thread name

2015-11-20 Thread Dr. David Alan Gilbert

* Paolo Bonzini (pbonz...@redhat.com) wrote:
> This makes it easier to find the desired thread
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  iothread.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/iothread.c b/iothread.c
> index da6ce7b..8a1d6f8 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -72,6 +72,7 @@ static void iothread_complete(UserCreatable *obj, Error 
> **errp)
>  {
>  Error *local_error = NULL;
>  IOThread *iothread = IOTHREAD(obj);
> +char *name, *thread_name;
>  
>  iothread->stopping = false;
>  iothread->thread_id = -1;
> @@ -87,8 +88,12 @@ static void iothread_complete(UserCreatable *obj, Error 
> **errp)
>  /* This assumes we are called from a thread with useful CPU affinity for 
> us
>   * to inherit.
>   */
> -qemu_thread_create(>thread, "iothread", iothread_run,
> +name = object_get_canonical_path_component(OBJECT(obj));
> +thread_name = g_strdup_printf("iothread %s", name);

Yes, that's a good idea; Can you shorten that to just "IO %s"  please,
for three reasons:

 1) There is a ~14 character limit on the size of that string
 2) We use CPU ...   for the CPU threads.
 3) It's a threadname, it doesn't need to say thread

Dave

> +qemu_thread_create(>thread, thread_name, iothread_run,
> iothread, QEMU_THREAD_JOINABLE);
> +g_free(thread_name);
> +g_free(name);
>  
>  /* Wait for initialization to complete */
>  qemu_mutex_lock(>init_done_lock);
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [RFC v1] virtio-crypto specification

2015-11-20 Thread Michael S. Tsirkin

Thanks, looks good overall.
You might want to join the TC if you maintain a device.
Generally, I think this needs a bit more formal conformance
statements.
Some examples below.

On Fri, Nov 20, 2015 at 03:27:51AM +, Gonglei (Arei) wrote:
> Hi guys,
> 
> After initial discussion at this year's KVM forum, I post the RFC version of 
> virtio-crypto
> device specification now. 
> 
> If you have any comments, please let me know, thanks.
> 
> Regards,
> -Gonglei
> 
> 
> 1 Crypto Device
> 
> The virtio crypto device is a virtual crypto device (ie. hardware crypto 
> accelerator card). Encrypt and decrypt requests are placed in the data queue, 
> and handled by the real hardware crypto accelerators finally. A second queue 
> is the controlling queue, which is used to create/destroy session or some 
> other advanced filtering features.
> 
> 1.1   Device ID
> 
>   65535 (experimental)
> 
> 1.2   Virtqueues
> 
> 0 
>   controlq
> 1
>   dataq
> 
> 1.3   Feature bits
> 
> VIRTIO_CRYPTO_F_REQ_SIZE_MAX (0)  
> Maximum size of any single request is in “size_max”. 
> VIRTIO_CRYPTO_F_SYM (1)
> Device supports the symmetric cryptography API.
> VIRTIO_CRYPTO_F_DH (2)
> Device supports the Diffie Hellman API.
> VIRTIO_CRYPTO_F_DSA (3)
> Device supports the DSA API.
> VIRTIO_CRYPTO_F_RSA (4)
> Device supports the RSA API.
> VIRTIO_CRYPTO_F_EC (5)
> Device supports the Elliptic Curve API.
> VIRTIO_CRYPTO_F_ECDH (6)
> Device supports the Elliptic Curve Diffie Hellman API.
> VIRTIO_CRYPTO_F_ECDSA (7)
> Device supports the Elliptic Curve DSA API.
> VIRTIO_CRYPTO_F _KEY (8)
> Device supports the Key Generation API.
> VIRTIO_CRYPTO_F_LN (9)
> Device supports the Large Number API.
> VIRTIO_CRYPTO_F_PRIME (10)
> Device supports the prime number testing API.
> VIRTIO_CRYPTO_F_DRGB (11)
> Device supports the DRGB API.
> VIRTIO_CRYPTO_F_NRGB (12)
> Device supports the NRGB API.
> VIRTIO_CRYPTO_F_RAND (13)
> Device supports the random bit/number generation API.
> 
> 1.4   Device configuration layout
> 
> struct virtio_crypto_config {
>   le32 size_max; /* Maximum size of any single request */
> }
> 
> 1.5   Device Initialization
> 
> 1. The initialization routine should identify the data and control virtqueues.
> 2. If the VIRTIO_CRYPTO_F_SYM feature bit is negotiated, identify the device 
> supports the symmetric cryptography API, which as the same as other features.
> 
> 1.6   Device Operation
> 
> The controlq is used to control session operations, such as create or
> destroy. Meanwhile, some other features or functions can also be
> handled by controlq.

In future versions of the specification?

> The control request is preceded by a header:
> struct virtio_crypto_ctx_outhdr {
> /* cipher algorithm type (ie. aes-cbc ) */
> __virtio32 alg;
> /* length of key */
> __virtio32 keylen;
> /* reserved */
> __virtio32 flags;
> /* control type  */
> uint8_t type;
> /* encrypt or decrypt */
> uint8_t op;
> /* mode of hash operation, including authenticated/plain/nested hash */
> uint8_t hash_mode;
> /* authenticate hash/cipher ordering  */
> uint8_t alg_chain_order;
> /* length of authenticated key */
> __virtio32 auth_key_len;
> /* hash algorithm type */
> __virtio32 hash_alg;

You can make this all le too: I don't think we need to
support legacy devices of this type.
Spec also does not use uint8_t.

> };
> The encrypt/decrypt requests and the corresponding results are transmitted by 
> placing them in dataq. The request itself is preceded by a header:
> struct virtio_crypto_req_outhdr {
> /* algorithm type (ie. aes-128-cbc ) */
> __virtio32 mode;
> /* length of iv */
> __virtio32 ivlen;
> /* length of source data */
> __virtio32 len;
> /* length of auth data */
> __virtio32 auth_len;
> /* the backend session id */
> __virtio64 session_id;
> /* reserved */
> __virtio32 flags;
> };
> 
> Both ctx and data requests end by a status byte. The ﬁnal status byte is 
> written by the device: either VIRTIO_CRYPTO_S_OK for success, 
> VIRTIO_BLK_S_IOERR for device or driver error or VIRTIO_BLK_S_UNSUPP for a 
> request unsupported by device, VIRTIO_CRYPTO_S_BADMSG for verification failed 
> when decrypt AEAD algorithms:
> 
> #define VIRTIO_CRYPTO_S_OK0
> #define VIRTIO_CRYPTO_S_ERR1
> #define VIRTIO_CRYPTO_S_UNSUPP2
> #define VIRTIO_CRYPTO_S_BADMSG3
> 
> For symmetric cryptography, three types algorithms are supported:

What does this "are supported" mean?
Device SHOULD support 3 types of algorithms?
Or CAN? MUST?

> enum {
> VIRTIO_CRYPTO_ABLKCIPHER,
> VIRTIO_CRYPTO_AEAD,
> VIRTIO_CRYPTO_HASH,
> };

Specify values here too pls.

> VIRTIO_CRYPTO_ABLKCIPHER: Asynchronous Block Cipher.
> VIRTIO_CRYPTO_AEAD: Authenticated Encryption With Associated Data (AEAD) 
> Cipher.
> VIRTIO_CRYPTO_HASH: Hash and MAC (Message Authentication Code) cipher.
> 
> 1.6.1 Encryption Operation
> 
> Bothe

[Qemu-devel] [PATCH for 2.6 3/3] hbitmap: Drop "granularity"

2015-11-20 Thread Fam Zheng

Sometimes confused with the granularity with coarse levels in HBitmap, the
granularity in the hbitmap_alloc is not an essential concept of a bitmap.  Now
that all callers except the test code use zero, it's possible to drop the
parameter to make the interface cleaner and more intuitive.

Test code of hbitmap granularity is removed together.

Signed-off-by: Fam Zheng 
---
 block.c|   4 +-
 include/qemu/hbitmap.h |  20 +
 tests/test-hbitmap.c   | 206 -
 util/hbitmap.c |  64 +++
 4 files changed, 49 insertions(+), 245 deletions(-)

diff --git a/block.c b/block.c
index e225050..86b32a0 100644
--- a/block.c
+++ b/block.c
@@ -3183,7 +3183,7 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
 return NULL;
 }
 bitmap = g_new0(BdrvDirtyBitmap, 1);
-bitmap->bitmap = hbitmap_alloc(bitmap_size, 0);
+bitmap->bitmap = hbitmap_alloc(bitmap_size);
 bitmap->gran_shift = gran_shift;
 bitmap->size = bitmap_size;
 bitmap->name = g_strdup(name);
@@ -3453,7 +3453,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, 
HBitmap **out)
 hbitmap_reset_all(bitmap->bitmap);
 } else {
 HBitmap *backup = bitmap->bitmap;
-bitmap->bitmap = hbitmap_alloc(bitmap->size, 0);
+bitmap->bitmap = hbitmap_alloc(bitmap->size);
 *out = backup;
 }
 }
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index bb94a00..0f81483 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -39,9 +39,6 @@ typedef struct HBitmapIter HBitmapIter;
 struct HBitmapIter {
 const HBitmap *hb;
 
-/* Copied from hb for access in the inline functions (hb is opaque).  */
-int granularity;
-
 /* Entry offset into the last-level array of longs.  */
 size_t pos;
 
@@ -54,15 +51,10 @@ struct HBitmapIter {
 /**
  * hbitmap_alloc:
  * @size: Number of bits in the bitmap.
- * @granularity: Granularity of the bitmap.  Aligned groups of 2^@granularity
- * bits will be represented by a single bit.  Each operation on a
- * range of bits first rounds the bits to determine which group they land
- * in, and then affect the entire set; iteration will only visit the first
- * bit of each group.
  *
  * Allocate a new HBitmap.
  */
-HBitmap *hbitmap_alloc(uint64_t size, int granularity);
+HBitmap *hbitmap_alloc(uint64_t size);
 
 /**
  * hbitmap_truncate:
@@ -96,14 +88,6 @@ bool hbitmap_merge(HBitmap *a, const HBitmap *b);
 bool hbitmap_empty(const HBitmap *hb);
 
 /**
- * hbitmap_granularity:
- * @hb: HBitmap to operate on.
- *
- * Return the granularity of the HBitmap.
- */
-int hbitmap_granularity(const HBitmap *hb);
-
-/**
  * hbitmap_count:
  * @hb: HBitmap to operate on.
  *
@@ -204,7 +188,7 @@ static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
 hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
 item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
 
-return item << hbi->granularity;
+return item;
 }
 
 /**
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index abcea0c..dc8485a 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -26,7 +26,6 @@ typedef struct TestHBitmapData {
 unsigned long *bits;
 size_t size;
 size_t old_size;
-intgranularity;
 } TestHBitmapData;
 
 
@@ -70,17 +69,16 @@ static void hbitmap_test_check(TestHBitmapData *data,
 }
 
 if (first == 0) {
-g_assert_cmpint(count << data->granularity, ==, 
hbitmap_count(data->hb));
+g_assert_cmpint(count, ==, hbitmap_count(data->hb));
 }
 }
 
 /* This is provided instead of a test setup function so that the sizes
are kept in the test functions (and not in main()) */
-static void hbitmap_test_init(TestHBitmapData *data,
-  uint64_t size, int granularity)
+static void hbitmap_test_init(TestHBitmapData *data, uint64_t size)
 {
 size_t n;
-data->hb = hbitmap_alloc(size, granularity);
+data->hb = hbitmap_alloc(size);
 
 n = (size + BITS_PER_LONG - 1) / BITS_PER_LONG;
 if (n == 0) {
@@ -88,7 +86,6 @@ static void hbitmap_test_init(TestHBitmapData *data,
 }
 data->bits = g_new0(unsigned long, n);
 data->size = size;
-data->granularity = granularity;
 if (size) {
 hbitmap_test_check(data, 0);
 }
@@ -158,9 +155,7 @@ static void hbitmap_test_set(TestHBitmapData *data,
 data->bits[pos] |= 1UL << bit;
 }
 
-if (data->granularity == 0) {
-hbitmap_test_check(data, 0);
-}
+hbitmap_test_check(data, 0);
 }
 
 /* Reset a range in the HBitmap and in the shadow "simple" bitmap.
@@ -177,9 +172,7 @@ static void hbitmap_test_reset(TestHBitmapData *data,
 data->bits[pos] &= ~(1UL << bit);
 }
 
-if (data->granularity == 0) {
-hbitmap_test_check(data, 0);
-}
+hbitmap_test_check(data, 0);
 }
 
 static void hbitmap_test_reset_all(TestHBitmapData

[Qemu-devel] [PATCH for 2.6 0/3] Bitmap clean-up patches for 2.6

2015-11-20 Thread Fam Zheng

This makes a cleaner base for more dirty bitmap work. "granularity" appearing
with different representations have always been mind twisting, remove it from
HBitmap to make the interface and implementation simpler. Upon this, it is
a bit easier to add persistent dirty bitmap functionalities.

Block dirty bitmap is not unit-tested, so the removal of HBitmap test code
looks like a loss, but the overall test coverage is barely affected as we also
have various mirror, commit and backup iotest cases, and they do catch various
bugs when I wrote the patches.

Please review!

Fam

Fam Zheng (3):
  backup: Use Bitmap to replace "s->bitmap"
  block: Hide HBitmap in block dirty bitmap interface
  hbitmap: Drop "granularity"

 block.c|  79 ++-
 block/backup.c |  25 +++---
 block/mirror.c |  14 ++--
 include/block/block.h  |   9 ++-
 include/qemu/hbitmap.h |  20 +
 tests/test-hbitmap.c   | 206 -
 util/hbitmap.c |  64 +++
 7 files changed, 135 insertions(+), 282 deletions(-)

-- 
2.4.3

[Qemu-devel] [PATCH v5 10/10] target-ppc: Enable CPU hotplug for POWER8 CPU family

2015-11-20 Thread Bharata B Rao

Support CPU hotplug on POWER8 by enabling device_add semantics.

Signed-off-by: Bharata B Rao 
---
 target-ppc/translate_init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 245d73a..45d1b7c 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8207,6 +8207,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 dc->fw_name = "PowerPC,POWER8";
 dc->desc = "POWER8";
 dc->props = powerpc_servercpu_properties;
+dc->cannot_instantiate_with_device_add_yet = false;
+
 pcc->pvr_match = ppc_pvr_match_power8;
 pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
 pcc->init_proc = init_proc_POWER8;
-- 
2.1.0

[Qemu-devel] [PATCH v5 09/10] spapr: CPU hot unplug support

2015-11-20 Thread Bharata B Rao

Support hot removal of CPU for sPAPR guests by sending the hot unplug
notification to the guest via EPOW interrupt. Release the vCPU object
after CPU hot unplug so that vCPU fd can be parked and reused.

Signed-off-by: Bharata B Rao 
---
 hw/ppc/spapr.c | 71 ++
 1 file changed, 71 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4434d45..6dca553 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2401,11 +2401,82 @@ static void spapr_machine_device_plug(HotplugHandler 
*hotplug_dev,
 }
 }
 
+static void spapr_cpu_destroy(PowerPCCPU *cpu)
+{
+sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+xics_cpu_destroy(spapr->icp, cpu);
+qemu_unregister_reset(spapr_cpu_reset, cpu);
+}
+
+static void spapr_cpu_release(DeviceState *dev, void *opaque)
+{
+CPUState *cs;
+int i;
+int id = ppc_get_vcpu_dt_id(POWERPC_CPU(CPU(dev)));
+
+for (i = id; i < id + smp_threads; i++) {
+CPU_FOREACH(cs) {
+PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+if (i == ppc_get_vcpu_dt_id(cpu)) {
+spapr_cpu_destroy(cpu);
+cpu_remove(cs);
+}
+}
+}
+}
+
+static int spapr_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+CPUState *cs = CPU(dev);
+PowerPCCPU *cpu = POWERPC_CPU(cs);
+int id = ppc_get_vcpu_dt_id(cpu);
+int smt = kvmppc_smt_threads();
+sPAPRDRConnector *drc =
+spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
+sPAPRDRConnectorClass *drck;
+Error *local_err = NULL;
+
+/*
+ * SMT threads return from here, only main thread (core) will
+ * continue and signal hot unplug event to the guest.
+ */
+if ((id % smt) != 0) {
+return 0;
+}
+g_assert(drc);
+
+drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+drck->detach(drc, dev, spapr_cpu_release, NULL, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+return -1;
+}
+
+/*
+ * In addition to hotplugged CPUs, send the hot-unplug notification
+ * interrupt to the guest for coldplugged CPUs started via -device
+ * option too.
+ */
+spapr_hotplug_req_remove_by_index(drc);
+return 0;
+}
+
 static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
   DeviceState *dev, Error **errp)
 {
+sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
+
 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
 error_setg(errp, "Memory hot unplug not supported by sPAPR");
+} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+if (!smc->dr_cpu_enabled) {
+error_setg(errp, "CPU hot unplug not supported on this machine");
+return;
+}
+spapr_cpu_unplug(hotplug_dev, dev, errp);
 }
 }
 
-- 
2.1.0

[Qemu-devel] [PATCH v5 01/10] vl: Don't allow CPU toplogies with partially filled cores

2015-11-20 Thread Bharata B Rao

Prevent guests from booting with CPU topologies that have partially
filled CPU cores or can result in partially filled CPU cores after
CPU hotplug like

-smp 15,sockets=1,cores=4,threads=4,maxcpus=16 or
-smp 15,sockets=1,cores=4,threads=4,maxcpus=17.

Signed-off-by: Bharata B Rao 
---
 vl.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/vl.c b/vl.c
index 7d993a5..23a1a1e 100644
--- a/vl.c
+++ b/vl.c
@@ -1248,6 +1248,15 @@ static void smp_parse(QemuOpts *opts)
 exit(1);
 }
 
+if (cpus % threads || max_cpus % threads) {
+error_report("cpu topology: "
+ "sockets (%u) cores (%u) threads (%u) with "
+ "smp_cpus (%u) maxcpus (%u) "
+ "will result in partially filled cores",
+ sockets, cores, threads, cpus, max_cpus);
+exit(1);
+}
+
 smp_cpus = cpus;
 smp_cores = cores > 0 ? cores : 1;
 smp_threads = threads > 0 ? threads : 1;
-- 
2.1.0

Re: [Qemu-devel] [Qemu-ppc] [PATCH 14/77] ppc: Change 'invalid' bit mask of tlbiel and tlbie

2015-11-20 Thread David Gibson

On Wed, Nov 11, 2015 at 11:27:27AM +1100, Benjamin Herrenschmidt wrote:
> Otherwise it will trip on the forms used in recent architecture.
> 
> Ideally, we should have different handlers for different architecture
> levels but our current implementation of TLB flushing is dumb enough
> that this will do for now.
> 
> Signed-off-by: Benjamin Herrenschmidt 

Reviewed-by: David Gibson 

> ---
>  target-ppc/translate.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 014fe5e..bd5df40 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -9952,8 +9952,10 @@ GEN_HANDLER2(slbmfee, "slbmfee", 0x1F, 0x13, 0x1C, 
> 0x001F0001, PPC_SEGMENT_64B),
>  GEN_HANDLER2(slbmfev, "slbmfev", 0x1F, 0x13, 0x1A, 0x001F0001, 
> PPC_SEGMENT_64B),
>  #endif
>  GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA),
> -GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x03FF0001, PPC_MEM_TLBIE),
> -GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x03FF0001, PPC_MEM_TLBIE),
> +/* XXX Those instructions will need to be handled differently for
> + * different ISA versions */
> +GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE),
> +GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
>  GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
>  #if defined(TARGET_PPC64)
>  GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x03FFFC01, PPC_SLBI),

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PULL for-2.5 v2 00/10] QOM devices patch queue 2015-11-19

2015-11-20 Thread Peter Maydell

On 19 November 2015 at 14:35, Andreas Färber  wrote:
> Hello Peter,
>
> This is my late QOM (devices) patch queue. Please pull.
>
> v2: GLib version incompatibility addressed, Reviewed-bys added.
>
> Regards,
> Andreas
>
> Cc: Peter Maydell 
> Cc: Daniel P. Berrange 
> Cc: Pavel Fedin 
>
> The following changes since commit 74fcbd22d20a2fbc1a47a7b00cce5bf98fd7be5f:
>
>   hw/misc: Add support for ADC controller in Xilinx Zynq 7000 (2015-11-12 
> 21:30:42 +)
>
> are available in the git repository at:
>
>   git://github.com/afaerber/qemu-cpu.git tags/qom-devices-for-peter
>
> for you to fetch changes up to 9f4aa7cef2214137db192b252f1d4fc1799d05c7:
>
>   MAINTAINERS: Add check-qom-{interface,proplist} to QOM (2015-11-19 15:15:40 
> +0100)
>
> 
> QOM infrastructure fixes and device conversions
>
> * Fix for properties on objects > 4 GiB
> * Performance improvements for QOM property handling
> * Assertion cleanups
> * MAINTAINERS additions

Applied, thanks (since the test failures seem to be intermittent and
not the fault of this patchset).

-- PMM

Re: [Qemu-devel] [PULL 00/14] Migration pull request

2015-11-20 Thread Peter Maydell

On 20 November 2015 at 09:38, Peter Lieven  wrote:
> I wonder if there is a glitch in the PIO implementation of test-ide.c. As far 
> as I understand the specs
> it is not allowed to read data while the BSY flag is set. With the following 
> change to the test-ide script
> the test does not race:
>
> diff --git a/tests/ide-test.c b/tests/ide-test.c
> index d1014bb..ab0489e 100644
> --- a/tests/ide-test.c
> +++ b/tests/ide-test.c
> @@ -728,6 +728,7 @@ static void cdrom_pio_impl(int nblocks)
>  for (i = 0; i < DIV_ROUND_UP(rxsize, limit); i++) {
>  size_t offset = i * (limit / 2);
>  size_t rem = (rxsize / 2) - offset;
> +ide_wait_clear(BSY);
>  for (j = 0; j < MIN((limit / 2), rem); j++) {
>  rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
>  }
>
> Note: in the old sync version of the ATAPI PIO implementation this could not 
> happen.

This certainly fixes the stalls for me, though I don't know enough
IDE to say whether it is the correct fix.

thanks
-- PMM

Re: [Qemu-devel] [PATCH V5 8/8] arm: xlnx-zynqmp: Add xlnx-dp and xlnx-dpdma

2015-11-20 Thread Alistair Francis

On Fri, Oct 16, 2015 at 7:11 PM,   wrote:
> From: KONRAD Frederic 
>
> This adds the DP and the DPDMA to the Zynq MP platform.
>
> Signed-off-by: KONRAD Frederic 
> Reviewed-by: Peter Crosthwaite 
> Tested-By: Hyun Kwon 
> ---
>  hw/arm/xlnx-zynqmp.c | 20 
>  include/hw/arm/xlnx-zynqmp.h |  5 +
>  2 files changed, 25 insertions(+)
>
> diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
> index b36ca3d..dfed5cd 100644
> --- a/hw/arm/xlnx-zynqmp.c
> +++ b/hw/arm/xlnx-zynqmp.c
> @@ -32,6 +32,12 @@
>  #define SATA_ADDR   0xFD0C
>  #define SATA_NUM_PORTS  2
>
> +#define DP_ADDR 0xfd4a
> +#define DP_IRQ  113
> +
> +#define DPDMA_ADDR  0xfd4c
> +#define DPDMA_IRQ   116
> +
>  static const uint64_t gem_addr[XLNX_ZYNQMP_NUM_GEMS] = {
>  0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E,
>  };
> @@ -97,6 +103,11 @@ static void xlnx_zynqmp_init(Object *obj)
>
>  object_initialize(>sata, sizeof(s->sata), TYPE_SYSBUS_AHCI);
>  qdev_set_parent_bus(DEVICE(>sata), sysbus_get_default());
> +
> +object_initialize(>dp, sizeof(s->dp), TYPE_XLNX_DP);
> +qdev_set_parent_bus(DEVICE(>dp), sysbus_get_default());

New line

> +object_initialize(>dpdma, sizeof(s->dpdma), TYPE_XLNX_DPDMA);
> +qdev_set_parent_bus(DEVICE(>dpdma), sysbus_get_default());
>  }
>
>  static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
> @@ -258,6 +269,15 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
> **errp)
>
>  sysbus_mmio_map(SYS_BUS_DEVICE(>sata), 0, SATA_ADDR);
>  sysbus_connect_irq(SYS_BUS_DEVICE(>sata), 0, gic_spi[SATA_INTR]);
> +
> +sysbus_mmio_map(SYS_BUS_DEVICE(>dp), 0, DP_ADDR);
> +sysbus_connect_irq(SYS_BUS_DEVICE(>dp), 0, gic_spi[DP_IRQ]);

New line

> +sysbus_mmio_map(SYS_BUS_DEVICE(>dpdma), 0, DPDMA_ADDR);
> +sysbus_connect_irq(SYS_BUS_DEVICE(>dpdma), 0, gic_spi[DPDMA_IRQ]);
> +object_property_set_bool(OBJECT(>dp), true, "realized", );
> +object_property_set_bool(OBJECT(>dpdma), true, "realized", );

Can you add something to check these errors?

Thanks,

Alistair

> +object_property_set_link(OBJECT(>dp), OBJECT(>dpdma), "dpdma",
> + _abort);
>  }
>
>  static Property xlnx_zynqmp_props[] = {
> diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
> index 4005a99..5a4d6cc 100644
> --- a/include/hw/arm/xlnx-zynqmp.h
> +++ b/include/hw/arm/xlnx-zynqmp.h
> @@ -24,6 +24,8 @@
>  #include "hw/char/cadence_uart.h"
>  #include "hw/ide/pci.h"
>  #include "hw/ide/ahci.h"
> +#include "hw/dma/xlnx_dpdma.h"
> +#include "hw/display/xlnx_dp.h"
>
>  #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp"
>  #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \
> @@ -66,6 +68,9 @@ typedef struct XlnxZynqMPState {
>
>  char *boot_cpu;
>  ARMCPU *boot_cpu_ptr;
> +
> +XlnxDPState dp;
> +XlnxDPDMAState dpdma;
>  }  XlnxZynqMPState;
>
>  #define XLNX_ZYNQMP_H
> --
> 1.9.0
>
>

[Qemu-devel] [PATCH] block/qapi: Plug memory leak on query-block error path

2015-11-20 Thread Markus Armbruster

Spotted by Coverity.

Signed-off-by: Markus Armbruster 
---
 block/qapi.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index d20262d..267f147 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -436,7 +436,9 @@ BlockInfoList *qmp_query_block(Error **errp)
 bdrv_query_info(blk, >value, _err);
 if (local_err) {
 error_propagate(errp, local_err);
-goto err;
+g_free(info);
+qapi_free_BlockInfoList(head);
+return NULL;
 }
 
 *p_next = info;
@@ -444,10 +446,6 @@ BlockInfoList *qmp_query_block(Error **errp)
 }
 
 return head;
-
- err:
-qapi_free_BlockInfoList(head);
-return NULL;
 }
 
 BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
-- 
2.4.3

[Qemu-devel] [PATCH] numa: Clean up query-memdev error handling, plug leak

2015-11-20 Thread Markus Armbruster

qmp_query_memdev() doesn't fail.  Instead, it returns an empty list.
That's wrong.

Two error paths:

* When object_get_objects_root() returns null.  It never does, so
  simply drop the useless error handling.

* When query_memdev() fails.  This can happen, and the error to return
  is the one that query_memdev() currently leaks.  Passing the error
  from query_memdev() to qmp_query_memdev() isn't so simple, because
  object_child_foreach() is in the way.  Fixable, but I'd rather not
  try it in hard freeze.  Plug the leak, make up an error, and add a
  FIXME for the remaining work.

Screwed up in commit 76b5d85 "qmp: add query-memdev".

Signed-off-by: Markus Armbruster 
---
 numa.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/numa.c b/numa.c
index fdfe294..a584e8e 100644
--- a/numa.c
+++ b/numa.c
@@ -568,6 +568,7 @@ static int query_memdev(Object *obj, void *opaque)
 
 return 0;
 error:
+error_free(err);
 g_free(m->value);
 g_free(m);
 
@@ -576,15 +577,12 @@ error:
 
 MemdevList *qmp_query_memdev(Error **errp)
 {
-Object *obj;
+Object *obj = object_get_objects_root();
 MemdevList *list = NULL;
 
-obj = object_get_objects_root();
-if (obj == NULL) {
-return NULL;
-}
-
 if (object_child_foreach(obj, query_memdev, ) != 0) {
+/* FIXME propagate the error query_memdev() throws away */
+error_setg(errp, "Unknown error");
 goto error;
 }
 
-- 
2.4.3

Re: [Qemu-devel] [PATCH v9 2/6] target-arm: kvm - implement software breakpoints

2015-11-20 Thread Peter Maydell

On 12 November 2015 at 16:20, Alex Bennée  wrote:
> These don't involve messing around with debug registers, just setting
> the breakpoint instruction in memory. GDB will not use this mechanism if
> it can't access the memory to write the breakpoint.
>
> All the kernel has to do is ensure the hypervisor traps the breakpoint
> exceptions and returns to userspace.
>
> Signed-off-by: Alex Bennée 
>
> --
> v2
>   - handle debug exit with new hsr exception info
>   - add verbosity to UNIMP message
> v3
>   - sync with kvm_cpu_synchronize_state() before checking PC.
>   - use internals.h defines
>   - use env->pc
>   - use proper format types
> v9
>   - add include for error_report
>   - define a brk_insn constant
> ---
>  target-arm/kvm.c | 90 
> 
>  1 file changed, 78 insertions(+), 12 deletions(-)
>
> diff --git a/target-arm/kvm.c b/target-arm/kvm.c
> index 79ef4c6..50f70ef 100644
> --- a/target-arm/kvm.c
> +++ b/target-arm/kvm.c
> @@ -17,6 +17,7 @@
>
>  #include "qemu-common.h"
>  #include "qemu/timer.h"
> +#include "qemu/error-report.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/kvm.h"
>  #include "kvm_arm.h"
> @@ -516,9 +517,60 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct 
> kvm_run *run)
>  return MEMTXATTRS_UNSPECIFIED;
>  }
>
> +/* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register
> + *
> + * To minimise translating between kernel and user-space the kernel
> + * ABI just provides user-space with the full exception syndrome
> + * register value to be decoded in QEMU.
> + */
> +
> +static int kvm_handle_debug(CPUState *cs, struct kvm_run *run)
> +{
> +struct kvm_debug_exit_arch *arch_info = >debug.arch;
> +int hsr_ec = arch_info->hsr >> ARM_EL_EC_SHIFT;

This doesn't build for 32-bit ARM:

target-arm/kvm.c:530:27: error: ‘struct kvm_debug_exit_arch’ has no
member named ‘hsr’
 int hsr_ec = arch_info->hsr >> ARM_EL_EC_SHIFT;

> +ARMCPU *cpu = ARM_CPU(cs);
> +CPUARMState *env = >env;
> +
> +/* Ensure PC is synchronised */
> +kvm_cpu_synchronize_state(cs);
> +
> +switch (hsr_ec) {
> +case EC_AA64_BKPT:
> +if (kvm_find_sw_breakpoint(cs, env->pc)) {
> +return true;
> +}
> +break;
> +default:
> +error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")\n",
> + __func__, arch_info->hsr, env->pc);
> +}
> +
> +/* If we don't handle this it could be it really is for the
> +   guest to handle */
> +qemu_log_mask(LOG_UNIMP,
> +  "%s: re-injecting exception not yet implemented"
> +  " (0x%"PRIx32", %"PRIx64")\n",
> +  __func__, hsr_ec, env->pc);
> +
> +return false;
> +}
> +
>  int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>  {
> -return 0;
> +int ret = 0;
> +
> +switch (run->exit_reason) {
> +case KVM_EXIT_DEBUG:
> +if (kvm_handle_debug(cs, run)) {
> +ret = EXCP_DEBUG;
> +} /* otherwise return to guest */
> +break;
> +default:
> +qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
> +  __func__, run->exit_reason);
> +break;
> +}
> +return ret;
>  }
>
>  bool kvm_arch_stop_on_emulation_error(CPUState *cs)
> @@ -543,14 +595,34 @@ int kvm_arch_on_sigbus(int code, void *addr)
>
>  void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
>  {
> -qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
> +if (kvm_sw_breakpoints_active(cs)) {
> +dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;

This won't compile on 32-bit ARM either:

target-arm/kvm.c:634:38: error: ‘KVM_GUESTDBG_USE_SW_BP’ undeclared
(first use in this function)
 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;

> +}
>  }
>
> -int kvm_arch_insert_sw_breakpoint(CPUState *cs,
> -  struct kvm_sw_breakpoint *bp)
> +/* C6.6.29 BRK instruction */
> +static const uint32_t brk_insn = 0xd420;

This is the A64 breakpoint instruction, so why is it in the common-to-32-and-64
source file? How about the A32 and T16 breakpoint insns?

> +
> +int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
>  {
> -qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
> -return -EINVAL;
> +
> +if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)>saved_insn, 4, 0) ||
> +cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)_insn, 4, 1)) {
> +return -EINVAL;
> +}

Should we allow insertion of sw breakpoint insns if the kernel doesn't
implement KVM_EXIT_DEBUG and reporting the ESR to us?

> +return 0;
> +}
> +
> +int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
> +{
> +static uint32_t brk;
> +
> +if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *), 4, 0) ||
> +

Re: [Qemu-devel] [PATCH v9 3/6] target-arm: kvm - support for single step

2015-11-20 Thread Peter Maydell

On 12 November 2015 at 16:20, Alex Bennée  wrote:
> This adds support for single-step. There isn't much to do on the QEMU
> side as after we set-up the request for single step via the debug ioctl
> it is all handled within the kernel.
>
> Signed-off-by: Alex Bennée 
>
> ---
> v2
>   - convert to using HSR_EC
> v3
>   - use internals.h definitions
> ---
>  target-arm/kvm.c | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/target-arm/kvm.c b/target-arm/kvm.c
> index 50f70ef..d505a7e 100644
> --- a/target-arm/kvm.c
> +++ b/target-arm/kvm.c
> @@ -535,6 +535,13 @@ static int kvm_handle_debug(CPUState *cs, struct kvm_run 
> *run)
>  kvm_cpu_synchronize_state(cs);
>
>  switch (hsr_ec) {
> +case EC_SOFTWARESTEP:
> +if (cs->singlestep_enabled) {
> +return true;
> +} else {
> +error_report("Came out of SINGLE STEP when not enabled");
> +}
> +break;
>  case EC_AA64_BKPT:
>  if (kvm_find_sw_breakpoint(cs, env->pc)) {
>  return true;
> @@ -595,6 +602,9 @@ int kvm_arch_on_sigbus(int code, void *addr)
>
>  void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
>  {
> +if (cs->singlestep_enabled) {
> +dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
> +}

Doesn't kvm_update_guest_debug() already set these bits, or am
I misreading it?

>  if (kvm_sw_breakpoints_active(cs)) {
>  dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
>  }
> --
> 2.6.3

thanks
-- PMM

Re: [Qemu-devel] [PATCH v9 4/6] target-arm: kvm - add support for HW assisted debug

2015-11-20 Thread Peter Maydell

On 12 November 2015 at 16:20, Alex Bennée  wrote:
> This adds basic support for HW assisted debug. The ioctl interface to
> KVM allows us to pass an implementation defined number of break and
> watch point registers. When KVM_GUESTDBG_USE_HW is specified these
> debug registers will be installed in place on the world switch into the
> guest.
>
> The hardware is actually capable of more advanced matching but it is
> unclear if this expressiveness is available via the gdbstub protocol.
>
> Signed-off-by: Alex Bennée 
>
> ---
> v2
>   - correct setting of PMC/BAS/MASK
>   - improved commentary
>   - added helper function to check watchpoint in range
>   - fix find/deletion of watchpoints
> v3
>   - use internals.h definitions
> v6
>   - KVM_GUESTDBG_USE_HW_BP->KVM_GUESTDBG_USE_HW
>   - renamed some helper functions to avoid confusion
> v9
>   - fix merge conflicts on re-base
>   - rm asm/ptrace.h include
>   - add additional commentry for hw breakpoints
>   - explain gdb's model for HW bkpts
>   - fix up spacing, formatting as per checkpatch
>   - better PAC values
>   - use is_power_of_2
>   - use _arm_ fn naming and add docs
>   - add a CPUWatchpoint structure for reporting
>   - replace manual array manipulation with g_array abstraction
> ---
>  target-arm/kvm.c |  38 +++---
>  target-arm/kvm64.c   | 352 
> ++-
>  target-arm/kvm_arm.h |  38 ++
>  3 files changed, 406 insertions(+), 22 deletions(-)
>
> diff --git a/target-arm/kvm.c b/target-arm/kvm.c
> index d505a7e..1f57e92 100644
> --- a/target-arm/kvm.c
> +++ b/target-arm/kvm.c
> @@ -547,6 +547,20 @@ static int kvm_handle_debug(CPUState *cs, struct kvm_run 
> *run)
>  return true;
>  }
>  break;
> +case EC_BREAKPOINT:
> +if (kvm_arm_find_hw_breakpoint(cs, env->pc)) {
> +return true;
> +}
> +break;
> +case EC_WATCHPOINT:
> +{
> +CPUWatchpoint *wp = kvm_arm_find_hw_watchpoint(cs, arch_info->far);

This won't compile for 32-bit ARM.

> +if (wp) {
> +cs->watchpoint_hit = wp;
> +return true;
> +}
> +break;
> +}
>  default:
>  error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")\n",
>   __func__, arch_info->hsr, env->pc);
> @@ -608,6 +622,10 @@ void kvm_arch_update_guest_debug(CPUState *cs, struct 
> kvm_guest_debug *dbg)
>  if (kvm_sw_breakpoints_active(cs)) {
>  dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
>  }
> +if (kvm_arm_hw_debug_active(cs)) {
> +dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
> +kvm_arm_copy_hw_debug_data(>arch);
> +}
>  }
>
>  /* C6.6.29 BRK instruction */
> @@ -635,26 +653,6 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct 
> kvm_sw_breakpoint *bp)
>  return 0;
>  }
>
> -int kvm_arch_insert_hw_breakpoint(target_ulong addr,
> -  target_ulong len, int type)
> -{
> -qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
> -return -EINVAL;
> -}

You've moved these functions into kvm64.c but haven't provided
a stub version in kvm32.c...

> -
> -int kvm_arch_remove_hw_breakpoint(target_ulong addr,
> -  target_ulong len, int type)
> -{
> -qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
> -return -EINVAL;
> -}
> -
> -
> -void kvm_arch_remove_all_hw_breakpoints(void)
> -{
> -qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
> -}
> -
>  void kvm_arch_init_irq_routing(KVMState *s)
>  {
>  }
> diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
> index d087794..c468324 100644
> --- a/target-arm/kvm64.c
> +++ b/target-arm/kvm64.c
> @@ -2,6 +2,7 @@
>   * ARM implementation of KVM hooks, 64 bit specific code
>   *
>   * Copyright Mian-M. Hamayun 2013, Virtual Open Systems
> + * Copyright Alex Bennée 2014, Linaro
>   *
>   * This work is licensed under the terms of the GNU GPL, version 2 or later.
>   * See the COPYING file in the top-level directory.
> @@ -12,12 +13,17 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
> +#include 
>  #include 
>
>  #include "config-host.h"
>  #include "qemu-common.h"
>  #include "qemu/timer.h"
> +#include "qemu/host-utils.h"
> +#include "qemu/error-report.h"
> +#include "exec/gdbstub.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/kvm.h"
>  #include "kvm_arm.h"
> @@ -27,20 +33,362 @@
>
>  static bool have_guest_debug;
>
> +/*
> + * Although the ARM implementation of hardware assisted debugging
> + * allows for different breakpoints per-core the current GDB interface

Comma after "per-core".

> + * treats them as a global pool of registers (which seems to be the
> + * case for x86, ppc and s390). As a result we store one copy of
> + * registers which is used for all active cores.
> + *
> + * Write access is serialised by virtue of the

Re: [Qemu-devel] [PULL 08/15] i440fx: print an error message if user tries to enable iommu

2015-11-20 Thread Bandan Das

"Michael S. Tsirkin"  writes:

> On Thu, Nov 19, 2015 at 10:00:38PM +0100, Markus Armbruster wrote:
>> "Michael S. Tsirkin"  writes:
>> 
>> > On Thu, Nov 19, 2015 at 03:38:03PM -0500, Bandan Das wrote:
>> >> "Michael S. Tsirkin"  writes:
>> >> 
>> >> > From: Bandan Das 
>> >> >
>> >> > There's no indication of any sort that i440fx doesn't support
>> >> > "iommu=on"
>> >> 
>> >> Oh, Markus quite didn't like this approach because this is
>> >> true for all other machines too. Anyway, I will keep in
>> >> mind to take care of this when I post a generic patch. 
>> >
>> > Do you think I should revert this one then?
>> 
>> The patch isn't wrong, it merely addresses only one special case of a
>> generic issue.  Probably the most important case in practice.  If I
>> understood Bandan correctly, he intended to drop this patch and work on
>> a general solution.  As far as I'm concerned, you can keep this patch if
>> dropping it is inconvenient.
>
> Bandan, I suggest you include the revert in your patchset
> when it's ready then. Maybe post 2.5.

Yes, will do. Thanks.

Re: [Qemu-devel] [Intel-gfx] [Announcement] 2015-Q3 release of XenGT - a Mediated Graphics Passthrough Solution from Intel

2015-11-20 Thread Alex Williamson

On Fri, 2015-11-20 at 07:09 +, Tian, Kevin wrote:
> > From: Alex Williamson [mailto:alex.william...@redhat.com]
> > Sent: Friday, November 20, 2015 4:03 AM
> > 
> > > >
> > > > The proposal is therefore that GPU vendors can expose vGPUs to
> > > > userspace, and thus to QEMU, using the VFIO API.  For instance, vfio
> > > > supports modular bus drivers and IOMMU drivers.  An intel-vfio-gvt-d
> > > > module (or extension of i915) can register as a vfio bus driver, create
> > > > a struct device per vGPU, create an IOMMU group for that device, and
> > > > register that device with the vfio-core.  Since we don't rely on the
> > > > system IOMMU for GVT-d vGPU assignment, another vGPU vendor driver (or
> > > > extension of the same module) can register a "type1" compliant IOMMU
> > > > driver into vfio-core.  From the perspective of QEMU then, all of the
> > > > existing vfio-pci code is re-used, QEMU remains largely unaware of any
> > > > specifics of the vGPU being assigned, and the only necessary change so
> > > > far is how QEMU traverses sysfs to find the device and thus the IOMMU
> > > > group leading to the vfio group.
> > >
> > > GVT-g requires to pin guest memory and query GPA->HPA information,
> > > upon which shadow GTTs will be updated accordingly from (GMA->GPA)
> > > to (GMA->HPA). So yes, here a dummy or simple "type1" compliant IOMMU
> > > can be introduced just for this requirement.
> > >
> > > However there's one tricky point which I'm not sure whether overall
> > > VFIO concept will be violated. GVT-g doesn't require system IOMMU
> > > to function, however host system may enable system IOMMU just for
> > > hardening purpose. This means two-level translations existing (GMA->
> > > IOVA->HPA), so the dummy IOMMU driver has to request system IOMMU
> > > driver to allocate IOVA for VMs and then setup IOVA->HPA mapping
> > > in IOMMU page table. In this case, multiple VM's translations are
> > > multiplexed in one IOMMU page table.
> > >
> > > We might need create some group/sub-group or parent/child concepts
> > > among those IOMMUs for thorough permission control.
> > 
> > My thought here is that this is all abstracted through the vGPU IOMMU
> > and device vfio backends.  It's the GPU driver itself, or some vfio
> > extension of that driver, mediating access to the device and deciding
> > when to configure GPU MMU mappings.  That driver has access to the GPA
> > to HVA translations thanks to the type1 complaint IOMMU it implements
> > and can pin pages as needed to create GPA to HPA mappings.  That should
> > give it all the pieces it needs to fully setup mappings for the vGPU.
> > Whether or not there's a system IOMMU is simply an exercise for that
> > driver.  It needs to do a DMA mapping operation through the system IOMMU
> > the same for a vGPU as if it was doing it for itself, because they are
> > in fact one in the same.  The GMA to IOVA mapping seems like an internal
> > detail.  I assume the IOVA is some sort of GPA, and the GMA is managed
> > through mediation of the device.
> 
> Sorry I'm not familiar with VFIO internal. My original worry is that system 
> IOMMU for GPU may be already claimed by another vfio driver (e.g. host kernel
> wants to harden gfx driver from rest sub-systems, regardless of whether vGPU 
> is created or not). In that case vGPU IOMMU driver shouldn't manage system
> IOMMU directly.

There are different APIs for the IOMMU depending on how it's being use.
If the IOMMU is being used for inter-device isolation in the host, then
the DMA API (ex. dma_map_page) transparently makes use of the IOMMU.
When we're doing device assignment, we make use of the IOMMU API which
allows more explicit control (ex. iommu_domain_alloc,
iommu_attach_device, iommu_map, etc).  A vGPU is not an SR-IOV VF, it
doesn't have a unique requester ID that allows the IOMMU to
differentiate one vGPU from another, or vGPU from GPU.  All mappings for
vGPUs need to occur for the GPU.  It's therefore the responsibility of
the GPU driver, or this vfio extension of that driver, that needs to
perform the IOMMU mapping for the vGPU.

My expectation is therefore that once the GMA to IOVA mapping is
configured in the GPU MMU, the IOVA to HPA needs to be programmed, as if
the GPU driver was performing the setup itself, which it is.  Before the
device mediation that triggered the mapping setup is complete, the GPU
MMU and the system IOMMU (if preset) should be configured to enable that
DMA.  The GPU MMU provides the isolation of the vGPU, the system IOMMU
enable the DMA to occur.

> btw, curious today how VFIO coordinates with system IOMMU driver regarding
> to whether a IOMMU is used to control device assignment, or used for kernel 
> hardening. Somehow two are conflicting since different address spaces are
> concerned (GPA vs. IOVA)...

When devices unbind from native host drivers, any previous IOMMU
mappings and domains are removed.  These are typically created via the
DMA API above.  The

Re: [Qemu-devel] [PATCH v9 5/6] target-arm: kvm - re-inject guest debug exceptions

2015-11-20 Thread Peter Maydell

On 12 November 2015 at 16:20, Alex Bennée  wrote:
> From: Alex Bennée 
>
> If we can't find details for the debug exception in our debug state
> then we can assume the exception is due to debugging inside the guest.
> To inject the exception into the guest state we re-use the TCG exception
> code (do_interupt).

"do_interrupt".

>
> However while guest debugging is in effect we currently can't handle the
> guest using single step which is heavily used by GDB.

Can you expand this to be clearer about what the problem is here?
Is this a thing fixed by this commit or a remaining issue after it?

> Signed-off-by: Alex Bennée 
>
> ---
> v5:
>   - new for v5
> ---
>  target-arm/helper-a64.c | 12 ++--
>  target-arm/kvm.c| 27 +++
>  2 files changed, 29 insertions(+), 10 deletions(-)
>
> diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
> index deb8dbe..fc3ccdf 100644
> --- a/target-arm/helper-a64.c
> +++ b/target-arm/helper-a64.c
> @@ -25,6 +25,7 @@
>  #include "qemu/bitops.h"
>  #include "internals.h"
>  #include "qemu/crc32c.h"
> +#include "sysemu/kvm.h"
>  #include  /* For crc32 */
>
>  /* C2.4.7 Multiply and divide */
> @@ -469,7 +470,8 @@ void aarch64_cpu_do_interrupt(CPUState *cs)
>new_el);
>  if (qemu_loglevel_mask(CPU_LOG_INT)
>  && !excp_is_internal(cs->exception_index)) {
> -qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%" PRIx32 "\n",
> +qemu_log_mask(CPU_LOG_INT, "...with ESR %x/0x%" PRIx32 "\n",
> +  env->exception.syndrome >> ARM_EL_EC_SHIFT,
>env->exception.syndrome);
>  }
>
> @@ -535,6 +537,12 @@ void aarch64_cpu_do_interrupt(CPUState *cs)
>  aarch64_restore_sp(env, new_el);
>
>  env->pc = addr;
> -cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +
> +qemu_log_mask(CPU_LOG_INT, "...to EL%d PC 0x%" PRIx64 " PSTATE 0x%x\n",
> +  new_el, env->pc, pstate_read(env));
> +
> +if (!kvm_enabled()) {
> +cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +}
>  }
>  #endif
> diff --git a/target-arm/kvm.c b/target-arm/kvm.c
> index 1f57e92..4ac177a 100644
> --- a/target-arm/kvm.c
> +++ b/target-arm/kvm.c
> @@ -529,9 +529,10 @@ static int kvm_handle_debug(CPUState *cs, struct kvm_run 
> *run)
>  struct kvm_debug_exit_arch *arch_info = >debug.arch;
>  int hsr_ec = arch_info->hsr >> ARM_EL_EC_SHIFT;
>  ARMCPU *cpu = ARM_CPU(cs);
> +CPUClass *cc = CPU_GET_CLASS(cs);
>  CPUARMState *env = >env;
>
> -/* Ensure PC is synchronised */
> +/* Ensure all state is synchronised */

You might as well have just written the comment like that to start with :-)

>  kvm_cpu_synchronize_state(cs);
>
>  switch (hsr_ec) {
> @@ -539,7 +540,14 @@ static int kvm_handle_debug(CPUState *cs, struct kvm_run 
> *run)
>  if (cs->singlestep_enabled) {
>  return true;
>  } else {
> -error_report("Came out of SINGLE STEP when not enabled");
> +/*
> + * The kernel should have supressed the guests ability to

"suppressed". "guest's".

> + * single step at this point so something has gone wrong.
> + */
> +error_report("%s: guest single-step while debugging unsupported"
> + " (%"PRIx64", %"PRIx32")\n",
> + __func__, env->pc, arch_info->hsr);
> +return false;

Why didn't we just write the error_report this way to start with?

>  }
>  break;
>  case EC_AA64_BKPT:
> @@ -564,14 +572,17 @@ static int kvm_handle_debug(CPUState *cs, struct 
> kvm_run *run)
>  default:
>  error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")\n",
>   __func__, arch_info->hsr, env->pc);
> +return false;

Might as well put this "return false;" in the original code that
adds the default case, rather than changing the control flow in this
patch.

>  }
>
> -/* If we don't handle this it could be it really is for the
> -   guest to handle */
> -qemu_log_mask(LOG_UNIMP,
> -  "%s: re-injecting exception not yet implemented"
> -  " (0x%"PRIx32", %"PRIx64")\n",
> -  __func__, hsr_ec, env->pc);
> +/* If we are not handling the debug exception it must belong to
> + * the guest. Let's re-use the existing TCG interrupt code to set
> + * everything up properly

Missing trailing ".".

> + */
> +cs->exception_index = EXCP_BKPT;
> +env->exception.syndrome = arch_info->hsr;
> +env->exception.vaddress = arch_info->far;

You need to set env->exception.target_el to 1 as well.

> +cc->do_interrupt(cs);
>
>  return false;
>  }
> --
> 2.6.3
>

thanks
-- PMM

Re: [Qemu-devel] [PATCH for 2.6 3/3] hbitmap: Drop "granularity"

2015-11-20 Thread Vladimir Sementsov-Ogievskiy


On 20.11.2015 12:59, Fam Zheng wrote:

Sometimes confused with the granularity with coarse levels in HBitmap, the
granularity in the hbitmap_alloc is not an essential concept of a bitmap.  Now
that all callers except the test code use zero, it's possible to drop the
parameter to make the interface cleaner and more intuitive.

Test code of hbitmap granularity is removed together.

Signed-off-by: Fam Zheng 
---
  block.c|   4 +-
  include/qemu/hbitmap.h |  20 +
  tests/test-hbitmap.c   | 206 -
  util/hbitmap.c |  64 +++
  4 files changed, 49 insertions(+), 245 deletions(-)

diff --git a/block.c b/block.c
index e225050..86b32a0 100644
--- a/block.c
+++ b/block.c
@@ -3183,7 +3183,7 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
  return NULL;
  }
  bitmap = g_new0(BdrvDirtyBitmap, 1);
-bitmap->bitmap = hbitmap_alloc(bitmap_size, 0);
+bitmap->bitmap = hbitmap_alloc(bitmap_size);
  bitmap->gran_shift = gran_shift;
  bitmap->size = bitmap_size;
  bitmap->name = g_strdup(name);
@@ -3453,7 +3453,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, 
HBitmap **out)
  hbitmap_reset_all(bitmap->bitmap);
  } else {
  HBitmap *backup = bitmap->bitmap;
-bitmap->bitmap = hbitmap_alloc(bitmap->size, 0);
+bitmap->bitmap = hbitmap_alloc(bitmap->size);
  *out = backup;
  }
  }
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index bb94a00..0f81483 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -39,9 +39,6 @@ typedef struct HBitmapIter HBitmapIter;
  struct HBitmapIter {
  const HBitmap *hb;
  
-/* Copied from hb for access in the inline functions (hb is opaque).  */

-int granularity;
-
  /* Entry offset into the last-level array of longs.  */
  size_t pos;
  
@@ -54,15 +51,10 @@ struct HBitmapIter {

  /**
   * hbitmap_alloc:
   * @size: Number of bits in the bitmap.
- * @granularity: Granularity of the bitmap.  Aligned groups of 2^@granularity
- * bits will be represented by a single bit.  Each operation on a
- * range of bits first rounds the bits to determine which group they land
- * in, and then affect the entire set; iteration will only visit the first
- * bit of each group.
   *
   * Allocate a new HBitmap.
   */
-HBitmap *hbitmap_alloc(uint64_t size, int granularity);
+HBitmap *hbitmap_alloc(uint64_t size);
  
  /**

   * hbitmap_truncate:
@@ -96,14 +88,6 @@ bool hbitmap_merge(HBitmap *a, const HBitmap *b);
  bool hbitmap_empty(const HBitmap *hb);
  
  /**

- * hbitmap_granularity:
- * @hb: HBitmap to operate on.
- *
- * Return the granularity of the HBitmap.
- */
-int hbitmap_granularity(const HBitmap *hb);
-
-/**
   * hbitmap_count:
   * @hb: HBitmap to operate on.
   *
@@ -204,7 +188,7 @@ static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
  hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
  item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
  
-return item << hbi->granularity;

+return item;
  }
  
  /**

diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index abcea0c..dc8485a 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -26,7 +26,6 @@ typedef struct TestHBitmapData {
  unsigned long *bits;
  size_t size;
  size_t old_size;
-intgranularity;
  } TestHBitmapData;
  
  
@@ -70,17 +69,16 @@ static void hbitmap_test_check(TestHBitmapData *data,

  }
  
  if (first == 0) {

-g_assert_cmpint(count << data->granularity, ==, 
hbitmap_count(data->hb));
+g_assert_cmpint(count, ==, hbitmap_count(data->hb));
  }
  }
  
  /* This is provided instead of a test setup function so that the sizes

 are kept in the test functions (and not in main()) */
-static void hbitmap_test_init(TestHBitmapData *data,
-  uint64_t size, int granularity)
+static void hbitmap_test_init(TestHBitmapData *data, uint64_t size)
  {
  size_t n;
-data->hb = hbitmap_alloc(size, granularity);
+data->hb = hbitmap_alloc(size);
  
  n = (size + BITS_PER_LONG - 1) / BITS_PER_LONG;

  if (n == 0) {
@@ -88,7 +86,6 @@ static void hbitmap_test_init(TestHBitmapData *data,
  }
  data->bits = g_new0(unsigned long, n);
  data->size = size;
-data->granularity = granularity;
  if (size) {
  hbitmap_test_check(data, 0);
  }
@@ -158,9 +155,7 @@ static void hbitmap_test_set(TestHBitmapData *data,
  data->bits[pos] |= 1UL << bit;
  }
  
-if (data->granularity == 0) {

-hbitmap_test_check(data, 0);
-}
+hbitmap_test_check(data, 0);
  }
  
  /* Reset a range in the HBitmap and in the shadow "simple" bitmap.

@@ -177,9 +172,7 @@ static void hbitmap_test_reset(TestHBitmapData *data,
  data->bits[pos] &= ~(1UL << bit);
  }
  
-if (data->granularity == 0) {

-

Re: [Qemu-devel] [PATCH] tests: fix cdrom_pio_impl in ide-test

2015-11-20 Thread John Snow



On 11/20/2015 09:37 AM, Peter Maydell wrote:
> On 20 November 2015 at 14:29, Peter Lieven  wrote:
>> The check for the cleared BSY flag has to be performed
>> before each data transfer and not just before the
>> first one.
>>
>> Commit 5f81724d revealed this glitch as the BSY flag
>> was not set in ATAPI PIO transfers before.
>>
>> While at it fix the desciptions and add a comment before
>> the nested for loop that transfers the data.
>>
>> Signed-off-by: Peter Lieven 
> 
> If the IDE folks can review this I'd like to apply it
> direct to master this afternoon so we can tag and roll rc1
> today.
> 
> thanks
> -- PMM
> 

Please do.

--js

Re: [Qemu-devel] [PATCH v9 1/6] target-arm: kvm64 - introduce kvm_arm_init_debug()

2015-11-20 Thread Alex Bennée


Peter Maydell  writes:

> On 20 November 2015 at 15:05, Peter Maydell  wrote:
>> On 12 November 2015 at 16:20, Alex Bennée  wrote:
>>> As we haven't always had guest debug support we need to probe for it.
>>> Additionally we don't do this in the start-up capability code so we
>>> don't fall over on old kernels.
>>>
>>> Signed-off-by: Alex Bennée 
>>> ---
>>>  target-arm/kvm64.c | 18 ++
>>>  1 file changed, 18 insertions(+)
>>>
>>> diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
>>> index ceebfeb..d087794 100644
>>> --- a/target-arm/kvm64.c
>>> +++ b/target-arm/kvm64.c
>>> @@ -25,6 +25,22 @@
>>>  #include "internals.h"
>>>  #include "hw/arm/arm.h"
>>>
>>> +static bool have_guest_debug;
>>> +
>>> +/**
>>> + * kvm_arm_init_debug()
>>> + * @cs: CPUState
>>> + *
>>> + * Check for guest debug capabilities.
>>> + *
>>> + */
>>> +static void kvm_arm_init_debug(CPUState *cs)
>>> +{
>>> +have_guest_debug = kvm_check_extension(cs->kvm_state,
>>> +   KVM_CAP_SET_GUEST_DEBUG);
>>> +return;
>>> +}
>>> +
>>>  static inline void set_feature(uint64_t *features, int feature)
>>>  {
>>>  *features |= 1ULL << feature;
>>> @@ -121,6 +137,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>>  }
>>>  cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
>>>
>>> +kvm_arm_init_debug(cs);
>>> +
>>>  return kvm_arm_init_cpreg_list(cpu);
>>>  }
>>
>> I assume in practice the kernel guarantees that either all
>> CPUs have the SET_GUEST_DEBUG cap, or none do :-)
>>
>> Reviewed-by: Peter Maydell 
>
> ...except I've just noticed that nothing else in this patchset
> ever reads the have_guest_debug bool we just set, so what is
> the purpose of this patch?

Oops, maybe to point out my stupidity ;-)

But yes the SET_GUEST_DEBUG cap is kernel wide.

>
> thanks
> -- PMM


-- 
Alex Bennée

Re: [Qemu-devel] [PATCH v9 6/6] tests/guest-debug: introduce basic gdbstub tests

2015-11-20 Thread Peter Maydell

On 12 November 2015 at 16:20, Alex Bennée  wrote:
> From: Alex Bennée 
>
> The aim of these tests is to combine with an appropriate kernel
> image (with symbol-file vmlinux) and check it behaves as it should.
> Given a kernel it checks:
>
>   - single step
>   - software breakpoint
>   - hardware breakpoint
>   - access, read and write watchpoints
>
> On success it returns 0 to the calling process.
>
> I've not plumbed this into the "make check" logic though as we need a
> solution for providing non-host binaries to the tests. However the test
> is structured to work with pretty much any Linux kernel image as it
> uses the basic kernel_init code which is common across architectures.

Do these tests pass if you run them on the TCG QEMU, just out
of interest?

I'm not a great fan of tests that aren't in 'make check'
because IME they just bitrot, but as you say we have no
sensible approach for handling tests that need to run real
guest code :-(

thanks
-- PMM

Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-20 Thread Marc-André Lureau

Hi

- Original Message -
> Everybody's favourite device model has "size" property.  It's declared
> as *string*
> 
> DEFINE_PROP_STRING("size", IVShmemState, sizearg),
> 
> which gets converted to a size manually in the realize method:
> 
> } else if (s->sizearg == NULL) {
> s->ivshmem_size = 4 << 20; /* 4 MB default */
> } else {
> char *end;
> int64_t size = qemu_strtosz(s->sizearg, );
> if (size < 0 || *end != '\0' || !is_power_of_2(size)) {
> error_setg(errp, "Invalid size %s", s->sizearg);
> return;
> }
> s->ivshmem_size = size;
> }
> 
> This is *wrong*.  Impact, as far as I can tell:
> 
> * -device ivshmem,help shows the property as 'str' instead of 'size'.
> 
>   Unhelpful, but hardly show-stopper.
> 
> * On the command line and in HMP, ivshmem's size is parsed differently
>   than other size properties.  In particular, a number without a suffix
>   is normally interpreted as bytes, except for ivshmem, where it's
>   Mebibytes.
> 
>   Ugly inconsistency, but hardly the only one.
> 
> * In QMP, the size must be given as JSON string instead of JSON number,
>   and size suffixes are accepted.  Example: "size": "512k" instead of
>   "size": 524288.
> 
>   Right now, this violation of QMP rules is tolerable (barely), because
>   device_add breaks some of these rules already.  However, one goal of
>   the current work on QAPI is to support a QMP command to plug devices
>   that doesn't break QMP rules, and then this violation will stand out.
> 
>   Therefore, I want it fixed now, before ivshmem gets used in anger.
> 
>   A straight fix of size isn't fully backwards compatible: suffixes no
>   longer work in QMP, and you need to use an 'M' suffix to get Mebibytes
>   on command line and HMP.

I don't know the rules to break properties in qemu, but I would prefer to avoid 
it if possible.

>   If that's unacceptable, we'll have to provide a new, fixed property,
>   and deprecate size.

Sounds a better alternative to me.

Re: [Qemu-devel] [PATCH for 2.6 2/3] block: Hide HBitmap in block dirty bitmap interface

2015-11-20 Thread Vladimir Sementsov-Ogievskiy


On 20.11.2015 12:59, Fam Zheng wrote:

HBitmap is an implementation detail of block dirty bitmap that should be hidden
from users. Introduce a BdrvDirtyBitmapIter to encapsulate the underlying
HBitmapIter.

A small difference in the interface is, before, an HBitmapIter is initialized
in place, now the new BdrvDirtyBitmapIter must be dynamically allocated because
the structure definition is in block.c.

Two current users are converted too.

Signed-off-by: Fam Zheng 
---
  block.c   | 79 ++-
  block/backup.c| 14 +
  block/mirror.c| 14 +
  include/block/block.h |  9 --
  4 files changed, 82 insertions(+), 34 deletions(-)

diff --git a/block.c b/block.c
index 3a7324b..e225050 100644
--- a/block.c
+++ b/block.c
@@ -63,14 +63,22 @@
   * or enabled. A frozen bitmap can only abdicate() or reclaim().
   */
  struct BdrvDirtyBitmap {
+int gran_shift; /* Bits to right shift from sector number to
+   bit index. */
  HBitmap *bitmap;/* Dirty sector bitmap implementation */
  BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
  char *name; /* Optional non-empty unique ID */
  int64_t size;   /* Size of the bitmap (Number of sectors) */
  bool disabled;  /* Bitmap is read-only */
+int active_iterators;   /* How many iterators are active */
  QLIST_ENTRY(BdrvDirtyBitmap) list;
  };
  
+struct BdrvDirtyBitmapIter {

+HBitmapIter hbi;
+BdrvDirtyBitmap *bitmap;
+};
+
  #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
  
  struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);

@@ -3157,24 +3165,26 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
  {
  int64_t bitmap_size;
  BdrvDirtyBitmap *bitmap;
-uint32_t sector_granularity;
+int gran_shift;
  
  assert((granularity & (granularity - 1)) == 0);

+/* Caller should check that */
+assert(granularity >= BDRV_SECTOR_SIZE);
  
+gran_shift = ctz32(granularity) - BDRV_SECTOR_BITS;

  if (name && bdrv_find_dirty_bitmap(bs, name)) {
  error_setg(errp, "Bitmap already exists: %s", name);
  return NULL;
  }
-sector_granularity = granularity >> BDRV_SECTOR_BITS;
-assert(sector_granularity);
-bitmap_size = bdrv_nb_sectors(bs);
+bitmap_size = DIV_ROUND_UP(bdrv_getlength(bs), granularity);
  if (bitmap_size < 0) {
  error_setg_errno(errp, -bitmap_size, "could not get length of 
device");
  errno = -bitmap_size;
  return NULL;
  }
  bitmap = g_new0(BdrvDirtyBitmap, 1);
-bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+bitmap->bitmap = hbitmap_alloc(bitmap_size, 0);
+bitmap->gran_shift = gran_shift;
  bitmap->size = bitmap_size;
  bitmap->name = g_strdup(name);
  bitmap->disabled = false;
@@ -3293,9 +3303,10 @@ BdrvDirtyBitmap 
*bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
  static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
  {
  BdrvDirtyBitmap *bitmap;
-uint64_t size = bdrv_nb_sectors(bs);
  
  QLIST_FOREACH(bitmap, >dirty_bitmaps, list) {

+int64_t size = bdrv_nb_sectors(bs) >> bitmap->gran_shift;
+/* TODO: what if size < 0? */
  assert(!bdrv_dirty_bitmap_frozen(bitmap));
  hbitmap_truncate(bitmap->bitmap, size);
  bitmap->size = size;
@@ -3307,6 +3318,7 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
  BdrvDirtyBitmap *bm, *next;
  QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
  if (bm == bitmap) {
+assert(!bitmap->active_iterators);
  assert(!bdrv_dirty_bitmap_frozen(bm));
  QLIST_REMOVE(bitmap, list);
  hbitmap_free(bitmap->bitmap);
@@ -3354,7 +3366,7 @@ BlockDirtyInfoList 
*bdrv_query_dirty_bitmaps(BlockDriverState *bs)
  int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t 
sector)
  {
  if (bitmap) {
-return hbitmap_get(bitmap->bitmap, sector);
+return hbitmap_get(bitmap->bitmap, sector >> bitmap->gran_shift);
  } else {
  return 0;
  }
@@ -3382,26 +3394,56 @@ uint32_t 
bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
  
  uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)

  {
-return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+return BDRV_SECTOR_SIZE << bitmap->gran_shift;
  }
  
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)

+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
+ uint64_t first_sector)
  {
-hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
+

Re: [Qemu-devel] [PATCH for-2.5] target-arm: Don't mask out bits [47:40] in LPAE descriptors for v8

2015-11-20 Thread Peter Maydell

On 20 November 2015 at 15:18, Laurent Desnogues
 wrote:
> Hello,
>
> On Fri, Nov 20, 2015 at 3:32 PM, Peter Maydell  
> wrote:
>> In an LPAE format descriptor in ARMv8 the address field extends
>> up to bit 47, not just bit 39. Correct the masking so we don't
>> give incorrect results if the output address size is greater
>> than 40 bits, as it can be for AArch64.
>>
>> (Note that we don't yet support the new-in-v8 Address Size fault which
>> should be generated if any translation table entry or TTBR contains
>> an address with non-zero bits above the most significant bit of the
>> maximum output address size.)
>>
>> Signed-off-by: Peter Maydell 

>> +/* The address field in the descriptor goes up to bit 39 for ARMv7
>> + * but up to bit 47 for ARMv8.
>> + */
>> +if (arm_feature(env, ARM_FEATURE_V8)) {
>> +descaddrmask = 0xf000ULL;
>> +} else {
>> +descaddrmask = 0xfff000ULL;
>> +}
>
> My understanding is that 48 bits are used if you are running AArch64
> code, and 40 bits are used for 32-bit code even on an ARMv8 CPU, so
> checking for ARM_FEATURE_V8 is perhaps not enough.

For v8 32-bit code the usable address width is only 40 bits, but
setting a bit in [47:40] causes an AddressSize fault on v8 (but not
v7). So the mask should be 48 bits for v8 regardless of 32-vs-64,
and when we support AddressSize faults we'll then check the upper
bits of the masked-out address and raise a fault if needed.

thanks
-- PMM

Re: [Qemu-devel] [PATCH] numa: Clean up query-memdev error handling, plug leak

2015-11-20 Thread Eduardo Habkost

On Fri, Nov 20, 2015 at 03:57:17PM +0100, Markus Armbruster wrote:
> Eduardo Habkost  writes:
> 
> > On Fri, Nov 20, 2015 at 02:00:40PM +0100, Markus Armbruster wrote:
> >> qmp_query_memdev() doesn't fail.  Instead, it returns an empty list.
> >> That's wrong.
> >> 
> >> Two error paths:
> >> 
> >> * When object_get_objects_root() returns null.  It never does, so
> >>   simply drop the useless error handling.
> >> 
> >> * When query_memdev() fails.  This can happen, and the error to return
> >>   is the one that query_memdev() currently leaks.  Passing the error
> >>   from query_memdev() to qmp_query_memdev() isn't so simple, because
> >>   object_child_foreach() is in the way.  Fixable, but I'd rather not
> >>   try it in hard freeze.  Plug the leak, make up an error, and add a
> >>   FIXME for the remaining work.
> >> 
> >> Screwed up in commit 76b5d85 "qmp: add query-memdev".
> >> 
> >> Signed-off-by: Markus Armbruster 
> >
> > Reviewed-by: Eduardo Habkost 
> >
> > Do you know how to trigger a query_memdev() error today, or is
> > just theoretical?
> 
> Theoretical; I tested by injecting an error with gdb.
> 
> query_memdev() fails exactly when some object_property_get_FOO() fails.
> If we decide such a failure would always be a programming error, we can
> pass _abort and simplify things.  Opinions?

The hostmem-backend property getters should never fail. Using
_abort on query_memdev() would make everything simpler.

(I would even use the HostMemoryBackend struct fields directly,
instead of QOM properties. Is there a good reason to use QOM to
fetch the data that's readily available in the C struct?)

-- 
Eduardo

Re: [Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-20 Thread Vladimir Sementsov-Ogievskiy


On 20.11.2015 12:59, Fam Zheng wrote:

"s->bitmap" tracks done sectors, we only check bit states without using any
iterator which HBitmap is good for. Switch to "Bitmap" which is simpler and
more memory efficient.

Meanwhile, rename it to done_bitmap, to reflect the intention.

Signed-off-by: Fam Zheng 
---
  block/backup.c | 11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 3b39119..d408f98 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -22,6 +22,7 @@
  #include "qapi/qmp/qerror.h"
  #include "qemu/ratelimit.h"
  #include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
  
  #define BACKUP_CLUSTER_BITS 16

  #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
@@ -47,7 +48,7 @@ typedef struct BackupBlockJob {
  BlockdevOnError on_target_error;
  CoRwlock flush_rwlock;
  uint64_t sectors_read;
-HBitmap *bitmap;
+unsigned long *done_bitmap;
  QLIST_HEAD(, CowRequest) inflight_reqs;
  } BackupBlockJob;
  
@@ -113,7 +114,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,

  cow_request_begin(_request, job, start, end);
  
  for (; start < end; start++) {

-if (hbitmap_get(job->bitmap, start)) {
+if (test_bit(start, job->done_bitmap)) {
  trace_backup_do_cow_skip(job, start);
  continue; /* already copied */
  }
@@ -164,7 +165,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
  goto out;
  }
  
-hbitmap_set(job->bitmap, start, 1);

+bitmap_set(job->done_bitmap, start, 1);
  
  /* Publish progress, guest I/O counts as progress too.  Note that the

   * offset field is an opaque progress value, it is not a disk offset.
@@ -394,7 +395,7 @@ static void coroutine_fn backup_run(void *opaque)
  start = 0;
  end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
  
-job->bitmap = hbitmap_alloc(end, 0);

+job->done_bitmap = bitmap_new(end);
  
  bdrv_set_enable_write_cache(target, true);

  if (target->blk) {
@@ -475,7 +476,7 @@ static void coroutine_fn backup_run(void *opaque)
  /* wait until pending backup_do_cow() calls have completed */
  qemu_co_rwlock_wrlock(>flush_rwlock);
  qemu_co_rwlock_unlock(>flush_rwlock);
-hbitmap_free(job->bitmap);
+g_free(job->done_bitmap);
  
  if (target->blk) {

  blk_iostatus_disable(target->blk);


Isn't it better to use hbitmap iterators here to speed up bitmap handling?

trace_backup_do_cow_skip actually do nothing, so

for (; start < end; start++) {
if (hbitmap_get(job->bitmap, start)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}

may efficiently changed by something like:

   hbitmap_iter_init(hbi, start);
   while ((start = hbitmap_iter_next(hbi)) != -1) {


!! in this case bitmap usage should be reverted, as hbitmap_iter_next 
will skip 0 bits, not 1 ones.


--
Best regards,
Vladimir
* now, @virtuozzo.com instead of @parallels.com. Sorry for this inconvenience.

Re: [Qemu-devel] [PATCH] qemu-iotests: Add -nographic when starting QEMU in 120

2015-11-20 Thread Max Reitz

On 20.11.2015 10:35, Fam Zheng wrote:
> Otherwise, a window flashes on my desktop (built with SDL). Other
> iotest cases have that.
> 
> Signed-off-by: Fam Zheng 
> ---
>  tests/qemu-iotests/120 | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tests/qemu-iotests/120 b/tests/qemu-iotests/120
> index 9f13078..d899a3f 100755
> --- a/tests/qemu-iotests/120
> +++ b/tests/qemu-iotests/120
> @@ -49,7 +49,7 @@ echo "{'execute': 'qmp_capabilities'}
>{'execute': 'human-monitor-command',
> 'arguments': {'command-line': 'qemu-io drv \"write -P 42 0 64k\"'}}
>{'execute': 'quit'}" \
> -| $QEMU -qmp stdio -nodefaults \
> +| $QEMU -qmp stdio -nographic -nodefaults \
>  -drive 
> id=drv,if=none,file="$TEST_IMG",driver=raw,file.driver=$IMGFMT \
>  | _filter_qmp | _filter_qemu_io
>  $QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
> 

This is the same patch as
http://lists.nongnu.org/archive/html/qemu-devel/2015-11/msg00623.html,
but while both are correct, both need to fix 119, too, I think.

(And while I would be fine with merging this and then taking a follow-up
patch, I don't think we need to hurry for 2.5. Releases and iotests
don't really care about each other, other than that we should pass all
the iotests before a release unless we know what's wrong and don't care.)

Max

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH for 2.6 0/3] Bitmap clean-up patches for 2.6

2015-11-20 Thread Vladimir Sementsov-Ogievskiy


Hi Fam!

Thanks for it, I really like the idea about dropping granularity from 
hbitmap. I've waste lots of time understanding the code about bitmaps. 
Keeping in mind what units are in what granularity and what granularity 
is in what units is more than inconvenient)


On 20.11.2015 12:59, Fam Zheng wrote:

This makes a cleaner base for more dirty bitmap work. "granularity" appearing
with different representations have always been mind twisting, remove it from
HBitmap to make the interface and implementation simpler. Upon this, it is
a bit easier to add persistent dirty bitmap functionalities.

Block dirty bitmap is not unit-tested, so the removal of HBitmap test code
looks like a loss, but the overall test coverage is barely affected as we also
have various mirror, commit and backup iotest cases, and they do catch various
bugs when I wrote the patches.

Please review!

Fam

Fam Zheng (3):
   backup: Use Bitmap to replace "s->bitmap"
   block: Hide HBitmap in block dirty bitmap interface
   hbitmap: Drop "granularity"

  block.c|  79 ++-
  block/backup.c |  25 +++---
  block/mirror.c |  14 ++--
  include/block/block.h  |   9 ++-
  include/qemu/hbitmap.h |  20 +
  tests/test-hbitmap.c   | 206 -
  util/hbitmap.c |  64 +++
  7 files changed, 135 insertions(+), 282 deletions(-)




--
Best regards,
Vladimir
* now, @virtuozzo.com instead of @parallels.com. Sorry for this inconvenience.

Re: [Qemu-devel] [RESEND RFC 0/6] AMD XGBE KVM platform passthrough

2015-11-20 Thread Eric Auger

Hi Alex,
On 11/20/2015 12:44 AM, Alex Williamson wrote:
> On Thu, 2015-11-19 at 15:22 +, Eric Auger wrote:
>> I am resending this RFC from Oct 12, after kernel 4.4-rc1 and
>> QEMU 2.5-rc1, hoping things have calmed down a little bit.
>>
>> This RFC allows to set up AMD XGBE passthrough. This was tested on AMD
>> Seattle.
>>
>> The first upstreamed device supporting KVM platform passthrough was the
>> Calxeda Midway XGMAC. Compared to this latter, the XGBE XGMAC exposes a
>> much more complex device tree node. Generating the device tree node for
>> the guest is the challenging and controversary part of this series.
>>
>> - First There are 2 device tree node formats:
>> one where XGBE and PHY are described in separate nodes and another one
>> that combines both description in a single node (only supported by 4.2
>> onwards kernels). Only the combined description is supported for passthrough,
>> meaning the host must be >= 4.2 and must feature a device tree with a 
>> combined
>> description. The guest will also be exposed with a combined description,
>> meaning only >= 4.2 guest are supported. It is not planned to support
>> separate node representation since assignment of the PHY is less
>> straigtforward.
>>
>> - the XGMAC/PHY node depends on 2 clock nodes (DMA and PTP).
>> The code checks those clocks are fixed to make sure they cannot be
>> switched off at some point after the native driver gets unbound.
>>
>> - there are many property values to populate on guest side. Most of them
>> cannot be hardcoded. That series proposes a way to parse the host device
>> tree blob and retrieve host values to feed guest representation. Current
>> approach relies on dtc binary availability plus libfdt usage.
>> Other alternatives were discussed in:
>> http://www.spinics.net/lists/kvm-arm/msg16648.html.
>>
>> - Currently host booted with ACPI is not supported.
> 
> I won't pretend to know all the politics in the ARM space, but doesn't
> this last bullet sort of imply that this is dead-on-arrival code?  Maybe
> not in the embedded space, but certainly in the server space, I thought
> ACPI was declared the winner.  Thanks,

When the code was written, no ACPI description was available yet
including IOMMU. Now I think there is a specification that would enable
the description of the system (IORT/DSDT tables) and I will investigate
whether we have one ready for the HW I am using and mainlined.

Nethertheless we had a discussion end of Sept with Marc Zyngier,
Christoffer, Will Deacon and other people working in the ARM ecosystem
and we decided starting with FDT host description was a reasonable
choice at this moment. To be fully honest Peter also thought we should
consider the ACPI case from day 0. But I think nobody -AFAIK - has an
ideal solution about how to address ACPI/DT in a unified way and people
seem to be against introducing IOCTL API.

Among solutions I foresee the 1st one below was considered the simplest
and chosen:
1) rely on external applications to decode/parse dt/ACPI table
2) build a unified fs representation for dt/ACPI
3) create unified IOCTL API to retrieve dt/ACPI info (attempted by VOSYS
but at VFIO level)

The idea of this series is
- to rely on external dtc binary to build the blob from
/sys/firmware/devicetree/base
- introduce some helpers using libfdt that manipulate the host dt blob
- use those helpers in sysbus-fdt.c to build the clock and xgbe nodes
for the guest

Assuming we have an ACPI description I guess we would/could use a
similar approach to parse/decode the ACPI table from QEMU (relying on
acpidump, acpixtract, iasl, ../.. combination). So the current proposal
brings a solution for embedded world and can be easily reused for other
devices. Next step is to propose a similar approach for ACPI. Now I
would like to make sure the open approach is accepted (with external
dependency on dtc binary as we would have ext dependency on ACPI utilities).

Best Regards

Eric

> 
> Alex
>

Re: [Qemu-devel] [Qemu-ppc] [OpenBIOS] CUDA has problems with Mac OS 10.4

2015-11-20 Thread Programmingkid

On Nov 20, 2015, at 8:39 AM, BALATON Zoltan wrote:

> On Thu, 19 Nov 2015, Segher Boessenkool wrote:
>> Some mac99/pmu99 hardware has an ADB keyboard, fwiw (tibook, for example).
>> The do have built-in USB; that, and being newworld, are not directly
>> related things.
> 
> Maybe, but the PowerMac3,1 we are trying to emulate here does not have ADB 
> AFAIK. Although qemu's mac99 is not a real machine now we should move to 
> being closer to some existing hardware if we want OSes written for that 
> hardware to run.

I use to have the same belief until Mark set me straight. We are only making an 
emulator of a new world Mac, not a simulator of a PowerMac3,1. This means we 
might be able to get away with not exactly mirroring a real Mac. The fact that 
Mac OS 9 can boot up at all does give me hope we are on the right path.

Re: [Qemu-devel] [PATCH COLO-Frame v10 19/38] COLO failover: Introduce state to record failover process

2015-11-20 Thread Dr. David Alan Gilbert

* zhanghailiang (zhang.zhanghaili...@huawei.com) wrote:
> When handling failover, we do different things according to the different 
> stage
> of failover process, here we introduce a global atomic variable to record the
> status of failover.
> 
> We add four failover status to indicate the different stage of failover 
> process.
> You should use the helpers to get and set the value.
> 
> Signed-off-by: zhanghailiang 
> ---
>  include/migration/failover.h | 10 ++
>  migration/colo-failover.c| 37 +
>  migration/colo.c |  4 
>  trace-events |  1 +
>  4 files changed, 52 insertions(+)
> 
> diff --git a/include/migration/failover.h b/include/migration/failover.h
> index 1785b52..882c625 100644
> --- a/include/migration/failover.h
> +++ b/include/migration/failover.h
> @@ -15,6 +15,16 @@
>  
>  #include "qemu-common.h"
>  
> +typedef enum COLOFailoverStatus {
> +FAILOVER_STATUS_NONE = 0,
> +FAILOVER_STATUS_REQUEST = 1, /* Request but not handled */
> +FAILOVER_STATUS_HANDLING = 2, /* In the process of handling failover */
> +FAILOVER_STATUS_COMPLETED = 3, /* Finish the failover process */
> +} COLOFailoverStatus;

OK - there's a couple of typo's later, but other than those:

Reviewed-by: Dr. David Alan Gilbert 

> +
> +void failover_init_state(void);
> +int failover_set_state(int old_state, int new_state);
> +int failover_get_state(void);
>  void failover_request_active(Error **errp);
>  
>  #endif
> diff --git a/migration/colo-failover.c b/migration/colo-failover.c
> index e3897c6..ae06c16 100644
> --- a/migration/colo-failover.c
> +++ b/migration/colo-failover.c
> @@ -14,22 +14,59 @@
>  #include "migration/failover.h"
>  #include "qmp-commands.h"
>  #include "qapi/qmp/qerror.h"
> +#include "qemu/error-report.h"
> +#include "trace.h"
>  
>  static QEMUBH *failover_bh;
> +static COLOFailoverStatus failover_state;
>  
>  static void colo_failover_bh(void *opaque)
>  {
> +int old_state;
> +
>  qemu_bh_delete(failover_bh);
>  failover_bh = NULL;
> +old_state = failover_set_state(FAILOVER_STATUS_REQUEST,
> +   FAILOVER_STATUS_HANDLING);
> +if (old_state != FAILOVER_STATUS_REQUEST) {
> +error_report(" Unkown error for failover, old_state=%d", old_state);

Typo 'Unkown'

> +return;
> +}
>  /*TODO: Do failover work */
>  }
>  
>  void failover_request_active(Error **errp)
>  {
> +   if (failover_set_state(FAILOVER_STATUS_NONE, FAILOVER_STATUS_REQUEST)
> + != FAILOVER_STATUS_NONE) {
> +error_setg(errp, "COLO failover is already actived");
> +return;
> +}
>  failover_bh = qemu_bh_new(colo_failover_bh, NULL);
>  qemu_bh_schedule(failover_bh);
>  }
>  
> +void failover_init_state(void)
> +{
> +failover_state = FAILOVER_STATUS_NONE;
> +}
> +
> +int failover_set_state(int old_state, int new_state)
> +{
> +int old;
> +
> +old = atomic_cmpxchg(_state, old_state, new_state);;

Typo double ;;

> +if (old == old_state) {
> +trace_colo_failover_set_state(new_state);
> +}
> +return old;
> +}
> +
> +int failover_get_state(void)
> +{
> +return atomic_read(_state);
> +}
> +
>  void qmp_x_colo_lost_heartbeat(Error **errp)
>  {
>  if (get_colo_mode() == COLO_MODE_UNKNOWN) {
> diff --git a/migration/colo.c b/migration/colo.c
> index 64daee9..7732f60 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -194,6 +194,8 @@ static void colo_process_checkpoint(MigrationState *s)
>  int64_t current_time, checkpoint_time = 
> qemu_clock_get_ms(QEMU_CLOCK_HOST);
>  int fd, ret = 0;
>  
> +failover_init_state();
> +
>  /* Dup the fd of to_dst_file */
>  fd = dup(qemu_get_fd(s->to_dst_file));
>  if (fd == -1) {
> @@ -310,6 +312,8 @@ void *colo_process_incoming_thread(void *opaque)
>  migrate_set_state(>state, MIGRATION_STATUS_ACTIVE,
>MIGRATION_STATUS_COLO);
>  
> +failover_init_state();
> +
>  fd = dup(qemu_get_fd(mis->from_src_file));
>  if (fd < 0) {
>  ret = -errno;
> diff --git a/trace-events b/trace-events
> index c98bc13..61e89c7 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1502,6 +1502,7 @@ 
> rdma_start_outgoing_migration_after_rdma_source_init(void) ""
>  colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
>  colo_ctl_put(const char *msg, uint64_t value) "Send '%s' cmd, value: %" 
> PRIu64""
>  colo_ctl_get(const char *msg) "Receive '%s' cmd"
> +colo_failover_set_state(int new_state) "new state %d"
>  
>  # kvm-all.c
>  kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[Qemu-devel] ivshmem property size should be a size, not a string

2015-11-20 Thread Markus Armbruster

Everybody's favourite device model has "size" property.  It's declared
as *string*

DEFINE_PROP_STRING("size", IVShmemState, sizearg),

which gets converted to a size manually in the realize method:

} else if (s->sizearg == NULL) {
s->ivshmem_size = 4 << 20; /* 4 MB default */
} else {
char *end;
int64_t size = qemu_strtosz(s->sizearg, );
if (size < 0 || *end != '\0' || !is_power_of_2(size)) {
error_setg(errp, "Invalid size %s", s->sizearg);
return;
}
s->ivshmem_size = size;
}

This is *wrong*.  Impact, as far as I can tell:

* -device ivshmem,help shows the property as 'str' instead of 'size'.

  Unhelpful, but hardly show-stopper.

* On the command line and in HMP, ivshmem's size is parsed differently
  than other size properties.  In particular, a number without a suffix
  is normally interpreted as bytes, except for ivshmem, where it's
  Mebibytes.

  Ugly inconsistency, but hardly the only one.

* In QMP, the size must be given as JSON string instead of JSON number,
  and size suffixes are accepted.  Example: "size": "512k" instead of
  "size": 524288.

  Right now, this violation of QMP rules is tolerable (barely), because
  device_add breaks some of these rules already.  However, one goal of
  the current work on QAPI is to support a QMP command to plug devices
  that doesn't break QMP rules, and then this violation will stand out.

  Therefore, I want it fixed now, before ivshmem gets used in anger.

  A straight fix of size isn't fully backwards compatible: suffixes no
  longer work in QMP, and you need to use an 'M' suffix to get Mebibytes
  on command line and HMP.

  If that's unacceptable, we'll have to provide a new, fixed property,
  and deprecate size.

  Opinions?

Re: [Qemu-devel] [PATCH] tests: fix cdrom_pio_impl in ide-test

2015-11-20 Thread Kevin Wolf

Am 20.11.2015 um 15:29 hat Peter Lieven geschrieben:
> The check for the cleared BSY flag has to be performed
> before each data transfer and not just before the
> first one.
> 
> Commit 5f81724d revealed this glitch as the BSY flag
> was not set in ATAPI PIO transfers before.
> 
> While at it fix the desciptions and add a comment before
> the nested for loop that transfers the data.
> 
> Signed-off-by: Peter Lieven 

Reviewed-by: Kevin Wolf

Re: [Qemu-devel] [PATCH for-2.5] target-arm: Don't mask out bits [47:40] in LPAE descriptors for v8

2015-11-20 Thread Laurent Desnogues

On Fri, Nov 20, 2015 at 4:20 PM, Peter Maydell  wrote:
> On 20 November 2015 at 15:18, Laurent Desnogues
>  wrote:
>> Hello,
>>
>> On Fri, Nov 20, 2015 at 3:32 PM, Peter Maydell  
>> wrote:
>>> In an LPAE format descriptor in ARMv8 the address field extends
>>> up to bit 47, not just bit 39. Correct the masking so we don't
>>> give incorrect results if the output address size is greater
>>> than 40 bits, as it can be for AArch64.
>>>
>>> (Note that we don't yet support the new-in-v8 Address Size fault which
>>> should be generated if any translation table entry or TTBR contains
>>> an address with non-zero bits above the most significant bit of the
>>> maximum output address size.)
>>>
>>> Signed-off-by: Peter Maydell 
>
>>> +/* The address field in the descriptor goes up to bit 39 for ARMv7
>>> + * but up to bit 47 for ARMv8.
>>> + */
>>> +if (arm_feature(env, ARM_FEATURE_V8)) {
>>> +descaddrmask = 0xf000ULL;
>>> +} else {
>>> +descaddrmask = 0xfff000ULL;
>>> +}
>>
>> My understanding is that 48 bits are used if you are running AArch64
>> code, and 40 bits are used for 32-bit code even on an ARMv8 CPU, so
>> checking for ARM_FEATURE_V8 is perhaps not enough.
>
> For v8 32-bit code the usable address width is only 40 bits, but
> setting a bit in [47:40] causes an AddressSize fault on v8 (but not
> v7). So the mask should be 48 bits for v8 regardless of 32-vs-64,
> and when we support AddressSize faults we'll then check the upper
> bits of the masked-out address and raise a fault if needed.

That makes sense.

So here we go:

Reviewed-by: 

Thanks,

Laurent

Re: [Qemu-devel] [PATCH] memory: emulate ioeventfd

2015-11-20 Thread Paolo Bonzini



On 20/11/2015 10:37, Pavel Fedin wrote:
> The ioeventfd mechanism is used by vhost, dataplane, and virtio-pci to
> turn guest MMIO/PIO writes into eventfd file descriptor events.  This
> allows arbitrary threads to be notified when the guest writes to a
> specific MMIO/PIO address.
> 
> qtest and TCG do not support ioeventfd because memory writes are not
> checked against registered ioeventfds in QEMU.  This patch implements
> this in memory_region_dispatch_write() so qtest can use ioeventfd.
> 
> Also this patch fixes vhost aborting on some misconfigured old kernels
> like 3.18.0 on ARM. It is possible to explicitly enable CONFIG_EVENTFD
> in expert settings, while MMIO binding support in KVM will still be
> missing.
> 
> Signed-off-by: Stefan Hajnoczi 
> Signed-off-by: Pavel Fedin 
> ---
> RFC => PATCH:
> - Add !kvm_eventfds_enabled() conditions to bypass eventfd injection when not 
> needed
> - Renamed "ioeventfd" to "eventfd", just to make words shorter
> - Add a one-shot warning about missing MMIO bindings in KVM
> ---
>  kvm-all.c |  6 --
>  memory.c  | 42 ++
>  2 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index ddb007a..70f5cec 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1633,8 +1633,10 @@ static int kvm_init(MachineState *ms)
>  
>  kvm_state = s;
>  
> -s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> -s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +if (kvm_eventfds_allowed) {
> +s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> +s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +}
>  s->memory_listener.listener.coalesced_mmio_add = 
> kvm_coalesce_mmio_region;
>  s->memory_listener.listener.coalesced_mmio_del = 
> kvm_uncoalesce_mmio_region;
>  
> diff --git a/memory.c b/memory.c
> index e193658..4d138fb 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -18,12 +18,14 @@
>  #include "exec/ioport.h"
>  #include "qapi/visitor.h"
>  #include "qemu/bitops.h"
> +#include "qemu/error-report.h"
>  #include "qom/object.h"
>  #include "trace.h"
>  #include 
>  
>  #include "exec/memory-internal.h"
>  #include "exec/ram_addr.h"
> +#include "sysemu/kvm.h"
>  #include "sysemu/sysemu.h"
>  
>  //#define DEBUG_UNASSIGNED
> @@ -1141,6 +1143,32 @@ MemTxResult memory_region_dispatch_read(MemoryRegion 
> *mr,
>  return r;
>  }
>  
> +/* Return true if an eventfd was signalled */
> +static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
> +hwaddr addr,
> +uint64_t data,
> +unsigned size,
> +MemTxAttrs attrs)
> +{
> +MemoryRegionIoeventfd ioeventfd = {
> +.addr = addrrange_make(int128_make64(addr), int128_make64(size)),
> +.data = data,
> +};
> +unsigned i;
> +
> +for (i = 0; i < mr->ioeventfd_nb; i++) {
> +ioeventfd.match_data = mr->ioeventfds[i].match_data;
> +ioeventfd.e = mr->ioeventfds[i].e;
> +
> +if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
> +event_notifier_set(ioeventfd.e);
> +return true;
> +}
> +}
> +
> +return false;
> +}
> +
>  MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>   hwaddr addr,
>   uint64_t data,
> @@ -1154,6 +1182,11 @@ MemTxResult memory_region_dispatch_write(MemoryRegion 
> *mr,
>  
>  adjust_endianness(mr, , size);
>  
> +if ((!kvm_eventfds_enabled()) &&
> +memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
> +return MEMTX_OK;
> +}
> +
>  if (mr->ops->write) {
>  return access_with_adjusted_size(addr, , size,
>   mr->ops->impl.min_access_size,
> @@ -1672,6 +1705,8 @@ void memory_region_clear_global_locking(MemoryRegion 
> *mr)
>  mr->global_locking = false;
>  }
>  
> +static bool userspace_eventfd_warning;
> +
>  void memory_region_add_eventfd(MemoryRegion *mr,
> hwaddr addr,
> unsigned size,
> @@ -1688,6 +1723,13 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>  };
>  unsigned i;
>  
> +if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
> +userspace_eventfd_warning))) {
> +userspace_eventfd_warning = true;
> +error_report("Using eventfd without MMIO binding in KVM. "
> + "Suboptimal performance expected");
> +}
> +
>  if (size) {
>  adjust_endianness(mr, , size);
>  }
> 

Queued for 2.6.

Paolo

Re: [Qemu-devel] [PATCH] nseries: Don't use qemu_hw_version()

2015-11-20 Thread Peter Maydell

On 12 November 2015 at 15:04, Eduardo Habkost  wrote:
> On Thu, Nov 12, 2015 at 01:39:24PM +0200, Michael S. Tsirkin wrote:
>> On Wed, Nov 11, 2015 at 07:42:47PM -0200, Eduardo Habkost wrote:
>> > nseries doesn't use qemu_set_hw_version() and doesn't need the
>> > compatibility magic of qemu_hw_version(). Use QEMU_VERSION
>> > directly.
>> >
>> > Signed-off-by: Eduardo Habkost 
>>
>> This looks very wrong.
>> We should be reducing the users of QEMU_VERSION,
>> not adding more.
>
> I would like to hear from the users and maintainers of the
> nseries machines, to judge this. I assume they don't need ABI
> compatibility betweeen QEMU versions, and maybe they want to know
> the QEMU version they are running.

I suspect you won't find many users or maintainers on this
list (or anywhere else). My guess is that this was useful
for when Nokia were actively shipping an SDK that used QEMU
(because then they could surface the QEMU and other component
version information to SDK end-users and get better bug reports
and so on as a result), but that anybody still using upstream
QEMU to emulate an n-series device is capable of reporting
what QEMU version they're using without having to have it
reported via a dialog box inside the guest :-)

thanks
-- PMM

1 2 3 >

1 - 100 of 218 matches

Mail list logo