Re: [PATCH v4 06/20] irqchip/gic-v4.1: Add initial SGI configuration

2020-02-17 Thread Zenghui Yu

Hi Marc,

On 2020/2/14 22:57, Marc Zyngier wrote:

The GICv4.1 ITS has yet another new command (VSGI) which allows
a VPE-targeted SGI to be configured (or have its pending state
cleared). Add support for this command and plumb it into the
activate irqdomain callback so that it is ready to be used.

Signed-off-by: Marc Zyngier 
---
  drivers/irqchip/irq-gic-v3-its.c   | 76 +-
  include/linux/irqchip/arm-gic-v3.h |  3 +-
  2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 6121c8f2a8ce..229e4ae9c59b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -354,6 +354,15 @@ struct its_cmd_desc {
struct {
struct its_vpe *vpe;
} its_invdb_cmd;
+
+   struct {
+   struct its_vpe *vpe;
+   u8 sgi;
+   u8 priority;
+   bool enable;
+   bool group;
+   bool clear;
+   } its_vsgi_cmd;
};
  };
  
@@ -502,6 +511,31 @@ static void its_encode_db(struct its_cmd_block *cmd, bool db)

its_mask_encode(>raw_cmd[2], db, 63, 63);
  }
  
+static void its_encode_sgi_intid(struct its_cmd_block *cmd, u8 sgi)

+{
+   its_mask_encode(>raw_cmd[0], sgi, 35, 32);
+}
+
+static void its_encode_sgi_priority(struct its_cmd_block *cmd, u8 prio)
+{
+   its_mask_encode(>raw_cmd[0], prio >> 4, 23, 20);
+}
+
+static void its_encode_sgi_group(struct its_cmd_block *cmd, bool grp)
+{
+   its_mask_encode(>raw_cmd[0], grp, 10, 10);
+}
+
+static void its_encode_sgi_clear(struct its_cmd_block *cmd, bool clr)
+{
+   its_mask_encode(>raw_cmd[0], clr, 9, 9);
+}
+
+static void its_encode_sgi_enable(struct its_cmd_block *cmd, bool en)
+{
+   its_mask_encode(>raw_cmd[0], en, 8, 8);
+}
+
  static inline void its_fixup_cmd(struct its_cmd_block *cmd)
  {
/* Let's fixup BE commands */
@@ -867,6 +901,26 @@ static struct its_vpe *its_build_invdb_cmd(struct its_node 
*its,
return valid_vpe(its, desc->its_invdb_cmd.vpe);
  }
  
+static struct its_vpe *its_build_vsgi_cmd(struct its_node *its,

+ struct its_cmd_block *cmd,
+ struct its_cmd_desc *desc)
+{
+   if (WARN_ON(!is_v4_1(its)))
+   return NULL;
+
+   its_encode_cmd(cmd, GITS_CMD_VSGI);
+   its_encode_vpeid(cmd, desc->its_vsgi_cmd.vpe->vpe_id);
+   its_encode_sgi_intid(cmd, desc->its_vsgi_cmd.sgi);
+   its_encode_sgi_priority(cmd, desc->its_vsgi_cmd.priority);
+   its_encode_sgi_group(cmd, desc->its_vsgi_cmd.group);
+   its_encode_sgi_clear(cmd, desc->its_vsgi_cmd.clear);
+   its_encode_sgi_enable(cmd, desc->its_vsgi_cmd.enable);
+
+   its_fixup_cmd(cmd);
+
+   return valid_vpe(its, desc->its_vsgi_cmd.vpe);
+}
+
  static u64 its_cmd_ptr_to_offset(struct its_node *its,
 struct its_cmd_block *ptr)
  {
@@ -3823,6 +3877,21 @@ static struct irq_chip its_vpe_4_1_irq_chip = {
.irq_set_vcpu_affinity  = its_vpe_4_1_set_vcpu_affinity,
  };
  
+static void its_configure_sgi(struct irq_data *d, bool clear)

+{
+   struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+   struct its_cmd_desc desc;
+
+   desc.its_vsgi_cmd.vpe = vpe;
+   desc.its_vsgi_cmd.sgi = d->hwirq;
+   desc.its_vsgi_cmd.priority = vpe->sgi_config[d->hwirq].priority;
+   desc.its_vsgi_cmd.enable = vpe->sgi_config[d->hwirq].enabled;
+   desc.its_vsgi_cmd.group = vpe->sgi_config[d->hwirq].group;
+   desc.its_vsgi_cmd.clear = clear;
+
+   its_send_single_vcommand(find_4_1_its(), its_build_vsgi_cmd, );
+}
+
  static int its_sgi_set_affinity(struct irq_data *d,
const struct cpumask *mask_val,
bool force)
@@ -3868,13 +3937,18 @@ static void its_sgi_irq_domain_free(struct irq_domain 
*domain,
  static int its_sgi_irq_domain_activate(struct irq_domain *domain,
   struct irq_data *d, bool reserve)
  {
+   /* Write out the initial SGI configuration */
+   its_configure_sgi(d, false);
return 0;
  }
  
  static void its_sgi_irq_domain_deactivate(struct irq_domain *domain,

  struct irq_data *d)
  {
-   /* Nothing to do */
+   struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+
+   vpe->sgi_config[d->hwirq].enabled = false;
+   its_configure_sgi(d, true);


The spec says, when C==1, VSGI clears the pending state of the vSGI,
leaving the configuration unchanged.  So should we first clear the
pending state and then disable vSGI (let E==0)?


Thanks,
Zenghui


  }
  
  static struct irq_domain_ops its_sgi_domain_ops = {

diff --git a/include/linux/irqchip/arm-gic-v3.h 

Re: [PATCH v4 08/20] irqchip/gic-v4.1: Plumb get/set_irqchip_state SGI callbacks

2020-02-17 Thread Zenghui Yu

Hi Marc,

On 2020/2/14 22:57, Marc Zyngier wrote:

To implement the get/set_irqchip_state callbacks (limited to the
PENDING state), we have to use a particular set of hacks:

- Reading the pending state is done by using a pair of new redistributor
   registers (GICR_VSGIR, GICR_VSGIPENDR), which allow the 16 interrupts
   state to be retrieved.
- Setting the pending state is done by generating it as we'd otherwise do
   for a guest (writing to GITS_SGIR)
- Clearing the pending state is done by emiting a VSGI command with the
   "clear" bit set.

Signed-off-by: Marc Zyngier 
---
  drivers/irqchip/irq-gic-v3-its.c   | 56 ++
  include/linux/irqchip/arm-gic-v3.h | 14 
  2 files changed, 70 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 1e448d9a16ea..a9753435c4ff 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3915,11 +3915,67 @@ static int its_sgi_set_affinity(struct irq_data *d,
return -EINVAL;
  }
  
+static int its_sgi_set_irqchip_state(struct irq_data *d,

+enum irqchip_irq_state which,
+bool state)
+{
+   if (which != IRQCHIP_STATE_PENDING)
+   return -EINVAL;
+
+   if (state) {
+   struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+   struct its_node *its = find_4_1_its();
+   u64 val;
+
+   val  = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id);
+   val |= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq);
+   writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K);
+   } else {
+   its_configure_sgi(d, true);
+   }
+
+   return 0;
+}
+
+static int its_sgi_get_irqchip_state(struct irq_data *d,
+enum irqchip_irq_state which, bool *val)
+{
+   struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+   void __iomem *base = gic_data_rdist_cpu(vpe->col_idx)->rd_base + 
SZ_128K;


There might be a race on reading the 'vpe->col_idx' against a concurrent
vPE schedule (col_idx will be modified in its_vpe_set_affinity)? Will we
end up accessing the GICR_VSGI* registers of the old redistributor,
while the vPE is now resident on the new one? Or is it harmful?

The same question for direct_lpi_inv(), where 'vpe->col_idx' will be
used in irq_to_cpuid().


+   u32 count = 100;/* 1s! */
+   u32 status;
+
+   if (which != IRQCHIP_STATE_PENDING)
+   return -EINVAL;
+
+   writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
+   do {
+   status = readl_relaxed(base + GICR_VSGIPENDR);
+   if (!(status & GICR_VSGIPENDR_BUSY))
+   goto out;
+
+   count--;
+   if (!count) {
+   pr_err_ratelimited("Unable to get SGI status\n");
+   goto out;
+   }
+   cpu_relax();
+   udelay(1);
+   } while(count);
+
+out:
+   *val = !!(status & (1 << d->hwirq));
+
+   return 0;
+}
+
  static struct irq_chip its_sgi_irq_chip = {
.name   = "GICv4.1-sgi",
.irq_mask   = its_sgi_mask_irq,
.irq_unmask = its_sgi_unmask_irq,
.irq_set_affinity   = its_sgi_set_affinity,
+   .irq_set_irqchip_state  = its_sgi_set_irqchip_state,
+   .irq_get_irqchip_state  = its_sgi_get_irqchip_state,
  };
  
  static int its_sgi_irq_domain_alloc(struct irq_domain *domain,

diff --git a/include/linux/irqchip/arm-gic-v3.h 
b/include/linux/irqchip/arm-gic-v3.h
index a89578884263..64da945486ac 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -345,6 +345,15 @@
  #define GICR_VPENDBASER_4_1_VGRP1EN   (1ULL << 58)
  #define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0)
  
+#define GICR_VSGIR			0x0080

+
+#define GICR_VSGIR_VPEID   GENMASK(15, 0)
+
+#define GICR_VSGIPENDR 0x0088
+
+#define GICR_VSGIPENDR_BUSY(1U << 31)
+#define GICR_VSGIPENDR_PENDING GENMASK(15, 0)
+
  /*
   * ITS registers, offsets from ITS_base
   */
@@ -368,6 +377,11 @@
  
  #define GITS_TRANSLATER			0x10040
  
+#define GITS_SGIR			0x20020

+
+#define GITS_SGIR_VPEIDGENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID   GENMASK_ULL(7, 0)


The spec says vINTID field is [3:0] of the GITS_SGIR.


Thanks,
Zenghui


+
  #define GITS_CTLR_ENABLE  (1U << 0)
  #define GITS_CTLR_ImDe(1U << 1)
  #define   GITS_CTLR_ITS_NUMBER_SHIFT  4



___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: Re: [PATCH 0/2] KVM: arm/arm64: Fixes for scheudling htimer of emulated timers

2020-02-17 Thread Tomasz Nowicki

On 17.02.2020 19:00, Marc Zyngier wrote:

--
On 2020-02-17 14:54, Tomasz Nowicki wrote:

This small series contains two fixes which were found while testing
Marc's ARM NV patch set, where we are going to have at most 4 timers
and the two are purely emulated.


What are these patches fixing? the NV series? or mainline?



Both, but found when testing NV.

Tomasz
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 0/2] KVM: arm/arm64: Fixes for scheudling htimer of emulated timers

2020-02-17 Thread Marc Zyngier

On 2020-02-17 14:54, Tomasz Nowicki wrote:

This small series contains two fixes which were found while testing
Marc's ARM NV patch set, where we are going to have at most 4 timers
and the two are purely emulated.


What are these patches fixing? the NV series? or mainline?

Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 12/94] KVM: arm64: nv: Add EL2->EL1 translation helpers

2020-02-17 Thread Marc Zyngier

Hi Mark,

Congratulations, you will now be CC'd on all the subsequent postings
of this series! Yes, I'm that nice! ;-)

On 2020-02-17 14:56, Mark Rutland wrote:

On Tue, Feb 11, 2020 at 05:48:16PM +, Marc Zyngier wrote:

Some EL2 system registers immediately affect the current execution
of the system, so we need to use their respective EL1 counterparts.
For this we need to define a mapping between the two.

These helpers will get used in subsequent patches.

Co-developed-by: Andre Przywara 
Signed-off-by: Andre Przywara 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/include/asm/kvm_emulate.h |  6 
 arch/arm64/kvm/sys_regs.c| 48 


 2 files changed, 54 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h

index 282e9ddbe1bc..486978d0346b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -58,6 +58,12 @@ void kvm_emulate_nested_eret(struct kvm_vcpu 
*vcpu);

 int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
 int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);

+u64 translate_tcr(u64 tcr);
+u64 translate_cptr(u64 tcr);
+u64 translate_sctlr(u64 tcr);
+u64 translate_ttbr0(u64 tcr);
+u64 translate_cnthctl(u64 tcr);


Sorry to bikeshed, but could we please make the direction of 
translation

explicit in the name? e.g. tcr_el2_to_tcr_el1(), or tcr_el2_to_el1()?


Sure, that's an easy one!




+
 static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
 {
return !(vcpu->arch.hcr_el2 & HCR_RW);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4b5310ea3bf8..634d3ee6799c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -65,6 +65,54 @@ static bool write_to_read_only(struct kvm_vcpu 
*vcpu,

return false;
 }

+static u64 tcr_el2_ips_to_tcr_el1_ps(u64 tcr_el2)
+{
+   return ((tcr_el2 & TCR_EL2_PS_MASK) >> TCR_EL2_PS_SHIFT)
+   << TCR_IPS_SHIFT;
+}
+
+u64 translate_tcr(u64 tcr)
+{
+   return TCR_EPD1_MASK |  /* disable TTBR1_EL1 */
+  ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
+  tcr_el2_ips_to_tcr_el1_ps(tcr) |
+  (tcr & TCR_EL2_TG0_MASK) |
+  (tcr & TCR_EL2_ORGN0_MASK) |
+  (tcr & TCR_EL2_IRGN0_MASK) |
+  (tcr & TCR_EL2_T0SZ_MASK);
+}


I'm guessing this is only meant to cover a !VHE guest EL2 for the
moment, so only covers HCR_EL2.E2H=0? It might be worth mentioning in
the commit message.


Indeed, all the "translate_*" function have a single purpose: converting
an !VHE EL2 layout into an EL1 layout.


It looks like this is missing some bits (e.g. TBID, HPD, HD, HA) that
could apply to the Guest-EL2 Stage-1. Maybe those are added by later
patches, but that's not obvious to me at this point in the series.


ARMv8.3-PAUTH isn't supported, and ARMv8.1-TTHM cannot be supported at
Stage-2, so we don't support it at Stage-1 either (even if we 
technically

could). Maybe worth suggesting to the powers that be...

ARMv8.1-HPD is an oversight though, and we should be able to support it.




+
+u64 translate_cptr(u64 cptr_el2)
+{
+   u64 cpacr_el1 = 0;
+
+   if (!(cptr_el2 & CPTR_EL2_TFP))
+   cpacr_el1 |= CPACR_EL1_FPEN;
+   if (cptr_el2 & CPTR_EL2_TTA)
+   cpacr_el1 |= CPACR_EL1_TTA;
+   if (!(cptr_el2 & CPTR_EL2_TZ))
+   cpacr_el1 |= CPACR_EL1_ZEN;
+
+   return cpacr_el1;
+}


Looking in ARM DDI 0487E.a I also see TCPAC and TAM; I guess we don't
need to map those to anthing?


TCPAC allows us to trap CPACR_EL1, but we always have the physical
CPTR_EL2.TCPAC set in this case, so that bit doesn't need to translate
into anything.

TAM doesn't seem to translate into anything in CPACR_EL1, and I don't
plan to support the AMU any time soon with NV! ;-)




+
+u64 translate_sctlr(u64 sctlr)
+{
+   /* Bit 20 is RES1 in SCTLR_EL1, but RES0 in SCTLR_EL2 */
+   return sctlr | BIT(20);
+}


Looking in ARM DDI 0487E.a section D13.2.105, bit 20 is TSCXT, so this
might need to be reconsidered.


Huhuh, nice catch! We need to detect ARMv8.0-CSV2 here, and set the bit
accordingly.




+
+u64 translate_ttbr0(u64 ttbr0)
+{
+   /* Force ASID to 0 (ASID 0 or RES0) */
+   return ttbr0 & ~GENMASK_ULL(63, 48);
+}


Again, I assume this is only meant to provide a !VHE EL2 as this 
stands.


Indeed. I guess I need to write a better commit message to make this 
clear.





+
+u64 translate_cnthctl(u64 cnthctl)
+{
+   return ((cnthctl & 0x3) << 10) | (cnthctl & 0xfc);
+}


I assume this yields CNTKCTL_EL1, but I don't entirely follow. For
virtual-EL2 don't we have to force EL1P(C)TEN so that virtual-EL2
accesses don't trap?


A non-VHE guest will use the _EL2 instructions to access its own timer,
which will trap. At the same time, we use the EL1 timer to implement
the vEL2 timer, meaning we also need to trap the EL1 timer. Yes, !VHE
looses on all 

Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory

2020-02-17 Thread Alexandru Elisei
Hi,

I guess the device hasn't been tested with Linux. This is what I'm getting when
trying to boot a Linux guest using the command:

$ ./lkvm run -c4 -m4096 -k /path/to/kernel -d /path/to/disk -p root="/dev/vda2" 
-F
flash.img

[    0.659167] physmap-flash 200.flash: physmap platform flash device: [mem
0x0200-0x029f]
[    0.660444] Number of erase regions: 1
[    0.661036] Primary Vendor Command Set: 0001 (Intel/Sharp Extended)
[    0.661688] Primary Algorithm Table at 0031
[    0.662168] Alternative Vendor Command Set:  (None)
[    0.662711] No Alternate Algorithm Table
[    0.663120] Vcc Minimum:  4.5 V
[    0.663450] Vcc Maximum:  5.5 V
[    0.663779] No Vpp line
[    0.664039] Typical byte/word write timeout: 2 µs
[    0.664590] Maximum byte/word write timeout: 2 µs
[    0.665240] Typical full buffer write timeout: 2 µs
[    0.665775] Maximum full buffer write timeout: 2 µs
[    0.666373] Typical block erase timeout: 2 ms
[    0.666828] Maximum block erase timeout: 2 ms
[    0.667282] Chip erase not supported
[    0.667659] Device size: 0x80 bytes (8 MiB)
[    0.668137] Flash Device Interface description: 0x0006
[    0.668697]   - Unknown
[    0.668963] Max. bytes in buffer write: 0x40
[    0.669407] Number of Erase Block Regions: 1
[    0.669865]   Erase Region #0: BlockSize 0x8000 bytes, 160 blocks
[    0.672299] 200.flash: Found 2 x16 devices at 0x0 in 32-bit bank.
Manufacturer ID 0x00 Chip ID 0x00
[    0.681328] NOR chip too large to fit in mapping. Attempting to cope...
[    0.682046] Intel/Sharp Extended Query Table at 0x0031
[    0.682645] Using buffer write method
[    0.683031] Sum of regions (a0) != total size of set of interleaved chips
(100)
[    0.683854] gen_probe: No supported Vendor Command Set found
[    0.684441] physmap-flash 200.flash: map_probe failed

I also defined DEBUG_CFI in drivers/mtd/chips/cfi_probe.c.

The Flash Device Interface description that we provide is wrong, it should 0x05.
More details below.

On 2/7/20 12:19 PM, Andre Przywara wrote:
> From: Raphael Gault 
>
> The EDK II UEFI firmware implementation requires some storage for the EFI
> variables, which is typically some flash storage.
> Since this is already supported on the EDK II side, we add a CFI flash
> emulation to kvmtool.
> This is backed by a file, specified via the --flash or -F command line
> option. Any flash writes done by the guest will immediately be reflected
> into this file (kvmtool mmap's the file).
>
> This implements a CFI flash using the "Intel/Sharp extended command
> set", as specified in:
> - JEDEC JESD68.01
> - JEDEC JEP137B
> - Intel Application Note 646
> Some gaps in those specs have been filled by looking at real devices and
> other implementations (QEMU, Linux kernel driver).
>
> At the moment this relies on DT to advertise the base address of the
> flash memory (mapped into the MMIO address space) and is only enabled
> for ARM/ARM64. The emulation itself is architecture agnostic, though.
>
> This is one missing piece toward a working UEFI boot with kvmtool on
> ARM guests, the other is to provide writable PCI BARs, which is WIP.
>
> Signed-off-by: Raphael Gault 
> [Andre: rewriting and fixing]
> Signed-off-by: Andre Przywra 
> ---
> Hi,
>
> an update addressing Will's comments. I added coarse grained locking
> to the MMIO handler, to prevent concurrent vCPU accesses from messing up
> the internal CFI flash state machine.
> I also folded the actual flash array read access into the MMIO handler
> and fixed the other small issues.
>
> Cheers,
> Andre
>
>  Makefile  |   6 +
>  arm/include/arm-common/kvm-arch.h |   3 +
>  builtin-run.c |   2 +
>  hw/cfi_flash.c| 546 ++
>  include/kvm/kvm-config.h  |   1 +
>  include/kvm/util.h|   5 +
>  6 files changed, 563 insertions(+)
>  create mode 100644 hw/cfi_flash.c
>
> diff --git a/Makefile b/Makefile
> index 3862112c..7ed6fb5e 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
>   CFLAGS  += -march=armv7-a
>  
>   ARCH_WANT_LIBFDT := y
> + ARCH_HAS_FLASH_MEM := y
>  endif
>  
>  # ARM64
> @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
>   ARCH_INCLUDE+= -Iarm/aarch64/include
>  
>   ARCH_WANT_LIBFDT := y
> + ARCH_HAS_FLASH_MEM := y
>  endif
>  
>  ifeq ($(ARCH),mips)
> @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
>   endif
>  endif
>  
> +ifeq (y,$(ARCH_HAS_FLASH_MEM))
> + OBJS+= hw/cfi_flash.o
> +endif
> +
>  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
>   CFLAGS_DYNOPT   += -DCONFIG_HAS_ZLIB
>   LIBS_DYNOPT += -lz
> diff --git a/arm/include/arm-common/kvm-arch.h 
> b/arm/include/arm-common/kvm-arch.h
> index b9d486d5..2bb085f4 100644
> --- a/arm/include/arm-common/kvm-arch.h
> +++ b/arm/include/arm-common/kvm-arch.h
> @@ -21,6 +21,9 @@
>  #define ARM_GIC_DIST_SIZE

Re: [PATCH v5 15/19] KVM: Provide common implementation for generic dirty log functions

2020-02-17 Thread Vitaly Kuznetsov
Sean Christopherson  writes:

> On Fri, Feb 07, 2020 at 07:53:34PM -0500, Peter Xu wrote:
>> On Fri, Feb 07, 2020 at 04:42:33PM -0800, Sean Christopherson wrote:
>> > On Fri, Feb 07, 2020 at 07:18:32PM -0500, Peter Xu wrote:
>> > > On Fri, Feb 07, 2020 at 11:45:32AM -0800, Sean Christopherson wrote:
>> > > > +Vitaly for HyperV
>> > > > 
>> > > > On Thu, Feb 06, 2020 at 04:41:06PM -0500, Peter Xu wrote:
>> > > > > On Thu, Feb 06, 2020 at 01:21:20PM -0800, Sean Christopherson wrote:
>> > > > > > On Thu, Feb 06, 2020 at 03:02:00PM -0500, Peter Xu wrote:
>> > > > > > > But that matters to this patch because if MIPS can use
>> > > > > > > kvm_flush_remote_tlbs(), then we probably don't need this
>> > > > > > > arch-specific hook any more and we can directly call
>> > > > > > > kvm_flush_remote_tlbs() after sync dirty log when flush==true.
>> > > > > > 
>> > > > > > Ya, the asid_flush_mask in kvm_vz_flush_shadow_all() is the only 
>> > > > > > thing
>> > > > > > that prevents calling kvm_flush_remote_tlbs() directly, but I have 
>> > > > > > no
>> > > > > > clue as to the important of that code.
>> > > > > 
>> > > > > As said above I think the x86 lockdep is really not necessary, then
>> > > > > considering MIPS could be the only one that will use the new hook
>> > > > > introduced in this patch...  Shall we figure that out first?
>> > > > 
>> > > > So I prepped a follow-up patch to make kvm_arch_dirty_log_tlb_flush() a
>> > > > MIPS-only hook and use kvm_flush_remote_tlbs() directly for arm and 
>> > > > x86,
>> > > > but then I realized x86 *has* a hook to do a precise remote TLB flush.
>> > > > There's even an existing kvm_flush_remote_tlbs_with_address() call on a
>> > > > memslot, i.e. this exact scenario.  So arguably, x86 should be using 
>> > > > the
>> > > > more precise flush and should keep kvm_arch_dirty_log_tlb_flush().
>> > > > 
>> > > > But, the hook is only used when KVM is running as an L1 on top of 
>> > > > HyperV,
>> > > > and I assume dirty logging isn't used much, if at all, for L1 KVM on
>> > > > HyperV?
>> > > > 
>> > > > I see three options:
>> > > > 
>> > > >   1. Make kvm_arch_dirty_log_tlb_flush() MIPS-only and call
>> > > >  kvm_flush_remote_tlbs() directly for arm and x86.  Add comments to
>> > > >  explain when an arch should implement 
>> > > > kvm_arch_dirty_log_tlb_flush().
>> > > > 
>> > > >   2. Change x86 to use kvm_flush_remote_tlbs_with_address() when 
>> > > > flushing
>> > > >  a memslot after the dirty log is grabbed by userspace.
>> > > > 
>> > > >   3. Keep the resulting code as is, but add a comment in x86's
>> > > >  kvm_arch_dirty_log_tlb_flush() to explain why it uses
>> > > >  kvm_flush_remote_tlbs() instead of the with_address() variant.
>> > > > 
>> > > > I strongly prefer to (2) or (3), but I'll defer to Vitaly as to which 
>> > > > of
>> > > > those is preferable.
>> > > > 
>> > > > I don't like (1) because (a) it requires more lines code (well 
>> > > > comments),
>> > > > to explain why kvm_flush_remote_tlbs() is the default, and (b) it would
>> > > > require even more comments, which would be x86-specific in generic KVM,
>> > > > to explain why x86 doesn't use its with_address() flush, or we'd lost 
>> > > > that
>> > > > info altogether.
>> > > > 
>> > > 
>> > > I proposed the 4th solution here:
>> > > 
>> > > https://lore.kernel.org/kvm/20200207223520.735523-1-pet...@redhat.com/
>> > > 
>> > > I'm not sure whether that's acceptable, but if it can, then we can
>> > > drop the kvm_arch_dirty_log_tlb_flush() hook, or even move on to
>> > > per-slot tlb flushing.
>> > 
>> > This effectively is per-slot TLB flushing, it just has a different name.
>> > I.e. s/kvm_arch_dirty_log_tlb_flush/kvm_arch_flush_remote_tlbs_memslot.
>> > I'm not opposed to that name change.  And on second and third glance, I
>> > probably prefer it.  That would more or less follow the naming of
>> > kvm_arch_flush_shadow_all() and kvm_arch_flush_shadow_memslot().
>> 
>> Note that the major point of the above patchset is not about doing tlb
>> flush per-memslot or globally.  It's more about whether we can provide
>> a common entrance for TLB flushing.  Say, after that series, we should
>> be able to flush TLB on all archs (majorly, including MIPS) as:
>> 
>>   kvm_flush_remote_tlbs(kvm);
>> 
>> And with the same idea we can also introduce the ranged version.
>> 
>> > 
>> > I don't want to go straight to kvm_arch_flush_remote_tlb_with_address()
>> > because that loses the important distinction (on x86) that slots_lock is
>> > expected to be held.
>> 
>> Sorry I'm still puzzled on why that lockdep is so important and
>> special for x86...  For example, what if we move that lockdep to the
>> callers of the kvm_arch_dirty_log_tlb_flush() calls so it protects
>> even more arch (where we do get/clear dirty log)?  IMHO the callers
>> must be with the slots_lock held anyways no matter for x86 or not.
>
>
> Following the breadcrumbs leads to the comment in
> 

Re: [PATCH v5 15/19] KVM: Provide common implementation for generic dirty log functions

2020-02-17 Thread Vitaly Kuznetsov
Sean Christopherson  writes:

> +Vitaly for HyperV
>
> On Thu, Feb 06, 2020 at 04:41:06PM -0500, Peter Xu wrote:
>> On Thu, Feb 06, 2020 at 01:21:20PM -0800, Sean Christopherson wrote:
>> > On Thu, Feb 06, 2020 at 03:02:00PM -0500, Peter Xu wrote:
>> > > But that matters to this patch because if MIPS can use
>> > > kvm_flush_remote_tlbs(), then we probably don't need this
>> > > arch-specific hook any more and we can directly call
>> > > kvm_flush_remote_tlbs() after sync dirty log when flush==true.
>> > 
>> > Ya, the asid_flush_mask in kvm_vz_flush_shadow_all() is the only thing
>> > that prevents calling kvm_flush_remote_tlbs() directly, but I have no
>> > clue as to the important of that code.
>> 
>> As said above I think the x86 lockdep is really not necessary, then
>> considering MIPS could be the only one that will use the new hook
>> introduced in this patch...  Shall we figure that out first?
>
> So I prepped a follow-up patch to make kvm_arch_dirty_log_tlb_flush() a
> MIPS-only hook and use kvm_flush_remote_tlbs() directly for arm and x86,
> but then I realized x86 *has* a hook to do a precise remote TLB flush.
> There's even an existing kvm_flush_remote_tlbs_with_address() call on a
> memslot, i.e. this exact scenario.  So arguably, x86 should be using the
> more precise flush and should keep kvm_arch_dirty_log_tlb_flush().
>
> But, the hook is only used when KVM is running as an L1 on top of HyperV,
> and I assume dirty logging isn't used much, if at all, for L1 KVM on
> HyperV?

(Sorry for the delayed reply, was traveling last week)

When KVM runs as an L1 on top of Hyper-V it uses eVMCS by default and
eVMCSv1 doesn't support PML. I've also just checked Hyper-V 2019 and it
hides SECONDARY_EXEC_ENABLE_PML from guests (this was expected).

>
> I see three options:
>
>   1. Make kvm_arch_dirty_log_tlb_flush() MIPS-only and call
>  kvm_flush_remote_tlbs() directly for arm and x86.  Add comments to
>  explain when an arch should implement kvm_arch_dirty_log_tlb_flush().
>
>   2. Change x86 to use kvm_flush_remote_tlbs_with_address() when flushing
>  a memslot after the dirty log is grabbed by userspace.
>
>   3. Keep the resulting code as is, but add a comment in x86's
>  kvm_arch_dirty_log_tlb_flush() to explain why it uses
>  kvm_flush_remote_tlbs() instead of the with_address() variant.
>
> I strongly prefer to (2) or (3), but I'll defer to Vitaly as to which of
> those is preferable.

I'd vote for (2): while this will effectively be kvm_flush_remote_tlbs()
for now, we may think of something smarter in the future (e.g. PV
interface for KVM-on-KVM).

>
> I don't like (1) because (a) it requires more lines code (well comments),
> to explain why kvm_flush_remote_tlbs() is the default, and (b) it would
> require even more comments, which would be x86-specific in generic KVM,
> to explain why x86 doesn't use its with_address() flush, or we'd lost that
> info altogether.
>

-- 
Vitaly

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] KVM: arm64: Add PMU event filtering infrastructure

2020-02-17 Thread Robin Murphy

On 15/02/2020 10:28 am, Marc Zyngier wrote:

On Fri, 14 Feb 2020 22:01:01 +,
Robin Murphy  wrote:

Hi Robin,



Hi Marc,

On 2020-02-14 6:36 pm, Marc Zyngier wrote:
[...]

@@ -585,6 +585,14 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu 
*vcpu, u64 select_idx)
pmc->idx != ARMV8_PMU_CYCLE_IDX)
return;
   +/*
+* If we have a filter in place and that the event isn't allowed, do
+* not install a perf event either.
+*/
+   if (vcpu->kvm->arch.pmu_filter &&
+   !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
+   return;


If I'm reading the derivation of eventsel right, this will end up
treating cycle counter events (aliased to SW_INCR) differently from
CPU_CYCLES, which doesn't seem desirable.


Indeed, this doesn't look quite right.

Looking at the description of event 0x11, it doesn't seem to count
exactly like the cycle counter (there are a number of PMCR controls
affecting it). But none of these actually apply to our PMU emulation
(no secure mode, and the idea of dealing with virtual EL2 in the
context of the PMU is... not appealing).

Now, given that we implement the cycle counter with event 0x11 anyway,
I don't think there is any reason to deal with them separately.


Right, from the user's PoV they can only ask for event 0x11, and where 
it gets scheduled is more of a black-box implementation detail. Reading 
the Arm ARM doesn't leave me entirely convinced that cycles couldn't 
ever leak idle/not-idle information between closely-coupled PEs, so this 
might not be entirely academic.



Also, if the user did try to blacklist SW_INCR for ridiculous
reasons, we'd need to special-case kvm_pmu_software_increment() to
make it (not) work as expected, right?


I thought of that one, and couldn't see a reason to blacklist it
(after all, the guest could also increment a variable) and send itself
an interrupt. I'm tempted to simply document that event 0 is never
filtered.


I'd say you're on even stronger ground simply because KVM's 
implementation of SW_INCR doesn't go near the PMU hardware at all, thus 
is well beyond the purpose of the blacklist anyway. I believe it's 
important that how the code behaves matches expectations, but there's no 
harm in changing the latter as appropriate ;)


Cheers,
Robin.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 32/94] KVM: arm64: nv: Filter out unsupported features from ID regs

2020-02-17 Thread Mark Rutland
On Tue, Feb 11, 2020 at 05:48:36PM +, Marc Zyngier wrote:
> As there is a number of features that we either can't support,
> or don't want to support right away with NV, let's add some
> basic filtering so that we don't advertize silly things to the
> EL2 guest.
> 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_nested.h |   6 ++
>  arch/arm64/include/asm/sysreg.h |  11 +++
>  arch/arm64/kvm/nested.c | 115 
>  arch/arm64/kvm/sys_regs.c   |   5 +-
>  4 files changed, 136 insertions(+), 1 deletion(-)

> +/*
> + * Our emulated CPU doesn't support all the possible features. For the
> + * sake of simplicity (and probably mental sanity), wipe out a number
> + * of feature bits we don't intend to support for the time being.
> + * This list should get updated as new features get added to the NV
> + * support, and new extension to the architecture.
> + *
> + * Revisit: Implement as a whitelist rather than a blacklist?

I certainly think we want this to be a whitelist; blacklists are awful
for both forwards-compatibility and backporting.

I realise that's pain up-front, but I think it saves us much much more
in the long run.

Thanks,
Mark.

> + */
> +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
> +   const struct sys_reg_desc *r)
> +{
> + u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
> +  (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
> + u64 val, tmp;
> +
> + if (!nested_virt_in_use(v))
> + return;
> +
> + val = p->regval;
> +
> + switch (id) {
> + case SYS_ID_AA64DFR0_EL1:
> + /* No SPE */
> + val &= ~FEATURE(ID_AA64DFR0_PMSVER);
> + /* Cap PMU to ARMv8.1 */
> + tmp = FIELD_GET(FEATURE(ID_AA64DFR0_PMUVER), val);
> + if (tmp > 0b0100) {
> + val &= FEATURE(ID_AA64DFR0_PMUVER);
> + val |= FIELD_PREP(FEATURE(ID_AA64DFR0_PMUVER), 0b0100);
> + }
> + /* No trace */
> + val &= FEATURE(ID_AA64DFR0_TRACEVER);
> + /* Cap Debug to ARMv8.1 */
> + tmp = FIELD_GET(FEATURE(ID_AA64DFR0_DEBUGVER), val);
> + if (tmp > 0b0111) {
> + val &= FEATURE(ID_AA64DFR0_DEBUGVER);
> + val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 
> 0b0111);
> + }
> + break;
> +
> + case SYS_ID_AA64ISAR1_EL1:
> + /* No PtrAuth */
> + val &= ~(FEATURE(ID_AA64ISAR1_APA) |
> +  FEATURE(ID_AA64ISAR1_API) |
> +  FEATURE(ID_AA64ISAR1_GPA) |
> +  FEATURE(ID_AA64ISAR1_GPI));
> + break;
> +
> + case SYS_ID_AA64MMFR0_EL1:
> + /* Cap PARange to 40bits */
> + tmp = FIELD_GET(FEATURE(ID_AA64MMFR0_PARANGE), val);
> + if (tmp > 0b0010) {
> + val &= ~FEATURE(ID_AA64MMFR0_PARANGE);
> + val |= FIELD_PREP(FEATURE(ID_AA64MMFR0_PARANGE), 
> 0b0010);
> + }
> + break;
> +
> + case SYS_ID_AA64MMFR1_EL1:
> + /* No XNX */
> + val &= ~FEATURE(ID_AA64MMFR1_XNX);
> + /* No RAS */
> + val &= ~FEATURE(ID_AA64MMFR1_SpecSEI);
> + /* No Hierarchical Permission Disable */
> + val &= ~FEATURE(ID_AA64MMFR1_HPD);
> + /* No Hardward Access flags and Dirty Bit State update */
> + val &= ~FEATURE(ID_AA64MMFR1_HADBS);
> + break;
> +
> + case SYS_ID_AA64MMFR2_EL1:
> + /* No ARMv8.2-EVT */
> + val &= ~FEATURE(ID_AA64MMFR2_EVT);
> + /* No ARMv8.4-TTRem */
> + val &= ~FEATURE(ID_AA64MMFR2_BBM);
> + /* No ARMv8.4-TTST */
> + val &= ~FEATURE(ID_AA64MMFR2_ST);
> + /* No ARMv8.3-CCIDX */
> + val &= ~FEATURE(ID_AA64MMFR2_CCIDX);
> + /* No ARMv8.2-LVA */
> + val &= ~FEATURE(ID_AA64MMFR2_LVA);
> + break;
> +
> + case SYS_ID_AA64PFR0_EL1:
> + /* No AMU */
> + val &= ~FEATURE(ID_AA64PFR0_AMU);
> + /* No MPAM */
> + val &= ~FEATURE(ID_AA64PFR0_MPAM);
> + /* No Secure EL2 */
> + val &= ~FEATURE(ID_AA64PFR0_SEL2);
> + /* No RAS */
> + val &= ~FEATURE(ID_AA64PFR0_RAS);
> + /* No SVE */
> + val &= ~FEATURE(ID_AA64PFR0_SVE);
> + /* EL2 is AArch64 only */
> + val &= ~FEATURE(ID_AA64PFR0_EL2);
> + val |= FIELD_PREP(FEATURE(ID_AA64PFR0_EL2), 0b0001);
> + break;
> +
> + case SYS_ID_AA64PFR1_EL1:
> + /* No MTE */
> + val &= ~FEATURE(ID_AA64PFR1_MTE);
> + /* No BT */
> + val &= ~FEATURE(ID_AA64PFR1_BT);
> + break;
> + }
> +
> + p->regval = val;

Re: [PATCH v2 31/94] KVM: arm64: nv: Only toggle cache for virtual EL2 when SCTLR_EL2 changes

2020-02-17 Thread Mark Rutland
On Tue, Feb 11, 2020 at 05:48:35PM +, Marc Zyngier wrote:
> From: Christoffer Dall 
> 
> So far we were flushing almost the entire universe whenever a VM would
> load/unload the SCTLR_EL1 and the two versions of that register had
> different MMU enabled settings.  This turned out to be so slow that it
> prevented forward progress for a nested VM, because a scheduler timer
> tick interrupt would always be pending when we reached the nested VM.
> 
> To avoid this problem, we consider the SCTLR_EL2 when evaluating if
> caches are on or off when entering virtual EL2 (because this is the
> value that we end up shadowing onto the hardware EL1 register).
> 
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Jintack Lim 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_mmu.h | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index ee47f7637f28..ec4de0613e7c 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -88,6 +88,7 @@ alternative_cb_end
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  void kvm_update_va_mask(struct alt_instr *alt,
>   __le32 *origptr, __le32 *updptr, int nr_inst);
> @@ -305,7 +306,10 @@ struct kvm;
>  
>  static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
>  {
> - return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
> + if (vcpu_mode_el2(vcpu))
> + return (__vcpu_sys_reg(vcpu, SCTLR_EL2) & 0b101) == 0b101;
> + else
> + return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
>  }

How about:

static bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
unsigned long cm = SCTLR_ELx_C | SCTLR_ELx_M;
unsigned long sctlr;

if (vcpu_mode_el2(vcpu))
sctlr = __vcpu_sys_reg(vcpu, SCTLR_EL2);
else
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);

return (sctlr & cm) == cm;
}

... to avoid duplication and make it clearer which fields we're
accessing.

Thanks,
Mark.

>  
>  static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long 
> size)
> -- 
> 2.20.1
> 
> ___
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 15/94] KVM: arm64: nv: Handle SPSR_EL2 specially

2020-02-17 Thread Mark Rutland
On Tue, Feb 11, 2020 at 05:48:19PM +, Marc Zyngier wrote:
> SPSR_EL2 needs special attention when running nested on ARMv8.3:
> 
> If taking an exception while running at vEL2 (actually EL1), the
> HW will update the SPSR_EL1 register with the EL1 mode. We need
> to track this in order to make sure that accesses to the virtual
> view of SPSR_EL2 is correct.
> 
> To do so, we place an illegal value in SPSR_EL1.M, and patch it
> accordingly if required when accessing it.
> 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_emulate.h | 45 
>  arch/arm64/kvm/sys_regs.c| 23 --
>  2 files changed, 66 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_emulate.h 
> b/arch/arm64/include/asm/kvm_emulate.h
> index 486978d0346b..26552c8571cb 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -277,11 +277,51 @@ static inline bool is_hyp_ctxt(const struct kvm_vcpu 
> *vcpu)
>   return __is_hyp_ctxt(>arch.ctxt);
>  }
>  
> +static inline u64 __fixup_spsr_el2_write(struct kvm_cpu_context *ctxt, u64 
> val)
> +{
> + if (!__vcpu_el2_e2h_is_set(ctxt)) {
> + /*
> +  * Clear the .M field when writing SPSR to the CPU, so that we
> +  * can detect when the CPU clobbered our SPSR copy during a
> +  * local exception.
> +  */
> + val &= ~0xc;

Probably worth making that UL(0xc) so that we consistently treat the
SPSRs as 64-bits.

Perhaps:

#define SPSR_EL2_M_3_2  UL(0xc)

... so that we can use it consistently below?

> + }
> +
> + return val;
> +}
> +
> +static inline u64 __fixup_spsr_el2_read(const struct kvm_cpu_context *ctxt, 
> u64 val)
> +{
> + if (__vcpu_el2_e2h_is_set(ctxt))
> + return val;
> +
> + /*
> +  * SPSR.M == 0 means the CPU has not touched the SPSR, so the
> +  * register has still the value we saved on the last write.
> +  */
> + if ((val & 0xc) == 0)
> + return ctxt->sys_regs[SPSR_EL2];
> +
> + /*
> +  * Otherwise there was a "local" exception on the CPU,

Perhaps "exit-less" rather than "local"?

Thanks,
Mark.

> +  * which from the guest's point of view was being taken from
> +  * EL2 to EL2, although it actually happened to be from
> +  * EL1 to EL1.
> +  * So we need to fix the .M field in SPSR, to make it look
> +  * like EL2, which is what the guest would expect.
> +  */
> + return (val & ~0x0c) | CurrentEL_EL2;
> +}
> +
>  static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
>  {
>   if (vcpu_mode_is_32bit(vcpu))
>   return vcpu_read_spsr32(vcpu);
>  
> + if (unlikely(vcpu_mode_el2(vcpu)))
> + return vcpu_read_sys_reg(vcpu, SPSR_EL2);
> +
>   if (vcpu->arch.sysregs_loaded_on_cpu)
>   return read_sysreg_el1(SYS_SPSR);
>   else
> @@ -295,6 +335,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu 
> *vcpu, unsigned long v)
>   return;
>   }
>  
> + if (unlikely(vcpu_mode_el2(vcpu))) {
> + vcpu_write_sys_reg(vcpu, v, SPSR_EL2);
> + return;
> + }
> +
>   if (vcpu->arch.sysregs_loaded_on_cpu)
>   write_sysreg_el1(v, SYS_SPSR);
>   else
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 64be9f452ad6..8c7d3d410689 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -258,11 +258,14 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int 
> reg)
>   goto memory_read;
>  
>   /*
> -  * ELR_EL2 is special cased for now.
> +  * ELR_EL2 and SPSR_EL2 are special cased for now.
>*/
>   switch (reg) {
>   case ELR_EL2:
>   return read_sysreg_el1(SYS_ELR);
> + case SPSR_EL2:
> + val = read_sysreg_el1(SYS_SPSR);
> + return __fixup_spsr_el2_read(>arch.ctxt, val);
>   }
>  
>   /*
> @@ -319,6 +322,10 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, 
> int reg)
>   case ELR_EL2:
>   write_sysreg_el1(val, SYS_ELR);
>   return;
> + case SPSR_EL2:
> + val = __fixup_spsr_el2_write(>arch.ctxt, val);
> + write_sysreg_el1(val, SYS_SPSR);
> + return;
>   }
>  
>   /* No EL1 counterpart? We're done here.? */
> @@ -1589,6 +1596,18 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu,
>   return true;
>  }
>  
> +static bool access_spsr_el2(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + if (p->is_write)
> + vcpu_write_sys_reg(vcpu, p->regval, SPSR_EL2);
> + else
> +  

Re: [PATCH v2 12/94] KVM: arm64: nv: Add EL2->EL1 translation helpers

2020-02-17 Thread Mark Rutland
On Tue, Feb 11, 2020 at 05:48:16PM +, Marc Zyngier wrote:
> Some EL2 system registers immediately affect the current execution
> of the system, so we need to use their respective EL1 counterparts.
> For this we need to define a mapping between the two.
> 
> These helpers will get used in subsequent patches.
> 
> Co-developed-by: Andre Przywara 
> Signed-off-by: Andre Przywara 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_emulate.h |  6 
>  arch/arm64/kvm/sys_regs.c| 48 
>  2 files changed, 54 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_emulate.h 
> b/arch/arm64/include/asm/kvm_emulate.h
> index 282e9ddbe1bc..486978d0346b 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -58,6 +58,12 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
>  int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
>  int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
>  
> +u64 translate_tcr(u64 tcr);
> +u64 translate_cptr(u64 tcr);
> +u64 translate_sctlr(u64 tcr);
> +u64 translate_ttbr0(u64 tcr);
> +u64 translate_cnthctl(u64 tcr);

Sorry to bikeshed, but could we please make the direction of translation
explicit in the name? e.g. tcr_el2_to_tcr_el1(), or tcr_el2_to_el1()?

> +
>  static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
>  {
>   return !(vcpu->arch.hcr_el2 & HCR_RW);
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 4b5310ea3bf8..634d3ee6799c 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -65,6 +65,54 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
>   return false;
>  }
>  
> +static u64 tcr_el2_ips_to_tcr_el1_ps(u64 tcr_el2)
> +{
> + return ((tcr_el2 & TCR_EL2_PS_MASK) >> TCR_EL2_PS_SHIFT)
> + << TCR_IPS_SHIFT;
> +}
> +
> +u64 translate_tcr(u64 tcr)
> +{
> + return TCR_EPD1_MASK |  /* disable TTBR1_EL1 */
> +((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
> +tcr_el2_ips_to_tcr_el1_ps(tcr) |
> +(tcr & TCR_EL2_TG0_MASK) |
> +(tcr & TCR_EL2_ORGN0_MASK) |
> +(tcr & TCR_EL2_IRGN0_MASK) |
> +(tcr & TCR_EL2_T0SZ_MASK);
> +}

I'm guessing this is only meant to cover a !VHE guest EL2 for the
moment, so only covers HCR_EL2.E2H=0? It might be worth mentioning in
the commit message.

It looks like this is missing some bits (e.g. TBID, HPD, HD, HA) that
could apply to the Guest-EL2 Stage-1. Maybe those are added by later
patches, but that's not obvious to me at this point in the series.

> +
> +u64 translate_cptr(u64 cptr_el2)
> +{
> + u64 cpacr_el1 = 0;
> +
> + if (!(cptr_el2 & CPTR_EL2_TFP))
> + cpacr_el1 |= CPACR_EL1_FPEN;
> + if (cptr_el2 & CPTR_EL2_TTA)
> + cpacr_el1 |= CPACR_EL1_TTA;
> + if (!(cptr_el2 & CPTR_EL2_TZ))
> + cpacr_el1 |= CPACR_EL1_ZEN;
> +
> + return cpacr_el1;
> +}

Looking in ARM DDI 0487E.a I also see TCPAC and TAM; I guess we don't
need to map those to anthing?

> +
> +u64 translate_sctlr(u64 sctlr)
> +{
> + /* Bit 20 is RES1 in SCTLR_EL1, but RES0 in SCTLR_EL2 */
> + return sctlr | BIT(20);
> +}

Looking in ARM DDI 0487E.a section D13.2.105, bit 20 is TSCXT, so this
might need to be reconsidered.

> +
> +u64 translate_ttbr0(u64 ttbr0)
> +{
> + /* Force ASID to 0 (ASID 0 or RES0) */
> + return ttbr0 & ~GENMASK_ULL(63, 48);
> +}

Again, I assume this is only meant to provide a !VHE EL2 as this stands.

> +
> +u64 translate_cnthctl(u64 cnthctl)
> +{
> + return ((cnthctl & 0x3) << 10) | (cnthctl & 0xfc);
> +}

I assume this yields CNTKCTL_EL1, but I don't entirely follow. For
virtual-EL2 don't we have to force EL1P(C)TEN so that virtual-EL2
accesses don't trap?

Thanks,
Mark.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 2/2] KVM: arm/arm64: Fix htimer setup for emulated timer when irq goes down

2020-02-17 Thread Tomasz Nowicki
We still need to schedule software timer if the line goes down
(should_fire == 0 && should_fire != ctx->irq.level) and
there is still some time to expire again.

Fixes: bee038a674875 ("KVM: arm/arm64: Rework the timer code to use a 
timer_map")
Signed-off-by: Tomasz Nowicki 
---
 virt/kvm/arm/arch_timer.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f1814f733ef8..de14520c6cc2 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -316,10 +316,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
 
trace_kvm_timer_emulate(ctx, should_fire);
 
-   if (should_fire != ctx->irq.level) {
+   if (should_fire != ctx->irq.level)
kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
-   return;
-   }
 
/*
 * If the timer can fire now, we don't need to have a soft timer
-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 0/2] KVM: arm/arm64: Fixes for scheudling htimer of emulated timers

2020-02-17 Thread Tomasz Nowicki
This small series contains two fixes which were found while testing
Marc's ARM NV patch set, where we are going to have at most 4 timers
and the two are purely emulated.

First patch cancels hrtimer when the timer should fire and there is no
change in irq line level which suppresses timer interrupt storm when
guest enables interrupts.

Second patch makes sure that hrtimer is scheduled when timer irq line
goes down and there is still some time to expire.

Tomasz Nowicki (2):
  KVM: arm/arm64: Fix spurious htimer setup for emulated timer
  KVM: arm/arm64: Fix htimer setup for emulated timer when irq goes down

 virt/kvm/arm/arch_timer.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 1/2] KVM: arm/arm64: Fix spurious htimer setup for emulated timer

2020-02-17 Thread Tomasz Nowicki
The emulated timer needs no further software timer if the timer should fire
now and there is no change in irq line level: (should_fire == 1 &&
should_fire == ctx->irq.level). In that case htimer should be simply
canceled.

Fixes: bee038a674875 ("KVM: arm/arm64: Rework the timer code to use a 
timer_map")
Signed-off-by: Tomasz Nowicki 
---
 virt/kvm/arm/arch_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 0d9438e9de2a..f1814f733ef8 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -326,7 +326,7 @@ static void timer_emulate(struct arch_timer_context *ctx)
 * scheduled for the future.  If the timer cannot fire at all,
 * then we also don't need a soft timer.
 */
-   if (!kvm_timer_irq_can_fire(ctx)) {
+   if (should_fire || !kvm_timer_irq_can_fire(ctx)) {
soft_timer_cancel(>hrtimer);
return;
}
-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 0/2] arm64: add finalized cap helper

2020-02-17 Thread Marc Zyngier

On 2020-02-10 12:27, Mark Rutland wrote:

Across arm64 we use cpus_have_const_cap() to check for a capability
without a runtime check. Prior to capabilities being finalized
cpus_have_const_cap() falls back to a runtime check of the cpu_hwcaps
array. In some cases we know that code is never invoked prior to the
capabilities being finalized, and the fallback code is redundant (and
unsound if ever executed in hyp context).

So that we can avoid the redundant code and detect when the caps are
unexpectedly checked too early, this series adds a new
cpus_have_final_cap() helper, and migrates the KVM hyp code over to it.

I'm hoping to use this as part of the entry.S -> entry-common.c
conversion, and there are other places in arm64 that could potentially
use this today.

Thanks,
Mark.

Mark Rutland (2):
  arm64: cpufeature: add cpus_have_final_cap()
  arm64: kvm: hyp: use cpus_have_final_cap()

 arch/arm64/include/asm/cpufeature.h | 47 
+

 arch/arm64/kvm/hyp/switch.c | 14 +--
 arch/arm64/kvm/hyp/sysreg-sr.c  |  8 +++
 arch/arm64/kvm/hyp/tlb.c|  8 +++
 4 files changed, 57 insertions(+), 20 deletions(-)


Seems like a valuable optimization. For the series:

Reviewed-by: Marc Zyngier 

M.
--
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] kvm: arm/arm64: fold VHE entry/exit work into kvm_vcpu_run_vhe()

2020-02-17 Thread Marc Zyngier

Hi Mark,

On 2020-02-10 11:47, Mark Rutland wrote:

With VHE, running a vCPU always requires the sequence:

1. kvm_arm_vhe_guest_enter();
2. kvm_vcpu_run_vhe();
3. kvm_arm_vhe_guest_exit()

... and as we invoke this from the shared arm/arm64 KVM code, 32-bit 
arm

has to provide stubs for all three functions.

To simplify the common code, and make it easier to make further
modifications to the arm64-specific portions in the near future, let's
fold kvm_arm_vhe_guest_enter() and kvm_arm_vhe_guest_exit() into
kvm_vcpu_run_vhe().

The 32-bit stubs for kvm_arm_vhe_guest_enter() and
kvm_arm_vhe_guest_exit() are removed, as they are no longer used. The
32-bit stub for kvm_vcpu_run_vhe() is left as-is.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland 
Cc: Catalin Marinas 
Cc: James Morse 
Cc: Julien Thierry 
Cc: Marc Zyngier 
Cc: Suzuki K Poulose 
Cc: Will Deacon 
Cc: kvmarm@lists.cs.columbia.edu
---
 arch/arm/include/asm/kvm_host.h   |  3 ---
 arch/arm64/include/asm/kvm_host.h | 32 

 arch/arm64/kvm/hyp/switch.c   | 39 
+--

 virt/kvm/arm/arm.c|  2 --
 4 files changed, 37 insertions(+), 39 deletions(-)

Hi Marc,

This is a preparatory patch for the arm64 entry rework I'm doing: I 
need to
move the DAIF manipulation into the same function so that I can replace 
that

with a common DAIF+PMR save/restore sequence.

If it's possible to queue this patch for v5.6-rc*, it would help to 
minimize

conflict in the v5.7 merge window, and would be much appreciated.

I've tested this on a ThunderX2 machine atop v5.6-rc1 defconfig, 
booting the

host and running a test VM.

Thanks,
Mark.

diff --git a/arch/arm/include/asm/kvm_host.h 
b/arch/arm/include/asm/kvm_host.h

index c3314b286a61..a827b4d60d38 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -392,9 +392,6 @@ static inline void kvm_arch_vcpu_put_fp(struct
kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) 
{}

 static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}

-static inline void kvm_arm_vhe_guest_enter(void) {}
-static inline void kvm_arm_vhe_guest_exit(void) {}
-
 #define KVM_BP_HARDEN_UNKNOWN  -1
 #define KVM_BP_HARDEN_WA_NEEDED0
 #define KVM_BP_HARDEN_NOT_REQUIRED 1
diff --git a/arch/arm64/include/asm/kvm_host.h
b/arch/arm64/include/asm/kvm_host.h
index d87aa609d2b6..57fd46acd058 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -626,38 +626,6 @@ static inline void kvm_set_pmu_events(u32 set,
struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u32 clr) {}
 #endif

-static inline void kvm_arm_vhe_guest_enter(void)
-{
-   local_daif_mask();
-
-   /*
-* Having IRQs masked via PMR when entering the guest means the GIC
-* will not signal the CPU of interrupts of lower priority, and the
-* only way to get out will be via guest exceptions.
-* Naturally, we want to avoid this.
-*
-* local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
-* dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
-*/
-   pmr_sync();
-}
-
-static inline void kvm_arm_vhe_guest_exit(void)
-{
-   /*
-* local_daif_restore() takes care to properly restore PSTATE.DAIF
-* and the GIC PMR if the host is using IRQ priorities.
-*/
-   local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
-   /*
-	 * When we exit from the guest we change a number of CPU 
configuration

-* parameters, such as traps.  Make sure these changes take effect
-* before running the host or additional guests.
-*/
-   isb();
-}
-
 #define KVM_BP_HARDEN_UNKNOWN  -1
 #define KVM_BP_HARDEN_WA_NEEDED0
 #define KVM_BP_HARDEN_NOT_REQUIRED 1
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index dfe8dd172512..925086b46136 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -625,7 +625,7 @@ static void __hyp_text __pmu_switch_to_host(struct
kvm_cpu_context *host_ctxt)
 }

 /* Switch to the guest for VHE systems running in EL2 */
-int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 {
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
@@ -678,7 +678,42 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)

return exit_code;
 }
-NOKPROBE_SYMBOL(kvm_vcpu_run_vhe);
+NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
+
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+   int ret;
+
+   local_daif_mask();
+
+   /*
+* Having IRQs masked via PMR when entering the guest means the GIC
+* will not signal the CPU of interrupts of lower priority, and the
+* only way to get out will be via guest exceptions.
+* 

Re: [PATCH v2 09/94] KVM: arm64: nv: Support virtual EL2 exceptions

2020-02-17 Thread Marc Zyngier

On 2020-02-17 12:52, Mark Rutland wrote:

On Tue, Feb 11, 2020 at 05:48:13PM +, Marc Zyngier wrote:

From: Jintack Lim 

Support injecting exceptions and performing exception returns to and
from virtual EL2.  This must be done entirely in software except when
taking an exception from vEL0 to vEL2 when the virtual 
HCR_EL2.{E2H,TGE}

== {1,1}  (a VHE guest hypervisor).

Signed-off-by: Jintack Lim 
Signed-off-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/include/asm/kvm_arm.h |  17 +++
 arch/arm64/include/asm/kvm_emulate.h |  22 
 arch/arm64/kvm/Makefile  |   2 +
 arch/arm64/kvm/emulate-nested.c  | 183 
+++

 arch/arm64/kvm/inject_fault.c|  12 --
 arch/arm64/kvm/trace.h   |  56 
 6 files changed, 280 insertions(+), 12 deletions(-)
 create mode 100644 arch/arm64/kvm/emulate-nested.c


[...]


+static void enter_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
+   enum exception_type type)
+{
+   trace_kvm_inject_nested_exception(vcpu, esr_el2, type);
+
+   vcpu_write_sys_reg(vcpu, *vcpu_cpsr(vcpu), SPSR_EL2);
+   vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL2);
+   vcpu_write_sys_reg(vcpu, esr_el2, ESR_EL2);
+
+   *vcpu_pc(vcpu) = get_el2_except_vector(vcpu, type);
+   /* On an exception, PSTATE.SP becomes 1 */
+   *vcpu_cpsr(vcpu) = PSR_MODE_EL2h;
+   *vcpu_cpsr(vcpu) |= PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
+}


This needs to be fixed up to handle the rest of the PSTATE bits.

It should be possible to refactor get_except64_pstate() for that. I
*think* the only differences are bits affects by SCTLR controls, but
someone should audit that -- good thing we added references. :)


Absolutely. It is on my list of things to fix for the next drop. Also,
("arm64: KVM: nv: Honor SCTLR_EL2.SPAN on entering vEL2") should be
folded in there (or simply dropped if we can reuse the EL1 stuff).

Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 09/94] KVM: arm64: nv: Support virtual EL2 exceptions

2020-02-17 Thread Mark Rutland
On Tue, Feb 11, 2020 at 05:48:13PM +, Marc Zyngier wrote:
> From: Jintack Lim 
> 
> Support injecting exceptions and performing exception returns to and
> from virtual EL2.  This must be done entirely in software except when
> taking an exception from vEL0 to vEL2 when the virtual HCR_EL2.{E2H,TGE}
> == {1,1}  (a VHE guest hypervisor).
> 
> Signed-off-by: Jintack Lim 
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_arm.h |  17 +++
>  arch/arm64/include/asm/kvm_emulate.h |  22 
>  arch/arm64/kvm/Makefile  |   2 +
>  arch/arm64/kvm/emulate-nested.c  | 183 +++
>  arch/arm64/kvm/inject_fault.c|  12 --
>  arch/arm64/kvm/trace.h   |  56 
>  6 files changed, 280 insertions(+), 12 deletions(-)
>  create mode 100644 arch/arm64/kvm/emulate-nested.c

[...]

> +static void enter_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
> + enum exception_type type)
> +{
> + trace_kvm_inject_nested_exception(vcpu, esr_el2, type);
> +
> + vcpu_write_sys_reg(vcpu, *vcpu_cpsr(vcpu), SPSR_EL2);
> + vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL2);
> + vcpu_write_sys_reg(vcpu, esr_el2, ESR_EL2);
> +
> + *vcpu_pc(vcpu) = get_el2_except_vector(vcpu, type);
> + /* On an exception, PSTATE.SP becomes 1 */
> + *vcpu_cpsr(vcpu) = PSR_MODE_EL2h;
> + *vcpu_cpsr(vcpu) |= PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
> +}

This needs to be fixed up to handle the rest of the PSTATE bits.

It should be possible to refactor get_except64_pstate() for that. I
*think* the only differences are bits affects by SCTLR controls, but
someone should audit that -- good thing we added references. :)

Thanks,
Mark.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 02/20] irqchip/gic-v3: Use SGIs without active state if offered

2020-02-17 Thread Zenghui Yu

Hi Marc,

On 2020/2/14 22:57, Marc Zyngier wrote:

To allow the direct injection of SGIs into a guest, the GICv4.1
architecture has to sacrifice the Active state so that SGIs look
a lot like LPIs (they are injected by the same mechanism).

In order not to break existing software, the architecture gives
offers guests OSs the choice: SGIs with or without an active
state. It is the hypervisors duty to honor the guest's choice.

For this, the architecture offers a discovery bit indicating whether
the GIC supports GICv4.1 SGIs (GICD_TYPER2.nASSGIcap), and another
bit indicating whether the guest wants Active-less SGIs or not
(controlled by GICD_CTLR.nASSGIreq).

A hypervisor not supporting GICv4.1 SGIs would leave nASSGIcap
clear, and a guest not knowing about GICv4.1 SGIs (or definitely
wanting an Active state) would leave nASSGIreq clear (both being
thankfully backward compatible with oler revisions of the GIC).

older?



Since Linux is perfectly happy without an active state on SGIs,
inform the hypervisor that we'll use that if offered.

Signed-off-by: Marc Zyngier 


Per the description of these two bits in the commit message,

Reviewed-by: Zenghui Yu 


Thanks

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 01/20] irqchip/gic-v4.1: Skip absent CPUs while iterating over redistributors

2020-02-17 Thread Zenghui Yu

Hi Marc,

On 2020/2/14 22:57, Marc Zyngier wrote:

In a system that is only sparsly populated with CPUs, we can end-up with
redistributors structures that are not initialized. Let's make sure we
don't try and access those when iterating over them (in this case when
checking we have a L2 VPE table).

Fixes: 4e6437f12d6e ("irqchip/gic-v4.1: Ensure L2 vPE table is allocated at RD 
level")
Signed-off-by: Marc Zyngier 
---
  drivers/irqchip/irq-gic-v3-its.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 83b1186ffcad..da883a691028 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2452,6 +2452,10 @@ static bool allocate_vpe_l2_table(int cpu, u32 id)
if (!gic_rdists->has_rvpeid)
return true;
  
+	/* Skip non-present CPUs */

+   if (!base)
+   return true;
+


Thanks for fixing this,

Reviewed-by: Zenghui Yu 


val  = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER);
  
  	esz  = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val) + 1;




___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 03/20] irqchip/gic-v4.1: Advertise support v4.1 to KVM

2020-02-17 Thread Zenghui Yu

On 2020/2/14 22:57, Marc Zyngier wrote:

Tell KVM that we support v4.1. Nothing uses this information so far.

Signed-off-by: Marc Zyngier 


Reviewed-by: Zenghui Yu 

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm