RE: [PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for muiltiple PLIC instances

2020-05-18 Thread Anup Patel



> -Original Message-
> From: Marc Zyngier 
> Sent: 18 May 2020 13:45
> To: Anup Patel 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for muiltiple 
> PLIC
> instances
> 
> On 2020-05-16 17:38, Anup Patel wrote:
> >> -Original Message-
> >> From: Marc Zyngier 
> 
> [...]
> 
> >> I *have* given you a way to implement that in a better way. But
> >> again, I'd rather you *don't* do it for the reason I have outlined
> >> above.
> >
> > I explored kernel/irq/proc.c and we can achieve what this patch does
> > by implementing irq_print_chip() callback of "struct irq_chip" so we
> > certainly don't need separate "struct irq_chip" for each PLIC instance.
> >
> > I will implement irq_print_chip() callback in v2 series.
> 
> You still haven't explained *why* you need to have this change.
> As it stands, I'm not prepared to take it.
> 

This is only for differentiating interrupts of multiple PLIC instance
In /proc/interrupts.

I will drop this patch since (like you mentioned) contents of
/proc/interrupts is considered an ABI and this patch breaks it.

For now, we can infer the PLIC instance for interrupt X based
on contents of /proc/irq/X/node (i.e. interrupt NUMA node id).

Thanks,
Anup


RE: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after current handler is present

2020-05-17 Thread Anup Patel



> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org  ow...@vger.kernel.org> On Behalf Of Anup Patel
> Sent: 16 May 2020 21:59
> To: Marc Zyngier 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: RE: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after current
> handler is present
> 
> 
> 
> > -Original Message-
> > From: Marc Zyngier 
> > Sent: 16 May 2020 19:01
> > To: Anup Patel 
> > Cc: Palmer Dabbelt ; Paul Walmsley
> > ; Thomas Gleixner ;
> > Jason Cooper ; Atish Patra
> > ; Alistair Francis ;
> > Anup Patel ; linux- ri...@lists.infradead.org;
> > linux-kernel@vger.kernel.org
> > Subject: Re: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after
> > current handler is present
> >
> > On 2020-05-16 13:52, Anup Patel wrote:
> > >> -Original Message-
> > >> From: Marc Zyngier 
> > >> Sent: 16 May 2020 17:42
> > >> To: Anup Patel 
> > >> Cc: Palmer Dabbelt ; Paul Walmsley
> > >> ; Thomas Gleixner ;
> > >> Jason Cooper ; Atish Patra
> > >> ; Alistair Francis ;
> > >> Anup Patel ;
> > >> linux-
> > >> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> > >> Subject: Re: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once
> > >> after current handler is present
> > >>
> > >> Hi Anup,
> > >>
> > >> On 2020-05-16 07:38, Anup Patel wrote:
> > >> > For multiple PLIC instances, the plic_init() is called once for
> > >> > each PLIC instance. Due to this we have two issues:
> > >> > 1. cpuhp_setup_state() is called multiple times 2.
> > >> > plic_starting_cpu() can crash for boot CPU if cpuhp_setup_state()
> > >> >is called before boot CPU PLIC handler is available.
> > >> >
> > >> > This patch fixes both above issues.
> > >> >
> > >> > Signed-off-by: Anup Patel 
> > >> > ---
> > >> >  drivers/irqchip/irq-sifive-plic.c | 14 --
> > >> >  1 file changed, 12 insertions(+), 2 deletions(-)
> > >> >
> > >> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > >> > b/drivers/irqchip/irq-sifive-plic.c
> > >> > index 822e074c0600..7dc23edb3267 100644
> > >> > --- a/drivers/irqchip/irq-sifive-plic.c
> > >> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > >> > @@ -76,6 +76,7 @@ struct plic_handler {
> > >> >void __iomem*enable_base;
> > >> >struct plic_priv*priv;
> > >> >  };
> > >> > +static bool plic_cpuhp_setup_done;
> > >> >  static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
> > >> >
> > >> >  static inline void plic_toggle(struct plic_handler *handler, @@
> > >> > -282,6 +283,7 @@ static int __init plic_init(struct device_node *node,
> > >> >int error = 0, nr_contexts, nr_handlers = 0, i;
> > >> >u32 nr_irqs;
> > >> >struct plic_priv *priv;
> > >> > +  struct plic_handler *handler;
> > >> >
> > >> >priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > >> >if (!priv)
> > >> > @@ -310,7 +312,6 @@ static int __init plic_init(struct
> > >> > device_node *node,
> > >> >
> > >> >for (i = 0; i < nr_contexts; i++) {
> > >> >struct of_phandle_args parent;
> > >> > -  struct plic_handler *handler;
> > >> >irq_hw_number_t hwirq;
> > >> >int cpu, hartid;
> > >> >
> > >> > @@ -364,9 +365,18 @@ static int __init plic_init(struct
> > >> > device_node *node,
> > >> >nr_handlers++;
> > >> >}
> > >> >
> > >> > -  cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> > >> > +  /*
> > >> > +   * We can have multiple PLIC instances so setup cpuhp state
> only
> > >> > +   * when context handler for current/boot CPU is present.
> > >> > +   */
> > >> > +  handler = this_cpu_ptr(_handlers);
> > >&g

RE: [PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for muiltiple PLIC instances

2020-05-16 Thread Anup Patel



> -Original Message-
> From: Marc Zyngier 
> Sent: 16 May 2020 18:46
> To: Anup Patel 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for muiltiple 
> PLIC
> instances
> 
> On 2020-05-16 14:01, Anup Patel wrote:
> >> -Original Message-
> >> From: Marc Zyngier 
> >> Sent: 16 May 2020 17:59
> >> To: Anup Patel 
> >> Cc: Palmer Dabbelt ; Paul Walmsley
> >> ; Thomas Gleixner ;
> >> Jason Cooper ; Atish Patra
> >> ; Alistair Francis ;
> >> Anup Patel ;
> >> linux-
> >> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> >> Subject: Re: [PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for
> >> muiltiple PLIC instances
> >>
> >> On 2020-05-16 07:39, Anup Patel wrote:
> >> > To distinguish interrupts from multiple PLIC instances, we use a
> >> > per-PLIC irq_chip instance with a different name.
> >> >
> >> > Signed-off-by: Anup Patel 
> >> > ---
> >> >  drivers/irqchip/irq-sifive-plic.c | 28
> >> > +++-
> >> >  1 file changed, 15 insertions(+), 13 deletions(-)
> >> >
> >> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> >> > b/drivers/irqchip/irq-sifive-plic.c
> >> > index 2d3db927a551..e42fc082ad18 100644
> >> > --- a/drivers/irqchip/irq-sifive-plic.c
> >> > +++ b/drivers/irqchip/irq-sifive-plic.c
> >> > @@ -60,6 +60,7 @@
> >> >  #define PLIC_ENABLE_THRESHOLD   0
> >> >
> >> >  struct plic_priv {
> >> > +struct irq_chip chip;
> >> >  struct cpumask lmask;
> >> >  struct irq_domain *irqdomain;
> >> >  void __iomem *regs;
> >> > @@ -76,6 +77,7 @@ struct plic_handler {
> >> >  void __iomem*enable_base;
> >> >  struct plic_priv*priv;
> >> >  };
> >> > +static unsigned int plic_count;
> >> >  static bool plic_cpuhp_setup_done;  static DEFINE_PER_CPU(struct
> >> > plic_handler, plic_handlers);
> >> >
> >> > @@ -164,20 +166,12 @@ static void plic_irq_eoi(struct irq_data *d)
> >> >  writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);  }
> >> >
> >> > -static struct irq_chip plic_chip = {
> >> > -.name   = "SiFive PLIC",
> >> > -.irq_mask   = plic_irq_mask,
> >> > -.irq_unmask = plic_irq_unmask,
> >> > -.irq_eoi= plic_irq_eoi,
> >> > -#ifdef CONFIG_SMP
> >> > -.irq_set_affinity = plic_set_affinity,
> >> > -#endif
> >> > -};
> >> > -
> >> >  static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
> >> >irq_hw_number_t hwirq)
> >> >  {
> >> > -irq_domain_set_info(d, irq, hwirq, _chip, d->host_data,
> >> > +struct plic_priv *priv = d->host_data;
> >> > +
> >> > +irq_domain_set_info(d, irq, hwirq, >chip, d->host_data,
> >> >  handle_fasteoi_irq, NULL, NULL);
> >> >  irq_set_noprobe(irq);
> >> >  return 0;
> >> > @@ -294,6 +288,14 @@ static int __init plic_init(struct device_node
> >> > *node,
> >> >  if (!priv)
> >> >  return -ENOMEM;
> >> >
> >> > +priv->chip.name = kasprintf(GFP_KERNEL, "PLIC%d", plic_count++);
> >> > +priv->chip.irq_mask = plic_irq_mask,
> >> > +priv->chip.irq_unmask = plic_irq_unmask,
> >> > +priv->chip.irq_eoi = plic_irq_eoi, #ifdef CONFIG_SMP
> >> > +priv->chip.irq_set_affinity = plic_set_affinity, #endif
> >> > +
> >> >  priv->regs = of_iomap(node, 0);
> >> >  if (WARN_ON(!priv->regs)) {
> >> >  error = -EIO;
> >> > @@ -383,9 +385,9 @@ static int __init plic_init(struct device_node
> >> > *node,
> >> >  }
> >> >
> >> >  pr_info("interrupt-controller at 0x%llx "
> >> > -

RE: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after current handler is present

2020-05-16 Thread Anup Patel



> -Original Message-
> From: Marc Zyngier 
> Sent: 16 May 2020 19:01
> To: Anup Patel 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after current
> handler is present
> 
> On 2020-05-16 13:52, Anup Patel wrote:
> >> -Original Message-
> >> From: Marc Zyngier 
> >> Sent: 16 May 2020 17:42
> >> To: Anup Patel 
> >> Cc: Palmer Dabbelt ; Paul Walmsley
> >> ; Thomas Gleixner ;
> >> Jason Cooper ; Atish Patra
> >> ; Alistair Francis ;
> >> Anup Patel ;
> >> linux-
> >> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> >> Subject: Re: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after
> >> current handler is present
> >>
> >> Hi Anup,
> >>
> >> On 2020-05-16 07:38, Anup Patel wrote:
> >> > For multiple PLIC instances, the plic_init() is called once for
> >> > each PLIC instance. Due to this we have two issues:
> >> > 1. cpuhp_setup_state() is called multiple times 2.
> >> > plic_starting_cpu() can crash for boot CPU if cpuhp_setup_state()
> >> >is called before boot CPU PLIC handler is available.
> >> >
> >> > This patch fixes both above issues.
> >> >
> >> > Signed-off-by: Anup Patel 
> >> > ---
> >> >  drivers/irqchip/irq-sifive-plic.c | 14 --
> >> >  1 file changed, 12 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> >> > b/drivers/irqchip/irq-sifive-plic.c
> >> > index 822e074c0600..7dc23edb3267 100644
> >> > --- a/drivers/irqchip/irq-sifive-plic.c
> >> > +++ b/drivers/irqchip/irq-sifive-plic.c
> >> > @@ -76,6 +76,7 @@ struct plic_handler {
> >> >  void __iomem*enable_base;
> >> >  struct plic_priv*priv;
> >> >  };
> >> > +static bool plic_cpuhp_setup_done;
> >> >  static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
> >> >
> >> >  static inline void plic_toggle(struct plic_handler *handler, @@
> >> > -282,6 +283,7 @@ static int __init plic_init(struct device_node *node,
> >> >  int error = 0, nr_contexts, nr_handlers = 0, i;
> >> >  u32 nr_irqs;
> >> >  struct plic_priv *priv;
> >> > +struct plic_handler *handler;
> >> >
> >> >  priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> >> >  if (!priv)
> >> > @@ -310,7 +312,6 @@ static int __init plic_init(struct device_node
> >> > *node,
> >> >
> >> >  for (i = 0; i < nr_contexts; i++) {
> >> >  struct of_phandle_args parent;
> >> > -struct plic_handler *handler;
> >> >  irq_hw_number_t hwirq;
> >> >  int cpu, hartid;
> >> >
> >> > @@ -364,9 +365,18 @@ static int __init plic_init(struct device_node
> >> > *node,
> >> >  nr_handlers++;
> >> >  }
> >> >
> >> > -cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> >> > +/*
> >> > + * We can have multiple PLIC instances so setup cpuhp state only
> >> > + * when context handler for current/boot CPU is present.
> >> > + */
> >> > +handler = this_cpu_ptr(_handlers);
> >> > +if (handler->present && !plic_cpuhp_setup_done) {
> >>
> >> If there is no context handler for the boot CPU, the system is
> >> doomed, right? It isn't able to get any interrupt, and you don't
> >> register the hotplug notifier that could allow secondary CPUs to
> >> boot.
> >>
> >> So what is the point? It feels like you should just give up here.
> >>
> >> Also, the boot CPU is always CPU 0. So checking that you only
> >> register the hotplug notifier from CPU 0 should be enough.
> >
> > The boot CPU is not fixed in RISC-V, the logical id of the boot CPU
> > will always be zero but physical id (or HART id) can be something
> > totally different.
> 
> So on riscv, smp_processor_id() can return a non-zero value on the the boot
> CPU? Interesting... :-/

RE: [PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for muiltiple PLIC instances

2020-05-16 Thread Anup Patel



> -Original Message-
> From: Marc Zyngier 
> Sent: 16 May 2020 17:59
> To: Anup Patel 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for muiltiple 
> PLIC
> instances
> 
> On 2020-05-16 07:39, Anup Patel wrote:
> > To distinguish interrupts from multiple PLIC instances, we use a
> > per-PLIC irq_chip instance with a different name.
> >
> > Signed-off-by: Anup Patel 
> > ---
> >  drivers/irqchip/irq-sifive-plic.c | 28 +++-
> >  1 file changed, 15 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > b/drivers/irqchip/irq-sifive-plic.c
> > index 2d3db927a551..e42fc082ad18 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -60,6 +60,7 @@
> >  #definePLIC_ENABLE_THRESHOLD   0
> >
> >  struct plic_priv {
> > +   struct irq_chip chip;
> > struct cpumask lmask;
> > struct irq_domain *irqdomain;
> > void __iomem *regs;
> > @@ -76,6 +77,7 @@ struct plic_handler {
> > void __iomem*enable_base;
> > struct plic_priv*priv;
> >  };
> > +static unsigned int plic_count;
> >  static bool plic_cpuhp_setup_done;
> >  static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
> >
> > @@ -164,20 +166,12 @@ static void plic_irq_eoi(struct irq_data *d)
> > writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);  }
> >
> > -static struct irq_chip plic_chip = {
> > -   .name   = "SiFive PLIC",
> > -   .irq_mask   = plic_irq_mask,
> > -   .irq_unmask = plic_irq_unmask,
> > -   .irq_eoi= plic_irq_eoi,
> > -#ifdef CONFIG_SMP
> > -   .irq_set_affinity = plic_set_affinity,
> > -#endif
> > -};
> > -
> >  static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
> >   irq_hw_number_t hwirq)
> >  {
> > -   irq_domain_set_info(d, irq, hwirq, _chip, d->host_data,
> > +   struct plic_priv *priv = d->host_data;
> > +
> > +   irq_domain_set_info(d, irq, hwirq, >chip, d->host_data,
> > handle_fasteoi_irq, NULL, NULL);
> > irq_set_noprobe(irq);
> > return 0;
> > @@ -294,6 +288,14 @@ static int __init plic_init(struct device_node
> > *node,
> > if (!priv)
> > return -ENOMEM;
> >
> > +   priv->chip.name = kasprintf(GFP_KERNEL, "PLIC%d", plic_count++);
> > +   priv->chip.irq_mask = plic_irq_mask,
> > +   priv->chip.irq_unmask = plic_irq_unmask,
> > +   priv->chip.irq_eoi = plic_irq_eoi,
> > +#ifdef CONFIG_SMP
> > +   priv->chip.irq_set_affinity = plic_set_affinity, #endif
> > +
> > priv->regs = of_iomap(node, 0);
> > if (WARN_ON(!priv->regs)) {
> > error = -EIO;
> > @@ -383,9 +385,9 @@ static int __init plic_init(struct device_node
> > *node,
> > }
> >
> > pr_info("interrupt-controller at 0x%llx "
> > -   "(interrupts=%d, contexts=%d, handlers=%d)\n",
> > +   "(interrupts=%d, contexts=%d, handlers=%d) (%s)\n",
> > (unsigned long long)iores.start, nr_irqs,
> > -   nr_contexts, nr_handlers);
> > +   nr_contexts, nr_handlers, priv->chip.name);
> > set_handle_irq(plic_handle_irq);
> > return 0;
> 
> I really dislike this patch for multiple reasons:
> 
> - Allocating a new struc irq_chip just for a string seems over the top,
>specially as all the *useful* stuff stays the same.
> 
> - Even if I hate it, /proc is API. I'm sure something, somewhere is
>parsing this. Changing the string is likely to confuse it.

AFAIK, we don't have scripts in RISC-V world that depend on
/proc/interrupts content. May be in future such scripts will show up.

For system with multiple PLICs, we are seeing same "SiFive PLIC"
string for all PLIC interrupts in "cat /proc/interrupts". I am trying to
assign different string based on PLIC instance. This is similar to
what GICv2 driver is doing (e.g. GIC-0, GIC-1, ... in /proc/interrupts).

Is there a better way to do this ?

> 
> - If you do this for debug purposes, then CONFIG_GENERIC_IRQ_DEBUGFS
>is the right way to look up the information.
> 
> - If, for reasons that are beyond me, you actually *need* this, then
>implementing irq_print_chip in your irq_chip structure is the way
>to go.
> 
> But frankly, I'd rather you drop this altogether.

I just want to differentiate which interrupt belongs to which PLIC
Instance in /proc/interrupts. I can take a different approach if you
suggest.

Regards,
Anup


RE: [PATCH 4/4] irqchip/sifive-plic: Set default irq affinity in plic_irqdomain_map()

2020-05-16 Thread Anup Patel



> -Original Message-
> From: Marc Zyngier 
> Sent: 16 May 2020 18:01
> To: Anup Patel 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 4/4] irqchip/sifive-plic: Set default irq affinity in
> plic_irqdomain_map()
> 
> On 2020-05-16 07:39, Anup Patel wrote:
> > For multiple PLIC instances, each PLIC can only target a subset of
> > CPUs which is represented by "lmask" in the "struct plic_priv".
> >
> > Currently, the default irq affinity for each PLIC interrupt is all
> > online CPUs which is illegal value for default irq affinity when we
> > have multiple PLIC instances. To fix this, we now set "lmask" as the
> > default irq affinity in for each interrupt in plic_irqdomain_map().
> >
> > Signed-off-by: Anup Patel 
> > ---
> >  drivers/irqchip/irq-sifive-plic.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > b/drivers/irqchip/irq-sifive-plic.c
> > index e42fc082ad18..9af5e2fd2574 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -174,6 +174,7 @@ static int plic_irqdomain_map(struct irq_domain
> > *d, unsigned int irq,
> > irq_domain_set_info(d, irq, hwirq, >chip, d->host_data,
> > handle_fasteoi_irq, NULL, NULL);
> > irq_set_noprobe(irq);
> > +   irq_set_affinity(irq, >lmask);
> > return 0;
> >  }
> 
> Isn't that a fix? If so, please add a Fixes: tag, as well as a CC to stable 
> if you
> think it should be backported.

This is certainly a fix. I will add Fixes: tag like you suggested.

Regards,
Anup


RE: [PATCH 2/4] irqchip/sifive-plic: Improve boot prints for multiple PLIC instances

2020-05-16 Thread Anup Patel



> -Original Message-
> From: Marc Zyngier 
> Sent: 16 May 2020 17:50
> To: Anup Patel 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 2/4] irqchip/sifive-plic: Improve boot prints for multiple
> PLIC instances
> 
> On 2020-05-16 07:38, Anup Patel wrote:
> > We improve PLIC banner to help distinguish multiple PLIC instances in
> > boot time prints.
> >
> > Signed-off-by: Anup Patel 
> > ---
> >  drivers/irqchip/irq-sifive-plic.c | 11 +--
> >  1 file changed, 9 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > b/drivers/irqchip/irq-sifive-plic.c
> > index 7dc23edb3267..2d3db927a551 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -284,6 +284,11 @@ static int __init plic_init(struct device_node
> > *node,
> > u32 nr_irqs;
> > struct plic_priv *priv;
> > struct plic_handler *handler;
> > +   struct resource iores;
> > +
> > +   error = of_address_to_resource(node, 0, );
> > +   if (error)
> > +   return error;
> >
> > priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > if (!priv)
> > @@ -377,8 +382,10 @@ static int __init plic_init(struct device_node
> > *node,
> > plic_cpuhp_setup_done = true;
> > }
> >
> > -   pr_info("mapped %d interrupts with %d handlers for %d contexts.\n",
> > -   nr_irqs, nr_handlers, nr_contexts);
> > +   pr_info("interrupt-controller at 0x%llx "
> > +   "(interrupts=%d, contexts=%d, handlers=%d)\n",
> > +   (unsigned long long)iores.start, nr_irqs,
> > +   nr_contexts, nr_handlers);
> 
> Instead of displaying "interrupt controller at ...", why not use the existing 
> printk
> format for OF nodes? Something along the lines of
> 
>  pr_info("%pOF : mapped %d interrupts with %d handlers for %d contexts\n",
>  node, nr_irqs, nr_handlers, nr_contexts);

Sure, I will go with your suggestion and use printk for OF nodes.

Regards,
Anup


RE: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after current handler is present

2020-05-16 Thread Anup Patel



> -Original Message-
> From: Marc Zyngier 
> Sent: 16 May 2020 17:42
> To: Anup Patel 
> Cc: Palmer Dabbelt ; Paul Walmsley
> ; Thomas Gleixner ; Jason
> Cooper ; Atish Patra ; Alistair
> Francis ; Anup Patel ; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after current
> handler is present
> 
> Hi Anup,
> 
> On 2020-05-16 07:38, Anup Patel wrote:
> > For multiple PLIC instances, the plic_init() is called once for each
> > PLIC instance. Due to this we have two issues:
> > 1. cpuhp_setup_state() is called multiple times 2. plic_starting_cpu()
> > can crash for boot CPU if cpuhp_setup_state()
> >is called before boot CPU PLIC handler is available.
> >
> > This patch fixes both above issues.
> >
> > Signed-off-by: Anup Patel 
> > ---
> >  drivers/irqchip/irq-sifive-plic.c | 14 --
> >  1 file changed, 12 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > b/drivers/irqchip/irq-sifive-plic.c
> > index 822e074c0600..7dc23edb3267 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -76,6 +76,7 @@ struct plic_handler {
> > void __iomem*enable_base;
> > struct plic_priv*priv;
> >  };
> > +static bool plic_cpuhp_setup_done;
> >  static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
> >
> >  static inline void plic_toggle(struct plic_handler *handler, @@
> > -282,6 +283,7 @@ static int __init plic_init(struct device_node *node,
> > int error = 0, nr_contexts, nr_handlers = 0, i;
> > u32 nr_irqs;
> > struct plic_priv *priv;
> > +   struct plic_handler *handler;
> >
> > priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > if (!priv)
> > @@ -310,7 +312,6 @@ static int __init plic_init(struct device_node
> > *node,
> >
> > for (i = 0; i < nr_contexts; i++) {
> > struct of_phandle_args parent;
> > -   struct plic_handler *handler;
> > irq_hw_number_t hwirq;
> > int cpu, hartid;
> >
> > @@ -364,9 +365,18 @@ static int __init plic_init(struct device_node
> > *node,
> > nr_handlers++;
> > }
> >
> > -   cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> > +   /*
> > +* We can have multiple PLIC instances so setup cpuhp state only
> > +* when context handler for current/boot CPU is present.
> > +*/
> > +   handler = this_cpu_ptr(_handlers);
> > +   if (handler->present && !plic_cpuhp_setup_done) {
> 
> If there is no context handler for the boot CPU, the system is doomed, right? 
> It
> isn't able to get any interrupt, and you don't register the hotplug notifier 
> that
> could allow secondary CPUs to boot.
> 
> So what is the point? It feels like you should just give up here.
> 
> Also, the boot CPU is always CPU 0. So checking that you only register the
> hotplug notifier from CPU 0 should be enough.

The boot CPU is not fixed in RISC-V, the logical id of the boot CPU will always
be zero but physical id (or HART id) can be something totally different.

On RISC-V NUMA system, we will have a separate PLIC in each NUMA node.

Let's say we have a system with 2 NUMA nodes, each NUMA node having
4 CPUs (or 4 HARTs).  In this case, the DTB passed to Linux will have two PLIC
DT nodes where each PLIC device targets only 4 CPUs (or 4 HARTs). The
plic_init() functions will setup handlers for only 4 CPUs (or 4 HARTs). In other
words, plic_init() for "PLIC0" will setup handler for HART id 0 to 3 and
plic_init() for "PLIC1" will setup handler for HART id 4 to 7. Now, any CPU
can be boot CPU so it is possible that CPU with HART id 4 is boot CPU and
when plic_init() is first called for "PLIC0" the handler for HART id 4 is not
setup because it will be setup later when plic_init() is called for "PLIC1".
This cause plic_starting_cpu() to crash when plic_init() is called for "PLIC0".

I hope above example helps understanding the issue.

I encounter this issue randomly when booting Linux on QEMU RISC-V
with multiple NUMA nodes.

Regards,
Anup

> 
> > +   cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> >   "irqchip/sifive/plic:starting",
> >   plic_starting_cpu, plic_dying_cpu);
> > +   plic_cpuhp_setup_done = true;
> > +   }
> > +
> > pr_info("mapped %d interrupts with %d handlers for %d contexts.\n",
> > nr_irqs, nr_handlers, nr_contexts);
> > set_handle_irq(plic_handle_irq);
> 
> Thanks,
> 
>  M.
> --
> Jazz is not dead. It just smells funny...


[PATCH 2/4] irqchip/sifive-plic: Improve boot prints for multiple PLIC instances

2020-05-16 Thread Anup Patel
We improve PLIC banner to help distinguish multiple PLIC instances
in boot time prints.

Signed-off-by: Anup Patel 
---
 drivers/irqchip/irq-sifive-plic.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c 
b/drivers/irqchip/irq-sifive-plic.c
index 7dc23edb3267..2d3db927a551 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -284,6 +284,11 @@ static int __init plic_init(struct device_node *node,
u32 nr_irqs;
struct plic_priv *priv;
struct plic_handler *handler;
+   struct resource iores;
+
+   error = of_address_to_resource(node, 0, );
+   if (error)
+   return error;
 
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -377,8 +382,10 @@ static int __init plic_init(struct device_node *node,
plic_cpuhp_setup_done = true;
}
 
-   pr_info("mapped %d interrupts with %d handlers for %d contexts.\n",
-   nr_irqs, nr_handlers, nr_contexts);
+   pr_info("interrupt-controller at 0x%llx "
+   "(interrupts=%d, contexts=%d, handlers=%d)\n",
+   (unsigned long long)iores.start, nr_irqs,
+   nr_contexts, nr_handlers);
set_handle_irq(plic_handle_irq);
return 0;
 
-- 
2.25.1



[PATCH 0/4] More improvements for multiple PLICs

2020-05-16 Thread Anup Patel
This series does more improvements for supporting multiple PLIC
instances.

PATCH1 and PATCH4 are fixes whereas PATCH2 and PATCH3 help users
distinguish between multiple PLIC instances.

These patches are based up Linux-5.7-rc5 and can be found at
plic_imp_v1 branch at: https://github.com/avpatel/linux.git

To try this patches, we will need:
1. OpenSBI multi-PLIC and multi-CLINT support which can be found in
   multi_plic_clint_v1 branch at:
   https://github.com/avpatel/opensbi.git
2. QEMU RISC-V multi-socket support which can be found in
   riscv_multi_socket_v1 branch at:
   https://github.com/avpatel/qemu.git

Anup Patel (4):
  irqchip/sifive-plic: Setup cpuhp once after current handler is present
  irqchip/sifive-plic: Improve boot prints for multiple PLIC instances
  irqchip/sifive-plic: Separate irq_chip for muiltiple PLIC instances
  irqchip/sifive-plic: Set default irq affinity in plic_irqdomain_map()

 drivers/irqchip/irq-sifive-plic.c | 50 +--
 1 file changed, 35 insertions(+), 15 deletions(-)

-- 
2.25.1



[PATCH 1/4] irqchip/sifive-plic: Setup cpuhp once after current handler is present

2020-05-16 Thread Anup Patel
For multiple PLIC instances, the plic_init() is called once for each
PLIC instance. Due to this we have two issues:
1. cpuhp_setup_state() is called multiple times
2. plic_starting_cpu() can crash for boot CPU if cpuhp_setup_state()
   is called before boot CPU PLIC handler is available.

This patch fixes both above issues.

Signed-off-by: Anup Patel 
---
 drivers/irqchip/irq-sifive-plic.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c 
b/drivers/irqchip/irq-sifive-plic.c
index 822e074c0600..7dc23edb3267 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -76,6 +76,7 @@ struct plic_handler {
void __iomem*enable_base;
struct plic_priv*priv;
 };
+static bool plic_cpuhp_setup_done;
 static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
 
 static inline void plic_toggle(struct plic_handler *handler,
@@ -282,6 +283,7 @@ static int __init plic_init(struct device_node *node,
int error = 0, nr_contexts, nr_handlers = 0, i;
u32 nr_irqs;
struct plic_priv *priv;
+   struct plic_handler *handler;
 
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -310,7 +312,6 @@ static int __init plic_init(struct device_node *node,
 
for (i = 0; i < nr_contexts; i++) {
struct of_phandle_args parent;
-   struct plic_handler *handler;
irq_hw_number_t hwirq;
int cpu, hartid;
 
@@ -364,9 +365,18 @@ static int __init plic_init(struct device_node *node,
nr_handlers++;
}
 
-   cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
+   /*
+* We can have multiple PLIC instances so setup cpuhp state only
+* when context handler for current/boot CPU is present.
+*/
+   handler = this_cpu_ptr(_handlers);
+   if (handler->present && !plic_cpuhp_setup_done) {
+   cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
  "irqchip/sifive/plic:starting",
  plic_starting_cpu, plic_dying_cpu);
+   plic_cpuhp_setup_done = true;
+   }
+
pr_info("mapped %d interrupts with %d handlers for %d contexts.\n",
nr_irqs, nr_handlers, nr_contexts);
set_handle_irq(plic_handle_irq);
-- 
2.25.1



[PATCH 3/4] irqchip/sifive-plic: Separate irq_chip for muiltiple PLIC instances

2020-05-16 Thread Anup Patel
To distinguish interrupts from multiple PLIC instances, we use a
per-PLIC irq_chip instance with a different name.

Signed-off-by: Anup Patel 
---
 drivers/irqchip/irq-sifive-plic.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c 
b/drivers/irqchip/irq-sifive-plic.c
index 2d3db927a551..e42fc082ad18 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -60,6 +60,7 @@
 #definePLIC_ENABLE_THRESHOLD   0
 
 struct plic_priv {
+   struct irq_chip chip;
struct cpumask lmask;
struct irq_domain *irqdomain;
void __iomem *regs;
@@ -76,6 +77,7 @@ struct plic_handler {
void __iomem*enable_base;
struct plic_priv*priv;
 };
+static unsigned int plic_count;
 static bool plic_cpuhp_setup_done;
 static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
 
@@ -164,20 +166,12 @@ static void plic_irq_eoi(struct irq_data *d)
writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
 }
 
-static struct irq_chip plic_chip = {
-   .name   = "SiFive PLIC",
-   .irq_mask   = plic_irq_mask,
-   .irq_unmask = plic_irq_unmask,
-   .irq_eoi= plic_irq_eoi,
-#ifdef CONFIG_SMP
-   .irq_set_affinity = plic_set_affinity,
-#endif
-};
-
 static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
  irq_hw_number_t hwirq)
 {
-   irq_domain_set_info(d, irq, hwirq, _chip, d->host_data,
+   struct plic_priv *priv = d->host_data;
+
+   irq_domain_set_info(d, irq, hwirq, >chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
irq_set_noprobe(irq);
return 0;
@@ -294,6 +288,14 @@ static int __init plic_init(struct device_node *node,
if (!priv)
return -ENOMEM;
 
+   priv->chip.name = kasprintf(GFP_KERNEL, "PLIC%d", plic_count++);
+   priv->chip.irq_mask = plic_irq_mask,
+   priv->chip.irq_unmask = plic_irq_unmask,
+   priv->chip.irq_eoi = plic_irq_eoi,
+#ifdef CONFIG_SMP
+   priv->chip.irq_set_affinity = plic_set_affinity,
+#endif
+
priv->regs = of_iomap(node, 0);
if (WARN_ON(!priv->regs)) {
error = -EIO;
@@ -383,9 +385,9 @@ static int __init plic_init(struct device_node *node,
}
 
pr_info("interrupt-controller at 0x%llx "
-   "(interrupts=%d, contexts=%d, handlers=%d)\n",
+   "(interrupts=%d, contexts=%d, handlers=%d) (%s)\n",
(unsigned long long)iores.start, nr_irqs,
-   nr_contexts, nr_handlers);
+   nr_contexts, nr_handlers, priv->chip.name);
set_handle_irq(plic_handle_irq);
return 0;
 
-- 
2.25.1



[PATCH 4/4] irqchip/sifive-plic: Set default irq affinity in plic_irqdomain_map()

2020-05-16 Thread Anup Patel
For multiple PLIC instances, each PLIC can only target a subset of
CPUs which is represented by "lmask" in the "struct plic_priv".

Currently, the default irq affinity for each PLIC interrupt is all
online CPUs which is illegal value for default irq affinity when we
have multiple PLIC instances. To fix this, we now set "lmask" as the
default irq affinity in for each interrupt in plic_irqdomain_map().

Signed-off-by: Anup Patel 
---
 drivers/irqchip/irq-sifive-plic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-sifive-plic.c 
b/drivers/irqchip/irq-sifive-plic.c
index e42fc082ad18..9af5e2fd2574 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -174,6 +174,7 @@ static int plic_irqdomain_map(struct irq_domain *d, 
unsigned int irq,
irq_domain_set_info(d, irq, hwirq, >chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
irq_set_noprobe(irq);
+   irq_set_affinity(irq, >lmask);
return 0;
 }
 
-- 
2.25.1



Re: [PATCH 1/2] riscv: defconfig: enable spi nor on Hifive Unleashed A00 board.

2020-05-05 Thread Anup Patel
On Wed, May 6, 2020 at 9:26 AM Sagar Kadam  wrote:
>
> Hi Palmer,
>
> > -Original Message-
> > From: Palmer Dabbelt 
> > Sent: Wednesday, May 6, 2020 4:54 AM
> > To: Sagar Kadam 
> > Cc: tudor.amba...@microchip.com; miquel.ray...@bootlin.com;
> > rich...@nod.at; vigne...@ti.com; Paul Walmsley
> > ; linux-ri...@lists.infradead.org; linux-
> > ker...@vger.kernel.org; linux-...@lists.infradead.org
> > Subject: RE: [PATCH 1/2] riscv: defconfig: enable spi nor on Hifive 
> > Unleashed
> > A00 board.
> >
> > [External Email] Do not click links or attachments unless you recognize the
> > sender and know the content is safe
> >
> > On Tue, 05 May 2020 00:18:45 PDT (-0700), sagar.ka...@sifive.com wrote:
> > > Hello Palmer,
> > >
> > >> -Original Message-
> > >> From: Palmer Dabbelt 
> > >> Sent: Tuesday, May 5, 2020 3:40 AM
> > >> To: Sagar Kadam 
> > >> Cc: tudor.amba...@microchip.com; miquel.ray...@bootlin.com;
> > >> rich...@nod.at; vigne...@ti.com; Paul Walmsley
> > >> ; linux-ri...@lists.infradead.org; linux-
> > >> ker...@vger.kernel.org; linux-...@lists.infradead.org; Sagar Kadam
> > >> 
> > >> Subject: Re: [PATCH 1/2] riscv: defconfig: enable spi nor on Hifive
> > Unleashed
> > >> A00 board.
> > >>
> > >> [External Email] Do not click links or attachments unless you recognize
> > the
> > >> sender and know the content is safe
> > >>
> > >> On Thu, 30 Apr 2020 02:58:51 PDT (-0700), sagar.ka...@sifive.com
> > wrote:
> > >> > Enable MTD based SPI-NOR framework in order to use spi flash
> > available
> > >> > on HiFive Unleashed A00 board.
> > >> >
> > >> > Signed-off-by: Sagar Shrikant Kadam 
> > >> > ---
> > >> >  arch/riscv/configs/defconfig | 2 ++
> > >> >  1 file changed, 2 insertions(+)
> > >> >
> > >> > diff --git a/arch/riscv/configs/defconfig
> > >> > b/arch/riscv/configs/defconfig index 4da4886..970580b 100644
> > >> > --- a/arch/riscv/configs/defconfig
> > >> > +++ b/arch/riscv/configs/defconfig
> > >> > @@ -80,6 +80,8 @@ CONFIG_USB_STORAGE=y  CONFIG_USB_UAS=y
> > >> CONFIG_MMC=y
> > >> > CONFIG_MMC_SPI=y
> > >> > +CONFIG_MTD=y
> > >> > +CONFIG_MTD_SPI_NOR=y
> > >> >  CONFIG_RTC_CLASS=y
> > >> >  CONFIG_VIRTIO_PCI=y
> > >> >  CONFIG_VIRTIO_BALLOON=y
> > >>
> > >> From the second patch's description I'm assuming that MTD still
> > functions
> > >> correctly without that change?
> > >
> > > Yes Palmer, the second patch is to enable QUAD write to nor flash..
> > > MTD  function's correctly without second patch.
> > >
> > > Using the character interface (/dev/mtd0) mtd_utils (mtd_debug :
> > erase/read/write) work fine.
> > > We might require CONFIG_MTD_BLOCK, CONFIG_MTD_CMDLINE_PARTS
> > in order to use MTD partitioning.
> > > IMHO it can be at user's choice weather to use flash partitions or not, 
> > > so I
> > have not enabled. Please let me
> > > know if I should enable these features as well.
> >
> > Looks like arm64 has these:
> >
> > arch/arm64/configs/defconfig:CONFIG_MTD=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_BLOCK=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_CFI=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_CFI_ADV_OPTIONS=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_CFI_INTELEXT=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_CFI_AMDSTD=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_CFI_STAA=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_PHYSMAP=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_PHYSMAP_OF=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_DATAFLASH=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_SST25L=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_RAW_NAND=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_NAND_DENALI_DT=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_NAND_MARVELL=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_NAND_FSL_IFC=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_NAND_QCOM=y
> > arch/arm64/configs/defconfig:CONFIG_MTD_SPI_NOR=y
> >
> > so I think we're good with just what you have here: MTD_BLOCK doesn't
> > seem that
> > useful, and the rest are drivers.  That said, these (along with SPI and
> > SPI_SIFIVE) should really be in Kconfig.socs rather than defconfig.  Can you
> > send a patch that does that?
> >
>
> Yes sure, I will send a V2 series, where SPI,  SPI_SIFIVE,  MTD,
> and MTD_SPI_NOR will be a part of Kconfig.socs.

We had build issues in past by selecting major driver subsystems
in Kconfig.socs

I suggest to select SPI_SIFIVE from Kconfig.socs and other
platform independent options should go in defconfig and
rv32_defconfig.

The general rule of thumb is to force select only required
drivers from Kconfig.socs.

Regards,
Anup

>
> Thanks & BR,
> Sagar Kadam
>
> > >
> > > To demonstrate a bit more with linux 5.7-rc3
> > >
> > > Specify on U-boot prompt:
> > > # setenv bootargs "root=/dev/ram rw console=ttySIF0
> > mtdparts=spi0.0:1024k(loader1),4096K(loader2),26M(rootfs)"
> > >
> > > After booting linux will enumerate mtd partitions:
> > > # cat /proc/mtd
> > > dev:size   erasesize  name
> > > mtd0: 0010 1000 "loader1"
> > > mtd1: 

Re: [PATCH v2] riscv: force __cpu_up_ variables to put in data section

2020-05-04 Thread Anup Patel
On Mon, May 4, 2020 at 9:24 AM Zong Li  wrote:
>
> Put __cpu_up_stack_pointer and __cpu_up_task_pointer in data section.
> Currently, these two variables are put in bss section, there is a
> potential risk that secondary harts get the uninitialized value before
> main hart finishing the bss clearing. In this case, all secondary
> harts would pass the waiting loop and enable the MMU before main hart
> set up the page table.
>
> This issue happened on random booting of multiple harts, which means
> it will manifest for BBL and OpenSBI which older than v0.6. In OpenSBI
> v0.7, it had included HSM extension, all the secondary harts are
> waiting in firmware, so it could work fine without this change.

Slightly improved text:

This issue happens on random booting of multiple harts, which means
it will manifest for BBL and OpenSBI v0.6 (or older version). In OpenSBI
v0.7 (or higher version), we have HSM extension so all the secondary harts
are brought-up by Linux kernel in an orderly fashion. This means we don't
this change for OpenSBI v0.7 (or higher version).

>
> Changes in v2:
>   - Add commit description about random booting.
>
> Signed-off-by: Zong Li 
> Reviewed-by: Greentime Hu 
> ---
>  arch/riscv/kernel/cpu_ops.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
> index c4c33bf02369..0ec22354018c 100644
> --- a/arch/riscv/kernel/cpu_ops.c
> +++ b/arch/riscv/kernel/cpu_ops.c
> @@ -15,8 +15,8 @@
>
>  const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
>
> -void *__cpu_up_stack_pointer[NR_CPUS];
> -void *__cpu_up_task_pointer[NR_CPUS];
> +void *__cpu_up_stack_pointer[NR_CPUS] __section(.data);
> +void *__cpu_up_task_pointer[NR_CPUS] __section(.data);
>
>  extern const struct cpu_operations cpu_ops_sbi;
>  extern const struct cpu_operations cpu_ops_spinwait;
> --
> 2.26.1
>

Apart from above, looks good to me.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH] riscv: force __cpu_up_ variables to put in data section

2020-05-02 Thread Anup Patel
On Sat, May 2, 2020 at 11:30 AM Zong Li  wrote:
>
> On Fri, May 1, 2020 at 2:23 AM Atish Patra  wrote:
> >
> > On Thu, Apr 30, 2020 at 2:53 AM Zong Li  wrote:
> > >
> > > Put __cpu_up_stack_pointer and __cpu_up_task_pointer in data section.
> > > Currently, these two variables are put in bss section, there is a
> > > potential risk that secondary harts get the uninitialized value before
> > > main hart finishing the bss clearing. In this case, all secondary
> > > harts would go through the waiting loop and enable the MMU before
> > > main hart set up the page table.
> > >
> >
> > That would be only true if you are using random booting protocol with
> > SBI v0.1 implementation.
> > With HSM extension in place, all the secondary cores are waiting in
> > firmware. The booting core
> > will bring them up one by one from cpu_up method.
> >
> > The HSM extension is already available in OpenSBI v0.7 and Linux
> > kernel 5.7-rc1 onwards.
>
> If I understand correctly, the newest kernel still works with earlier
> OpenSBI than v0.7 or BBL. It seems to me that we need to consider the
> use cases if we don't limit it to up to OpenSBI v0.7.

I think the commit description should clearly state that the issue will
manifest only for random booting of multiple HARTs which means it
will manifest only for BBL and OpenSBI v0.6 (or older).

Regards,
Anup


[PATCH v12 17/17] RISC-V: KVM: Add MAINTAINERS entry

2020-04-28 Thread Anup Patel
Add myself as maintainer for KVM RISC-V and Atish as designated reviewer.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 MAINTAINERS | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 26f281d9f32a..ad3f37a8bdb1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9324,6 +9324,17 @@ F:   arch/powerpc/include/uapi/asm/kvm*
 F: arch/powerpc/kernel/kvm*
 F: arch/powerpc/kvm/
 
+KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
+M: Anup Patel 
+R: Atish Patra 
+L: k...@vger.kernel.org
+L: kvm-ri...@lists.infradead.org
+T: git git://github.com/kvm-riscv/linux.git
+S: Maintained
+F: arch/riscv/include/uapi/asm/kvm*
+F: arch/riscv/include/asm/kvm*
+F: arch/riscv/kvm/
+
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M: Christian Borntraeger 
 M: Janosch Frank 
-- 
2.25.1



[PATCH v12 16/17] RISC-V: KVM: Document RISC-V specific parts of KVM API

2020-04-28 Thread Anup Patel
Document RISC-V specific parts of the KVM API, such as:
 - The interrupt numbers passed to the KVM_INTERRUPT ioctl.
 - The states supported by the KVM_{GET,SET}_MP_STATE ioctls.
 - The registers supported by the KVM_{GET,SET}_ONE_REG interface
   and the encoding of those register ids.
 - The exit reason KVM_EXIT_RISCV_SBI for SBI calls forwarded to
   userspace tool.

CC: Jonathan Corbet 
CC: linux-...@vger.kernel.org
Signed-off-by: Anup Patel 
---
 Documentation/virt/kvm/api.rst | 193 +++--
 1 file changed, 184 insertions(+), 9 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index efbbe570aa9b..c2f9a535993f 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -513,7 +513,7 @@ translation mode.
 --
 
 :Capability: basic
-:Architectures: x86, ppc, mips
+:Architectures: x86, ppc, mips, riscv
 :Type: vcpu ioctl
 :Parameters: struct kvm_interrupt (in)
 :Returns: 0 on success, negative on failure.
@@ -582,6 +582,23 @@ interrupt number dequeues the interrupt.
 
 This is an asynchronous vcpu ioctl and can be invoked from any thread.
 
+RISC-V:
+^^^
+
+Queues an external interrupt to be injected into the virutal CPU. This ioctl
+is overloaded with 2 different irq values:
+
+a) KVM_INTERRUPT_SET
+
+   This sets external interrupt for a virtual CPU and it will receive
+   once it is ready.
+
+b) KVM_INTERRUPT_UNSET
+
+   This clears pending external interrupt for a virtual CPU.
+
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
 
 4.17 KVM_DEBUG_GUEST
 
@@ -1360,7 +1377,7 @@ for vm-wide capabilities.
 -
 
 :Capability: KVM_CAP_MP_STATE
-:Architectures: x86, s390, arm, arm64
+:Architectures: x86, s390, arm, arm64, riscv
 :Type: vcpu ioctl
 :Parameters: struct kvm_mp_state (out)
 :Returns: 0 on success; -1 on error
@@ -1377,7 +1394,8 @@ uniprocessor guests).
 Possible values are:
 
==
===
-   KVM_MP_STATE_RUNNABLE the vcpu is currently running [x86,arm/arm64]
+   KVM_MP_STATE_RUNNABLE the vcpu is currently running
+ [x86,arm/arm64,riscv]
KVM_MP_STATE_UNINITIALIZEDthe vcpu is an application processor (AP)
  which has not yet received an INIT signal 
[x86]
KVM_MP_STATE_INIT_RECEIVEDthe vcpu has received an INIT signal, and is
@@ -1386,7 +1404,7 @@ Possible values are:
  is waiting for an interrupt [x86]
KVM_MP_STATE_SIPI_RECEIVEDthe vcpu has just received a SIPI (vector
  accessible via KVM_GET_VCPU_EVENTS) [x86]
-   KVM_MP_STATE_STOPPED  the vcpu is stopped [s390,arm/arm64]
+   KVM_MP_STATE_STOPPED  the vcpu is stopped [s390,arm/arm64,riscv]
KVM_MP_STATE_CHECK_STOP   the vcpu is in a special error state [s390]
KVM_MP_STATE_OPERATINGthe vcpu is operating (running or halted)
  [s390]
@@ -1398,8 +1416,8 @@ On x86, this ioctl is only useful after 
KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
-For arm/arm64:
-^^
+For arm/arm64/riscv:
+
 
 The only states that are valid are KVM_MP_STATE_STOPPED and
 KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
@@ -1408,7 +1426,7 @@ KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused 
or not.
 -
 
 :Capability: KVM_CAP_MP_STATE
-:Architectures: x86, s390, arm, arm64
+:Architectures: x86, s390, arm, arm64, riscv
 :Type: vcpu ioctl
 :Parameters: struct kvm_mp_state (in)
 :Returns: 0 on success; -1 on error
@@ -1420,8 +1438,8 @@ On x86, this ioctl is only useful after 
KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
-For arm/arm64:
-^^
+For arm/arm64/riscv:
+
 
 The only states that are valid are KVM_MP_STATE_STOPPED and
 KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
@@ -2532,6 +2550,144 @@ following id bit patterns::
 
   0x7020  0003 02 <0:3> 
 
+RISC-V registers are mapped using the lower 32 bits. The upper 8 bits of
+that is the register group type.
+
+RISC-V config registers are meant for configuring a Guest VCPU and it has
+the following id bit patterns::
+
+  0x8020  01  (32bit Host)
+  0x8030  01  (64bit Host)
+
+Following are the RISC-V config registers:
+
+=== = =
+EncodingRegister  Description
+=== = =
+  0x80x0  0100  isa   ISA feature bitmap of Gues

[PATCH v12 12/17] RISC-V: KVM: Add timer functionality

2020-04-28 Thread Anup Patel
From: Atish Patra 

The RISC-V hypervisor specification doesn't have any virtual timer
feature.

Due to this, the guest VCPU timer will be programmed via SBI calls.
The host will use a separate hrtimer event for each guest VCPU to
provide timer functionality. We inject a virtual timer interrupt to
the guest VCPU whenever the guest VCPU hrtimer event expires.

This patch adds guest VCPU timer implementation along with ONE_REG
interface to access VCPU timer state from user space.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Acked-by: Daniel Lezcano 
---
 arch/riscv/include/asm/kvm_host.h   |   7 +
 arch/riscv/include/asm/kvm_vcpu_timer.h |  44 +
 arch/riscv/include/uapi/asm/kvm.h   |  17 ++
 arch/riscv/kvm/Makefile |   2 +-
 arch/riscv/kvm/vcpu.c   |  14 ++
 arch/riscv/kvm/vcpu_timer.c | 225 
 arch/riscv/kvm/vm.c |   2 +-
 drivers/clocksource/timer-riscv.c   |   8 +
 include/clocksource/timer-riscv.h   |  16 ++
 9 files changed, 333 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h
 create mode 100644 arch/riscv/kvm/vcpu_timer.c
 create mode 100644 include/clocksource/timer-riscv.h

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 642fd817c29b..8bf3e7250ce9 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_64BIT
 #define KVM_MAX_VCPUS  (1U << 16)
@@ -64,6 +65,9 @@ struct kvm_arch {
/* stage2 page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
+
+   /* Guest Timer */
+   struct kvm_guest_timer timer;
 };
 
 struct kvm_mmio_decode {
@@ -176,6 +180,9 @@ struct kvm_vcpu_arch {
unsigned long irqs_pending;
unsigned long irqs_pending_mask;
 
+   /* VCPU Timer */
+   struct kvm_vcpu_timer timer;
+
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h 
b/arch/riscv/include/asm/kvm_vcpu_timer.h
new file mode 100644
index ..375281eb49e0
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#ifndef __KVM_VCPU_RISCV_TIMER_H
+#define __KVM_VCPU_RISCV_TIMER_H
+
+#include 
+
+struct kvm_guest_timer {
+   /* Mult & Shift values to get nanoseconds from cycles */
+   u32 nsec_mult;
+   u32 nsec_shift;
+   /* Time delta value */
+   u64 time_delta;
+};
+
+struct kvm_vcpu_timer {
+   /* Flag for whether init is done */
+   bool init_done;
+   /* Flag for whether timer event is configured */
+   bool next_set;
+   /* Next timer event cycles */
+   u64 next_cycles;
+   /* Underlying hrtimer instance */
+   struct hrtimer hrt;
+};
+
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles);
+int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu);
+int kvm_riscv_guest_timer_init(struct kvm *kvm);
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index 3a20327242f1..8f15eee35a1e 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -73,6 +73,18 @@ struct kvm_riscv_csr {
unsigned long satp;
 };
 
+/* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_timer {
+   u64 frequency;
+   u64 time;
+   u64 compare;
+   u64 state;
+};
+
+/* Possible states for kvm_riscv_timer */
+#define KVM_RISCV_TIMER_STATE_OFF  0
+#define KVM_RISCV_TIMER_STATE_ON   1
+
 #define KVM_REG_SIZE(id)   \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -95,6 +107,11 @@ struct kvm_riscv_csr {
 #define KVM_REG_RISCV_CSR_REG(name)\
(offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
 
+/* Timer registers are mapped as type 4 */
+#define KVM_REG_RISCV_TIMER(0x04 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_TIMER_REG(name)  \
+   (offsetof(struct kvm_riscv_timer, name) / sizeof(u64))
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index c0f57f26c13d..3e0c7558

[PATCH v12 13/17] RISC-V: KVM: FP lazy save/restore

2020-04-28 Thread Anup Patel
From: Atish Patra 

This patch adds floating point (F and D extension) context save/restore
for guest VCPUs. The FP context is saved and restored lazily only when
kernel enter/exits the in-kernel run loop and not during the KVM world
switch. This way FP save/restore has minimal impact on KVM performance.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   5 +
 arch/riscv/kernel/asm-offsets.c   |  72 +
 arch/riscv/kvm/vcpu.c |  81 ++
 arch/riscv/kvm/vcpu_switch.S  | 174 ++
 4 files changed, 332 insertions(+)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 8bf3e7250ce9..0677b5a70ac1 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -128,6 +128,7 @@ struct kvm_cpu_context {
unsigned long sepc;
unsigned long sstatus;
unsigned long hstatus;
+   union __riscv_fp_state fp;
 };
 
 struct kvm_vcpu_csr {
@@ -245,6 +246,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
struct kvm_cpu_trap *trap);
 
 void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
+void __kvm_riscv_fp_f_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_f_restore(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context);
 
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index f7e43fe55335..80673f7ef5cf 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -191,6 +191,78 @@ void asm_offsets(void)
OFFSET(KVM_ARCH_TRAP_HTVAL, kvm_cpu_trap, htval);
OFFSET(KVM_ARCH_TRAP_HTINST, kvm_cpu_trap, htinst);
 
+   /* F extension */
+
+   OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]);
+   OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]);
+   OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]);
+   OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]);
+   OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]);
+   OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]);
+   OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]);
+   OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]);
+   OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]);
+   OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]);
+   OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]);
+   OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]);
+   OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]);
+   OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]);
+   OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]);
+   OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]);
+   OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]);
+   OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]);
+   OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]);
+   OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]);
+   OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]);
+   OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]);
+   OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]);
+   OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]);
+   OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]);
+   OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]);
+   OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]);
+   OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]);
+   OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]);
+   OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]);
+   OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]);
+   OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]);
+   OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr);
+
+   /* D extension */
+
+   OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]);
+   OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]);
+   OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]);
+   OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]);
+   OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]);
+   OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]);
+   OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]);
+   OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]);
+   OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]);
+   OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]);
+   OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]);
+   OFFSET(KVM_ARCH_FP_D_F11, kvm_cpu_context, fp.d.f[11

[PATCH v12 14/17] RISC-V: KVM: Implement ONE REG interface for FP registers

2020-04-28 Thread Anup Patel
From: Atish Patra 

Add a KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctl interface for floating
point registers such as F0-F31 and FCSR. This support is added for
both 'F' and 'D' extensions.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/uapi/asm/kvm.h |  10 +++
 arch/riscv/kvm/vcpu.c | 104 ++
 2 files changed, 114 insertions(+)

diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index 8f15eee35a1e..f4274c2e5cdc 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -112,6 +112,16 @@ struct kvm_riscv_timer {
 #define KVM_REG_RISCV_TIMER_REG(name)  \
(offsetof(struct kvm_riscv_timer, name) / sizeof(u64))
 
+/* F extension registers are mapped as type 5 */
+#define KVM_REG_RISCV_FP_F (0x05 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_F_REG(name)   \
+   (offsetof(struct __riscv_f_ext_state, name) / sizeof(u32))
+
+/* D extension registers are mapped as type 6 */
+#define KVM_REG_RISCV_FP_D (0x06 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_D_REG(name)   \
+   (offsetof(struct __riscv_d_ext_state, name) / sizeof(u64))
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 0f4b6b7165aa..a52180404271 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -416,6 +416,98 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+static int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg,
+unsigned long rtype)
+{
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   unsigned long isa = vcpu->arch.isa;
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   rtype);
+   void *reg_val;
+
+   if ((rtype == KVM_REG_RISCV_FP_F) &&
+   riscv_isa_extension_available(, f)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+   reg_val = >fp.f.fcsr;
+   else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+   reg_val = >fp.f.f[reg_num];
+   else
+   return -EINVAL;
+   } else if ((rtype == KVM_REG_RISCV_FP_D) &&
+  riscv_isa_extension_available(, d)) {
+   if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   reg_val = >fp.d.fcsr;
+   } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
+  reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+   return -EINVAL;
+   reg_val = >fp.d.f[reg_num];
+   } else
+   return -EINVAL;
+   } else
+   return -EINVAL;
+
+   if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
+   return -EFAULT;
+
+   return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg,
+unsigned long rtype)
+{
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   unsigned long isa = vcpu->arch.isa;
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   rtype);
+   void *reg_val;
+
+   if ((rtype == KVM_REG_RISCV_FP_F) &&
+   riscv_isa_extension_available(, f)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+   reg_val = >fp.f.fcsr;
+   else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+   reg_val = >fp.f.f[reg_num];
+   else
+   return -EINVAL;
+   } else if (

[PATCH v12 15/17] RISC-V: KVM: Add SBI v0.1 support

2020-04-28 Thread Anup Patel
From: Atish Patra 

The KVM host kernel is running in HS-mode needs so we need to handle
the SBI calls coming from guest kernel running in VS-mode.

This patch adds SBI v0.1 support in KVM RISC-V. Almost all SBI v0.1
calls are implemented in KVM kernel module except GETCHAR and PUTCHART
calls which are forwarded to user space because these calls cannot be
implemented in kernel space. In future, when we implement SBI v0.2 for
Guest, we will forward SBI v0.2 experimental and vendor extension calls
to user space.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h |  10 ++
 arch/riscv/kvm/Makefile   |   2 +-
 arch/riscv/kvm/vcpu.c |   9 ++
 arch/riscv/kvm/vcpu_exit.c|   4 +
 arch/riscv/kvm/vcpu_sbi.c | 172 ++
 include/uapi/linux/kvm.h  |   8 ++
 6 files changed, 204 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/kvm/vcpu_sbi.c

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 0677b5a70ac1..f7520d4e0b43 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -77,6 +77,10 @@ struct kvm_mmio_decode {
int return_handled;
 };
 
+struct kvm_sbi_context {
+   int return_handled;
+};
+
 #define KVM_MMU_PAGE_CACHE_NR_OBJS 32
 
 struct kvm_mmu_page_cache {
@@ -187,6 +191,9 @@ struct kvm_vcpu_arch {
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
+   /* SBI context */
+   struct kvm_sbi_context sbi_context;
+
/* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_page_cache mmu_page_cache;
 
@@ -259,4 +266,7 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, 
unsigned long mask);
 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
 
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
 #endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 3e0c7558320d..b56dc1650d2c 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 kvm-objs := $(common-objs-y)
 
 kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
-kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o vcpu_sbi.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index a52180404271..567804268c39 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -866,6 +866,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
}
}
 
+   /* Process SBI value returned from user-space */
+   if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
+   ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
+   if (ret) {
+   srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
+   return ret;
+   }
+   }
+
if (run->immediate_exit) {
srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
return -EINTR;
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 2384b4bf4939..d826ce6f1e98 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -631,6 +631,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
ret = stage2_page_fault(vcpu, run, trap);
break;
+   case EXC_SUPERVISOR_SYSCALL:
+   if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+   ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
+   break;
default:
break;
};
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
new file mode 100644
index ..b04e2b175fbc
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define SBI_VERSION_MAJOR  0
+#define SBI_VERSION_MINOR  1
+
+static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
+   struct kvm_run *run, u32 type)
+{
+   int i;
+   struct kvm_vcpu *tmp;
+
+   kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+   tmp->arch.power_off = true;
+   kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+   memset(>system_event, 0, sizeof(run->system_eve

[PATCH v12 10/17] RISC-V: KVM: Implement stage2 page table programming

2020-04-28 Thread Anup Patel
This patch implements all required functions for programming
the stage2 page table for each Guest/VM.

At high-level, the flow of stage2 related functions is similar
from KVM ARM/ARM64 implementation but the stage2 page table
format is quite different for KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h |  10 +
 arch/riscv/include/asm/pgtable-bits.h |   1 +
 arch/riscv/kvm/mmu.c  | 574 +-
 3 files changed, 575 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 296a5b13b3a6..a75778665546 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -73,6 +73,13 @@ struct kvm_mmio_decode {
int return_handled;
 };
 
+#define KVM_MMU_PAGE_CACHE_NR_OBJS 32
+
+struct kvm_mmu_page_cache {
+   int nobjs;
+   void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS];
+};
+
 struct kvm_cpu_trap {
unsigned long sepc;
unsigned long scause;
@@ -172,6 +179,9 @@ struct kvm_vcpu_arch {
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
+   /* Cache pages needed to program page tables with spinlock held */
+   struct kvm_mmu_page_cache mmu_page_cache;
+
/* VCPU power-off state */
bool power_off;
 
diff --git a/arch/riscv/include/asm/pgtable-bits.h 
b/arch/riscv/include/asm/pgtable-bits.h
index bbaeb5d35842..be49d62fcc2b 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -26,6 +26,7 @@
 
 #define _PAGE_SPECIAL   _PAGE_SOFT
 #define _PAGE_TABLE _PAGE_PRESENT
+#define _PAGE_LEAF  (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 
 /*
  * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 8fb356e68cc5..9daeb4a051e7 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -17,6 +17,357 @@
 #include 
 #include 
 #include 
+#include 
+
+#ifdef CONFIG_64BIT
+#define stage2_have_pmdtrue
+#define stage2_gpa_size((gpa_t)(1ULL << 39))
+#define stage2_pgd_levels  3
+#define stage2_index_bits  9
+#else
+#define stage2_have_pmdfalse
+#define stage2_gpa_size((gpa_t)(1ULL << 32))
+#define stage2_pgd_levels  2
+#define stage2_index_bits  10
+#endif
+
+#define stage2_pte_index(addr, level) \
+(((addr) >> (PAGE_SHIFT + stage2_index_bits * (level))) & (PTRS_PER_PTE - 1))
+
+static inline unsigned long stage2_pte_page_vaddr(pte_t pte)
+{
+   return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
+}
+
+static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
+{
+   if (page_size == PAGE_SIZE)
+   *out_level = 0;
+   else if (page_size == PMD_SIZE)
+   *out_level = 1;
+   else if (page_size == PGDIR_SIZE)
+   *out_level = (stage2_have_pmd) ? 2 : 1;
+   else
+   return -EINVAL;
+
+   return 0;
+}
+
+static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
+{
+   switch (level) {
+   case 0:
+   *out_pgsize = PAGE_SIZE;
+   break;
+   case 1:
+   *out_pgsize = (stage2_have_pmd) ? PMD_SIZE : PGDIR_SIZE;
+   break;
+   case 2:
+   *out_pgsize = PGDIR_SIZE;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
+ int min, int max)
+{
+   void *page;
+
+   BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
+   if (pcache->nobjs >= min)
+   return 0;
+   while (pcache->nobjs < max) {
+   page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+   if (!page)
+   return -ENOMEM;
+   pcache->objects[pcache->nobjs++] = page;
+   }
+
+   return 0;
+}
+
+static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
+{
+   while (pcache && pcache->nobjs)
+   free_page((unsigned long)pcache->objects[--pcache->nobjs]);
+}
+
+static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
+{
+   void *p;
+
+   if (!pcache)
+   return NULL;
+
+   BUG_ON(!pcache->nobjs);
+   p = pcache->objects[--pcache->nobjs];
+
+   return p;
+}
+
+static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
+ pte_t **ptepp, u32 *ptep_level)
+{
+   pte_t *ptep;
+   u32 current_level = stage2_pgd_levels - 1;
+
+   *ptep_level = current_level;
+   ptep = (pte_t *)kvm->arch.pgd;
+   ptep = [stage2_pte_index(addr, current_level)];
+   wh

[PATCH v12 11/17] RISC-V: KVM: Implement MMU notifiers

2020-04-28 Thread Anup Patel
This patch implements MMU notifiers for KVM RISC-V so that Guest
physical address space is in-sync with Host physical address space.

This will allow swapping, page migration, etc to work transparently
with KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   7 ++
 arch/riscv/kvm/Kconfig|   1 +
 arch/riscv/kvm/mmu.c  | 129 +-
 arch/riscv/kvm/vm.c   |   1 +
 4 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index a75778665546..642fd817c29b 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -198,6 +198,13 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva_range(struct kvm *kvm,
+   unsigned long start, unsigned long end);
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
+
 void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
  unsigned long gpa);
 void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 88edd477b3a8..2356dc52ebb3 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -20,6 +20,7 @@ if VIRTUALIZATION
 config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on RISCV_SBI && MMU
+   select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_MMIO
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 9daeb4a051e7..692141ce9bfc 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -369,6 +369,38 @@ int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t 
hpa,
 
 }
 
+static int handle_hva_to_gpa(struct kvm *kvm,
+unsigned long start,
+unsigned long end,
+int (*handler)(struct kvm *kvm,
+   gpa_t gpa, u64 size,
+   void *data),
+void *data)
+{
+   struct kvm_memslots *slots;
+   struct kvm_memory_slot *memslot;
+   int ret = 0;
+
+   slots = kvm_memslots(kvm);
+
+   /* we only care about the pages that the guest sees */
+   kvm_for_each_memslot(memslot, slots) {
+   unsigned long hva_start, hva_end;
+   gfn_t gpa;
+
+   hva_start = max(start, memslot->userspace_addr);
+   hva_end = min(end, memslot->userspace_addr +
+   (memslot->npages << PAGE_SHIFT));
+   if (hva_start >= hva_end)
+   continue;
+
+   gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
+   ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
+   }
+
+   return ret;
+}
+
 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
@@ -504,6 +536,95 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return ret;
 }
 
+static int kvm_unmap_hva_handler(struct kvm *kvm,
+gpa_t gpa, u64 size, void *data)
+{
+   stage2_unmap_range(kvm, gpa, size);
+   return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm,
+   unsigned long start, unsigned long end)
+{
+   if (!kvm->arch.pgd)
+   return 0;
+
+   handle_hva_to_gpa(kvm, start, end,
+ _unmap_hva_handler, NULL);
+   return 0;
+}
+
+static int kvm_set_spte_handler(struct kvm *kvm,
+   gpa_t gpa, u64 size, void *data)
+{
+   pte_t *pte = (pte_t *)data;
+
+   WARN_ON(size != PAGE_SIZE);
+   stage2_set_pte(kvm, 0, NULL, gpa, pte);
+
+   return 0;
+}
+
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+   unsigned long end = hva + PAGE_SIZE;
+   kvm_pfn_t pfn = pte_pfn(pte);
+   pte_t stage2_pte;
+
+   if (!kvm->arch.pgd)
+   return 0;
+
+   stage2_pte = pfn_pte(pfn, PAGE_WRITE_EXEC);
+   handle_hva_to_gpa(kvm, hva, end,
+ _set_spte_handler, _pte);
+
+   return 0;
+}
+
+static int kvm_age_hva_handler(struct kvm *kvm,
+   gpa_t gpa, u64 size, void *data)
+{
+   pte_t *ptep;
+   u32 ptep_level = 0;
+
+   WARN_ON(size != PAG

[PATCH v12 06/17] RISC-V: KVM: Implement VCPU world-switch

2020-04-28 Thread Anup Patel
This patch implements the VCPU world-switch for KVM RISC-V.

The KVM RISC-V world-switch (i.e. __kvm_riscv_switch_to()) mostly
switches general purpose registers, SSTATUS, STVEC, SSCRATCH and
HSTATUS CSRs. Other CSRs are switched via vcpu_load() and vcpu_put()
interface in kvm_arch_vcpu_load() and kvm_arch_vcpu_put() functions
respectively.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   9 +-
 arch/riscv/kernel/asm-offsets.c   |  76 
 arch/riscv/kvm/Makefile   |   2 +-
 arch/riscv/kvm/vcpu.c |  30 -
 arch/riscv/kvm/vcpu_switch.S  | 194 ++
 5 files changed, 307 insertions(+), 4 deletions(-)
 create mode 100644 arch/riscv/kvm/vcpu_switch.S

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 822b580d96a9..74d4ff6af0a5 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -118,6 +118,13 @@ struct kvm_vcpu_arch {
/* ISA feature bits (similar to MISA) */
unsigned long isa;
 
+   /* SSCRATCH and STVEC of Host */
+   unsigned long host_sscratch;
+   unsigned long host_stvec;
+
+   /* CPU context of Host */
+   struct kvm_cpu_context host_context;
+
/* CPU context of Guest VCPU */
struct kvm_cpu_context guest_context;
 
@@ -167,7 +174,7 @@ int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, 
struct kvm_run *run);
 int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_trap *trap);
 
-static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
+void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
 
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 07cb9c10de4e..3a230882b91e 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -7,7 +7,9 @@
 #define GENERATING_ASM_OFFSETS
 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 
@@ -109,6 +111,80 @@ void asm_offsets(void)
OFFSET(PT_BADADDR, pt_regs, badaddr);
OFFSET(PT_CAUSE, pt_regs, cause);
 
+   OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero);
+   OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra);
+   OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp);
+   OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp);
+   OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp);
+   OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0);
+   OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1);
+   OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2);
+   OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0);
+   OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1);
+   OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0);
+   OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1);
+   OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2);
+   OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3);
+   OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4);
+   OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5);
+   OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6);
+   OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7);
+   OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2);
+   OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3);
+   OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4);
+   OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5);
+   OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6);
+   OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7);
+   OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8);
+   OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9);
+   OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10);
+   OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11);
+   OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3);
+   OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4);
+   OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5);
+   OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6);
+   OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc);
+   OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus);
+   OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus);
+
+   OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero);
+   OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra);
+   OFFSET(KVM_ARCH_HOST_SP, kvm_vcpu_arch

[PATCH v12 07/17] RISC-V: KVM: Handle MMIO exits for VCPU

2020-04-28 Thread Anup Patel
We will get stage2 page faults whenever Guest/VM access SW emulated
MMIO device or unmapped Guest RAM.

This patch implements MMIO read/write emulation by extracting MMIO
details from the trapped load/store instruction and forwarding the
MMIO read/write to user-space. The actual MMIO emulation will happen
in user-space and KVM kernel module will only take care of register
updates before resuming the trapped VCPU.

The handling for stage2 page faults for unmapped Guest RAM will be
implemeted by a separate patch later.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  21 ++
 arch/riscv/kernel/asm-offsets.c   |   6 +
 arch/riscv/kvm/mmu.c  |   8 +
 arch/riscv/kvm/vcpu_exit.c| 545 +-
 arch/riscv/kvm/vcpu_switch.S  |  23 ++
 5 files changed, 600 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 74d4ff6af0a5..05c84c745c61 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -53,6 +53,13 @@ struct kvm_arch {
phys_addr_t pgd_phys;
 };
 
+struct kvm_mmio_decode {
+   unsigned long insn;
+   int len;
+   int shift;
+   int return_handled;
+};
+
 struct kvm_cpu_trap {
unsigned long sepc;
unsigned long scause;
@@ -149,6 +156,9 @@ struct kvm_vcpu_arch {
unsigned long irqs_pending;
unsigned long irqs_pending_mask;
 
+   /* MMIO instruction details */
+   struct kvm_mmio_decode mmio_decode;
+
/* VCPU power-off state */
bool power_off;
 
@@ -165,11 +175,22 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
+struct kvm_memory_slot *memslot,
+gpa_t gpa, unsigned long hva, bool is_write);
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
 int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
 
+void __kvm_riscv_unpriv_trap(void);
+
+unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
+bool read_insn,
+unsigned long guest_addr,
+struct kvm_cpu_trap *trap);
+void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_trap *trap);
 int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_trap *trap);
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 3a230882b91e..f7e43fe55335 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -185,6 +185,12 @@ void asm_offsets(void)
OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch);
OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec);
 
+   OFFSET(KVM_ARCH_TRAP_SEPC, kvm_cpu_trap, sepc);
+   OFFSET(KVM_ARCH_TRAP_SCAUSE, kvm_cpu_trap, scause);
+   OFFSET(KVM_ARCH_TRAP_STVAL, kvm_cpu_trap, stval);
+   OFFSET(KVM_ARCH_TRAP_HTVAL, kvm_cpu_trap, htval);
+   OFFSET(KVM_ARCH_TRAP_HTINST, kvm_cpu_trap, htinst);
+
/*
 * THREAD_{F,X}* might be larger than a S-type offset can handle, but
 * these are used in performance-sensitive assembly so we can't resort
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index ec13507e8a18..8fb356e68cc5 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -64,6 +64,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return 0;
 }
 
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
+struct kvm_memory_slot *memslot,
+gpa_t gpa, unsigned long hva, bool is_write)
+{
+   /* TODO: */
+   return 0;
+}
+
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
 {
/* TODO: */
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 4484e9200fe4..35101ba8814d 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -6,9 +6,471 @@
  * Anup Patel 
  */
 
+#include 
 #include 
 #include 
 #include 
+#include 
+
+#define INSN_MATCH_LB  0x3
+#define INSN_MASK_LB   0x707f
+#define INSN_MATCH_LH  0x1003
+#define INSN_MASK_LH   0x707f
+#define INSN_MATCH_LW  0x2003
+#define INSN_MASK_LW   0x707f
+#define INSN_MATCH_LD  0x3003
+#define INSN_MASK_LD   0x707f
+#define INSN_MATCH_LBU 0x4003
+#define INSN_MASK_LBU  0x707f
+#define

[PATCH v12 09/17] RISC-V: KVM: Implement VMID allocator

2020-04-28 Thread Anup Patel
We implement a simple VMID allocator for Guests/VMs which:
1. Detects number of VMID bits at boot-time
2. Uses atomic number to track VMID version and increments
   VMID version whenever we run-out of VMIDs
3. Flushes Guest TLBs on all host CPUs whenever we run-out
   of VMIDs
4. Force updates HW Stage2 VMID for each Guest VCPU whenever
   VMID changes using VCPU request KVM_REQ_UPDATE_HGATP

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  25 +++
 arch/riscv/kvm/Makefile   |   3 +-
 arch/riscv/kvm/main.c |   4 +
 arch/riscv/kvm/tlb.S  |  43 +++
 arch/riscv/kvm/vcpu.c |   9 +++
 arch/riscv/kvm/vm.c   |   6 ++
 arch/riscv/kvm/vmid.c | 120 ++
 7 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/kvm/tlb.S
 create mode 100644 arch/riscv/kvm/vmid.c

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 05c84c745c61..296a5b13b3a6 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+#define KVM_REQ_UPDATE_HGATP   KVM_ARCH_REQ(2)
 
 struct kvm_vm_stat {
ulong remote_tlb_flush;
@@ -47,7 +48,19 @@ struct kvm_vcpu_stat {
 struct kvm_arch_memory_slot {
 };
 
+struct kvm_vmid {
+   /*
+* Writes to vmid_version and vmid happen with vmid_lock held
+* whereas reads happen without any lock held.
+*/
+   unsigned long vmid_version;
+   unsigned long vmid;
+};
+
 struct kvm_arch {
+   /* stage2 vmid */
+   struct kvm_vmid vmid;
+
/* stage2 page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
@@ -175,6 +188,12 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
+ unsigned long gpa);
+void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
+void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa);
+void __kvm_riscv_hfence_gvma_all(void);
+
 int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
 struct kvm_memory_slot *memslot,
 gpa_t gpa, unsigned long hva, bool is_write);
@@ -183,6 +202,12 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
 
+void kvm_riscv_stage2_vmid_detect(void);
+unsigned long kvm_riscv_stage2_vmid_bits(void);
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
+
 void __kvm_riscv_unpriv_trap(void);
 
 unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 845579273727..c0f57f26c13d 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -8,6 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 
 kvm-objs := $(common-objs-y)
 
-kvm-objs += main.o vm.o mmu.o vcpu.o vcpu_exit.o vcpu_switch.o
+kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 4dee84f5c440..6915ac6888a7 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -77,8 +77,12 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
 
+   kvm_riscv_stage2_vmid_detect();
+
kvm_info("hypervisor extension available\n");
 
+   kvm_info("host has %ld VMID bits\n", kvm_riscv_stage2_vmid_bits());
+
return 0;
 }
 
diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
new file mode 100644
index ..453fca8d7940
--- /dev/null
+++ b/arch/riscv/kvm/tlb.S
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel 
+ */
+
+#include 
+#include 
+
+   .text
+   .altmacro
+   .option norelax
+
+   /*
+* Instruction encoding of hfence.gvma is:
+* 0110001 rs2(5) rs1(5) 000 0 1110011
+*/
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
+   /* hfence.gvma a1, a0 */
+   .word 0x62a60073
+   ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid)
+   /* hfence.gvma zero, a0 */
+   .word 0x62a00073
+   ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid)
+
+ENTRY(__kvm_riscv_hfence_gvma_gpa)
+

[PATCH v12 03/17] RISC-V: KVM: Implement VCPU create, init and destroy functions

2020-04-28 Thread Anup Patel
This patch implements VCPU create, init and destroy functions
required by generic KVM module. We don't have much dynamic
resources in struct kvm_vcpu_arch so these functions are quite
simple for KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h | 68 +++
 arch/riscv/kvm/vcpu.c | 54 
 2 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index ad4a5e1a6cd3..51bdd6fe05c7 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -61,7 +61,75 @@ struct kvm_cpu_trap {
unsigned long htinst;
 };
 
+struct kvm_cpu_context {
+   unsigned long zero;
+   unsigned long ra;
+   unsigned long sp;
+   unsigned long gp;
+   unsigned long tp;
+   unsigned long t0;
+   unsigned long t1;
+   unsigned long t2;
+   unsigned long s0;
+   unsigned long s1;
+   unsigned long a0;
+   unsigned long a1;
+   unsigned long a2;
+   unsigned long a3;
+   unsigned long a4;
+   unsigned long a5;
+   unsigned long a6;
+   unsigned long a7;
+   unsigned long s2;
+   unsigned long s3;
+   unsigned long s4;
+   unsigned long s5;
+   unsigned long s6;
+   unsigned long s7;
+   unsigned long s8;
+   unsigned long s9;
+   unsigned long s10;
+   unsigned long s11;
+   unsigned long t3;
+   unsigned long t4;
+   unsigned long t5;
+   unsigned long t6;
+   unsigned long sepc;
+   unsigned long sstatus;
+   unsigned long hstatus;
+};
+
+struct kvm_vcpu_csr {
+   unsigned long vsstatus;
+   unsigned long hie;
+   unsigned long vstvec;
+   unsigned long vsscratch;
+   unsigned long vsepc;
+   unsigned long vscause;
+   unsigned long vstval;
+   unsigned long hvip;
+   unsigned long vsatp;
+};
+
 struct kvm_vcpu_arch {
+   /* VCPU ran atleast once */
+   bool ran_atleast_once;
+
+   /* ISA feature bits (similar to MISA) */
+   unsigned long isa;
+
+   /* CPU context of Guest VCPU */
+   struct kvm_cpu_context guest_context;
+
+   /* CPU CSR context of Guest VCPU */
+   struct kvm_vcpu_csr guest_csr;
+
+   /* CPU context upon Guest VCPU reset */
+   struct kvm_cpu_context guest_reset_context;
+
+   /* CPU CSR context upon Guest VCPU reset */
+   struct kvm_vcpu_csr guest_reset_csr;
+
/* Don't run the VCPU (blocked) */
bool pause;
 
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index b238592db2ba..c37745bd3dbd 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -31,6 +31,27 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
 };
 
+#define KVM_RISCV_ISA_ALLOWED  (riscv_isa_extension_mask(a) | \
+riscv_isa_extension_mask(c) | \
+riscv_isa_extension_mask(d) | \
+riscv_isa_extension_mask(f) | \
+riscv_isa_extension_mask(i) | \
+riscv_isa_extension_mask(m) | \
+riscv_isa_extension_mask(s) | \
+riscv_isa_extension_mask(u))
+
+static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_csr *csr = >arch.guest_csr;
+   struct kvm_vcpu_csr *reset_csr = >arch.guest_reset_csr;
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   struct kvm_cpu_context *reset_cntx = >arch.guest_reset_context;
+
+   memcpy(csr, reset_csr, sizeof(*csr));
+
+   memcpy(cntx, reset_cntx, sizeof(*cntx));
+}
+
 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 {
return 0;
@@ -38,7 +59,24 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 
 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 {
-   /* TODO: */
+   struct kvm_cpu_context *cntx;
+
+   /* Mark this VCPU never ran */
+   vcpu->arch.ran_atleast_once = false;
+
+   /* Setup ISA features available to VCPU */
+   vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
+
+   /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
+   cntx = >arch.guest_reset_context;
+   cntx->sstatus = SR_SPP | SR_SPIE;
+   cntx->hstatus = 0;
+   cntx->hstatus |= HSTATUS_SPVP;
+   cntx->hstatus |= HSTATUS_SPV;
+
+   /* Reset VCPU */
+   kvm_riscv_reset_vcpu(vcpu);
+
return 0;
 }
 
@@ -51,15 +89,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 {
 }
 
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-   /* TODO: */
-   return 0;
-}
-
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-   /* TODO: */
+   /* Flush the page

[PATCH v12 08/17] RISC-V: KVM: Handle WFI exits for VCPU

2020-04-28 Thread Anup Patel
We get illegal instruction trap whenever Guest/VM executes WFI
instruction.

This patch handles WFI trap by blocking the trapped VCPU using
kvm_vcpu_block() API. The blocked VCPU will be automatically
resumed whenever a VCPU interrupt is injected from user-space
or from in-kernel IRQCHIP emulation.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/kvm/vcpu_exit.c | 76 ++
 1 file changed, 76 insertions(+)

diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 35101ba8814d..2384b4bf4939 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+#define INSN_OPCODE_MASK   0x007c
+#define INSN_OPCODE_SHIFT  2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_WFI  0xff00
+#define INSN_MATCH_WFI 0x1050
+
 #define INSN_MATCH_LB  0x3
 #define INSN_MASK_LB   0x707f
 #define INSN_MATCH_LH  0x1003
@@ -116,6 +123,71 @@
 (s32)(((insn) >> 7) & 0x1f))
 #define MASK_FUNCT30x7000
 
+static int truly_illegal_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+   struct kvm_cpu_trap utrap = { 0 };
+
+   /* Redirect trap to Guest VCPU */
+   utrap.sepc = vcpu->arch.guest_context.sepc;
+   utrap.scause = EXC_INST_ILLEGAL;
+   utrap.stval = insn;
+   kvm_riscv_vcpu_trap_redirect(vcpu, );
+
+   return 1;
+}
+
+static int system_opcode_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+   if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
+   vcpu->stat.wfi_exit_stat++;
+   if (!kvm_arch_vcpu_runnable(vcpu)) {
+   srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
+   kvm_vcpu_block(vcpu);
+   vcpu->arch.srcu_idx = srcu_read_lock(>kvm->srcu);
+   kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+   }
+   vcpu->arch.guest_context.sepc += INSN_LEN(insn);
+   return 1;
+   }
+
+   return truly_illegal_insn(vcpu, run, insn);
+}
+
+static int illegal_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+   unsigned long insn = trap->stval;
+   struct kvm_cpu_trap utrap = { 0 };
+   struct kvm_cpu_context *ct;
+
+   if (unlikely(INSN_IS_16BIT(insn))) {
+   if (insn == 0) {
+   ct = >arch.guest_context;
+   insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
+ ct->sepc,
+ );
+   if (utrap.scause) {
+   utrap.sepc = ct->sepc;
+   kvm_riscv_vcpu_trap_redirect(vcpu, );
+   return 1;
+   }
+   }
+   if (INSN_IS_16BIT(insn))
+   return truly_illegal_insn(vcpu, run, insn);
+   }
+
+   switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
+   case INSN_OPCODE_SYSTEM:
+   return system_opcode_insn(vcpu, run, insn);
+   default:
+   return truly_illegal_insn(vcpu, run, insn);
+   }
+}
+
 static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long fault_addr, unsigned long htinst)
 {
@@ -549,6 +621,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
ret = -EFAULT;
run->exit_reason = KVM_EXIT_UNKNOWN;
switch (trap->scause) {
+   case EXC_INST_ILLEGAL:
+   if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+   ret = illegal_inst_fault(vcpu, run, trap);
+   break;
case EXC_INST_GUEST_PAGE_FAULT:
case EXC_LOAD_GUEST_PAGE_FAULT:
case EXC_STORE_GUEST_PAGE_FAULT:
-- 
2.25.1



[PATCH v12 05/17] RISC-V: KVM: Implement KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls

2020-04-28 Thread Anup Patel
For KVM RISC-V, we use KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls to access
VCPU config and registers from user-space.

We have three types of VCPU registers:
1. CONFIG - these are VCPU config and capabilities
2. CORE   - these are VCPU general purpose registers
3. CSR- these are VCPU control and status registers

The CONFIG register available to user-space is ISA. The ISA register is
a read and write register where user-space can only write the desired
VCPU ISA capabilities before running the VCPU.

The CORE registers available to user-space are PC, RA, SP, GP, TP, A0-A7,
T0-T6, S0-S11 and MODE. Most of these are RISC-V general registers except
PC and MODE. The PC register represents program counter whereas the MODE
register represent VCPU privilege mode (i.e. S/U-mode).

The CSRs available to user-space are SSTATUS, SIE, STVEC, SSCRATCH, SEPC,
SCAUSE, STVAL, SIP, and SATP. All of these are read/write registers.

In future, more VCPU register types will be added (such as FP) for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/uapi/asm/kvm.h |  52 ++-
 arch/riscv/kvm/vcpu.c | 246 +-
 2 files changed, 294 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index 6dbc056d58ba..3a20327242f1 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -41,10 +41,60 @@ struct kvm_guest_debug_arch {
 struct kvm_sync_regs {
 };
 
-/* dummy definition */
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
 struct kvm_sregs {
 };
 
+/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_config {
+   unsigned long isa;
+};
+
+/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_core {
+   struct user_regs_struct regs;
+   unsigned long mode;
+};
+
+/* Possible privilege modes for kvm_riscv_core */
+#define KVM_RISCV_MODE_S   1
+#define KVM_RISCV_MODE_U   0
+
+/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_csr {
+   unsigned long sstatus;
+   unsigned long sie;
+   unsigned long stvec;
+   unsigned long sscratch;
+   unsigned long sepc;
+   unsigned long scause;
+   unsigned long stval;
+   unsigned long sip;
+   unsigned long satp;
+};
+
+#define KVM_REG_SIZE(id)   \
+   (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_RISCV_TYPE_MASK0xFF00
+#define KVM_REG_RISCV_TYPE_SHIFT   24
+
+/* Config registers are mapped as type 1 */
+#define KVM_REG_RISCV_CONFIG   (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CONFIG_REG(name) \
+   (offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long))
+
+/* Core registers are mapped as type 2 */
+#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CORE_REG(name)   \
+   (offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long))
+
+/* Control and status registers are mapped as type 3 */
+#define KVM_REG_RISCV_CSR  (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CSR_REG(name)\
+   (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 97585aeec207..30806f36c996 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -18,7 +18,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
@@ -143,6 +142,225 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, 
struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
 }
 
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg)
+{
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   KVM_REG_RISCV_CONFIG);
+   unsigned long reg_val;
+
+   if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+   return -EINVAL;
+
+   switch (reg_num) {
+   case KVM_REG_RISCV_CONFIG_REG(isa):
+   reg_val = vcpu->arch.isa;
+   break;
+   default:
+   return -EINVAL;
+   };
+
+   if (copy_to_user(uaddr, _val, KVM_REG_SIZE(reg->id)))
+   return -EFAULT;
+
+   return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
+co

[PATCH v12 04/17] RISC-V: KVM: Implement VCPU interrupts and requests handling

2020-04-28 Thread Anup Patel
This patch implements VCPU interrupts and requests which are both
asynchronous events.

The VCPU interrupts can be set/unset using KVM_INTERRUPT ioctl from
user-space. In future, the in-kernel IRQCHIP emulation will use
kvm_riscv_vcpu_set_interrupt() and kvm_riscv_vcpu_unset_interrupt()
functions to set/unset VCPU interrupts.

Important VCPU requests implemented by this patch are:
KVM_REQ_SLEEP   - set whenever VCPU itself goes to sleep state
KVM_REQ_VCPU_RESET  - set whenever VCPU reset is requested

The WFI trap-n-emulate (added later) will use KVM_REQ_SLEEP request
and kvm_riscv_vcpu_has_interrupt() function.

The KVM_REQ_VCPU_RESET request will be used by SBI emulation (added
later) to power-up a VCPU in power-off state. The user-space can use
the GET_MPSTATE/SET_MPSTATE ioctls to get/set power state of a VCPU.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  23 
 arch/riscv/include/uapi/asm/kvm.h |   3 +
 arch/riscv/kvm/vcpu.c | 182 +++---
 3 files changed, 195 insertions(+), 13 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 51bdd6fe05c7..822b580d96a9 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -130,6 +130,21 @@ struct kvm_vcpu_arch {
/* CPU CSR context upon Guest VCPU reset */
struct kvm_vcpu_csr guest_reset_csr;
 
+   /*
+* VCPU interrupts
+*
+* We have a lockless approach for tracking pending VCPU interrupts
+* implemented using atomic bitops. The irqs_pending bitmap represent
+* pending interrupts whereas irqs_pending_mask represent bits changed
+* in irqs_pending. Our approach is modeled around multiple producer
+* and single consumer problem where the consumer is the VCPU itself.
+*/
+   unsigned long irqs_pending;
+   unsigned long irqs_pending_mask;
+
+   /* VCPU power-off state */
+   bool power_off;
+
/* Don't run the VCPU (blocked) */
bool pause;
 
@@ -154,4 +169,12 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
 
 static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
 
+int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
+bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask);
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+
 #endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index d15875818b6e..6dbc056d58ba 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -18,6 +18,9 @@
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
+#define KVM_INTERRUPT_SET  -1U
+#define KVM_INTERRUPT_UNSET-2U
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 };
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index c37745bd3dbd..97585aeec207 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -50,6 +51,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
memcpy(csr, reset_csr, sizeof(*csr));
 
memcpy(cntx, reset_cntx, sizeof(*cntx));
+
+   WRITE_ONCE(vcpu->arch.irqs_pending, 0);
+   WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
 }
 
 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
@@ -97,8 +101,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-   /* TODO: */
-   return 0;
+   return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER);
 }
 
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
@@ -111,20 +114,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
-   /* TODO: */
-   return 0;
+   return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
+   !vcpu->arch.power_off && !vcpu->arch.pause);
 }
 
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
-   /* TODO: */
-   return 0;
+   return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
 }
 
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 {
-   /* TODO: */
-   return false;
+   return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
 }
 
 bool kvm_arch_has_vcpu_debugfs(void)
@@ -145,7 +146,21 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, 
struct vm_fault *vmf)
 long kvm_arch_vcpu_async_ioctl(struct file *filp

[PATCH v12 01/17] RISC-V: Add hypervisor extension related CSR defines

2020-04-28 Thread Anup Patel
This patch extends asm/csr.h by adding RISC-V hypervisor extension
related defines.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/csr.h | 85 
 1 file changed, 85 insertions(+)

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index cec462e198ce..ec6a1917bb0d 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -30,6 +30,8 @@
 #define SR_XS_CLEAN_AC(0x0001, UL)
 #define SR_XS_DIRTY_AC(0x00018000, UL)
 
+#define SR_MXR _AC(0x0008, UL)
+
 #ifndef CONFIG_64BIT
 #define SR_SD  _AC(0x8000, UL) /* FS/XS dirty */
 #else
@@ -52,22 +54,31 @@
 
 /* Interrupt causes (minus the high bit) */
 #define IRQ_S_SOFT 1
+#define IRQ_VS_SOFT2
 #define IRQ_M_SOFT 3
 #define IRQ_S_TIMER5
+#define IRQ_VS_TIMER   6
 #define IRQ_M_TIMER7
 #define IRQ_S_EXT  9
+#define IRQ_VS_EXT 10
 #define IRQ_M_EXT  11
 
 /* Exception causes */
 #define EXC_INST_MISALIGNED0
 #define EXC_INST_ACCESS1
+#define EXC_INST_ILLEGAL   2
 #define EXC_BREAKPOINT 3
 #define EXC_LOAD_ACCESS5
 #define EXC_STORE_ACCESS   7
 #define EXC_SYSCALL8
+#define EXC_HYPERVISOR_SYSCALL 9
+#define EXC_SUPERVISOR_SYSCALL 10
 #define EXC_INST_PAGE_FAULT12
 #define EXC_LOAD_PAGE_FAULT13
 #define EXC_STORE_PAGE_FAULT   15
+#define EXC_INST_GUEST_PAGE_FAULT  20
+#define EXC_LOAD_GUEST_PAGE_FAULT  21
+#define EXC_STORE_GUEST_PAGE_FAULT 23
 
 /* PMP configuration */
 #define PMP_R  0x01
@@ -79,6 +90,55 @@
 #define PMP_A_NAPOT0x18
 #define PMP_L  0x80
 
+/* HSTATUS flags */
+#ifdef CONFIG_64BIT
+#define HSTATUS_VSXL   _AC(0x3, UL)
+#define HSTATUS_VSXL_SHIFT 32
+#endif
+#define HSTATUS_VTSR   _AC(0x0040, UL)
+#define HSTATUS_VTVM   _AC(0x0010, UL)
+#define HSTATUS_VGEIN  _AC(0x0003f000, UL)
+#define HSTATUS_VGEIN_SHIFT12
+#define HSTATUS_HU _AC(0x0200, UL)
+#define HSTATUS_SPVP   _AC(0x0100, UL)
+#define HSTATUS_SPV_AC(0x0080, UL)
+#define HSTATUS_GVA_AC(0x0040, UL)
+#define HSTATUS_VSBE   _AC(0x0020, UL)
+
+/* HGATP flags */
+#define HGATP_MODE_OFF _AC(0, UL)
+#define HGATP_MODE_SV32X4  _AC(1, UL)
+#define HGATP_MODE_SV39X4  _AC(8, UL)
+#define HGATP_MODE_SV48X4  _AC(9, UL)
+
+#define HGATP32_MODE_SHIFT 31
+#define HGATP32_VMID_SHIFT 22
+#define HGATP32_VMID_MASK  _AC(0x1FC0, UL)
+#define HGATP32_PPN_AC(0x003F, UL)
+
+#define HGATP64_MODE_SHIFT 60
+#define HGATP64_VMID_SHIFT 44
+#define HGATP64_VMID_MASK  _AC(0x03FFF000, UL)
+#define HGATP64_PPN_AC(0x0FFF, UL)
+
+#ifdef CONFIG_64BIT
+#define HGATP_PPN  HGATP64_PPN
+#define HGATP_VMID_SHIFT   HGATP64_VMID_SHIFT
+#define HGATP_VMID_MASKHGATP64_VMID_MASK
+#define HGATP_MODE (HGATP_MODE_SV39X4 << HGATP64_MODE_SHIFT)
+#else
+#define HGATP_PPN  HGATP32_PPN
+#define HGATP_VMID_SHIFT   HGATP32_VMID_SHIFT
+#define HGATP_VMID_MASKHGATP32_VMID_MASK
+#define HGATP_MODE (HGATP_MODE_SV32X4 << HGATP32_MODE_SHIFT)
+#endif
+
+/* VSIP & HVIP relation */
+#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
+#define VSIP_VALID_MASK((_AC(1, UL) << IRQ_S_SOFT) | \
+(_AC(1, UL) << IRQ_S_TIMER) | \
+(_AC(1, UL) << IRQ_S_EXT))
+
 /* symbolic CSR names: */
 #define CSR_CYCLE  0xc00
 #define CSR_TIME   0xc01
@@ -98,6 +158,31 @@
 #define CSR_SIP0x144
 #define CSR_SATP   0x180
 
+#define CSR_VSSTATUS   0x200
+#define CSR_VSIE   0x204
+#define CSR_VSTVEC 0x205
+#define CSR_VSSCRATCH  0x240
+#define CSR_VSEPC  0x241
+#define CSR_VSCAUSE0x242
+#define CSR_VSTVAL 0x243
+#define CSR_VSIP   0x244
+#define CSR_VSATP  0x280
+
+#define CSR_HSTATUS0x600
+#define CSR_HEDELEG0x602
+#define CSR_HIDELEG0x603
+#define CSR_HIE0x604
+#define CSR_HTIMEDELTA 0x605
+#define CSR_HCOUNTERNEN0x606
+#define CSR_HGEIE  0x607
+#define CSR_HTIMEDELTAH0x615
+#define CSR_HTVAL  0x643
+#define CSR_HIP0x644
+#define CSR_HVIP   0x645
+#define CSR_HTINST 0x64a
+#define CSR_HGATP  0x680
+#define CSR_HGEIP  0xe12
+
 #define CSR_MSTATUS0x300
 #define CSR_MISA   

[PATCH v12 02/17] RISC-V: Add initial skeletal KVM support

2020-04-28 Thread Anup Patel
This patch adds initial skeletal KVM RISC-V support which has:
1. A simple implementation of arch specific VM functions
   except kvm_vm_ioctl_get_dirty_log() which will implemeted
   in-future as part of stage2 page loging.
2. Stubs of required arch specific VCPU functions except
   kvm_arch_vcpu_ioctl_run() which is semi-complete and
   extended by subsequent patches.
3. Stubs for required arch specific stage2 MMU functions.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/Kconfig|   2 +
 arch/riscv/Makefile   |   2 +
 arch/riscv/include/asm/kvm_host.h |  89 +
 arch/riscv/include/uapi/asm/kvm.h |  47 +
 arch/riscv/kvm/Kconfig|  33 +++
 arch/riscv/kvm/Makefile   |  13 ++
 arch/riscv/kvm/main.c |  93 +
 arch/riscv/kvm/mmu.c  |  86 
 arch/riscv/kvm/vcpu.c | 321 ++
 arch/riscv/kvm/vcpu_exit.c|  35 
 arch/riscv/kvm/vm.c   |  79 
 11 files changed, 800 insertions(+)
 create mode 100644 arch/riscv/include/asm/kvm_host.h
 create mode 100644 arch/riscv/include/uapi/asm/kvm.h
 create mode 100644 arch/riscv/kvm/Kconfig
 create mode 100644 arch/riscv/kvm/Makefile
 create mode 100644 arch/riscv/kvm/main.c
 create mode 100644 arch/riscv/kvm/mmu.c
 create mode 100644 arch/riscv/kvm/vcpu.c
 create mode 100644 arch/riscv/kvm/vcpu_exit.c
 create mode 100644 arch/riscv/kvm/vm.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e9dbfd7d7738..e4b33939d1c7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -386,3 +386,5 @@ menu "Power management options"
 source "kernel/power/Kconfig"
 
 endmenu
+
+source "arch/riscv/kvm/Kconfig"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index fb6e37db836d..fc189284b245 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -79,6 +79,8 @@ head-y := arch/riscv/kernel/head.o
 
 core-y += arch/riscv/
 
+core-$(CONFIG_KVM) += arch/riscv/kvm/
+
 libs-y += arch/riscv/lib/
 
 PHONY += vdso_install
diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
new file mode 100644
index ..ad4a5e1a6cd3
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel 
+ */
+
+#ifndef __RISCV_KVM_HOST_H__
+#define __RISCV_KVM_HOST_H__
+
+#include 
+#include 
+#include 
+
+#ifdef CONFIG_64BIT
+#define KVM_MAX_VCPUS  (1U << 16)
+#else
+#define KVM_MAX_VCPUS  (1U << 9)
+#endif
+
+#define KVM_USER_MEM_SLOTS 512
+#define KVM_HALT_POLL_NS_DEFAULT   50
+
+#define KVM_VCPU_MAX_FEATURES  0
+
+#define KVM_REQ_SLEEP \
+   KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+
+struct kvm_vm_stat {
+   ulong remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+   u64 halt_successful_poll;
+   u64 halt_attempted_poll;
+   u64 halt_poll_invalid;
+   u64 halt_wakeup;
+   u64 ecall_exit_stat;
+   u64 wfi_exit_stat;
+   u64 mmio_exit_user;
+   u64 mmio_exit_kernel;
+   u64 exits;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct kvm_arch {
+   /* stage2 page table */
+   pgd_t *pgd;
+   phys_addr_t pgd_phys;
+};
+
+struct kvm_cpu_trap {
+   unsigned long sepc;
+   unsigned long scause;
+   unsigned long stval;
+   unsigned long htval;
+   unsigned long htinst;
+};
+
+struct kvm_vcpu_arch {
+   /* Don't run the VCPU (blocked) */
+   bool pause;
+
+   /* SRCU lock index for in-kernel run loop */
+   int srcu_idx;
+};
+
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+
+void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
+int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
+void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
+void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
+
+int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+   struct kvm_cpu_trap *trap);
+
+static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
+
+#endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
new file mode 100644
index ..d15875818b6e
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -0,0 +1,47 @@
+/* SPDX-License-I

[PATCH v12 00/17] KVM RISC-V Support

2020-04-28 Thread Anup Patel
 traps due to Guest stage1 page table
- Added separate patch to document RISC-V specific things in
  Documentation/virt/kvm/api.txt

Changes since v4:
- Rebased patches on Linux-5.3-rc5
- Added Paolo's Acked-by and Reviewed-by
- Updated mailing list in MAINTAINERS entry

Changes since v3:
- Moved patch for ISA bitmap from KVM prep series to this series
- Make vsip_shadow as run-time percpu variable instead of compile-time
- Flush Guest TLBs on all Host CPUs whenever we run-out of VMIDs

Changes since v2:
- Removed references of KVM_REQ_IRQ_PENDING from all patches
- Use kvm->srcu within in-kernel KVM run loop
- Added percpu vsip_shadow to track last value programmed in VSIP CSR
- Added comments about irqs_pending and irqs_pending_mask
- Used kvm_arch_vcpu_runnable() in-place-of kvm_riscv_vcpu_has_interrupt()
  in system_opcode_insn()
- Removed unwanted smp_wmb() in kvm_riscv_stage2_vmid_update()
- Use kvm_flush_remote_tlbs() in kvm_riscv_stage2_vmid_update()
- Use READ_ONCE() in kvm_riscv_stage2_update_hgatp() for vmid

Changes since v1:
- Fixed compile errors in building KVM RISC-V as module
- Removed unused kvm_riscv_halt_guest() and kvm_riscv_resume_guest()
- Set KVM_CAP_SYNC_MMU capability only after MMU notifiers are implemented
- Made vmid_version as unsigned long instead of atomic
- Renamed KVM_REQ_UPDATE_PGTBL to KVM_REQ_UPDATE_HGATP
- Renamed kvm_riscv_stage2_update_pgtbl() to kvm_riscv_stage2_update_hgatp()
- Configure HIDELEG and HEDELEG in kvm_arch_hardware_enable()
- Updated ONE_REG interface for CSR access to user-space
- Removed irqs_pending_lock and use atomic bitops instead
- Added separate patch for FP ONE_REG interface
- Added separate patch for updating MAINTAINERS file

Anup Patel (13):
  RISC-V: Add hypervisor extension related CSR defines
  RISC-V: Add initial skeletal KVM support
  RISC-V: KVM: Implement VCPU create, init and destroy functions
  RISC-V: KVM: Implement VCPU interrupts and requests handling
  RISC-V: KVM: Implement KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls
  RISC-V: KVM: Implement VCPU world-switch
  RISC-V: KVM: Handle MMIO exits for VCPU
  RISC-V: KVM: Handle WFI exits for VCPU
  RISC-V: KVM: Implement VMID allocator
  RISC-V: KVM: Implement stage2 page table programming
  RISC-V: KVM: Implement MMU notifiers
  RISC-V: KVM: Document RISC-V specific parts of KVM API
  RISC-V: KVM: Add MAINTAINERS entry

Atish Patra (4):
  RISC-V: KVM: Add timer functionality
  RISC-V: KVM: FP lazy save/restore
  RISC-V: KVM: Implement ONE REG interface for FP registers
  RISC-V: KVM: Add SBI v0.1 support

 Documentation/virt/kvm/api.rst  | 193 -
 MAINTAINERS |  11 +
 arch/riscv/Kconfig  |   2 +
 arch/riscv/Makefile |   2 +
 arch/riscv/include/asm/csr.h|  85 ++
 arch/riscv/include/asm/kvm_host.h   | 272 +++
 arch/riscv/include/asm/kvm_vcpu_timer.h |  44 ++
 arch/riscv/include/asm/pgtable-bits.h   |   1 +
 arch/riscv/include/uapi/asm/kvm.h   | 127 +++
 arch/riscv/kernel/asm-offsets.c | 154 
 arch/riscv/kvm/Kconfig  |  34 +
 arch/riscv/kvm/Makefile |  14 +
 arch/riscv/kvm/main.c   |  97 +++
 arch/riscv/kvm/mmu.c| 775 ++
 arch/riscv/kvm/tlb.S|  43 +
 arch/riscv/kvm/vcpu.c   | 996 
 arch/riscv/kvm/vcpu_exit.c  | 654 
 arch/riscv/kvm/vcpu_sbi.c   | 172 
 arch/riscv/kvm/vcpu_switch.S| 391 ++
 arch/riscv/kvm/vcpu_timer.c | 225 ++
 arch/riscv/kvm/vm.c |  86 ++
 arch/riscv/kvm/vmid.c   | 120 +++
 drivers/clocksource/timer-riscv.c   |   8 +
 include/clocksource/timer-riscv.h   |  16 +
 include/uapi/linux/kvm.h|   8 +
 25 files changed, 4521 insertions(+), 9 deletions(-)
 create mode 100644 arch/riscv/include/asm/kvm_host.h
 create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h
 create mode 100644 arch/riscv/include/uapi/asm/kvm.h
 create mode 100644 arch/riscv/kvm/Kconfig
 create mode 100644 arch/riscv/kvm/Makefile
 create mode 100644 arch/riscv/kvm/main.c
 create mode 100644 arch/riscv/kvm/mmu.c
 create mode 100644 arch/riscv/kvm/tlb.S
 create mode 100644 arch/riscv/kvm/vcpu.c
 create mode 100644 arch/riscv/kvm/vcpu_exit.c
 create mode 100644 arch/riscv/kvm/vcpu_sbi.c
 create mode 100644 arch/riscv/kvm/vcpu_switch.S
 create mode 100644 arch/riscv/kvm/vcpu_timer.c
 create mode 100644 arch/riscv/kvm/vm.c
 create mode 100644 arch/riscv/kvm/vmid.c
 create mode 100644 include/clocksource/timer-riscv.h

-- 
2.25.1



Re: [PATCH v2 2/2] RISC-V: defconfig: Enable Goldfish RTC driver

2019-10-23 Thread Anup Patel
On Wed, Oct 23, 2019 at 11:30 AM Paul Walmsley  wrote:
>
> On Wed, 23 Oct 2019, Anup Patel wrote:
>
> > On Wed, Oct 23, 2019 at 6:37 AM Paul Walmsley  
> > wrote:
> >
> > > Incidentally, just looking at drivers/platform/goldfish, that driver seems
> > > to be some sort of Google-specific RPC driver.  Are you all really sure
> >
> > Nopes, it's not RPC driver.  In fact, all Goldfish virtual platform
> > devices are MMIO devices.
>
> Is drivers/platform/goldfish/goldfish_pipe.c required for the Goldfish RTC
> driver or not?

No, it's not required.

>
> If not, then the first patch that was sent isn't the right fix.  It would
> be better to remove the Kbuild dependency between the code in
> drivers/platform/goldfish and the Goldfish RTC.

The common GOLDFISH kconfig option is there to specify the
common expectations of all GOLDFISH drivers from Linux ARCH
support.

Currently, all GOLDFISH drivers require HAS_IOMEM and
HAS_DMA support from underlying arch.

If you still think that common GOLDFISH kconfig option is not
required then please go ahead and send patch.

>
> If it is required, then surely there must be a simpler RTC implementation
> available.

GOLDFISH pipe is not required so GOLDFISH RTC is certainly
a simple RTC implementation.

>
> > The problem is VirtIO spec does not define any RTC device so instead of
> > inventing our own virtual RTC device we re-use RTC device defined in
> > Goldfish virtual platform for QEMU virt machine. This way we can re-use
> > the Linux Goldfish RTC driver.
>
> With 160+ RTC drivers in the kernel tree already, we certainly agree that
> it doesn't make sense to invent a new RTC.
>
>
> - Paul

Regards,
Anup


Re: [PATCH v2 2/2] RISC-V: defconfig: Enable Goldfish RTC driver

2019-10-22 Thread Anup Patel
On Wed, Oct 23, 2019 at 6:37 AM Paul Walmsley  wrote:
>
> On Tue, 22 Oct 2019, Alistair Francis wrote:
>
> > I think it makese sense for this to go into Linux first.
> >
> > The QEMU patches are going to be accepted, just some nit picking to do
> > first :)
> >
> > After that we have to wait for a PR and then a QEMU release until most
> > people will see the change in QEMU. In that time Linux 5.4 will be
> > released, if this can make it into 5.4 then everyone using 5.4 will get
> > the new RTC as soon as they upgrade QEMU (QEMU provides the device
> > tree). If this has to wait until QEMU has support then it won't be
> > supported for users until even later.
> >
> > Users are generally slow to update kernels (buildroot is still using
> > 5.1 by default for example) so the sooner changes like this go in the
> > better.
>
> The defconfigs are really just for kernel developers.  We expect users to
> define their own Kconfigs for their own needs.
>
> If using the Goldfish code really is what we all want to do (see below),
> then the kernel patch that should go in right away -- which also has no
> dependence on what QEMU does -- would be the first patch of this series:
>
> https://lore.kernel.org/linux-riscv/20190925063706.56175-2-anup.pa...@wdc.com/
>
> And that should go in via whoever is maintaining the Goldfish driver, not
> the RISC-V tree.  (It looks like drivers/platform/goldfish is completely
> unmaintained - a red flag! - so probably someone needs to persuade Greg or
> Andrew to take it.)

GregKH has already queued this for Linux-5.5 and you can see this
commit present in linux-next tree:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/log/drivers/platform/goldfish?h=next-20191022

>
> Incidentally, just looking at drivers/platform/goldfish, that driver seems
> to be some sort of Google-specific RPC driver.  Are you all really sure

Nopes, it's not RPC driver. In fact, all Goldfish virtual platform devices
are MMIO devices.

> you want to enable that just for an RTC?  Seems like overkill - there are
> much simpler RTCs out there.

No, it's not overkill. All Goldfish virtual platform devices are quite simple
MMIO devices having bare minimum registers required for device
functioning.

The problem is VirtIO spec does not define any RTC device so instead of
inventing our own virtual RTC device we re-use RTC device defined in
Goldfish virtual platform for QEMU virt machine. This way we can re-use
the Linux Goldfish RTC driver.

BTW, I will send-out QEMU Goldfish RTC patches today or tomorrow
addressing nit comments from Alistair.

Regards,
Anup


Re: [PATCH v9 18/22] RISC-V: KVM: Simplify stage2 page table programming

2019-10-21 Thread Anup Patel
On Mon, Oct 21, 2019 at 11:01 PM Paolo Bonzini  wrote:
>
> On 16/10/19 18:11, Anup Patel wrote:
> > Instead of dealing with PGD, PMD, and PTE differently in stage2
> > page table progamming, we can simply use iterative and recursive
> > helper functions to program stage2 page tables of any level.
> >
> > This patch re-implements stage2_get_leaf_entry(), stage2_set_pte(),
> > stage2_map_page(), stage2_op_pte(), stage2_unmap_range(), and
> > stage2_wp_range() helper functions as mentioned above.
> >
> > Signed-off-by: Anup Patel 
>
> I honestly haven't reviewed this in depth, but I definitely agree with
> the idea.  Please squash it with the original implementation though.

Sure, I will squash patch18 (this patch) onto patch11.

Regards,
Anup

>
> Paolo
>
> > ---
> >  arch/riscv/kvm/mmu.c | 469 +++
> >  1 file changed, 164 insertions(+), 305 deletions(-)
> >
> > diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
> > index 5aa5ea5ef8f6..fe86cae4cf42 100644
> > --- a/arch/riscv/kvm/mmu.c
> > +++ b/arch/riscv/kvm/mmu.c
> > @@ -21,15 +21,56 @@
> >  #ifdef CONFIG_64BIT
> >  #define stage2_have_pmd  true
> >  #define stage2_gpa_size  ((phys_addr_t)(1ULL << 39))
> > -#define stage2_cache_min_pages   2
> > +#define stage2_pgd_levels3
> > +#define stage2_index_bits9
> >  #else
> > -#define pmd_index(x) 0
> > -#define pfn_pmd(x, y)({ pmd_t __x = { 0 }; __x; })
> >  #define stage2_have_pmd  false
> >  #define stage2_gpa_size  ((phys_addr_t)(1ULL << 32))
> > -#define stage2_cache_min_pages   1
> > +#define stage2_pgd_levels2
> > +#define stage2_index_bits10
> >  #endif
> >
> > +#define stage2_pte_index(addr, level) \
> > +(((addr) >> (PAGE_SHIFT + stage2_index_bits * (level))) & (PTRS_PER_PTE - 
> > 1))
> > +
> > +static inline unsigned long stage2_pte_page_vaddr(pte_t pte)
> > +{
> > + return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
> > +}
> > +
> > +static int stage2_page_size_to_level(unsigned long page_size, u32 
> > *out_level)
> > +{
> > + if (page_size == PAGE_SIZE)
> > + *out_level = 0;
> > + else if (page_size == PMD_SIZE)
> > + *out_level = 1;
> > + else if (page_size == PGDIR_SIZE)
> > + *out_level = (stage2_have_pmd) ? 2 : 1;
> > + else
> > + return -EINVAL;
> > +
> > + return 0;
> > +}
> > +
> > +static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
> > +{
> > + switch (level) {
> > + case 0:
> > + *out_pgsize = PAGE_SIZE;
> > + break;
> > + case 1:
> > + *out_pgsize = (stage2_have_pmd) ? PMD_SIZE : PGDIR_SIZE;
> > + break;
> > + case 2:
> > + *out_pgsize = PGDIR_SIZE;
> > + break;
> > + default:
> > + return -EINVAL;
> > + }
> > +
> > + return 0;
> > +}
> > +
> >  static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
> > int min, int max)
> >  {
> > @@ -67,61 +108,30 @@ static void *stage2_cache_alloc(struct 
> > kvm_mmu_page_cache *pcache)
> >   return p;
> >  }
> >
> > -static int stage2_pgdp_test_and_clear_young(pgd_t *pgd)
> > -{
> > - return ptep_test_and_clear_young(NULL, 0, (pte_t *)pgd);
> > -}
> > -
> > -static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
> > -{
> > - return ptep_test_and_clear_young(NULL, 0, (pte_t *)pmd);
> > -}
> > -
> > -static int stage2_ptep_test_and_clear_young(pte_t *pte)
> > -{
> > - return ptep_test_and_clear_young(NULL, 0, pte);
> > -}
> > -
> >  static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
> > -   pgd_t **pgdpp, pmd_t **pmdpp, pte_t **ptepp)
> > +   pte_t **ptepp, u32 *ptep_level)
> >  {
> > - pgd_t *pgdp;
> > - pmd_t *pmdp;
> >   pte_t *ptep;
> > -
> > - *pgdpp = NULL;
> > - *pmdpp = NULL;
> > - *ptepp = NULL;
> > -
> > - pgdp = >arch.pgd[pgd_index(addr)];
> > - if (!pgd_val(*pgdp))
> > - return false;
> > - if (pgd_val(*pgdp) & _PAGE_LEAF) {
> > - 

Re: [PATCH v9 19/22] RISC-V: KVM: Remove per-CPU vsip_shadow variable

2019-10-21 Thread Anup Patel
On Mon, Oct 21, 2019 at 10:58 PM Paolo Bonzini  wrote:
>
> On 16/10/19 18:12, Anup Patel wrote:
> > Currently, we track last value wrote to VSIP CSR using per-CPU
> > vsip_shadow variable but this easily goes out-of-sync because
> > Guest can update VSIP.SSIP bit directly.
> >
> > To simplify things, we remove per-CPU vsip_shadow variable and
> > unconditionally write vcpu->arch.guest_csr.vsip to VSIP CSR in
> > run-loop.
> >
> > Signed-off-by: Anup Patel 
>
> Please squash this and patch 20 into the corresponding patches earlier
> in the series.

Sure, I will squash patch20 and patch19 onto patch5.

Regards,
Anup


>
> Paolo
>
> > ---
> >  arch/riscv/include/asm/kvm_host.h |  3 ---
> >  arch/riscv/kvm/main.c |  6 --
> >  arch/riscv/kvm/vcpu.c | 24 +---
> >  3 files changed, 1 insertion(+), 32 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/kvm_host.h 
> > b/arch/riscv/include/asm/kvm_host.h
> > index ec1ca4bc98f2..cd86acaed055 100644
> > --- a/arch/riscv/include/asm/kvm_host.h
> > +++ b/arch/riscv/include/asm/kvm_host.h
> > @@ -202,9 +202,6 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu 
> > *vcpu) {}
> >  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
> >  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> >
> > -int kvm_riscv_setup_vsip(void);
> > -void kvm_riscv_cleanup_vsip(void);
> > -
> >  #define KVM_ARCH_WANT_MMU_NOTIFIER
> >  int kvm_unmap_hva_range(struct kvm *kvm,
> >   unsigned long start, unsigned long end);
> > diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
> > index 55df85184241..002301a27d29 100644
> > --- a/arch/riscv/kvm/main.c
> > +++ b/arch/riscv/kvm/main.c
> > @@ -61,17 +61,11 @@ void kvm_arch_hardware_disable(void)
> >
> >  int kvm_arch_init(void *opaque)
> >  {
> > - int ret;
> > -
> >   if (!riscv_isa_extension_available(NULL, h)) {
> >   kvm_info("hypervisor extension not available\n");
> >   return -ENODEV;
> >   }
> >
> > - ret = kvm_riscv_setup_vsip();
> > - if (ret)
> > - return ret;
> > -
> >   kvm_riscv_stage2_vmid_detect();
> >
> >   kvm_info("hypervisor extension available\n");
> > diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
> > index fd77cd39dd8c..f1a218d3a8cf 100644
> > --- a/arch/riscv/kvm/vcpu.c
> > +++ b/arch/riscv/kvm/vcpu.c
> > @@ -111,8 +111,6 @@ static void kvm_riscv_vcpu_host_fp_restore(struct 
> > kvm_cpu_context *cntx) {}
> >riscv_isa_extension_mask(s) | \
> >riscv_isa_extension_mask(u))
> >
> > -static unsigned long __percpu *vsip_shadow;
> > -
> >  static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
> >  {
> >   struct kvm_vcpu_csr *csr = >arch.guest_csr;
> > @@ -765,7 +763,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
> > *vcpu,
> >  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >  {
> >   struct kvm_vcpu_csr *csr = >arch.guest_csr;
> > - unsigned long *vsip = raw_cpu_ptr(vsip_shadow);
> >
> >   csr_write(CSR_VSSTATUS, csr->vsstatus);
> >   csr_write(CSR_VSIE, csr->vsie);
> > @@ -775,7 +772,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >   csr_write(CSR_VSCAUSE, csr->vscause);
> >   csr_write(CSR_VSTVAL, csr->vstval);
> >   csr_write(CSR_VSIP, csr->vsip);
> > - *vsip = csr->vsip;
> >   csr_write(CSR_VSATP, csr->vsatp);
> >
> >   kvm_riscv_stage2_update_hgatp(vcpu);
> > @@ -843,26 +839,8 @@ static void kvm_riscv_check_vcpu_requests(struct 
> > kvm_vcpu *vcpu)
> >  static void kvm_riscv_update_vsip(struct kvm_vcpu *vcpu)
> >  {
> >   struct kvm_vcpu_csr *csr = >arch.guest_csr;
> > - unsigned long *vsip = raw_cpu_ptr(vsip_shadow);
> > -
> > - if (*vsip != csr->vsip) {
> > - csr_write(CSR_VSIP, csr->vsip);
> > - *vsip = csr->vsip;
> > - }
> > -}
> > -
> > -int kvm_riscv_setup_vsip(void)
> > -{
> > - vsip_shadow = alloc_percpu(unsigned long);
> > - if (!vsip_shadow)
> > - return -ENOMEM;
> >
> > - return 0;
> > -}
> > -
> > -void kvm_riscv_cleanup_vsip(void)
> > -{
> > - free_percpu(vsip_shadow);
> > + csr_write(CSR_VSIP, csr->vsip);
> >  }
> >
> >  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
> >
>


Re: [PATCH v9 20/22] RISC-V: KVM: Fix race-condition in kvm_riscv_vcpu_sync_interrupts()

2019-10-21 Thread Anup Patel
On Mon, Oct 21, 2019 at 10:57 PM Paolo Bonzini  wrote:
>
> On 16/10/19 18:12, Anup Patel wrote:
> > + /* Read current VSIP and VSIE CSRs */
> > + vsip = csr_read(CSR_VSIP);
> > + csr->vsie = csr_read(CSR_VSIE);
> > +
> > + /* Sync-up VSIP.SSIP bit changes does by Guest */
> > + if ((csr->vsip ^ vsip) & (1UL << IRQ_S_SOFT)) {
> > + if (!test_and_set_bit(IRQ_S_SOFT, >irqs_pending_mask)) {
> > + if (vsip & (1UL << IRQ_S_SOFT))
> > + set_bit(IRQ_S_SOFT, >irqs_pending);
> > + else
> > + clear_bit(IRQ_S_SOFT, >irqs_pending);
> > + }
>
> Looks good, but I wonder if this could just be "csr->vsip =
> csr_read(CSR_VSIP)", which will be fixed up by flush_interrupts on the
> next entry.

It's not just "csr->vsip = csr_read(CSR_VSIP" because "irqs_pending"
bitmap has to be in-sync with Guest updates to VSIP because WFI
trap-n-emulate will check for pending interrupts which in-turn checks
"irqs_pending" bitmap.

Regards,
Anup


Re: RISC-V nommu support v5

2019-10-18 Thread Anup Patel
Hi Paul/Palmer,

On Thu, Oct 17, 2019 at 11:07 PM Christoph Hellwig  wrote:
>
> Hi all,
>
> below is a series to support nommu mode on RISC-V.  For now this series
> just works under qemu with the qemu-virt platform, but Damien has also
> been able to get kernel based on this tree with additional driver hacks
> to work on the Kendryte KD210, but that will take a while to cleanup
> an upstream.
>
> A git tree is available here:
>
> git://git.infradead.org/users/hch/riscv.git riscv-nommu.5
>
> Gitweb:
>
> 
> http://git.infradead.org/users/hch/riscv.git/shortlog/refs/heads/riscv-nommu.5
>
> I've also pushed out a builtroot branch that can build a RISC-V nommu
> root filesystem here:
>
>git://git.infradead.org/users/hch/buildroot.git riscv-nommu.2
>
> Gitweb:
>
>
> http://git.infradead.org/users/hch/buildroot.git/shortlog/refs/heads/riscv-nommu.2

It will be really cool to have this series for Linux-5.4-rcX.

Best Regards,
Anup

>
>
> Changes since v4:
>  - rebased to 5.4-rc + latest riscv fixes
>  - clean up do_trap_break
>  - fix an SR_XPIE issue (Paul Walmsley)
>  - use the symbolic PAGE_OFFSET value in the flat loader
>(Aurabindo Jayamohanan)
>
> Changes since v3:
>  - improve a few commit message
>  - cleanup riscv_cpuid_to_hartid_mask
>  - cleanup the timer handling
>  - cleanup the IPI handling a little more
>  - renamed CONFIG_M_MODE to CONFIG_RISCV_M_MODE
>  - split out CONFIG_RISCV_SBI to make some of the ifdefs more obbious
>  - use IS_ENABLED wherever possible instead of if ifdefs to make the
>code more readable
>
> Changes since v2:
>  - rebased to 5.3-rc
>  - remove the EFI image header for nommu builds
>  - set ARCH_SLAB_MINALIGN to ensure stack alignment in the flat binary
>loader
>  - minor comment improvement
>  - use #defines for more CSRs
>
> Changes since v1:
>  - fixes so that a kernel with this series still work on builds with an
>IOMMU
>  - small clint cleanups
>  - the binfmt_flat base and buildroot now don't put arguments on the stack
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv


Re: [PATCH 07/15] riscv: implement remote sfence.i using IPIs

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> The RISC-V ISA only supports flushing the instruction cache for the
> local CPU core.  Currently we always offload the remote TLB flushing to
> the SBI, which then issues an IPI under the hoods.  But with M-mode
> we do not have an SBI so we have to do it ourselves.   IPI to the
> other nodes using the existing kernel helpers instead if we have
> native clint support and thus can IPI directly from the kernel.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/include/asm/sbi.h |  3 +++
>  arch/riscv/mm/cacheflush.c   | 24 ++--
>  2 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> index b167af3e7470..0cb74eccc73f 100644
> --- a/arch/riscv/include/asm/sbi.h
> +++ b/arch/riscv/include/asm/sbi.h
> @@ -94,5 +94,8 @@ static inline void sbi_remote_sfence_vma_asid(const 
> unsigned long *hart_mask,
>  {
> SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid);
>  }
> +#else /* CONFIG_RISCV_SBI */
> +/* stub to for code is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
> +void sbi_remote_fence_i(const unsigned long *hart_mask);
>  #endif /* CONFIG_RISCV_SBI */
>  #endif /* _ASM_RISCV_SBI_H */
> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> index 3f15938dec89..794c9ab256eb 100644
> --- a/arch/riscv/mm/cacheflush.c
> +++ b/arch/riscv/mm/cacheflush.c
> @@ -10,9 +10,17 @@
>
>  #include 
>
> +static void ipi_remote_fence_i(void *info)
> +{
> +   return local_flush_icache_all();
> +}
> +
>  void flush_icache_all(void)
>  {
> -   sbi_remote_fence_i(NULL);
> +   if (IS_ENABLED(CONFIG_RISCV_SBI))
> +   sbi_remote_fence_i(NULL);
> +   else
> +   on_each_cpu(ipi_remote_fence_i, NULL, 1);
>  }
>
>  /*
> @@ -28,7 +36,7 @@ void flush_icache_all(void)
>  void flush_icache_mm(struct mm_struct *mm, bool local)
>  {
> unsigned int cpu;
> -   cpumask_t others, hmask, *mask;
> +   cpumask_t others, *mask;
>
> preempt_disable();
>
> @@ -46,10 +54,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
>  */
> cpumask_andnot(, mm_cpumask(mm), cpumask_of(cpu));
> local |= cpumask_empty();
> -   if (mm != current->active_mm || !local) {
> -   riscv_cpuid_to_hartid_mask(, );
> -   sbi_remote_fence_i(hmask.bits);
> -   } else {
> +   if (mm == current->active_mm && local) {
> /*
>  * It's assumed that at least one strongly ordered operation 
> is
>  * performed on this hart between setting a hart's cpumask bit
> @@ -59,6 +64,13 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
>  * with flush_icache_deferred().
>  */
> smp_mb();
> +   } else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
> +   cpumask_t hartid_mask;
> +
> +   riscv_cpuid_to_hartid_mask(, _mask);
> +   sbi_remote_fence_i(cpumask_bits(_mask));
> +   } else {
> +   on_each_cpu_mask(, ipi_remote_fence_i, NULL, 1);
> }
>
> preempt_enable();
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 08/15] riscv: add support for MMIO access to the timer registers

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> When running in M-mode we can't use the SBI to set the timer, and
> don't have access to the time CSR as that usually is emulated by
> M-mode.  Instead provide code that directly accesses the MMIO for
> the timer.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/include/asm/sbi.h  |  3 ++-
>  arch/riscv/include/asm/timex.h| 19 +--
>  drivers/clocksource/timer-riscv.c | 21 +
>  3 files changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> index 0cb74eccc73f..a4774bafe033 100644
> --- a/arch/riscv/include/asm/sbi.h
> +++ b/arch/riscv/include/asm/sbi.h
> @@ -95,7 +95,8 @@ static inline void sbi_remote_sfence_vma_asid(const 
> unsigned long *hart_mask,
> SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid);
>  }
>  #else /* CONFIG_RISCV_SBI */
> -/* stub to for code is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
> +/* stubs to for code is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
> +void sbi_set_timer(uint64_t stime_value);
>  void sbi_remote_fence_i(const unsigned long *hart_mask);
>  #endif /* CONFIG_RISCV_SBI */
>  #endif /* _ASM_RISCV_SBI_H */
> diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
> index c7ef131b9e4c..e17837d61667 100644
> --- a/arch/riscv/include/asm/timex.h
> +++ b/arch/riscv/include/asm/timex.h
> @@ -7,12 +7,25 @@
>  #define _ASM_RISCV_TIMEX_H
>
>  #include 
> +#include 
>
>  typedef unsigned long cycles_t;
>
> +extern u64 __iomem *riscv_time_val;
> +extern u64 __iomem *riscv_time_cmp;
> +
> +#ifdef CONFIG_64BIT
> +#define mmio_get_cycles()  readq_relaxed(riscv_time_val)
> +#else
> +#define mmio_get_cycles()  readl_relaxed(riscv_time_val)
> +#define mmio_get_cycles_hi()   readl_relaxed(((u32 *)riscv_time_val) + 1)
> +#endif
> +
>  static inline cycles_t get_cycles(void)
>  {
> -   return csr_read(CSR_TIME);
> +   if (IS_ENABLED(CONFIG_RISCV_SBI))
> +   return csr_read(CSR_TIME);
> +   return mmio_get_cycles();
>  }
>  #define get_cycles get_cycles
>
> @@ -24,7 +37,9 @@ static inline u64 get_cycles64(void)
>  #else /* CONFIG_64BIT */
>  static inline u32 get_cycles_hi(void)
>  {
> -   return csr_read(CSR_TIMEH);
> +   if (IS_ENABLED(CONFIG_RISCV_SBI))
> +   return csr_read(CSR_TIMEH);
> +   return mmio_get_cycles_hi();
>  }
>
>  static inline u64 get_cycles64(void)
> diff --git a/drivers/clocksource/timer-riscv.c 
> b/drivers/clocksource/timer-riscv.c
> index 5d2fdc3e28a9..2b9fbc4ebe49 100644
> --- a/drivers/clocksource/timer-riscv.c
> +++ b/drivers/clocksource/timer-riscv.c
> @@ -3,9 +3,9 @@
>   * Copyright (C) 2012 Regents of the University of California
>   * Copyright (C) 2017 SiFive
>   *
> - * All RISC-V systems have a timer attached to every hart.  These timers can 
> be
> - * read from the "time" and "timeh" CSRs, and can use the SBI to setup
> - * events.
> + * All RISC-V systems have a timer attached to every hart.  These timers can
> + * either be read from the "time" and "timeh" CSRs, and can use the SBI to
> + * setup events, or directly accessed using MMIO registers.
>   */
>  #include 
>  #include 
> @@ -13,14 +13,27 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>
> +u64 __iomem *riscv_time_cmp;
> +u64 __iomem *riscv_time_val;
> +
> +static inline void mmio_set_timer(u64 val)
> +{
> +   writeq_relaxed(val,
> +   riscv_time_cmp + cpuid_to_hartid_map(smp_processor_id()));
> +}
> +
>  static int riscv_clock_next_event(unsigned long delta,
> struct clock_event_device *ce)
>  {
> csr_set(CSR_XIE, XIE_XTIE);
> -   sbi_set_timer(get_cycles64() + delta);
> +   if (IS_ENABLED(CONFIG_RISCV_SBI))
> +   sbi_set_timer(get_cycles64() + delta);
> +   else
> +   mmio_set_timer(get_cycles64() + delta);
> return 0;
>  }
>
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 12/15] riscv: clear the instruction cache and all registers when booting

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> When we get booted we want a clear slate without any leaks from previous
> supervisors or the firmware.  Flush the instruction cache and then clear
> all registers to known good values.  This is really important for the
> upcoming nommu support that runs on M-mode, but can't really harm when
> running in S-mode either.  Vaguely based on the concepts from opensbi.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/include/asm/csr.h |  1 +
>  arch/riscv/kernel/head.S | 88 +++-
>  2 files changed, 88 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index d0b5113e1a54..ee0101278608 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -83,6 +83,7 @@
>  /* symbolic CSR names: */
>  #define CSR_MHARTID0xf14
>  #define CSR_MSTATUS0x300
> +#define CSR_MISA   0x301
>  #define CSR_MIE0x304
>  #define CSR_MTVEC  0x305
>  #define CSR_MSCRATCH   0x340
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 583784cb3a32..25867b99cc95 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>
>  __INIT
> @@ -51,12 +52,18 @@ _start_kernel:
> csrw CSR_XIP, zero
>
>  #ifdef CONFIG_RISCV_M_MODE
> +   /* flush the instruction cache */
> +   fence.i
> +
> +   /* Reset all registers except ra, a0, a1 */
> +   call reset_regs
> +
> /*
>  * The hartid in a0 is expected later on, and we have no firmware
>  * to hand it to us.
>  */
> csrr a0, CSR_MHARTID
> -#endif
> +#endif /* CONFIG_RISCV_M_MODE */
>
> /* Load the global pointer */
>  .option push
> @@ -203,6 +210,85 @@ relocate:
> j .Lsecondary_park
>  END(_start)
>
> +#ifdef CONFIG_RISCV_M_MODE
> +ENTRY(reset_regs)
> +   li  sp, 0
> +   li  gp, 0
> +   li  tp, 0
> +   li  t0, 0
> +   li  t1, 0
> +   li  t2, 0
> +   li  s0, 0
> +   li  s1, 0
> +   li  a2, 0
> +   li  a3, 0
> +   li  a4, 0
> +   li  a5, 0
> +   li  a6, 0
> +   li  a7, 0
> +   li  s2, 0
> +   li  s3, 0
> +   li  s4, 0
> +   li  s5, 0
> +   li  s6, 0
> +   li  s7, 0
> +   li  s8, 0
> +   li  s9, 0
> +   li  s10, 0
> +   li  s11, 0
> +   li  t3, 0
> +   li  t4, 0
> +   li  t5, 0
> +   li  t6, 0
> +   csrwsscratch, 0
> +
> +#ifdef CONFIG_FPU
> +   csrrt0, CSR_MISA
> +   andit0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
> +   bnezt0, .Lreset_regs_done
> +
> +   li  t1, SR_FS
> +   csrsCSR_XSTATUS, t1
> +   fmv.s.x f0, zero
> +   fmv.s.x f1, zero
> +   fmv.s.x f2, zero
> +   fmv.s.x f3, zero
> +   fmv.s.x f4, zero
> +   fmv.s.x f5, zero
> +   fmv.s.x f6, zero
> +   fmv.s.x f7, zero
> +   fmv.s.x f8, zero
> +   fmv.s.x f9, zero
> +   fmv.s.x f10, zero
> +   fmv.s.x f11, zero
> +   fmv.s.x f12, zero
> +   fmv.s.x f13, zero
> +   fmv.s.x f14, zero
> +   fmv.s.x f15, zero
> +   fmv.s.x f16, zero
> +   fmv.s.x f17, zero
> +   fmv.s.x f18, zero
> +   fmv.s.x f19, zero
> +   fmv.s.x f20, zero
> +   fmv.s.x f21, zero
> +   fmv.s.x f22, zero
> +   fmv.s.x f23, zero
> +   fmv.s.x f24, zero
> +   fmv.s.x f25, zero
> +   fmv.s.x f26, zero
> +   fmv.s.x f27, zero
> +   fmv.s.x f28, zero
> +   fmv.s.x f29, zero
> +   fmv.s.x f30, zero
> +   fmv.s.x f31, zero
> +   csrwfcsr, 0
> +   /* note that the caller must clear SR_FS */
> +#endif /* CONFIG_FPU */
> +.Lreset_regs_done:
> +   ret
> +END(reset_regs)
> +#endif /* CONFIG_RISCV_M_MODE */
> +
>  __PAGE_ALIGNED_BSS
> /* Empty zero page */
> .balign PAGE_SIZE
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 13/15] riscv: add nommu support

2019-10-17 Thread Anup Patel
tatic int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
> struct pt_regs *regs)
>  {
> @@ -189,8 +193,19 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t 
> *set,
> return -EFAULT;
>
> /* Set up to return from userspace. */
> +#ifdef CONFIG_MMU
> regs->ra = (unsigned long)VDSO_SYMBOL(
> current->mm->context.vdso, rt_sigreturn);
> +#else
> +   /*
> +* For the nommu case we don't have a VDSO.  Instead we push two
> +* instructions to call the rt_sigreturn syscall onto the user stack.
> +*/
> +   if (copy_to_user(>sigreturn_code, __user_rt_sigreturn,
> +   sizeof(frame->sigreturn_code)))
> +   return -EFAULT;
> +   regs->ra = (unsigned long)>sigreturn_code;;
> +#endif /* CONFIG_MMU */
>
> /*
>  * Set up registers for signal handler.
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 267feaa10f6a..47e7a8204460 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -1,7 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0-only
> -lib-y  += delay.o
> -lib-y  += memcpy.o
> -lib-y  += memset.o
> -lib-y  += uaccess.o
> -
> -lib-$(CONFIG_64BIT) += tishift.o
> +lib-y  += delay.o
> +lib-y  += memcpy.o
> +lib-y  += memset.o
> +lib-$(CONFIG_MMU)  += uaccess.o
> +lib-$(CONFIG_64BIT)+= tishift.o
> diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
> index 9d9a17335686..44ab8f28c3fa 100644
> --- a/arch/riscv/mm/Makefile
> +++ b/arch/riscv/mm/Makefile
> @@ -6,9 +6,8 @@ CFLAGS_REMOVE_init.o = -pg
>  endif
>
>  obj-y += init.o
> -obj-y += fault.o
>  obj-y += extable.o
> -obj-y += ioremap.o
> +obj-$(CONFIG_MMU) += fault.o ioremap.o
>  obj-y += cacheflush.o
>  obj-y += context.o
>  obj-y += sifive_l2_cache.o
> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> index 794c9ab256eb..8f1900686640 100644
> --- a/arch/riscv/mm/cacheflush.c
> +++ b/arch/riscv/mm/cacheflush.c
> @@ -78,6 +78,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
>
>  #endif /* CONFIG_SMP */
>
> +#ifdef CONFIG_MMU
>  void flush_icache_pte(pte_t pte)
>  {
> struct page *page = pte_page(pte);
> @@ -85,3 +86,4 @@ void flush_icache_pte(pte_t pte)
> if (!test_and_set_bit(PG_dcache_clean, >flags))
> flush_icache_all();
>  }
> +#endif /* CONFIG_MMU */
> diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
> index beeb5d7f92ea..073ff12a838a 100644
> --- a/arch/riscv/mm/context.c
> +++ b/arch/riscv/mm/context.c
> @@ -57,8 +57,10 @@ void switch_mm(struct mm_struct *prev, struct mm_struct 
> *next,
> cpumask_clear_cpu(cpu, mm_cpumask(prev));
> cpumask_set_cpu(cpu, mm_cpumask(next));
>
> +#ifdef CONFIG_MMU
> csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
> local_flush_tlb_all();
> +#endif
>
> flush_icache_deferred(next);
>  }
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 83f7d12042fb..0b063f6acaa1 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -24,6 +24,7 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned 
> long)]
>  EXPORT_SYMBOL(empty_zero_page);
>
>  extern char _start[];
> +void *dtb_early_va;
>
>  static void __init zone_sizes_init(void)
>  {
> @@ -140,12 +141,12 @@ void __init setup_bootmem(void)
> }
>  }
>
> +#ifdef CONFIG_MMU
>  unsigned long va_pa_offset;
>  EXPORT_SYMBOL(va_pa_offset);
>  unsigned long pfn_base;
>  EXPORT_SYMBOL(pfn_base);
>
> -void *dtb_early_va;
>  pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
>  pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
>  pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
> @@ -448,6 +449,16 @@ static void __init setup_vm_final(void)
> csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
> local_flush_tlb_all();
>  }
> +#else
> +asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> +{
> +   dtb_early_va = (void *)dtb_pa;
> +}
> +
> +static inline void setup_vm_final(void)
> +{
> +}
> +#endif /* CONFIG_MMU */
>
>  void __init paging_init(void)
>  {
> --
> 2.20.1
>

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 02/15] riscv: cleanup do_trap_break

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:07 PM Christoph Hellwig  wrote:
>
> If we always compile the get_break_insn_length inline function we can
> remove the ifdefs and let dead code elimination take care of the warn
> branch that is now unreadable because the report_bug stub always
> returns BUG_TRAP_TYPE_BUG.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/kernel/traps.c | 26 ++
>  1 file changed, 6 insertions(+), 20 deletions(-)
>
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index 1ac75f7d0bff..10a17e545f43 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -111,7 +111,6 @@ DO_ERROR_INFO(do_trap_ecall_s,
>  DO_ERROR_INFO(do_trap_ecall_m,
> SIGILL, ILL_ILLTRP, "environment call from M-mode");
>
> -#ifdef CONFIG_GENERIC_BUG
>  static inline unsigned long get_break_insn_length(unsigned long pc)
>  {
> bug_insn_t insn;
> @@ -120,28 +119,15 @@ static inline unsigned long 
> get_break_insn_length(unsigned long pc)
> return 0;
> return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 
> 2UL);
>  }
> -#endif /* CONFIG_GENERIC_BUG */
>
>  asmlinkage void do_trap_break(struct pt_regs *regs)
>  {
> -   if (user_mode(regs)) {
> -   force_sig_fault(SIGTRAP, TRAP_BRKPT,
> -   (void __user *)(regs->sepc));
> -   return;
> -   }
> -#ifdef CONFIG_GENERIC_BUG
> -   {
> -   enum bug_trap_type type;
> -
> -   type = report_bug(regs->sepc, regs);
> -   if (type == BUG_TRAP_TYPE_WARN) {
> -   regs->sepc += get_break_insn_length(regs->sepc);
> -   return;
> -   }
> -   }
> -#endif /* CONFIG_GENERIC_BUG */
> -
> -   die(regs, "Kernel BUG");
> +   if (user_mode(regs))
> +   force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user 
> *)regs->sepc);
> +   else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN)
> +   regs->sepc += get_break_insn_length(regs->sepc);
> +   else
> +   die(regs, "Kernel BUG");
>  }
>
>  #ifdef CONFIG_GENERIC_BUG
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 09/15] riscv: provide native clint access for M-mode

2019-10-17 Thread Anup Patel
nt0");
> +   if (!np) {
> +   panic("clint not found");
> +   return;
> +   }
> +
> +   base = of_iomap(np, 0);
> +   if (!base)
> +   panic("could not map CLINT");
> +
> +   clint_ipi_base = base + CLINT_IPI_OFF;
> +   riscv_time_cmp = base + CLINT_TIME_CMP_OFF;
> +   riscv_time_val = base + CLINT_TIME_VAL_OFF;
> +
> +   clint_clear_ipi(boot_cpu_hartid);
> +}
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index a990a6cb184f..f4ba71b66c73 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -17,6 +17,7 @@
>  #include 
>  #include 
>
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -65,6 +66,7 @@ void __init setup_arch(char **cmdline_p)
> setup_bootmem();
> paging_init();
> unflatten_device_tree();
> +   clint_init_boot_cpu();
>
>  #ifdef CONFIG_SWIOTLB
> swiotlb_init(1);
> diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
> index b18cd6c8e8fb..c46df9c2e927 100644
> --- a/arch/riscv/kernel/smp.c
> +++ b/arch/riscv/kernel/smp.c
> @@ -14,6 +14,7 @@
>  #include 
>  #include 
>
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -90,7 +91,10 @@ static void send_ipi_mask(const struct cpumask *mask, enum 
> ipi_message_type op)
> smp_mb__after_atomic();
>
> riscv_cpuid_to_hartid_mask(mask, _mask);
> -   sbi_send_ipi(cpumask_bits(_mask));
> +   if (IS_ENABLED(CONFIG_RISCV_SBI))
> +   sbi_send_ipi(cpumask_bits(_mask));
> +   else
> +   clint_send_ipi_mask(_mask);
>  }
>
>  static void send_ipi_single(int cpu, enum ipi_message_type op)
> @@ -101,12 +105,18 @@ static void send_ipi_single(int cpu, enum 
> ipi_message_type op)
> set_bit(op, _data[cpu].bits);
> smp_mb__after_atomic();
>
> -   sbi_send_ipi(cpumask_bits(cpumask_of(hartid)));
> +   if (IS_ENABLED(CONFIG_RISCV_SBI))
> +   sbi_send_ipi(cpumask_bits(cpumask_of(hartid)));
> +   else
> +   clint_send_ipi_single(hartid);
>  }
>
>  static inline void clear_ipi(void)
>  {
> -   csr_clear(CSR_SIP, SIE_SSIE);
> +   if (IS_ENABLED(CONFIG_RISCV_SBI))
> +   csr_clear(CSR_SIP, SIE_SSIE);
> +   else
> +   clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
>  }
>
>  void riscv_software_interrupt(void)
> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
> index 18ae6da5115e..6300b09f1d1d 100644
> --- a/arch/riscv/kernel/smpboot.c
> +++ b/arch/riscv/kernel/smpboot.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -134,6 +135,9 @@ asmlinkage void __init smp_callin(void)
>  {
> struct mm_struct *mm = _mm;
>
> +   if (!IS_ENABLED(CONFIG_RISCV_SBI))
> +   clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
> +
> /* All kernel threads share the same mm context.  */
> mmgrab(mm);
> current->active_mm = mm;
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 11/15] riscv: use the correct interrupt levels for M-mode

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> The numerical levels for External/Timer/Software interrupts differ
> between S-mode and M-mode.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/kernel/irq.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
> index 804ff70bb853..dbd1fd7c22e4 100644
> --- a/arch/riscv/kernel/irq.c
> +++ b/arch/riscv/kernel/irq.c
> @@ -14,9 +14,15 @@
>  /*
>   * Possible interrupt causes:
>   */
> -#define INTERRUPT_CAUSE_SOFTWARE   IRQ_S_SOFT
> -#define INTERRUPT_CAUSE_TIMER  IRQ_S_TIMER
> -#define INTERRUPT_CAUSE_EXTERNAL   IRQ_S_EXT
> +#ifdef CONFIG_RISCV_M_MODE
> +# define INTERRUPT_CAUSE_SOFTWARE  IRQ_M_SOFT
> +# define INTERRUPT_CAUSE_TIMER IRQ_M_TIMER
> +# define INTERRUPT_CAUSE_EXTERNAL  IRQ_M_EXT
> +#else
> +# define INTERRUPT_CAUSE_SOFTWARE  IRQ_S_SOFT
> +# define INTERRUPT_CAUSE_TIMER IRQ_S_TIMER
> +# define INTERRUPT_CAUSE_EXTERNAL  IRQ_S_EXT
> +#endif /* CONFIG_RISCV_M_MODE */
>
>  int arch_show_interrupts(struct seq_file *p, int prec)
>  {
> --
> 2.20.1
>

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 10/15] riscv: read the hart ID from mhartid on boot

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> From: Damien Le Moal 
>
> When in M-Mode, we can use the mhartid CSR to get the ID of the running
> HART. Doing so, direct M-Mode boot without firmware is possible.
>
> Signed-off-by: Damien Le Moal 
> Signed-off-by: Christoph Hellwig 
> Reviewed-by: Atish Patra 
> ---
>  arch/riscv/include/asm/csr.h | 1 +
>  arch/riscv/kernel/head.S | 8 
>  2 files changed, 9 insertions(+)
>
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index 0dae5c361f29..d0b5113e1a54 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -81,6 +81,7 @@
>  #define SIE_SEIE   (_AC(0x1, UL) << IRQ_S_EXT)
>
>  /* symbolic CSR names: */
> +#define CSR_MHARTID0xf14
>  #define CSR_MSTATUS0x300
>  #define CSR_MIE0x304
>  #define CSR_MTVEC  0x305
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 679e63d29edb..583784cb3a32 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -50,6 +50,14 @@ _start_kernel:
> csrw CSR_XIE, zero
> csrw CSR_XIP, zero
>
> +#ifdef CONFIG_RISCV_M_MODE
> +   /*
> +* The hartid in a0 is expected later on, and we have no firmware
> +* to hand it to us.
> +*/
> +   csrr a0, CSR_MHARTID
> +#endif
> +
> /* Load the global pointer */
>  .option push
>  .option norelax
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 04/15] riscv: don't allow selecting SBI based drivers for M-mode

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:07 PM Christoph Hellwig  wrote:
>
> From: Damien Le Moal 
>
> When running in M-mode we can't use SBI based drivers.  Add a new
> CONFIG_RISCV_SBI that drivers that do SBI calls can depend on
> instead.
>
> Signed-off-by: Damien Le Moal 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/Kconfig | 6 ++
>  drivers/tty/hvc/Kconfig| 2 +-
>  drivers/tty/serial/Kconfig | 2 +-
>  3 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 86b7e8b0471c..b85492c42ccb 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -76,6 +76,12 @@ config ARCH_MMAP_RND_BITS_MAX
>  config RISCV_M_MODE
> bool
>
> +# set if we are running in S-mode and can use SBI calls
> +config RISCV_SBI
> +   bool
> +   depends on !RISCV_M_MODE
> +   default y
> +
>  config MMU
> def_bool y
>
> diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
> index 4d22b91f..4487a6b9acc8 100644
> --- a/drivers/tty/hvc/Kconfig
> +++ b/drivers/tty/hvc/Kconfig
> @@ -89,7 +89,7 @@ config HVC_DCC
>
>  config HVC_RISCV_SBI
> bool "RISC-V SBI console support"
> -   depends on RISCV
> +   depends on RISCV_SBI
> select HVC_DRIVER
> help
>   This enables support for console output via RISC-V SBI calls, which
> diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
> index 67a9eb3f94ce..540142c5b7b3 100644
> --- a/drivers/tty/serial/Kconfig
> +++ b/drivers/tty/serial/Kconfig
> @@ -88,7 +88,7 @@ config SERIAL_EARLYCON_ARM_SEMIHOST
>
>  config SERIAL_EARLYCON_RISCV_SBI
> bool "Early console using RISC-V SBI"
> -   depends on RISCV
> +   depends on RISCV_SBI
> select SERIAL_CORE
> select SERIAL_CORE_CONSOLE
> select SERIAL_EARLYCON
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 06/15] riscv: cleanup the default power off implementation

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> Move the sbi poweroff to a separate function and file that is only
> compiled if CONFIG_SBI is set.  Provide a new default fallback
> power off that just sits in a wfi loop to save some power.
>
> Signed-off-by: Christoph Hellwig 
> Reviewed-by: Atish Patra 
> ---
>  arch/riscv/kernel/Makefile |  1 +
>  arch/riscv/kernel/reset.c  |  5 ++---
>  arch/riscv/kernel/sbi.c| 17 +
>  3 files changed, 20 insertions(+), 3 deletions(-)
>  create mode 100644 arch/riscv/kernel/sbi.c
>
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index 696020ff72db..d8c35fa93cc6 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -41,5 +41,6 @@ obj-$(CONFIG_DYNAMIC_FTRACE)  += mcount-dyn.o
>  obj-$(CONFIG_PERF_EVENTS)  += perf_event.o
>  obj-$(CONFIG_PERF_EVENTS)  += perf_callchain.o
>  obj-$(CONFIG_HAVE_PERF_REGS)   += perf_regs.o
> +obj-$(CONFIG_RISCV_SBI)+= sbi.o
>
>  clean:
> diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
> index d0fe623bfb8f..5e4e69859af1 100644
> --- a/arch/riscv/kernel/reset.c
> +++ b/arch/riscv/kernel/reset.c
> @@ -4,12 +4,11 @@
>   */
>
>  #include 
> -#include 
>
>  static void default_power_off(void)
>  {
> -   sbi_shutdown();
> -   while (1);
> +   while (1)
> +   wait_for_interrupt();
>  }
>
>  void (*pm_power_off)(void) = default_power_off;
> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
> new file mode 100644
> index ..f6c7c3e82d28
> --- /dev/null
> +++ b/arch/riscv/kernel/sbi.c
> @@ -0,0 +1,17 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include 
> +#include 
> +#include 
> +
> +static void sbi_power_off(void)
> +{
> +   sbi_shutdown();
> +}
> +
> +static int __init sbi_init(void)
> +{
> +   pm_power_off = sbi_power_off;
> +   return 0;
> +}
> +early_initcall(sbi_init);
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 01/15] riscv: cleanup

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:07 PM Christoph Hellwig  wrote:
>
> Remove various not required ifdefs and externs.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/include/asm/bug.h | 16 +++-
>  1 file changed, 3 insertions(+), 13 deletions(-)
>
> diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
> index 07ceee8b1747..75604fec1b1b 100644
> --- a/arch/riscv/include/asm/bug.h
> +++ b/arch/riscv/include/asm/bug.h
> @@ -12,7 +12,6 @@
>
>  #include 
>
> -#ifdef CONFIG_GENERIC_BUG
>  #define __INSN_LENGTH_MASK  _UL(0x3)
>  #define __INSN_LENGTH_32_UL(0x3)
>  #define __COMPRESSED_INSN_MASK _UL(0x)
> @@ -20,7 +19,6 @@
>  #define __BUG_INSN_32  _UL(0x00100073) /* ebreak */
>  #define __BUG_INSN_16  _UL(0x9002) /* c.ebreak */
>
> -#ifndef __ASSEMBLY__
>  typedef u32 bug_insn_t;
>
>  #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
> @@ -43,6 +41,7 @@ typedef u32 bug_insn_t;
> RISCV_SHORT " %2"
>  #endif
>
> +#ifdef CONFIG_GENERIC_BUG
>  #define __BUG_FLAGS(flags) \
>  do {   \
> __asm__ __volatile__ (  \
> @@ -58,14 +57,10 @@ do {  
>   \
>   "i" (flags),  \
>   "i" (sizeof(struct bug_entry)));  \
>  } while (0)
> -
> -#endif /* !__ASSEMBLY__ */
>  #else /* CONFIG_GENERIC_BUG */
> -#ifndef __ASSEMBLY__
>  #define __BUG_FLAGS(flags) do {\
> __asm__ __volatile__ ("ebreak\n");  \
>  } while (0)
> -#endif /* !__ASSEMBLY__ */
>  #endif /* CONFIG_GENERIC_BUG */
>
>  #define BUG() do { \
> @@ -79,15 +74,10 @@ do {  
>   \
>
>  #include 
>
> -#ifndef __ASSEMBLY__
> -
>  struct pt_regs;
>  struct task_struct;
>
> -extern void die(struct pt_regs *regs, const char *str);
> -extern void do_trap(struct pt_regs *regs, int signo, int code,
> -   unsigned long addr);
> -
> -#endif /* !__ASSEMBLY__ */
> +void die(struct pt_regs *regs, const char *str);
> +void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
>
>  #endif /* _ASM_RISCV_BUG_H */
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 05/15] riscv: poison SBI calls for M-mode

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> There is no SBI when we run in M-mode, so fail the compile for any code
> trying to use SBI calls.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/include/asm/sbi.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> index 21134b3ef404..b167af3e7470 100644
> --- a/arch/riscv/include/asm/sbi.h
> +++ b/arch/riscv/include/asm/sbi.h
> @@ -8,6 +8,7 @@
>
>  #include 
>
> +#ifdef CONFIG_RISCV_SBI
>  #define SBI_SET_TIMER 0
>  #define SBI_CONSOLE_PUTCHAR 1
>  #define SBI_CONSOLE_GETCHAR 2
> @@ -93,5 +94,5 @@ static inline void sbi_remote_sfence_vma_asid(const 
> unsigned long *hart_mask,
>  {
> SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid);
>  }
> -
> -#endif
> +#endif /* CONFIG_RISCV_SBI */
> +#endif /* _ASM_RISCV_SBI_H */
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 03/15] riscv: abstract out CSR names for supervisor vs machine mode

2019-10-17 Thread Anup Patel
   case -ERESTARTNOINTR:
>  regs->a0 = regs->orig_a0;
> -   regs->sepc -= 0x4;
> +   regs->xepc -= 0x4;
> break;
> case -ERESTART_RESTARTBLOCK:
>  regs->a0 = regs->orig_a0;
> regs->a7 = __NR_restart_syscall;
> -   regs->sepc -= 0x4;
> +   regs->xepc -= 0x4;
> break;
> }
> }
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index 10a17e545f43..2cf1f0f3871e 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -40,7 +40,7 @@ void die(struct pt_regs *regs, const char *str)
> print_modules();
> show_regs(regs);
>
> -   ret = notify_die(DIE_OOPS, str, regs, 0, regs->scause, SIGSEGV);
> +   ret = notify_die(DIE_OOPS, str, regs, 0, regs->xcause, SIGSEGV);
>
> bust_spinlocks(0);
> add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
> @@ -85,7 +85,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, 
> int code,
>  #define DO_ERROR_INFO(name, signo, code, str)  \
>  asmlinkage void name(struct pt_regs *regs) \
>  {  \
> -   do_trap_error(regs, signo, code, regs->sepc, "Oops - " str);\
> +   do_trap_error(regs, signo, code, regs->xepc, "Oops - " str);\
>  }
>
>  DO_ERROR_INFO(do_trap_unknown,
> @@ -123,9 +123,9 @@ static inline unsigned long 
> get_break_insn_length(unsigned long pc)
>  asmlinkage void do_trap_break(struct pt_regs *regs)
>  {
> if (user_mode(regs))
> -   force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user 
> *)regs->sepc);
> -   else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN)
> -   regs->sepc += get_break_insn_length(regs->sepc);
> +   force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user 
> *)regs->xepc);
> +   else if (report_bug(regs->xepc, regs) == BUG_TRAP_TYPE_WARN)
> +   regs->xepc += get_break_insn_length(regs->xepc);
> else
> die(regs, "Kernel BUG");
>  }
> @@ -152,9 +152,9 @@ void __init trap_init(void)
>  * Set sup0 scratch register to 0, indicating to exception vector
>  * that we are presently executing in the kernel
>  */
> -   csr_write(CSR_SSCRATCH, 0);
> +   csr_write(CSR_XSCRATCH, 0);
> /* Set the exception vector address */
> -   csr_write(CSR_STVEC, _exception);
> +   csr_write(CSR_XTVEC, _exception);
> /* Enable all interrupts */
> -   csr_write(CSR_SIE, -1);
> +   csr_write(CSR_XIE, -1);
>  }
> diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
> index ed2696c0143d..f47a2ea4dc89 100644
> --- a/arch/riscv/lib/uaccess.S
> +++ b/arch/riscv/lib/uaccess.S
> @@ -18,7 +18,7 @@ ENTRY(__asm_copy_from_user)
>
> /* Enable access to user memory */
> li t6, SR_SUM
> -   csrs CSR_SSTATUS, t6
> +   csrs CSR_XSTATUS, t6
>
> add a3, a1, a2
> /* Use word-oriented copy only if low-order bits match */
> @@ -47,7 +47,7 @@ ENTRY(__asm_copy_from_user)
>
>  3:
> /* Disable access to user memory */
> -   csrc CSR_SSTATUS, t6
> +   csrc CSR_XSTATUS, t6
> li a0, 0
> ret
>  4: /* Edge case: unalignment */
> @@ -72,7 +72,7 @@ ENTRY(__clear_user)
>
> /* Enable access to user memory */
> li t6, SR_SUM
> -   csrs CSR_SSTATUS, t6
> +   csrs CSR_XSTATUS, t6
>
> add a3, a0, a1
> addi t0, a0, SZREG-1
> @@ -94,7 +94,7 @@ ENTRY(__clear_user)
>
>  3:
> /* Disable access to user memory */
> -   csrc CSR_SSTATUS, t6
> +   csrc CSR_XSTATUS, t6
> li a0, 0
> ret
>  4: /* Edge case: unalignment */
> @@ -114,11 +114,11 @@ ENDPROC(__clear_user)
> /* Fixup code for __copy_user(10) and __clear_user(11) */
>  10:
> /* Disable access to user memory */
> -   csrs CSR_SSTATUS, t6
> +   csrs CSR_XSTATUS, t6
> mv a0, a2
> ret
>  11:
> -   csrs CSR_SSTATUS, t6
> +   csrs CSR_XSTATUS, t6
> mv a0, a1
> ret
> .previous
> diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
> index 7aed9178d365..e0659deeb16b 100644
> --- a/arch/riscv/mm/extable.c
> +++ b/arch/riscv/mm/extable.c
> @@ -15,9 +15,9 @@ int fixup_exception(struct pt_regs *regs)
>  {
> const struct exception_table_entry *fixup;
>
> -   fixup = search_exception_tables(regs->sepc);
> +   fixup = search_exception_tables(regs->xepc);
> if (fixup) {
> -   regs->sepc = fixup->fixup;
> +   regs->xepc = fixup->fixup;
> return 1;
> }
> return 0;
> diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> index 96add1427a75..10a8ce38ac7a 100644
> --- a/arch/riscv/mm/fault.c
> +++ b/arch/riscv/mm/fault.c
> @@ -32,8 +32,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
> int code = SEGV_MAPERR;
> vm_fault_t fault;
>
> -   cause = regs->scause;
> -   addr = regs->sbadaddr;
> +   cause = regs->xcause;
> +   addr = regs->xbadaddr;
>
> tsk = current;
> mm = tsk->mm;
> @@ -51,7 +51,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
> goto vmalloc_fault;
>
> /* Enable interrupts if they were enabled in the parent context. */
> -   if (likely(regs->sstatus & SR_SPIE))
> +   if (likely(regs->xstatus & SR_XPIE))
> local_irq_enable();
>
> /*
> diff --git a/drivers/clocksource/timer-riscv.c 
> b/drivers/clocksource/timer-riscv.c
> index 470c7ef02ea4..5d2fdc3e28a9 100644
> --- a/drivers/clocksource/timer-riscv.c
> +++ b/drivers/clocksource/timer-riscv.c
> @@ -19,7 +19,7 @@
>  static int riscv_clock_next_event(unsigned long delta,
> struct clock_event_device *ce)
>  {
> -   csr_set(sie, SIE_STIE);
> +   csr_set(CSR_XIE, XIE_XTIE);
> sbi_set_timer(get_cycles64() + delta);
> return 0;
>  }
> @@ -61,13 +61,13 @@ static int riscv_timer_starting_cpu(unsigned int cpu)
> ce->cpumask = cpumask_of(cpu);
> clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fff);
>
> -   csr_set(sie, SIE_STIE);
> +   csr_set(CSR_XIE, XIE_XTIE);
> return 0;
>  }
>
>  static int riscv_timer_dying_cpu(unsigned int cpu)
>  {
> -   csr_clear(sie, SIE_STIE);
> +   csr_clear(CSR_XIE, XIE_XTIE);
> return 0;
>  }
>
> @@ -76,7 +76,7 @@ void riscv_timer_interrupt(void)
>  {
> struct clock_event_device *evdev = this_cpu_ptr(_clock_event);
>
> -   csr_clear(sie, SIE_STIE);
> +   csr_clear(CSR_XIE, XIE_XTIE);
> evdev->event_handler(evdev);
>  }
>
> diff --git a/drivers/irqchip/irq-sifive-plic.c 
> b/drivers/irqchip/irq-sifive-plic.c
> index c72c036aea76..4ee96ac90ea4 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -179,7 +179,7 @@ static void plic_handle_irq(struct pt_regs *regs)
>
> WARN_ON_ONCE(!handler->present);
>
> -   csr_clear(sie, SIE_SEIE);
> +   csr_clear(CSR_XIE, XIE_XEIE);
> while ((hwirq = readl(claim))) {
> int irq = irq_find_mapping(plic_irqdomain, hwirq);
>
> @@ -190,7 +190,7 @@ static void plic_handle_irq(struct pt_regs *regs)
> generic_handle_irq(irq);
> writel(hwirq, claim);
> }
> -   csr_set(sie, SIE_SEIE);
> +   csr_set(CSR_XIE, XIE_XEIE);
>  }
>
>  /*
> --
> 2.20.1
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 0/8] riscv: resolve most warnings from sparse

2019-10-17 Thread Anup Patel
On Fri, Oct 18, 2019 at 6:19 AM Paul Walmsley  wrote:
>
> Resolve most warnings from the 'sparse' static analysis tool for the
> arch/riscv codebase.  This makes life easier for us as maintainers,
> and makes it easier for developers to use static analysis tools on
> their own changes.
>
> This patch series incorporates some changes based on feedback from
> Christoph Hellwig .
>
> Applies on the current riscv fixes branch that is based on v5.4-rc3.

This series certainly conflict's with Christoph's NOMMU series so
please rebase it on NOMMU series.

Regards,
Anup

>
>
> - Paul
>
>
> Paul Walmsley (8):
>   riscv: add prototypes for assembly language functions from entry.S
>   riscv: add prototypes for assembly language functions from head.S
>   riscv: init: merge split string literals in preprocessor directive
>   riscv: ensure RISC-V C model definitions are passed to static
> analyzers
>   riscv: add missing prototypes
>   riscv: mark some code and data as file-static
>   riscv: add missing header file includes
>   riscv: fp: add missing __user pointer annotations
>
> Kernel object size difference:
>textdata bss dec hex filename
> 6664206 2136568  312608 9113382  8b0f26 vmlinux.orig
> 6664186 2136552  312608 9113346  8b0f02 vmlinux.patched
>
>  arch/riscv/Makefile |  2 ++
>  arch/riscv/include/asm/irq.h|  6 ++
>  arch/riscv/include/asm/pgtable.h|  2 ++
>  arch/riscv/include/asm/processor.h  |  4 
>  arch/riscv/include/asm/ptrace.h |  4 
>  arch/riscv/include/asm/smp.h|  2 ++
>  arch/riscv/include/asm/switch_to.h  |  1 +
>  arch/riscv/kernel/cpufeature.c  |  1 +
>  arch/riscv/kernel/entry.h   | 29 +
>  arch/riscv/kernel/head.h| 21 +
>  arch/riscv/kernel/module-sections.c |  1 +
>  arch/riscv/kernel/process.c |  2 ++
>  arch/riscv/kernel/reset.c   |  1 +
>  arch/riscv/kernel/setup.c   |  2 ++
>  arch/riscv/kernel/signal.c  |  6 --
>  arch/riscv/kernel/smp.c |  2 ++
>  arch/riscv/kernel/smpboot.c |  3 +++
>  arch/riscv/kernel/stacktrace.c  |  6 --
>  arch/riscv/kernel/syscall_table.c   |  1 +
>  arch/riscv/kernel/time.c|  1 +
>  arch/riscv/kernel/traps.c   |  2 ++
>  arch/riscv/kernel/vdso.c|  3 ++-
>  arch/riscv/mm/context.c |  1 +
>  arch/riscv/mm/fault.c   |  2 ++
>  arch/riscv/mm/init.c| 17 ++---
>  arch/riscv/mm/sifive_l2_cache.c |  2 +-
>  26 files changed, 111 insertions(+), 13 deletions(-)
>  create mode 100644 arch/riscv/kernel/entry.h
>  create mode 100644 arch/riscv/kernel/head.h
>
> --
> 2.23.0
>


Re: [PATCH 14/15] riscv: provide a flat image loader

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> This allows just loading the kernel at a pre-set address without
> qemu going bonkers trying to map the ELF file.
>
> Contains a controbution from Aurabindo Jayamohanan to reuse the
> PAGE_OFFSET definition.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/Makefile  | 13 +
>  arch/riscv/boot/Makefile |  7 ++-
>  arch/riscv/boot/loader.S |  8 
>  arch/riscv/boot/loader.lds.S | 16 
>  4 files changed, 39 insertions(+), 5 deletions(-)
>  create mode 100644 arch/riscv/boot/loader.S
>  create mode 100644 arch/riscv/boot/loader.lds.S
>
> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> index f5e914210245..b9009a2fbaf5 100644
> --- a/arch/riscv/Makefile
> +++ b/arch/riscv/Makefile
> @@ -83,13 +83,18 @@ PHONY += vdso_install
>  vdso_install:
> $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
>
> -all: Image.gz
> +ifeq ($(CONFIG_RISCV_M_MODE),y)
> +KBUILD_IMAGE := $(boot)/loader
> +else
> +KBUILD_IMAGE := $(boot)/Image.gz
> +endif
> +BOOT_TARGETS := Image Image.gz loader
>
> -Image: vmlinux
> -   $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
> +all:   $(notdir $(KBUILD_IMAGE))
>
> -Image.%: Image
> +$(BOOT_TARGETS): vmlinux
> $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
> +   @$(kecho) '  Kernel: $(boot)/$@ is ready'
>
>  zinstall install:
> $(Q)$(MAKE) $(build)=$(boot) $@
> diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
> index 0990a9fdbe5d..8639e0dd2cdf 100644
> --- a/arch/riscv/boot/Makefile
> +++ b/arch/riscv/boot/Makefile
> @@ -16,7 +16,7 @@
>
>  OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
>
> -targets := Image
> +targets := Image loader
>
>  $(obj)/Image: vmlinux FORCE
> $(call if_changed,objcopy)
> @@ -24,6 +24,11 @@ $(obj)/Image: vmlinux FORCE
>  $(obj)/Image.gz: $(obj)/Image FORCE
> $(call if_changed,gzip)
>
> +loader.o: $(src)/loader.S $(obj)/Image
> +
> +$(obj)/loader: $(obj)/loader.o $(obj)/Image $(obj)/loader.lds FORCE
> +   $(Q)$(LD) -T $(src)/loader.lds -o $@ $(obj)/loader.o
> +
>  install:
> $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
> $(obj)/Image System.map "$(INSTALL_PATH)"
> diff --git a/arch/riscv/boot/loader.S b/arch/riscv/boot/loader.S
> new file mode 100644
> index ..5586e2610dbb
> --- /dev/null
> +++ b/arch/riscv/boot/loader.S
> @@ -0,0 +1,8 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +   .align 4
> +   .section .payload, "ax", %progbits
> +   .globl _start
> +_start:
> +   .incbin "arch/riscv/boot/Image"
> +
> diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
> new file mode 100644
> index ..47a5003c2e28
> --- /dev/null
> +++ b/arch/riscv/boot/loader.lds.S
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include 
> +
> +OUTPUT_ARCH(riscv)
> +ENTRY(_start)
> +
> +SECTIONS
> +{
> +   . = PAGE_OFFSET;
> +
> +   .payload : {
> +   *(.payload)
> +   . = ALIGN(8);
> +   }
> +}
> --
> 2.20.1
>

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH 15/15] riscv: disable the EFI PECOFF header for M-mode

2019-10-17 Thread Anup Patel
On Thu, Oct 17, 2019 at 11:08 PM Christoph Hellwig  wrote:
>
> No point in bloating the kernel image with a bootloader header if
> we run bare metal.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/riscv/kernel/head.S | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 71efbba25ed5..dc21e409cc49 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -16,6 +16,7 @@
>
>  __INIT
>  ENTRY(_start)
> +#ifndef CONFIG_RISCV_M_MODE
> /*
>  * Image header expected by Linux boot-loaders. The image header data
>  * structure is described in asm/image.h.
> @@ -47,6 +48,7 @@ ENTRY(_start)
>
>  .global _start_kernel
>  _start_kernel:
> +#endif /* CONFIG_RISCV_M_MODE */
> /* Mask all interrupts */
> csrw CSR_XIE, zero
> csrw CSR_XIP, zero
> --
> 2.20.1
>

LGTM.

Reviewed-by: Anup Patel 

Regards,
Anup


Re: [PATCH v9 01/22] RISC-V: Add bitmap reprensenting ISA features common across CPUs

2019-10-17 Thread Anup Patel
Hi Paul,

On Wed, Oct 16, 2019 at 9:38 PM Anup Patel  wrote:
>
> This patch adds riscv_isa bitmap which represents Host ISA features
> common across all Host CPUs. The riscv_isa is not same as elf_hwcap
> because elf_hwcap will only have ISA features relevant for user-space
> apps whereas riscv_isa will have ISA features relevant to both kernel
> and user-space apps.
>
> One of the use-case for riscv_isa bitmap is in KVM hypervisor where
> we will use it to do following operations:
>
> 1. Check whether hypervisor extension is available
> 2. Find ISA features that need to be virtualized (e.g. floating
>point support, vector extension, etc.)
>
> Signed-off-by: Anup Patel 
> Signed-off-by: Atish Patra 
> Reviewed-by: Alexander Graf 

Can you consider this patch for Linux-5.4-rcX ??

Regards,
Anup

> ---
>  arch/riscv/include/asm/hwcap.h | 22 +
>  arch/riscv/kernel/cpufeature.c | 83 --
>  2 files changed, 102 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index 7ecb7c6a57b1..5989dd4426d1 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -8,6 +8,7 @@
>  #ifndef __ASM_HWCAP_H
>  #define __ASM_HWCAP_H
>
> +#include 
>  #include 
>
>  #ifndef __ASSEMBLY__
> @@ -22,5 +23,26 @@ enum {
>  };
>
>  extern unsigned long elf_hwcap;
> +
> +#define RISCV_ISA_EXT_a('a' - 'a')
> +#define RISCV_ISA_EXT_c('c' - 'a')
> +#define RISCV_ISA_EXT_d('d' - 'a')
> +#define RISCV_ISA_EXT_f('f' - 'a')
> +#define RISCV_ISA_EXT_h('h' - 'a')
> +#define RISCV_ISA_EXT_i('i' - 'a')
> +#define RISCV_ISA_EXT_m('m' - 'a')
> +#define RISCV_ISA_EXT_s('s' - 'a')
> +#define RISCV_ISA_EXT_u('u' - 'a')
> +
> +#define RISCV_ISA_EXT_MAX  256
> +
> +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
> +
> +#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
> +
> +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int 
> bit);
> +#define riscv_isa_extension_available(isa_bitmap, ext) \
> +   __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
> +
>  #endif
>  #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index eaad5aa07403..64068d36658d 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -6,21 +6,64 @@
>   * Copyright (C) 2017 SiFive
>   */
>
> +#include 
>  #include 
>  #include 
>  #include 
>  #include 
>
>  unsigned long elf_hwcap __read_mostly;
> +
> +/* Host ISA bitmap */
> +static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> +
>  #ifdef CONFIG_FPU
>  bool has_fpu __read_mostly;
>  #endif
>
> +/**
> + * riscv_isa_extension_base() - Get base extension word
> + *
> + * @isa_bitmap: ISA bitmap to use
> + * Return: base extension word as unsigned long value
> + *
> + * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
> + */
> +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
> +{
> +   if (!isa_bitmap)
> +   return riscv_isa[0];
> +   return isa_bitmap[0];
> +}
> +EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
> +
> +/**
> + * __riscv_isa_extension_available() - Check whether given extension
> + * is available or not
> + *
> + * @isa_bitmap: ISA bitmap to use
> + * @bit: bit position of the desired extension
> + * Return: true or false
> + *
> + * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
> + */
> +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int 
> bit)
> +{
> +   const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
> +
> +   if (bit >= RISCV_ISA_EXT_MAX)
> +   return false;
> +
> +   return test_bit(bit, bmap) ? true : false;
> +}
> +EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
> +
>  void riscv_fill_hwcap(void)
>  {
> struct device_node *node;
> const char *isa;
> -   size_t i;
> +   char print_str[BITS_PER_LONG+1];
> +   size_t i, j, isa_len;
> static unsigned long isa2hwcap[256] = {0};
>
> isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
> @@ -32,8 +75,11 @@ void riscv_fill_hwcap(void)
>
> elf_hwcap = 0;
>
> +   bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
> +
> for_each_of_cpu_node(node) {
> unsigned long this_hwcap = 0;
> +  

[PATCH v9 22/22] RISC-V: KVM: Add MAINTAINERS entry

2019-10-16 Thread Anup Patel
Add myself as maintainer for KVM RISC-V and Atish as designated reviewer.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 MAINTAINERS | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index a69e6db80c79..b73b9488a7c2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8979,6 +8979,16 @@ F:   arch/powerpc/include/asm/kvm*
 F: arch/powerpc/kvm/
 F: arch/powerpc/kernel/kvm*
 
+KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
+M: Anup Patel 
+R: Atish Patra 
+L: k...@vger.kernel.org
+T: git git://github.com/kvm-riscv/linux.git
+S: Maintained
+F: arch/riscv/include/uapi/asm/kvm*
+F: arch/riscv/include/asm/kvm*
+F: arch/riscv/kvm/
+
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M: Christian Borntraeger 
 M: Janosch Frank 
-- 
2.17.1



[PATCH v9 20/22] RISC-V: KVM: Fix race-condition in kvm_riscv_vcpu_sync_interrupts()

2019-10-16 Thread Anup Patel
Currently, we sync-up Guest VSIP and VSIE CSRs with HW state upon
VM-exit. This helps us track enable/disable state of interrupts
and VSIP.SSIP bit updates by Guest.

Unfortunately, the implementation of kvm_riscv_vcpu_sync_interrupts()
is racey when running SMP Guest on SMP Host because it can happen
that IRQ_S_SOFT is already queued from other Host CPU and we might
accidentally clear a valid pending IRQ_S_SOFT.

To fix this, we use test_and_set_bit() to update irqs_pending_mask
in kvm_riscv_vcpu_sync_interrupts() instead of directly calling
kvm_riscv_vcpu_set/unset_interrupt() functions.

Signed-off-by: Anup Patel 
---
 arch/riscv/kvm/vcpu.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index f1a218d3a8cf..844542dd13e4 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -662,15 +662,22 @@ void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu 
*vcpu)
 
 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
 {
-   vcpu->arch.guest_csr.vsip = csr_read(CSR_VSIP);
-   vcpu->arch.guest_csr.vsie = csr_read(CSR_VSIE);
+   unsigned long vsip;
+   struct kvm_vcpu_arch *v = >arch;
+   struct kvm_vcpu_csr *csr = >arch.guest_csr;
 
-   /* Guest can directly update VSIP software interrupt bits */
-   if (vcpu->arch.guest_csr.vsip ^ READ_ONCE(vcpu->arch.irqs_pending)) {
-   if (vcpu->arch.guest_csr.vsip & (1UL << IRQ_S_SOFT))
-   kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_SOFT);
-   else
-   kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_SOFT);
+   /* Read current VSIP and VSIE CSRs */
+   vsip = csr_read(CSR_VSIP);
+   csr->vsie = csr_read(CSR_VSIE);
+
+   /* Sync-up VSIP.SSIP bit changes does by Guest */
+   if ((csr->vsip ^ vsip) & (1UL << IRQ_S_SOFT)) {
+   if (!test_and_set_bit(IRQ_S_SOFT, >irqs_pending_mask)) {
+   if (vsip & (1UL << IRQ_S_SOFT))
+   set_bit(IRQ_S_SOFT, >irqs_pending);
+   else
+   clear_bit(IRQ_S_SOFT, >irqs_pending);
+   }
}
 }
 
-- 
2.17.1



[PATCH v9 21/22] RISC-V: KVM: Document RISC-V specific parts of KVM API.

2019-10-16 Thread Anup Patel
Document RISC-V specific parts of the KVM API, such as:
 - The interrupt numbers passed to the KVM_INTERRUPT ioctl.
 - The states supported by the KVM_{GET,SET}_MP_STATE ioctls.
 - The registers supported by the KVM_{GET,SET}_ONE_REG interface
   and the encoding of those register ids.
 - The exit reason KVM_EXIT_RISCV_SBI for SBI calls forwarded to
   userspace tool.

Signed-off-by: Anup Patel 
---
 Documentation/virt/kvm/api.txt | 158 +++--
 1 file changed, 151 insertions(+), 7 deletions(-)

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 4833904d32a5..83d654f463b6 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -471,7 +471,7 @@ struct kvm_translation {
 4.16 KVM_INTERRUPT
 
 Capability: basic
-Architectures: x86, ppc, mips
+Architectures: x86, ppc, mips, riscv
 Type: vcpu ioctl
 Parameters: struct kvm_interrupt (in)
 Returns: 0 on success, negative on failure.
@@ -531,6 +531,22 @@ interrupt number dequeues the interrupt.
 
 This is an asynchronous vcpu ioctl and can be invoked from any thread.
 
+RISC-V:
+
+Queues an external interrupt to be injected into the virutal CPU. This ioctl
+is overloaded with 2 different irq values:
+
+a) KVM_INTERRUPT_SET
+
+  This sets external interrupt for a virtual CPU and it will receive
+  once it is ready.
+
+b) KVM_INTERRUPT_UNSET
+
+  This clears pending external interrupt for a virtual CPU.
+
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
 
 4.17 KVM_DEBUG_GUEST
 
@@ -1219,7 +1235,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
+Architectures: x86, s390, arm, arm64, riscv
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1233,7 +1249,8 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:the vcpu is currently running [x86,arm/arm64]
+ - KVM_MP_STATE_RUNNABLE:the vcpu is currently running
+ [x86,arm/arm64,riscv]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
  which has not yet received an INIT signal 
[x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
@@ -1242,7 +1259,7 @@ Possible values are:
  is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
  accessible via KVM_GET_VCPU_EVENTS) [x86]
- - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64]
+ - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64,riscv]
  - KVM_MP_STATE_CHECK_STOP:  the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:   the vcpu is operating (running or halted)
  [s390]
@@ -1253,7 +1270,7 @@ On x86, this ioctl is only useful after 
KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
-For arm/arm64:
+For arm/arm64/riscv:
 
 The only states that are valid are KVM_MP_STATE_STOPPED and
 KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
@@ -1261,7 +1278,7 @@ KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused 
or not.
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
+Architectures: x86, s390, arm, arm64, riscv
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1273,7 +1290,7 @@ On x86, this ioctl is only useful after 
KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
-For arm/arm64:
+For arm/arm64/riscv:
 
 The only states that are valid are KVM_MP_STATE_STOPPED and
 KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
@@ -2282,6 +2299,116 @@ following id bit patterns:
   0x7020  0003 02 <0:3> 
 
 
+RISC-V registers are mapped using the lower 32 bits. The upper 8 bits of
+that is the register group type.
+
+RISC-V config registers are meant for configuring a Guest VCPU and it has
+the following id bit patterns:
+  0x8020  01  (32bit Host)
+  0x8030  01  (64bit Host)
+
+Following are the RISC-V config registers:
+
+EncodingRegister  Description
+--
+  0x80x0  0100  isa   ISA feature bitmap of Guest VCPU
+  0x80x0  0100 0001 tbfreqTime base frequency
+
+The isa config register can be read anytime but can only be written before
+a Guest VCPU runs. It will have ISA feature bits matching underlying host
+set by default. The tbfreq config register is a read-only register and it
+will return host timebase frequenc.
+
+RISC-V core registers represent the general excution state of a

[PATCH v9 18/22] RISC-V: KVM: Simplify stage2 page table programming

2019-10-16 Thread Anup Patel
Instead of dealing with PGD, PMD, and PTE differently in stage2
page table progamming, we can simply use iterative and recursive
helper functions to program stage2 page tables of any level.

This patch re-implements stage2_get_leaf_entry(), stage2_set_pte(),
stage2_map_page(), stage2_op_pte(), stage2_unmap_range(), and
stage2_wp_range() helper functions as mentioned above.

Signed-off-by: Anup Patel 
---
 arch/riscv/kvm/mmu.c | 469 +++
 1 file changed, 164 insertions(+), 305 deletions(-)

diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 5aa5ea5ef8f6..fe86cae4cf42 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -21,15 +21,56 @@
 #ifdef CONFIG_64BIT
 #define stage2_have_pmdtrue
 #define stage2_gpa_size((phys_addr_t)(1ULL << 39))
-#define stage2_cache_min_pages 2
+#define stage2_pgd_levels  3
+#define stage2_index_bits  9
 #else
-#define pmd_index(x)   0
-#define pfn_pmd(x, y)  ({ pmd_t __x = { 0 }; __x; })
 #define stage2_have_pmdfalse
 #define stage2_gpa_size((phys_addr_t)(1ULL << 32))
-#define stage2_cache_min_pages 1
+#define stage2_pgd_levels  2
+#define stage2_index_bits  10
 #endif
 
+#define stage2_pte_index(addr, level) \
+(((addr) >> (PAGE_SHIFT + stage2_index_bits * (level))) & (PTRS_PER_PTE - 1))
+
+static inline unsigned long stage2_pte_page_vaddr(pte_t pte)
+{
+   return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
+}
+
+static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
+{
+   if (page_size == PAGE_SIZE)
+   *out_level = 0;
+   else if (page_size == PMD_SIZE)
+   *out_level = 1;
+   else if (page_size == PGDIR_SIZE)
+   *out_level = (stage2_have_pmd) ? 2 : 1;
+   else
+   return -EINVAL;
+
+   return 0;
+}
+
+static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
+{
+   switch (level) {
+   case 0:
+   *out_pgsize = PAGE_SIZE;
+   break;
+   case 1:
+   *out_pgsize = (stage2_have_pmd) ? PMD_SIZE : PGDIR_SIZE;
+   break;
+   case 2:
+   *out_pgsize = PGDIR_SIZE;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
  int min, int max)
 {
@@ -67,61 +108,30 @@ static void *stage2_cache_alloc(struct kvm_mmu_page_cache 
*pcache)
return p;
 }
 
-static int stage2_pgdp_test_and_clear_young(pgd_t *pgd)
-{
-   return ptep_test_and_clear_young(NULL, 0, (pte_t *)pgd);
-}
-
-static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
-{
-   return ptep_test_and_clear_young(NULL, 0, (pte_t *)pmd);
-}
-
-static int stage2_ptep_test_and_clear_young(pte_t *pte)
-{
-   return ptep_test_and_clear_young(NULL, 0, pte);
-}
-
 static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
- pgd_t **pgdpp, pmd_t **pmdpp, pte_t **ptepp)
+ pte_t **ptepp, u32 *ptep_level)
 {
-   pgd_t *pgdp;
-   pmd_t *pmdp;
pte_t *ptep;
-
-   *pgdpp = NULL;
-   *pmdpp = NULL;
-   *ptepp = NULL;
-
-   pgdp = >arch.pgd[pgd_index(addr)];
-   if (!pgd_val(*pgdp))
-   return false;
-   if (pgd_val(*pgdp) & _PAGE_LEAF) {
-   *pgdpp = pgdp;
-   return true;
-   }
-
-   if (stage2_have_pmd) {
-   pmdp = (void *)pgd_page_vaddr(*pgdp);
-   pmdp = [pmd_index(addr)];
-   if (!pmd_present(*pmdp))
-   return false;
-   if (pmd_val(*pmdp) & _PAGE_LEAF) {
-   *pmdpp = pmdp;
+   u32 current_level = stage2_pgd_levels - 1;
+
+   *ptep_level = current_level;
+   ptep = (pte_t *)kvm->arch.pgd;
+   ptep = [stage2_pte_index(addr, current_level)];
+   while (ptep && pte_val(*ptep)) {
+   if (pte_val(*ptep) & _PAGE_LEAF) {
+   *ptep_level = current_level;
+   *ptepp = ptep;
return true;
}
 
-   ptep = (void *)pmd_page_vaddr(*pmdp);
-   } else {
-   ptep = (void *)pgd_page_vaddr(*pgdp);
-   }
-
-   ptep = [pte_index(addr)];
-   if (!pte_present(*ptep))
-   return false;
-   if (pte_val(*ptep) & _PAGE_LEAF) {
-   *ptepp = ptep;
-   return true;
+   if (current_level) {
+   current_level--;
+   *ptep_level = current_level;
+   ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+   ptep = [stage2_pte_index(addr, current_level)];
+ 

[PATCH v9 19/22] RISC-V: KVM: Remove per-CPU vsip_shadow variable

2019-10-16 Thread Anup Patel
Currently, we track last value wrote to VSIP CSR using per-CPU
vsip_shadow variable but this easily goes out-of-sync because
Guest can update VSIP.SSIP bit directly.

To simplify things, we remove per-CPU vsip_shadow variable and
unconditionally write vcpu->arch.guest_csr.vsip to VSIP CSR in
run-loop.

Signed-off-by: Anup Patel 
---
 arch/riscv/include/asm/kvm_host.h |  3 ---
 arch/riscv/kvm/main.c |  6 --
 arch/riscv/kvm/vcpu.c | 24 +---
 3 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index ec1ca4bc98f2..cd86acaed055 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -202,9 +202,6 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
-int kvm_riscv_setup_vsip(void);
-void kvm_riscv_cleanup_vsip(void);
-
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 55df85184241..002301a27d29 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -61,17 +61,11 @@ void kvm_arch_hardware_disable(void)
 
 int kvm_arch_init(void *opaque)
 {
-   int ret;
-
if (!riscv_isa_extension_available(NULL, h)) {
kvm_info("hypervisor extension not available\n");
return -ENODEV;
}
 
-   ret = kvm_riscv_setup_vsip();
-   if (ret)
-   return ret;
-
kvm_riscv_stage2_vmid_detect();
 
kvm_info("hypervisor extension available\n");
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index fd77cd39dd8c..f1a218d3a8cf 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -111,8 +111,6 @@ static void kvm_riscv_vcpu_host_fp_restore(struct 
kvm_cpu_context *cntx) {}
 riscv_isa_extension_mask(s) | \
 riscv_isa_extension_mask(u))
 
-static unsigned long __percpu *vsip_shadow;
-
 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
 {
struct kvm_vcpu_csr *csr = >arch.guest_csr;
@@ -765,7 +763,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
*vcpu,
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
struct kvm_vcpu_csr *csr = >arch.guest_csr;
-   unsigned long *vsip = raw_cpu_ptr(vsip_shadow);
 
csr_write(CSR_VSSTATUS, csr->vsstatus);
csr_write(CSR_VSIE, csr->vsie);
@@ -775,7 +772,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_VSCAUSE, csr->vscause);
csr_write(CSR_VSTVAL, csr->vstval);
csr_write(CSR_VSIP, csr->vsip);
-   *vsip = csr->vsip;
csr_write(CSR_VSATP, csr->vsatp);
 
kvm_riscv_stage2_update_hgatp(vcpu);
@@ -843,26 +839,8 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu 
*vcpu)
 static void kvm_riscv_update_vsip(struct kvm_vcpu *vcpu)
 {
struct kvm_vcpu_csr *csr = >arch.guest_csr;
-   unsigned long *vsip = raw_cpu_ptr(vsip_shadow);
-
-   if (*vsip != csr->vsip) {
-   csr_write(CSR_VSIP, csr->vsip);
-   *vsip = csr->vsip;
-   }
-}
-
-int kvm_riscv_setup_vsip(void)
-{
-   vsip_shadow = alloc_percpu(unsigned long);
-   if (!vsip_shadow)
-   return -ENOMEM;
 
-   return 0;
-}
-
-void kvm_riscv_cleanup_vsip(void)
-{
-   free_percpu(vsip_shadow);
+   csr_write(CSR_VSIP, csr->vsip);
 }
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
-- 
2.17.1



[PATCH v9 17/22] RISC-V: KVM: Forward unhandled SBI calls to userspace

2019-10-16 Thread Anup Patel
Instead of returning error to Guest for unhandled SBI calls, we should
forward such SBI calls to KVM user-space tool (QEMU/KVMTOOL).

This way KVM userspace tool can do something about unhandled SBI calls:
1. Print unhandled SBI call details and kill the Guest
2. Emulate unhandled SBI call and resume the Guest

To achieve this, we end-up having a RISC-V specific SBI exit reason
and riscv_sbi member under "struct kvm_run". The riscv_sbi member of
"struct kvm_run" added by this patch is compatible with both SBI v0.1
and SBI v0.2 specs.

Currently, we implement SBI v0.1 for Guest where CONSOLE_GETCHAR and
CONSOLE_PUTCHART SBI calls are unhandled in KVM RISC-V kernel module
so we forward these calls to userspace. In future when we implement
SBI v0.2 for Guest, we will forward SBI v0.2 experimental and vendor
extension calls to userspace.

Signed-off-by: Anup Patel 
---
 arch/riscv/include/asm/kvm_host.h |  8 +
 arch/riscv/kvm/vcpu.c |  9 +
 arch/riscv/kvm/vcpu_sbi.c | 56 +++
 include/uapi/linux/kvm.h  |  8 +
 4 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 58cb6789f502..ec1ca4bc98f2 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -74,6 +74,10 @@ struct kvm_mmio_decode {
int return_handled;
 };
 
+struct kvm_sbi_context {
+   int return_handled;
+};
+
 #define KVM_MMU_PAGE_CACHE_NR_OBJS 32
 
 struct kvm_mmu_page_cache {
@@ -176,6 +180,9 @@ struct kvm_vcpu_arch {
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
+   /* SBI context */
+   struct kvm_sbi_context sbi_context;
+
/* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_page_cache mmu_page_cache;
 
@@ -250,6 +257,7 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, 
unsigned long mask);
 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
 
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 092b209644de..fd77cd39dd8c 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -884,6 +884,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
}
}
 
+   /* Process SBI value returned from user-space */
+   if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
+   ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
+   if (ret) {
+   srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
+   return ret;
+   }
+   }
+
if (run->immediate_exit) {
srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
return -EINTR;
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 3d0c33c94daf..901ac73d23cf 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -31,6 +31,44 @@ static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
 }
 
+static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
+  struct kvm_run *run)
+{
+   struct kvm_cpu_context *cp = >arch.guest_context;
+
+   vcpu->arch.sbi_context.return_handled = 0;
+   run->exit_reason = KVM_EXIT_RISCV_SBI;
+   run->riscv_sbi.extension_id = cp->a7;
+   run->riscv_sbi.function_id = cp->a6;
+   run->riscv_sbi.args[0] = cp->a0;
+   run->riscv_sbi.args[1] = cp->a1;
+   run->riscv_sbi.args[2] = cp->a2;
+   run->riscv_sbi.args[3] = cp->a3;
+   run->riscv_sbi.args[4] = cp->a4;
+   run->riscv_sbi.args[5] = cp->a5;
+   run->riscv_sbi.ret[0] = cp->a0;
+   run->riscv_sbi.ret[1] = cp->a1;
+}
+
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+   struct kvm_cpu_context *cp = >arch.guest_context;
+
+   /* Handle SBI return only once */
+   if (vcpu->arch.sbi_context.return_handled)
+   return 0;
+   vcpu->arch.sbi_context.return_handled = 1;
+
+   /* Update return values */
+   cp->a0 = run->riscv_sbi.ret[0];
+   cp->a1 = run->riscv_sbi.ret[1];
+
+   /* Move to next instruction */
+   vcpu->arch.guest_context.sepc += 4;
+
+   return 0;
+}
+
 int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
int i, ret = 1;
@@ -44,6 +82,16 @@ int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
return -EINVAL;
 
switch (cp->a7) {
+   cas

[PATCH v9 14/22] RISC-V: KVM: FP lazy save/restore

2019-10-16 Thread Anup Patel
From: Atish Patra 

This patch adds floating point (F and D extension) context save/restore
for guest VCPUs. The FP context is saved and restored lazily only when
kernel enter/exits the in-kernel run loop and not during the KVM world
switch. This way FP save/restore has minimal impact on KVM performance.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   5 +
 arch/riscv/kernel/asm-offsets.c   |  72 +
 arch/riscv/kvm/vcpu.c |  81 ++
 arch/riscv/kvm/vcpu_switch.S  | 174 ++
 4 files changed, 332 insertions(+)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 0ef0aac8143e..8c7f947b31b6 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -117,6 +117,7 @@ struct kvm_cpu_context {
unsigned long sepc;
unsigned long sstatus;
unsigned long hstatus;
+   union __riscv_fp_state fp;
 };
 
 struct kvm_vcpu_csr {
@@ -236,6 +237,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
unsigned long scause, unsigned long stval);
 
 void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
+void __kvm_riscv_fp_f_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_f_restore(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context);
 
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 711656710190..9980069a1acf 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -185,6 +185,78 @@ void asm_offsets(void)
OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch);
OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec);
 
+   /* F extension */
+
+   OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]);
+   OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]);
+   OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]);
+   OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]);
+   OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]);
+   OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]);
+   OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]);
+   OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]);
+   OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]);
+   OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]);
+   OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]);
+   OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]);
+   OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]);
+   OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]);
+   OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]);
+   OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]);
+   OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]);
+   OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]);
+   OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]);
+   OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]);
+   OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]);
+   OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]);
+   OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]);
+   OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]);
+   OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]);
+   OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]);
+   OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]);
+   OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]);
+   OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]);
+   OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]);
+   OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]);
+   OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]);
+   OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr);
+
+   /* D extension */
+
+   OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]);
+   OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]);
+   OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]);
+   OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]);
+   OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]);
+   OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]);
+   OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]);
+   OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]);
+   OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]);
+   OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]);
+   OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]);
+   OFFSET(KVM_ARCH_FP_D_F11

[PATCH v9 13/22] RISC-V: KVM: Add timer functionality

2019-10-16 Thread Anup Patel
From: Atish Patra 

The RISC-V hypervisor specification doesn't have any virtual timer
feature.

Due to this, the guest VCPU timer will be programmed via SBI calls.
The host will use a separate hrtimer event for each guest VCPU to
provide timer functionality. We inject a virtual timer interrupt to
the guest VCPU whenever the guest VCPU hrtimer event expires.

The following features are not supported yet and will be added in
future:
1. A time offset to adjust guest time from host time
2. A saved next event in guest vcpu for vm migration

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h   |   4 +
 arch/riscv/include/asm/kvm_vcpu_timer.h |  30 +++
 arch/riscv/kvm/Makefile |   2 +-
 arch/riscv/kvm/vcpu.c   |   6 ++
 arch/riscv/kvm/vcpu_timer.c | 110 
 drivers/clocksource/timer-riscv.c   |   8 ++
 include/clocksource/timer-riscv.h   |  16 
 7 files changed, 175 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h
 create mode 100644 arch/riscv/kvm/vcpu_timer.c
 create mode 100644 include/clocksource/timer-riscv.h

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index b1493c6cbe83..0ef0aac8143e 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_64BIT
 #define KVM_MAX_VCPUS  (1U << 16)
@@ -168,6 +169,9 @@ struct kvm_vcpu_arch {
unsigned long irqs_pending;
unsigned long irqs_pending_mask;
 
+   /* VCPU Timer */
+   struct kvm_vcpu_timer timer;
+
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h 
b/arch/riscv/include/asm/kvm_vcpu_timer.h
new file mode 100644
index ..6f904d49e27e
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#ifndef __KVM_VCPU_RISCV_TIMER_H
+#define __KVM_VCPU_RISCV_TIMER_H
+
+#include 
+
+struct kvm_vcpu_timer {
+   bool init_done;
+   /* Check if the timer is programmed */
+   bool next_set;
+   u64 next_cycles;
+   struct hrtimer hrt;
+   /* Mult & Shift values to get nanosec from cycles */
+   u32 mult;
+   u32 shift;
+};
+
+int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles);
+
+#endif
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index c0f57f26c13d..3e0c7558320d 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 kvm-objs := $(common-objs-y)
 
 kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
-kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 36957802fed4..f9c5ef98becf 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
 
memcpy(cntx, reset_cntx, sizeof(*cntx));
 
+   kvm_riscv_vcpu_timer_reset(vcpu);
+
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
 }
@@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
cntx->hstatus |= HSTATUS_SP2P;
cntx->hstatus |= HSTATUS_SPV;
 
+   /* Setup VCPU timer */
+   kvm_riscv_vcpu_timer_init(vcpu);
+
/* Reset VCPU */
kvm_riscv_reset_vcpu(vcpu);
 
@@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+   kvm_riscv_vcpu_timer_deinit(vcpu);
kvm_riscv_stage2_flush_cache(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
new file mode 100644
index ..4ff5d666f60d
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t)
+{
+   unsigned long flags;
+   u64 cycles_now, cycles_delta, delta_ns;
+
+   local_irq_save(flags);
+   cycles_now = get_cycles64();
+   if (cycles_now < cycles)
+   cycles_de

[PATCH v9 16/22] RISC-V: KVM: Add SBI v0.1 support

2019-10-16 Thread Anup Patel
From: Atish Patra 

The KVM host kernel running in HS-mode needs to handle SBI calls coming
from guest kernel running in VS-mode.

This patch adds SBI v0.1 support in KVM RISC-V. All the SBI calls are
implemented correctly except remote tlb flushes. For remote TLB flushes,
we are doing full TLB flush and this will be optimized in future.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h |   2 +
 arch/riscv/kvm/Makefile   |   2 +-
 arch/riscv/kvm/vcpu_exit.c|   4 ++
 arch/riscv/kvm/vcpu_sbi.c | 107 ++
 4 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/kvm/vcpu_sbi.c

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 8c7f947b31b6..58cb6789f502 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -250,4 +250,6 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, 
unsigned long mask);
 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
 
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
 #endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 3e0c7558320d..b56dc1650d2c 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 kvm-objs := $(common-objs-y)
 
 kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
-kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o vcpu_sbi.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 7507b859246b..0e9b0ffa169d 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -587,6 +587,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
(vcpu->arch.guest_context.hstatus & HSTATUS_STL))
ret = stage2_page_fault(vcpu, run, scause, stval);
break;
+   case EXC_SUPERVISOR_SYSCALL:
+   if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+   ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
+   break;
default:
break;
};
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
new file mode 100644
index ..3d0c33c94daf
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define SBI_VERSION_MAJOR  0
+#define SBI_VERSION_MINOR  1
+
+static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
+   struct kvm_run *run, u32 type)
+{
+   int i;
+   struct kvm_vcpu *tmp;
+
+   kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+   tmp->arch.power_off = true;
+   kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+   memset(>system_event, 0, sizeof(run->system_event));
+   run->system_event.type = type;
+   run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+   int i, ret = 1;
+   u64 next_cycle;
+   struct kvm_vcpu *rvcpu;
+   bool next_sepc = true;
+   ulong hmask, ut_scause = 0;
+   struct kvm_cpu_context *cp = >arch.guest_context;
+
+   if (!cp)
+   return -EINVAL;
+
+   switch (cp->a7) {
+   case SBI_EXT_0_1_SET_TIMER:
+#if __riscv_xlen == 32
+   next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+   next_cycle = (u64)cp->a0;
+#endif
+   kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+   break;
+   case SBI_EXT_0_1_CLEAR_IPI:
+   kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_SOFT);
+   break;
+   case SBI_EXT_0_1_SEND_IPI:
+   hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+  _scause);
+   if (ut_scause) {
+   kvm_riscv_vcpu_trap_redirect(vcpu, ut_scause,
+cp->a0);
+   next_sepc = false;
+   } else {
+   for_each_set_bit(i, , BITS_PER_LONG) {
+   rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+   kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_S_SOFT);
+   }
+   }
+   break;
+   case SBI_EXT_0_1_SHUTDOWN:
+   kvm_sbi_system_shutdown

[PATCH v9 15/22] RISC-V: KVM: Implement ONE REG interface for FP registers

2019-10-16 Thread Anup Patel
From: Atish Patra 

Add a KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctl interface for floating
point registers such as F0-F31 and FCSR. This support is added for
both 'F' and 'D' extensions.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/uapi/asm/kvm.h |  10 +++
 arch/riscv/kvm/vcpu.c | 104 ++
 2 files changed, 114 insertions(+)

diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index 997b85f6fded..19811823ab70 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -96,6 +96,16 @@ struct kvm_riscv_csr {
 #define KVM_REG_RISCV_CSR_REG(name)\
(offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
 
+/* F extension registers are mapped as type4 */
+#define KVM_REG_RISCV_FP_F (0x04 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_F_REG(name)   \
+   (offsetof(struct __riscv_f_ext_state, name) / sizeof(u32))
+
+/* D extension registers are mapped as type 5 */
+#define KVM_REG_RISCV_FP_D (0x05 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_D_REG(name)   \
+   (offsetof(struct __riscv_d_ext_state, name) / sizeof(u64))
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 7ed556979b89..092b209644de 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -430,6 +430,98 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+static int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg,
+unsigned long rtype)
+{
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   unsigned long isa = vcpu->arch.isa;
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   rtype);
+   void *reg_val;
+
+   if ((rtype == KVM_REG_RISCV_FP_F) &&
+   riscv_isa_extension_available(, f)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+   reg_val = >fp.f.fcsr;
+   else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+   reg_val = >fp.f.f[reg_num];
+   else
+   return -EINVAL;
+   } else if ((rtype == KVM_REG_RISCV_FP_D) &&
+  riscv_isa_extension_available(, d)) {
+   if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   reg_val = >fp.d.fcsr;
+   } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
+  reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+   return -EINVAL;
+   reg_val = >fp.d.f[reg_num];
+   } else
+   return -EINVAL;
+   } else
+   return -EINVAL;
+
+   if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
+   return -EFAULT;
+
+   return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg,
+unsigned long rtype)
+{
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   unsigned long isa = vcpu->arch.isa;
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   rtype);
+   void *reg_val;
+
+   if ((rtype == KVM_REG_RISCV_FP_F) &&
+   riscv_isa_extension_available(, f)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+   reg_val = >fp.f.fcsr;
+   else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+   reg_val = >fp.f.f[reg_num];
+   else
+   return -E

[PATCH v9 09/22] RISC-V: KVM: Handle WFI exits for VCPU

2019-10-16 Thread Anup Patel
We get illegal instruction trap whenever Guest/VM executes WFI
instruction.

This patch handles WFI trap by blocking the trapped VCPU using
kvm_vcpu_block() API. The blocked VCPU will be automatically
resumed whenever a VCPU interrupt is injected from user-space
or from in-kernel IRQCHIP emulation.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/kvm/vcpu_exit.c | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index f1378c0a447f..7507b859246b 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+#define INSN_OPCODE_MASK   0x007c
+#define INSN_OPCODE_SHIFT  2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_WFI  0xff00
+#define INSN_MATCH_WFI 0x1050
+
 #define INSN_MATCH_LB  0x3
 #define INSN_MASK_LB   0x707f
 #define INSN_MATCH_LH  0x1003
@@ -116,6 +123,67 @@
 (s32)(((insn) >> 7) & 0x1f))
 #define MASK_FUNCT30x7000
 
+static int truly_illegal_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+   /* Redirect trap to Guest VCPU */
+   kvm_riscv_vcpu_trap_redirect(vcpu, EXC_INST_ILLEGAL, insn);
+
+   return 1;
+}
+
+static int system_opcode_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+   if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
+   vcpu->stat.wfi_exit_stat++;
+   if (!kvm_arch_vcpu_runnable(vcpu)) {
+   srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
+   kvm_vcpu_block(vcpu);
+   vcpu->arch.srcu_idx = srcu_read_lock(>kvm->srcu);
+   kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+   }
+   vcpu->arch.guest_context.sepc += INSN_LEN(insn);
+   return 1;
+   }
+
+   return truly_illegal_insn(vcpu, run, insn);
+}
+
+static int illegal_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long insn)
+{
+   unsigned long ut_scause = 0;
+   struct kvm_cpu_context *ct;
+
+   if (unlikely(INSN_IS_16BIT(insn))) {
+   if (insn == 0) {
+   ct = >arch.guest_context;
+   insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
+ ct->sepc,
+ _scause);
+   if (ut_scause) {
+   if (ut_scause == EXC_LOAD_PAGE_FAULT)
+   ut_scause = EXC_INST_PAGE_FAULT;
+   kvm_riscv_vcpu_trap_redirect(vcpu, ut_scause,
+ct->sepc);
+   return 1;
+   }
+   }
+   if (INSN_IS_16BIT(insn))
+   return truly_illegal_insn(vcpu, run, insn);
+   }
+
+   switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
+   case INSN_OPCODE_SYSTEM:
+   return system_opcode_insn(vcpu, run, insn);
+   default:
+   return truly_illegal_insn(vcpu, run, insn);
+   }
+}
+
 static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long fault_addr)
 {
@@ -508,6 +576,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
ret = -EFAULT;
run->exit_reason = KVM_EXIT_UNKNOWN;
switch (scause) {
+   case EXC_INST_ILLEGAL:
+   if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+   ret = illegal_inst_fault(vcpu, run, stval);
+   break;
case EXC_INST_PAGE_FAULT:
case EXC_LOAD_PAGE_FAULT:
case EXC_STORE_PAGE_FAULT:
-- 
2.17.1



[PATCH v9 11/22] RISC-V: KVM: Implement stage2 page table programming

2019-10-16 Thread Anup Patel
This patch implements all required functions for programming
the stage2 page table for each Guest/VM.

At high-level, the flow of stage2 related functions is similar
from KVM ARM/ARM64 implementation but the stage2 page table
format is quite different for KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h |  10 +
 arch/riscv/include/asm/pgtable-bits.h |   1 +
 arch/riscv/kvm/mmu.c  | 646 +-
 3 files changed, 647 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 9410468678ae..aba3d80d4bea 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -73,6 +73,13 @@ struct kvm_mmio_decode {
int return_handled;
 };
 
+#define KVM_MMU_PAGE_CACHE_NR_OBJS 32
+
+struct kvm_mmu_page_cache {
+   int nobjs;
+   void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS];
+};
+
 struct kvm_cpu_context {
unsigned long zero;
unsigned long ra;
@@ -164,6 +171,9 @@ struct kvm_vcpu_arch {
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
+   /* Cache pages needed to program page tables with spinlock held */
+   struct kvm_mmu_page_cache mmu_page_cache;
+
/* VCPU power-off state */
bool power_off;
 
diff --git a/arch/riscv/include/asm/pgtable-bits.h 
b/arch/riscv/include/asm/pgtable-bits.h
index bbaeb5d35842..be49d62fcc2b 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -26,6 +26,7 @@
 
 #define _PAGE_SPECIAL   _PAGE_SOFT
 #define _PAGE_TABLE _PAGE_PRESENT
+#define _PAGE_LEAF  (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 
 /*
  * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 2b965f9aac07..61a98d63d103 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -18,6 +18,438 @@
 #include 
 #include 
 
+#ifdef CONFIG_64BIT
+#define stage2_have_pmdtrue
+#define stage2_gpa_size((phys_addr_t)(1ULL << 39))
+#define stage2_cache_min_pages 2
+#else
+#define pmd_index(x)   0
+#define pfn_pmd(x, y)  ({ pmd_t __x = { 0 }; __x; })
+#define stage2_have_pmdfalse
+#define stage2_gpa_size((phys_addr_t)(1ULL << 32))
+#define stage2_cache_min_pages 1
+#endif
+
+static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
+ int min, int max)
+{
+   void *page;
+
+   BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
+   if (pcache->nobjs >= min)
+   return 0;
+   while (pcache->nobjs < max) {
+   page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+   if (!page)
+   return -ENOMEM;
+   pcache->objects[pcache->nobjs++] = page;
+   }
+
+   return 0;
+}
+
+static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
+{
+   while (pcache && pcache->nobjs)
+   free_page((unsigned long)pcache->objects[--pcache->nobjs]);
+}
+
+static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
+{
+   void *p;
+
+   if (!pcache)
+   return NULL;
+
+   BUG_ON(!pcache->nobjs);
+   p = pcache->objects[--pcache->nobjs];
+
+   return p;
+}
+
+struct local_guest_tlb_info {
+   struct kvm_vmid *vmid;
+   gpa_t addr;
+};
+
+static void local_guest_tlb_flush_vmid_gpa(void *info)
+{
+   struct local_guest_tlb_info *infop = info;
+
+   __kvm_riscv_hfence_gvma_vmid_gpa(READ_ONCE(infop->vmid->vmid_version),
+infop->addr);
+}
+
+static void stage2_remote_tlb_flush(struct kvm *kvm, gpa_t addr)
+{
+   struct local_guest_tlb_info info;
+   struct kvm_vmid *vmid = >arch.vmid;
+
+   /*
+* Ideally, we should have a SBI call OR some remote TLB instruction
+* but we don't have it so we explicitly flush TLBs using IPIs.
+*
+* TODO: Instead of cpu_online_mask, we should only target CPUs
+* where the Guest/VM is running.
+*/
+   info.vmid = vmid;
+   info.addr = addr;
+   preempt_disable();
+   smp_call_function_many(cpu_online_mask,
+  local_guest_tlb_flush_vmid_gpa, , true);
+   preempt_enable();
+}
+
+static int stage2_set_pgd(struct kvm *kvm, gpa_t addr, const pgd_t *new_pgd)
+{
+   pgd_t *pgdp = >arch.pgd[pgd_index(addr)];
+
+   *pgdp = *new_pgd;
+   if (pgd_val(*pgdp) & _PAGE_LEAF)
+   stage2_remote_tlb_flush(kvm, addr);
+
+   return 0;
+}
+
+static int stage2_set_pmd(struct kvm *kvm, struct kvm_mmu_page_cache *pcache,
+ gpa_t addr, const pmd_t *new_pmd)
+{
+   int rc;
+   pmd_

[PATCH v9 12/22] RISC-V: KVM: Implement MMU notifiers

2019-10-16 Thread Anup Patel
This patch implements MMU notifiers for KVM RISC-V so that Guest
physical address space is in-sync with Host physical address space.

This will allow swapping, page migration, etc to work transparently
with KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   7 ++
 arch/riscv/kvm/Kconfig|   1 +
 arch/riscv/kvm/mmu.c  | 200 +-
 arch/riscv/kvm/vm.c   |   1 +
 4 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index aba3d80d4bea..b1493c6cbe83 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -193,6 +193,13 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 int kvm_riscv_setup_vsip(void);
 void kvm_riscv_cleanup_vsip(void);
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva_range(struct kvm *kvm,
+   unsigned long start, unsigned long end);
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
+
 void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
  unsigned long gpa);
 void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 9cca98c4673b..d8fa13b0da18 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -20,6 +20,7 @@ if VIRTUALIZATION
 config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on OF
+   select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_MMIO
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 61a98d63d103..5aa5ea5ef8f6 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -67,6 +67,66 @@ static void *stage2_cache_alloc(struct kvm_mmu_page_cache 
*pcache)
return p;
 }
 
+static int stage2_pgdp_test_and_clear_young(pgd_t *pgd)
+{
+   return ptep_test_and_clear_young(NULL, 0, (pte_t *)pgd);
+}
+
+static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
+{
+   return ptep_test_and_clear_young(NULL, 0, (pte_t *)pmd);
+}
+
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+   return ptep_test_and_clear_young(NULL, 0, pte);
+}
+
+static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
+ pgd_t **pgdpp, pmd_t **pmdpp, pte_t **ptepp)
+{
+   pgd_t *pgdp;
+   pmd_t *pmdp;
+   pte_t *ptep;
+
+   *pgdpp = NULL;
+   *pmdpp = NULL;
+   *ptepp = NULL;
+
+   pgdp = >arch.pgd[pgd_index(addr)];
+   if (!pgd_val(*pgdp))
+   return false;
+   if (pgd_val(*pgdp) & _PAGE_LEAF) {
+   *pgdpp = pgdp;
+   return true;
+   }
+
+   if (stage2_have_pmd) {
+   pmdp = (void *)pgd_page_vaddr(*pgdp);
+   pmdp = [pmd_index(addr)];
+   if (!pmd_present(*pmdp))
+   return false;
+   if (pmd_val(*pmdp) & _PAGE_LEAF) {
+   *pmdpp = pmdp;
+   return true;
+   }
+
+   ptep = (void *)pmd_page_vaddr(*pmdp);
+   } else {
+   ptep = (void *)pgd_page_vaddr(*pgdp);
+   }
+
+   ptep = [pte_index(addr)];
+   if (!pte_present(*ptep))
+   return false;
+   if (pte_val(*ptep) & _PAGE_LEAF) {
+   *ptepp = ptep;
+   return true;
+   }
+
+   return false;
+}
+
 struct local_guest_tlb_info {
struct kvm_vmid *vmid;
gpa_t addr;
@@ -450,6 +510,38 @@ int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t 
hpa,
 
 }
 
+static int handle_hva_to_gpa(struct kvm *kvm,
+unsigned long start,
+unsigned long end,
+int (*handler)(struct kvm *kvm,
+   gpa_t gpa, u64 size,
+   void *data),
+void *data)
+{
+   struct kvm_memslots *slots;
+   struct kvm_memory_slot *memslot;
+   int ret = 0;
+
+   slots = kvm_memslots(kvm);
+
+   /* we only care about the pages that the guest sees */
+   kvm_for_each_memslot(memslot, slots) {
+   unsigned long hva_start, hva_end;
+   gfn_t gpa;
+
+   hva_start = max(start, memslot->userspace_addr);
+   hva_end = min(end, memslot->userspace_addr +
+   (memslot->npages << PAGE_SHIFT));
+   if (hva_start >= hva_end)
+   continue;
+
+   gpa = hva_to_gf

[PATCH v9 10/22] RISC-V: KVM: Implement VMID allocator

2019-10-16 Thread Anup Patel
We implement a simple VMID allocator for Guests/VMs which:
1. Detects number of VMID bits at boot-time
2. Uses atomic number to track VMID version and increments
   VMID version whenever we run-out of VMIDs
3. Flushes Guest TLBs on all host CPUs whenever we run-out
   of VMIDs
4. Force updates HW Stage2 VMID for each Guest VCPU whenever
   VMID changes using VCPU request KVM_REQ_UPDATE_HGATP

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  25 ++
 arch/riscv/kvm/Makefile   |   3 +-
 arch/riscv/kvm/main.c |   4 +
 arch/riscv/kvm/tlb.S  |  43 +++
 arch/riscv/kvm/vcpu.c |   9 +++
 arch/riscv/kvm/vm.c   |   6 ++
 arch/riscv/kvm/vmid.c | 123 ++
 7 files changed, 212 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/kvm/tlb.S
 create mode 100644 arch/riscv/kvm/vmid.c

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 7041d2a9304a..9410468678ae 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+#define KVM_REQ_UPDATE_HGATP   KVM_ARCH_REQ(2)
 
 struct kvm_vm_stat {
ulong remote_tlb_flush;
@@ -47,7 +48,19 @@ struct kvm_vcpu_stat {
 struct kvm_arch_memory_slot {
 };
 
+struct kvm_vmid {
+   /*
+* Writes to vmid_version and vmid happen with vmid_lock held
+* whereas reads happen without any lock held.
+*/
+   unsigned long vmid_version;
+   unsigned long vmid;
+};
+
 struct kvm_arch {
+   /* stage2 vmid */
+   struct kvm_vmid vmid;
+
/* stage2 page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
@@ -170,6 +183,12 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 int kvm_riscv_setup_vsip(void);
 void kvm_riscv_cleanup_vsip(void);
 
+void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
+ unsigned long gpa);
+void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
+void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa);
+void __kvm_riscv_hfence_gvma_all(void);
+
 int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
 bool is_write);
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
@@ -177,6 +196,12 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
 
+void kvm_riscv_stage2_vmid_detect(void);
+unsigned long kvm_riscv_stage2_vmid_bits(void);
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
+
 void __kvm_riscv_unpriv_trap(void);
 
 unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 845579273727..c0f57f26c13d 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -8,6 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 
 kvm-objs := $(common-objs-y)
 
-kvm-objs += main.o vm.o mmu.o vcpu.o vcpu_exit.o vcpu_switch.o
+kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index d088247843c5..55df85184241 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -72,8 +72,12 @@ int kvm_arch_init(void *opaque)
if (ret)
return ret;
 
+   kvm_riscv_stage2_vmid_detect();
+
kvm_info("hypervisor extension available\n");
 
+   kvm_info("host has %ld VMID bits\n", kvm_riscv_stage2_vmid_bits());
+
return 0;
 }
 
diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
new file mode 100644
index ..453fca8d7940
--- /dev/null
+++ b/arch/riscv/kvm/tlb.S
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel 
+ */
+
+#include 
+#include 
+
+   .text
+   .altmacro
+   .option norelax
+
+   /*
+* Instruction encoding of hfence.gvma is:
+* 0110001 rs2(5) rs1(5) 000 0 1110011
+*/
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
+   /* hfence.gvma a1, a0 */
+   .word 0x62a60073
+   ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid)
+   /* hfence.gvma zero, a0 */
+   .word 0x62a00073
+   ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid)
+
+ENTRY(__kvm_riscv_hfence_gvma_gpa)
+   /* hfence.gvma a0 */
+   .word 0x62050073
+   ret
+ENDPROC(__kvm_r

[PATCH v9 07/22] RISC-V: KVM: Implement VCPU world-switch

2019-10-16 Thread Anup Patel
This patch implements the VCPU world-switch for KVM RISC-V.

The KVM RISC-V world-switch (i.e. __kvm_riscv_switch_to()) mostly
switches general purpose registers, SSTATUS, STVEC, SSCRATCH and
HSTATUS CSRs. Other CSRs are switched via vcpu_load() and vcpu_put()
interface in kvm_arch_vcpu_load() and kvm_arch_vcpu_put() functions
respectively.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   9 +-
 arch/riscv/kernel/asm-offsets.c   |  76 
 arch/riscv/kvm/Makefile   |   2 +-
 arch/riscv/kvm/vcpu.c |  32 -
 arch/riscv/kvm/vcpu_switch.S  | 194 ++
 5 files changed, 309 insertions(+), 4 deletions(-)
 create mode 100644 arch/riscv/kvm/vcpu_switch.S

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index c0d7d4fc7d58..d0322721484b 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -110,6 +110,13 @@ struct kvm_vcpu_arch {
/* ISA feature bits (similar to MISA) */
unsigned long isa;
 
+   /* SSCRATCH and STVEC of Host */
+   unsigned long host_sscratch;
+   unsigned long host_stvec;
+
+   /* CPU context of Host */
+   struct kvm_cpu_context host_context;
+
/* CPU context of Guest VCPU */
struct kvm_cpu_context guest_context;
 
@@ -162,7 +169,7 @@ int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, 
struct kvm_run *run);
 int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long scause, unsigned long stval);
 
-static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
+void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
 
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 9f5628c38ac9..711656710190 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -7,7 +7,9 @@
 #define GENERATING_ASM_OFFSETS
 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 
@@ -109,6 +111,80 @@ void asm_offsets(void)
OFFSET(PT_SBADADDR, pt_regs, sbadaddr);
OFFSET(PT_SCAUSE, pt_regs, scause);
 
+   OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero);
+   OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra);
+   OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp);
+   OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp);
+   OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp);
+   OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0);
+   OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1);
+   OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2);
+   OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0);
+   OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1);
+   OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0);
+   OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1);
+   OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2);
+   OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3);
+   OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4);
+   OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5);
+   OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6);
+   OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7);
+   OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2);
+   OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3);
+   OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4);
+   OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5);
+   OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6);
+   OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7);
+   OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8);
+   OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9);
+   OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10);
+   OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11);
+   OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3);
+   OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4);
+   OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5);
+   OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6);
+   OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc);
+   OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus);
+   OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus);
+
+   OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero);
+   OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra);
+   OFFSET(KVM_ARCH_HOST_SP

[PATCH v9 08/22] RISC-V: KVM: Handle MMIO exits for VCPU

2019-10-16 Thread Anup Patel
We will get stage2 page faults whenever Guest/VM access SW emulated
MMIO device or unmapped Guest RAM.

This patch implements MMIO read/write emulation by extracting MMIO
details from the trapped load/store instruction and forwarding the
MMIO read/write to user-space. The actual MMIO emulation will happen
in user-space and KVM kernel module will only take care of register
updates before resuming the trapped VCPU.

The handling for stage2 page faults for unmapped Guest RAM will be
implemeted by a separate patch later.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  20 ++
 arch/riscv/kvm/mmu.c  |   7 +
 arch/riscv/kvm/vcpu_exit.c| 505 +-
 arch/riscv/kvm/vcpu_switch.S  |  14 +
 4 files changed, 543 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index d0322721484b..7041d2a9304a 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -53,6 +53,13 @@ struct kvm_arch {
phys_addr_t pgd_phys;
 };
 
+struct kvm_mmio_decode {
+   unsigned long insn;
+   int len;
+   int shift;
+   int return_handled;
+};
+
 struct kvm_cpu_context {
unsigned long zero;
unsigned long ra;
@@ -141,6 +148,9 @@ struct kvm_vcpu_arch {
unsigned long irqs_pending;
unsigned long irqs_pending_mask;
 
+   /* MMIO instruction details */
+   struct kvm_mmio_decode mmio_decode;
+
/* VCPU power-off state */
bool power_off;
 
@@ -160,11 +170,21 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 int kvm_riscv_setup_vsip(void);
 void kvm_riscv_cleanup_vsip(void);
 
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
+bool is_write);
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
 int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
 
+void __kvm_riscv_unpriv_trap(void);
+
+unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
+bool read_insn,
+unsigned long guest_addr,
+unsigned long *trap_scause);
+void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
+ unsigned long scause, unsigned long stval);
 int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long scause, unsigned long stval);
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 04dd089b86ff..2b965f9aac07 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -61,6 +61,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return 0;
 }
 
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
+bool is_write)
+{
+   /* TODO: */
+   return 0;
+}
+
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
 {
/* TODO: */
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index e4d7c8f0807a..f1378c0a447f 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -6,9 +6,430 @@
  * Anup Patel 
  */
 
+#include 
 #include 
 #include 
 #include 
+#include 
+
+#define INSN_MATCH_LB  0x3
+#define INSN_MASK_LB   0x707f
+#define INSN_MATCH_LH  0x1003
+#define INSN_MASK_LH   0x707f
+#define INSN_MATCH_LW  0x2003
+#define INSN_MASK_LW   0x707f
+#define INSN_MATCH_LD  0x3003
+#define INSN_MASK_LD   0x707f
+#define INSN_MATCH_LBU 0x4003
+#define INSN_MASK_LBU  0x707f
+#define INSN_MATCH_LHU 0x5003
+#define INSN_MASK_LHU  0x707f
+#define INSN_MATCH_LWU 0x6003
+#define INSN_MASK_LWU  0x707f
+#define INSN_MATCH_SB  0x23
+#define INSN_MASK_SB   0x707f
+#define INSN_MATCH_SH  0x1023
+#define INSN_MASK_SH   0x707f
+#define INSN_MATCH_SW  0x2023
+#define INSN_MASK_SW   0x707f
+#define INSN_MATCH_SD  0x3023
+#define INSN_MASK_SD   0x707f
+
+#define INSN_MATCH_C_LD0x6000
+#define INSN_MASK_C_LD 0xe003
+#define INSN_MATCH_C_SD0xe000
+#define INSN_MASK_C_SD 0xe003
+#define INSN_MATCH_C_LW0x4000
+#define INSN_MASK_C_LW 0xe003
+#define INSN_MATCH_C_SW0xc000
+#define INSN_MASK_C_SW 0xe003
+#define INSN_MATCH_C_LDSP  0x6002
+#define INSN_MASK_C_LDSP   0xe003
+#define INSN_MATCH_C_SDSP  0xe002
+#define INSN_MASK_C_SDSP   0xe003
+#define INSN_MATCH_C_LWSP  0x4002
+#define INSN_MASK_C_LWSP   0xe003

[PATCH v9 03/22] RISC-V: Add initial skeletal KVM support

2019-10-16 Thread Anup Patel
This patch adds initial skeletal KVM RISC-V support which has:
1. A simple implementation of arch specific VM functions
   except kvm_vm_ioctl_get_dirty_log() which will implemeted
   in-future as part of stage2 page loging.
2. Stubs of required arch specific VCPU functions except
   kvm_arch_vcpu_ioctl_run() which is semi-complete and
   extended by subsequent patches.
3. Stubs for required arch specific stage2 MMU functions.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/Kconfig|   2 +
 arch/riscv/Makefile   |   2 +
 arch/riscv/include/asm/kvm_host.h |  81 
 arch/riscv/include/uapi/asm/kvm.h |  47 +
 arch/riscv/kvm/Kconfig|  33 
 arch/riscv/kvm/Makefile   |  13 ++
 arch/riscv/kvm/main.c |  80 
 arch/riscv/kvm/mmu.c  |  83 
 arch/riscv/kvm/vcpu.c | 312 ++
 arch/riscv/kvm/vcpu_exit.c|  35 
 arch/riscv/kvm/vm.c   |  79 
 11 files changed, 767 insertions(+)
 create mode 100644 arch/riscv/include/asm/kvm_host.h
 create mode 100644 arch/riscv/include/uapi/asm/kvm.h
 create mode 100644 arch/riscv/kvm/Kconfig
 create mode 100644 arch/riscv/kvm/Makefile
 create mode 100644 arch/riscv/kvm/main.c
 create mode 100644 arch/riscv/kvm/mmu.c
 create mode 100644 arch/riscv/kvm/vcpu.c
 create mode 100644 arch/riscv/kvm/vcpu_exit.c
 create mode 100644 arch/riscv/kvm/vm.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 3815808f95fa..2744b50eaeea 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -327,3 +327,5 @@ menu "Power management options"
 source "kernel/power/Kconfig"
 
 endmenu
+
+source "arch/riscv/kvm/Kconfig"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index f5e914210245..a2067cdae2cd 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -77,6 +77,8 @@ head-y := arch/riscv/kernel/head.o
 
 core-y += arch/riscv/
 
+core-$(CONFIG_KVM) += arch/riscv/kvm/
+
 libs-y += arch/riscv/lib/
 
 PHONY += vdso_install
diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
new file mode 100644
index ..9459709656be
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel 
+ */
+
+#ifndef __RISCV_KVM_HOST_H__
+#define __RISCV_KVM_HOST_H__
+
+#include 
+#include 
+#include 
+
+#ifdef CONFIG_64BIT
+#define KVM_MAX_VCPUS  (1U << 16)
+#else
+#define KVM_MAX_VCPUS  (1U << 9)
+#endif
+
+#define KVM_USER_MEM_SLOTS 512
+#define KVM_HALT_POLL_NS_DEFAULT   50
+
+#define KVM_VCPU_MAX_FEATURES  0
+
+#define KVM_REQ_SLEEP \
+   KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+
+struct kvm_vm_stat {
+   ulong remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+   u64 halt_successful_poll;
+   u64 halt_attempted_poll;
+   u64 halt_poll_invalid;
+   u64 halt_wakeup;
+   u64 ecall_exit_stat;
+   u64 wfi_exit_stat;
+   u64 mmio_exit_user;
+   u64 mmio_exit_kernel;
+   u64 exits;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct kvm_arch {
+   /* stage2 page table */
+   pgd_t *pgd;
+   phys_addr_t pgd_phys;
+};
+
+struct kvm_vcpu_arch {
+   /* Don't run the VCPU (blocked) */
+   bool pause;
+
+   /* SRCU lock index for in-kernel run loop */
+   int srcu_idx;
+};
+
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+
+void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
+int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
+void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
+void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
+
+int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+   unsigned long scause, unsigned long stval);
+
+static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
+
+#endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
new file mode 100644
index ..d15875818b6e
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel 
+ */
+
+#ifndef __LINUX_KVM_RISCV

[PATCH v9 06/22] RISC-V: KVM: Implement KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls

2019-10-16 Thread Anup Patel
For KVM RISC-V, we use KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls to access
VCPU config and registers from user-space.

We have three types of VCPU registers:
1. CONFIG - these are VCPU config and capabilities
2. CORE   - these are VCPU general purpose registers
3. CSR- these are VCPU control and status registers

The CONFIG registers available to user-space are ISA and TIMEBASE. Out
of these, TIMEBASE is a read-only register which inform user-space about
VCPU timer base frequency. The ISA register is a read and write register
where user-space can only write the desired VCPU ISA capabilities before
running the VCPU.

The CORE registers available to user-space are PC, RA, SP, GP, TP, A0-A7,
T0-T6, S0-S11 and MODE. Most of these are RISC-V general registers except
PC and MODE. The PC register represents program counter whereas the MODE
register represent VCPU privilege mode (i.e. S/U-mode).

The CSRs available to user-space are SSTATUS, SIE, STVEC, SSCRATCH, SEPC,
SCAUSE, STVAL, SIP, and SATP. All of these are read/write registers.

In future, more VCPU register types will be added (such as FP) for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/uapi/asm/kvm.h |  53 ++-
 arch/riscv/kvm/vcpu.c | 239 +-
 2 files changed, 289 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index 6dbc056d58ba..997b85f6fded 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -41,10 +41,61 @@ struct kvm_guest_debug_arch {
 struct kvm_sync_regs {
 };
 
-/* dummy definition */
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
 struct kvm_sregs {
 };
 
+/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_config {
+   unsigned long isa;
+   unsigned long tbfreq;
+};
+
+/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_core {
+   struct user_regs_struct regs;
+   unsigned long mode;
+};
+
+/* Possible privilege modes for kvm_riscv_core */
+#define KVM_RISCV_MODE_S   1
+#define KVM_RISCV_MODE_U   0
+
+/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_csr {
+   unsigned long sstatus;
+   unsigned long sie;
+   unsigned long stvec;
+   unsigned long sscratch;
+   unsigned long sepc;
+   unsigned long scause;
+   unsigned long stval;
+   unsigned long sip;
+   unsigned long satp;
+};
+
+#define KVM_REG_SIZE(id)   \
+   (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_RISCV_TYPE_MASK0xFF00
+#define KVM_REG_RISCV_TYPE_SHIFT   24
+
+/* Config registers are mapped as type 1 */
+#define KVM_REG_RISCV_CONFIG   (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CONFIG_REG(name) \
+   (offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long))
+
+/* Core registers are mapped as type 2 */
+#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CORE_REG(name)   \
+   (offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long))
+
+/* Control and status registers are mapped as type 3 */
+#define KVM_REG_RISCV_CSR  (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CSR_REG(name)\
+   (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 9107469279a7..f76b165c9203 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -164,6 +164,219 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, 
struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
 }
 
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg)
+{
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   KVM_REG_RISCV_CONFIG);
+   unsigned long reg_val;
+
+   if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+   return -EINVAL;
+
+   switch (reg_num) {
+   case KVM_REG_RISCV_CONFIG_REG(isa):
+   reg_val = vcpu->arch.isa;
+   break;
+   case KVM_REG_RISCV_CONFIG_REG(tbfreq):
+   reg_val = riscv_timebase;
+   break;
+   default:
+   return -EINVAL;
+   };
+
+   if (copy_to_user(uaddr, _val, KVM_REG_SIZE(reg->id)))
+   return -EFAULT;
+
+   return 0;

[PATCH v9 04/22] RISC-V: KVM: Implement VCPU create, init and destroy functions

2019-10-16 Thread Anup Patel
This patch implements VCPU create, init and destroy functions
required by generic KVM module. We don't have much dynamic
resources in struct kvm_vcpu_arch so these functions are quite
simple for KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h | 68 +++
 arch/riscv/kvm/vcpu.c | 68 +--
 2 files changed, 132 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 9459709656be..dab32c9c3470 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -53,7 +53,75 @@ struct kvm_arch {
phys_addr_t pgd_phys;
 };
 
+struct kvm_cpu_context {
+   unsigned long zero;
+   unsigned long ra;
+   unsigned long sp;
+   unsigned long gp;
+   unsigned long tp;
+   unsigned long t0;
+   unsigned long t1;
+   unsigned long t2;
+   unsigned long s0;
+   unsigned long s1;
+   unsigned long a0;
+   unsigned long a1;
+   unsigned long a2;
+   unsigned long a3;
+   unsigned long a4;
+   unsigned long a5;
+   unsigned long a6;
+   unsigned long a7;
+   unsigned long s2;
+   unsigned long s3;
+   unsigned long s4;
+   unsigned long s5;
+   unsigned long s6;
+   unsigned long s7;
+   unsigned long s8;
+   unsigned long s9;
+   unsigned long s10;
+   unsigned long s11;
+   unsigned long t3;
+   unsigned long t4;
+   unsigned long t5;
+   unsigned long t6;
+   unsigned long sepc;
+   unsigned long sstatus;
+   unsigned long hstatus;
+};
+
+struct kvm_vcpu_csr {
+   unsigned long vsstatus;
+   unsigned long vsie;
+   unsigned long vstvec;
+   unsigned long vsscratch;
+   unsigned long vsepc;
+   unsigned long vscause;
+   unsigned long vstval;
+   unsigned long vsip;
+   unsigned long vsatp;
+};
+
 struct kvm_vcpu_arch {
+   /* VCPU ran atleast once */
+   bool ran_atleast_once;
+
+   /* ISA feature bits (similar to MISA) */
+   unsigned long isa;
+
+   /* CPU context of Guest VCPU */
+   struct kvm_cpu_context guest_context;
+
+   /* CPU CSR context of Guest VCPU */
+   struct kvm_vcpu_csr guest_csr;
+
+   /* CPU context upon Guest VCPU reset */
+   struct kvm_cpu_context guest_reset_context;
+
+   /* CPU CSR context upon Guest VCPU reset */
+   struct kvm_vcpu_csr guest_reset_csr;
+
/* Don't run the VCPU (blocked) */
bool pause;
 
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 48536cb0c8e7..8272b05d6ce4 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -31,10 +31,48 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
 };
 
+#define KVM_RISCV_ISA_ALLOWED  (riscv_isa_extension_mask(a) | \
+riscv_isa_extension_mask(c) | \
+riscv_isa_extension_mask(d) | \
+riscv_isa_extension_mask(f) | \
+riscv_isa_extension_mask(i) | \
+riscv_isa_extension_mask(m) | \
+riscv_isa_extension_mask(s) | \
+riscv_isa_extension_mask(u))
+
+static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_csr *csr = >arch.guest_csr;
+   struct kvm_vcpu_csr *reset_csr = >arch.guest_reset_csr;
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   struct kvm_cpu_context *reset_cntx = >arch.guest_reset_context;
+
+   memcpy(csr, reset_csr, sizeof(*csr));
+
+   memcpy(cntx, reset_cntx, sizeof(*cntx));
+}
+
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-   /* TODO: */
-   return NULL;
+   int err;
+   struct kvm_vcpu *vcpu;
+
+   vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+   if (!vcpu) {
+   err = -ENOMEM;
+   goto out;
+   }
+
+   err = kvm_vcpu_init(vcpu, kvm, id);
+   if (err)
+   goto free_vcpu;
+
+   return vcpu;
+
+free_vcpu:
+   kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+   return ERR_PTR(err);
 }
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -48,13 +86,32 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-   /* TODO: */
+   struct kvm_cpu_context *cntx;
+
+   /* Mark this VCPU never ran */
+   vcpu->arch.ran_atleast_once = false;
+
+   /* Setup ISA features available to VCPU */
+   vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
+
+   /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
+   cntx = >arch.guest_reset_context;
+   cntx->sstatus = SR

[PATCH v9 05/22] RISC-V: KVM: Implement VCPU interrupts and requests handling

2019-10-16 Thread Anup Patel
This patch implements VCPU interrupts and requests which are both
asynchronous events.

The VCPU interrupts can be set/unset using KVM_INTERRUPT ioctl from
user-space. In future, the in-kernel IRQCHIP emulation will use
kvm_riscv_vcpu_set_interrupt() and kvm_riscv_vcpu_unset_interrupt()
functions to set/unset VCPU interrupts.

Important VCPU requests implemented by this patch are:
KVM_REQ_SLEEP   - set whenever VCPU itself goes to sleep state
KVM_REQ_VCPU_RESET  - set whenever VCPU reset is requested

The WFI trap-n-emulate (added later) will use KVM_REQ_SLEEP request
and kvm_riscv_vcpu_has_interrupt() function.

The KVM_REQ_VCPU_RESET request will be used by SBI emulation (added
later) to power-up a VCPU in power-off state. The user-space can use
the GET_MPSTATE/SET_MPSTATE ioctls to get/set power state of a VCPU.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  26 
 arch/riscv/include/uapi/asm/kvm.h |   3 +
 arch/riscv/kvm/main.c |   8 ++
 arch/riscv/kvm/vcpu.c | 192 --
 4 files changed, 216 insertions(+), 13 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index dab32c9c3470..c0d7d4fc7d58 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -122,6 +122,21 @@ struct kvm_vcpu_arch {
/* CPU CSR context upon Guest VCPU reset */
struct kvm_vcpu_csr guest_reset_csr;
 
+   /*
+* VCPU interrupts
+*
+* We have a lockless approach for tracking pending VCPU interrupts
+* implemented using atomic bitops. The irqs_pending bitmap represent
+* pending interrupts whereas irqs_pending_mask represent bits changed
+* in irqs_pending. Our approach is modeled around multiple producer
+* and single consumer problem where the consumer is the VCPU itself.
+*/
+   unsigned long irqs_pending;
+   unsigned long irqs_pending_mask;
+
+   /* VCPU power-off state */
+   bool power_off;
+
/* Don't run the VCPU (blocked) */
bool pause;
 
@@ -135,6 +150,9 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+int kvm_riscv_setup_vsip(void);
+void kvm_riscv_cleanup_vsip(void);
+
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
 int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
@@ -146,4 +164,12 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
 
 static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
 
+int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
+bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask);
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+
 #endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index d15875818b6e..6dbc056d58ba 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -18,6 +18,9 @@
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
+#define KVM_INTERRUPT_SET  -1U
+#define KVM_INTERRUPT_UNSET-2U
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 };
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index e1ffe6d42f39..d088247843c5 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -48,6 +48,8 @@ int kvm_arch_hardware_enable(void)
hideleg |= SIE_SEIE;
csr_write(CSR_HIDELEG, hideleg);
 
+   csr_write(CSR_VSIP, 0);
+
return 0;
 }
 
@@ -59,11 +61,17 @@ void kvm_arch_hardware_disable(void)
 
 int kvm_arch_init(void *opaque)
 {
+   int ret;
+
if (!riscv_isa_extension_available(NULL, h)) {
kvm_info("hypervisor extension not available\n");
return -ENODEV;
}
 
+   ret = kvm_riscv_setup_vsip();
+   if (ret)
+   return ret;
+
kvm_info("hypervisor extension available\n");
 
return 0;
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 8272b05d6ce4..9107469279a7 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -40,6 +41,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 riscv_isa_extension_mask(s) | \
 riscv_isa_extension_mask(u))
 
+

[PATCH v9 02/22] RISC-V: Add hypervisor extension related CSR defines

2019-10-16 Thread Anup Patel
This patch extends asm/csr.h by adding RISC-V hypervisor extension
related defines.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/csr.h | 58 
 1 file changed, 58 insertions(+)

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index a18923fa23c8..059c5cb22aaf 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -27,6 +27,8 @@
 #define SR_XS_CLEAN_AC(0x0001, UL)
 #define SR_XS_DIRTY_AC(0x00018000, UL)
 
+#define SR_MXR _AC(0x0008, UL)
+
 #ifndef CONFIG_64BIT
 #define SR_SD  _AC(0x8000, UL) /* FS/XS dirty */
 #else
@@ -59,10 +61,13 @@
 
 #define EXC_INST_MISALIGNED0
 #define EXC_INST_ACCESS1
+#define EXC_INST_ILLEGAL   2
 #define EXC_BREAKPOINT 3
 #define EXC_LOAD_ACCESS5
 #define EXC_STORE_ACCESS   7
 #define EXC_SYSCALL8
+#define EXC_HYPERVISOR_SYSCALL 9
+#define EXC_SUPERVISOR_SYSCALL 10
 #define EXC_INST_PAGE_FAULT12
 #define EXC_LOAD_PAGE_FAULT13
 #define EXC_STORE_PAGE_FAULT   15
@@ -72,6 +77,43 @@
 #define SIE_STIE   (_AC(0x1, UL) << IRQ_S_TIMER)
 #define SIE_SEIE   (_AC(0x1, UL) << IRQ_S_EXT)
 
+/* HSTATUS flags */
+#define HSTATUS_VTSR   _AC(0x0040, UL)
+#define HSTATUS_VTVM   _AC(0x0010, UL)
+#define HSTATUS_SP2V   _AC(0x0200, UL)
+#define HSTATUS_SP2P   _AC(0x0100, UL)
+#define HSTATUS_SPV_AC(0x0080, UL)
+#define HSTATUS_STL_AC(0x0040, UL)
+#define HSTATUS_SPRV   _AC(0x0001, UL)
+
+/* HGATP flags */
+#define HGATP_MODE_OFF _AC(0, UL)
+#define HGATP_MODE_SV32X4  _AC(1, UL)
+#define HGATP_MODE_SV39X4  _AC(8, UL)
+#define HGATP_MODE_SV48X4  _AC(9, UL)
+
+#define HGATP32_MODE_SHIFT 31
+#define HGATP32_VMID_SHIFT 22
+#define HGATP32_VMID_MASK  _AC(0x1FC0, UL)
+#define HGATP32_PPN_AC(0x003F, UL)
+
+#define HGATP64_MODE_SHIFT 60
+#define HGATP64_VMID_SHIFT 44
+#define HGATP64_VMID_MASK  _AC(0x03FFF000, UL)
+#define HGATP64_PPN_AC(0x0FFF, UL)
+
+#ifdef CONFIG_64BIT
+#define HGATP_PPN  HGATP64_PPN
+#define HGATP_VMID_SHIFT   HGATP64_VMID_SHIFT
+#define HGATP_VMID_MASKHGATP64_VMID_MASK
+#define HGATP_MODE (HGATP_MODE_SV39X4 << HGATP64_MODE_SHIFT)
+#else
+#define HGATP_PPN  HGATP32_PPN
+#define HGATP_VMID_SHIFT   HGATP32_VMID_SHIFT
+#define HGATP_VMID_MASKHGATP32_VMID_MASK
+#define HGATP_MODE (HGATP_MODE_SV32X4 << HGATP32_MODE_SHIFT)
+#endif
+
 #define CSR_CYCLE  0xc00
 #define CSR_TIME   0xc01
 #define CSR_INSTRET0xc02
@@ -85,6 +127,22 @@
 #define CSR_STVAL  0x143
 #define CSR_SIP0x144
 #define CSR_SATP   0x180
+
+#define CSR_VSSTATUS   0x200
+#define CSR_VSIE   0x204
+#define CSR_VSTVEC 0x205
+#define CSR_VSSCRATCH  0x240
+#define CSR_VSEPC  0x241
+#define CSR_VSCAUSE0x242
+#define CSR_VSTVAL 0x243
+#define CSR_VSIP   0x244
+#define CSR_VSATP  0x280
+
+#define CSR_HSTATUS0x600
+#define CSR_HEDELEG0x602
+#define CSR_HIDELEG0x603
+#define CSR_HGATP  0x680
+
 #define CSR_CYCLEH 0xc80
 #define CSR_TIMEH  0xc81
 #define CSR_INSTRETH   0xc82
-- 
2.17.1



[PATCH v9 01/22] RISC-V: Add bitmap reprensenting ISA features common across CPUs

2019-10-16 Thread Anup Patel
This patch adds riscv_isa bitmap which represents Host ISA features
common across all Host CPUs. The riscv_isa is not same as elf_hwcap
because elf_hwcap will only have ISA features relevant for user-space
apps whereas riscv_isa will have ISA features relevant to both kernel
and user-space apps.

One of the use-case for riscv_isa bitmap is in KVM hypervisor where
we will use it to do following operations:

1. Check whether hypervisor extension is available
2. Find ISA features that need to be virtualized (e.g. floating
   point support, vector extension, etc.)

Signed-off-by: Anup Patel 
Signed-off-by: Atish Patra 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/hwcap.h | 22 +
 arch/riscv/kernel/cpufeature.c | 83 --
 2 files changed, 102 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 7ecb7c6a57b1..5989dd4426d1 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_HWCAP_H
 #define __ASM_HWCAP_H
 
+#include 
 #include 
 
 #ifndef __ASSEMBLY__
@@ -22,5 +23,26 @@ enum {
 };
 
 extern unsigned long elf_hwcap;
+
+#define RISCV_ISA_EXT_a('a' - 'a')
+#define RISCV_ISA_EXT_c('c' - 'a')
+#define RISCV_ISA_EXT_d('d' - 'a')
+#define RISCV_ISA_EXT_f('f' - 'a')
+#define RISCV_ISA_EXT_h('h' - 'a')
+#define RISCV_ISA_EXT_i('i' - 'a')
+#define RISCV_ISA_EXT_m('m' - 'a')
+#define RISCV_ISA_EXT_s('s' - 'a')
+#define RISCV_ISA_EXT_u('u' - 'a')
+
+#define RISCV_ISA_EXT_MAX  256
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+   __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
 #endif
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index eaad5aa07403..64068d36658d 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -6,21 +6,64 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
 
 unsigned long elf_hwcap __read_mostly;
+
+/* Host ISA bitmap */
+static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+
 #ifdef CONFIG_FPU
 bool has_fpu __read_mostly;
 #endif
 
+/**
+ * riscv_isa_extension_base() - Get base extension word
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * Return: base extension word as unsigned long value
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
+{
+   if (!isa_bitmap)
+   return riscv_isa[0];
+   return isa_bitmap[0];
+}
+EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
+
+/**
+ * __riscv_isa_extension_available() - Check whether given extension
+ * is available or not
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+{
+   const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
+
+   if (bit >= RISCV_ISA_EXT_MAX)
+   return false;
+
+   return test_bit(bit, bmap) ? true : false;
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
+
 void riscv_fill_hwcap(void)
 {
struct device_node *node;
const char *isa;
-   size_t i;
+   char print_str[BITS_PER_LONG+1];
+   size_t i, j, isa_len;
static unsigned long isa2hwcap[256] = {0};
 
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -32,8 +75,11 @@ void riscv_fill_hwcap(void)
 
elf_hwcap = 0;
 
+   bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
+
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
+   unsigned long this_isa = 0;
 
if (riscv_of_processor_hartid(node) < 0)
continue;
@@ -41,8 +87,24 @@ void riscv_fill_hwcap(void)
if (riscv_read_check_isa(node, ) < 0)
continue;
 
-   for (i = 0; i < strlen(isa); ++i)
+   i = 0;
+   isa_len = strlen(isa);
+#if IS_ENABLED(CONFIG_32BIT)
+   if (!strncmp(isa, "rv32", 4))
+   i += 4;
+#elif IS_ENABLED(CONFIG_64BIT)
+   if (!strncmp(isa, "rv64", 4))
+   i += 4;
+#endif
+   for (; i < isa_len; ++i) {
this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
+   /*
+* TODO: X, Y a

[PATCH v9 00/22] KVM RISC-V Support

2019-10-16 Thread Anup Patel
_update_pgtbl() to kvm_riscv_stage2_update_hgatp()
- Configure HIDELEG and HEDELEG in kvm_arch_hardware_enable()
- Updated ONE_REG interface for CSR access to user-space
- Removed irqs_pending_lock and use atomic bitops instead
- Added separate patch for FP ONE_REG interface
- Added separate patch for updating MAINTAINERS file

Anup Patel (18):
  RISC-V: Add bitmap reprensenting ISA features common across CPUs
  RISC-V: Add hypervisor extension related CSR defines
  RISC-V: Add initial skeletal KVM support
  RISC-V: KVM: Implement VCPU create, init and destroy functions
  RISC-V: KVM: Implement VCPU interrupts and requests handling
  RISC-V: KVM: Implement KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls
  RISC-V: KVM: Implement VCPU world-switch
  RISC-V: KVM: Handle MMIO exits for VCPU
  RISC-V: KVM: Handle WFI exits for VCPU
  RISC-V: KVM: Implement VMID allocator
  RISC-V: KVM: Implement stage2 page table programming
  RISC-V: KVM: Implement MMU notifiers
  RISC-V: KVM: Forward unhandled SBI calls to userspace
  RISC-V: KVM: Simplify stage2 page table programming
  RISC-V: KVM: Remove per-CPU vsip_shadow variable
  RISC-V: KVM: Fix race-condition in kvm_riscv_vcpu_sync_interrupts()
  RISC-V: KVM: Document RISC-V specific parts of KVM API.
  RISC-V: KVM: Add MAINTAINERS entry

Atish Patra (4):
  RISC-V: KVM: Add timer functionality
  RISC-V: KVM: FP lazy save/restore
  RISC-V: KVM: Implement ONE REG interface for FP registers
  RISC-V: KVM: Add SBI v0.1 support

 Documentation/virt/kvm/api.txt  | 158 +++-
 MAINTAINERS |  10 +
 arch/riscv/Kconfig  |   2 +
 arch/riscv/Makefile |   2 +
 arch/riscv/include/asm/csr.h|  58 ++
 arch/riscv/include/asm/hwcap.h  |  22 +
 arch/riscv/include/asm/kvm_host.h   | 260 +++
 arch/riscv/include/asm/kvm_vcpu_timer.h |  30 +
 arch/riscv/include/asm/pgtable-bits.h   |   1 +
 arch/riscv/include/uapi/asm/kvm.h   | 111 +++
 arch/riscv/kernel/asm-offsets.c | 148 
 arch/riscv/kernel/cpufeature.c  |  83 +-
 arch/riscv/kvm/Kconfig  |  34 +
 arch/riscv/kvm/Makefile |  14 +
 arch/riscv/kvm/main.c   |  86 ++
 arch/riscv/kvm/mmu.c| 773 ++
 arch/riscv/kvm/tlb.S|  43 +
 arch/riscv/kvm/vcpu.c   | 995 
 arch/riscv/kvm/vcpu_exit.c  | 610 +++
 arch/riscv/kvm/vcpu_sbi.c   | 151 
 arch/riscv/kvm/vcpu_switch.S| 382 +
 arch/riscv/kvm/vcpu_timer.c | 110 +++
 arch/riscv/kvm/vm.c |  86 ++
 arch/riscv/kvm/vmid.c   | 123 +++
 drivers/clocksource/timer-riscv.c   |   8 +
 include/clocksource/timer-riscv.h   |  16 +
 include/uapi/linux/kvm.h|   8 +
 27 files changed, 4314 insertions(+), 10 deletions(-)
 create mode 100644 arch/riscv/include/asm/kvm_host.h
 create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h
 create mode 100644 arch/riscv/include/uapi/asm/kvm.h
 create mode 100644 arch/riscv/kvm/Kconfig
 create mode 100644 arch/riscv/kvm/Makefile
 create mode 100644 arch/riscv/kvm/main.c
 create mode 100644 arch/riscv/kvm/mmu.c
 create mode 100644 arch/riscv/kvm/tlb.S
 create mode 100644 arch/riscv/kvm/vcpu.c
 create mode 100644 arch/riscv/kvm/vcpu_exit.c
 create mode 100644 arch/riscv/kvm/vcpu_sbi.c
 create mode 100644 arch/riscv/kvm/vcpu_switch.S
 create mode 100644 arch/riscv/kvm/vcpu_timer.c
 create mode 100644 arch/riscv/kvm/vm.c
 create mode 100644 arch/riscv/kvm/vmid.c
 create mode 100644 include/clocksource/timer-riscv.h

--
2.17.1


Re: [PATCH] RISC-V: fix virtual address overlapped in FIXADDR_START and VMEMMAP_START

2019-10-16 Thread Anup Patel
On Wed, Oct 16, 2019 at 1:04 PM  wrote:
>
> From: Greentime Hu 
>
> This patch fixes the virtual address layout in pgtable.h.
> The virtual address of FIXADDR_START and VMEMMAP_START should not be 
> overlapped.
> These addresses will be existed at the same time in Linux kernel that they 
> can't
> be overlapped.
>
> Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem")
> Signed-off-by: Greentime Hu 
> ---
>  arch/riscv/include/asm/pgtable.h | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/arch/riscv/include/asm/pgtable.h 
> b/arch/riscv/include/asm/pgtable.h
> index 4f4162d90586..b927fb4ecf1c 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -87,14 +87,6 @@ extern pgd_t swapper_pg_dir[];
>  #define VMALLOC_END  (PAGE_OFFSET - 1)
>  #define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
>
> -#define FIXADDR_TOP  VMALLOC_START
> -#ifdef CONFIG_64BIT
> -#define FIXADDR_SIZE PMD_SIZE
> -#else
> -#define FIXADDR_SIZE PGDIR_SIZE
> -#endif
> -#define FIXADDR_START(FIXADDR_TOP - FIXADDR_SIZE)
> -
>  /*
>   * Roughly size the vmemmap space to be large enough to fit enough
>   * struct pages to map half the virtual address space. Then
> @@ -108,6 +100,14 @@ extern pgd_t swapper_pg_dir[];
>
>  #define vmemmap((struct page *)VMEMMAP_START)
>
> +#define FIXADDR_TOP  (VMEMMAP_START)
> +#ifdef CONFIG_64BIT
> +#define FIXADDR_SIZE PMD_SIZE
> +#else
> +#define FIXADDR_SIZE PGDIR_SIZE
> +#endif
> +#define FIXADDR_START(FIXADDR_TOP - FIXADDR_SIZE)
> +
>  /*
>   * ZERO_PAGE is a global shared page that is always zero,
>   * used for zero-mapped memory areas, etc.
> --
> 2.17.1
>

Looks good to me.

Reviewed-by: Anup Patel 

Regards,
Anup


RE: [PATCH v2 2/2] RISC-V: defconfig: Enable Goldfish RTC driver

2019-10-14 Thread Anup Patel


> -Original Message-
> From: Palmer Dabbelt 
> Sent: Saturday, October 12, 2019 11:09 PM
> To: Anup Patel 
> Cc: Paul Walmsley ; a...@eecs.berkeley.edu;
> Greg KH ; r...@google.com; Atish Patra
> ; Alistair Francis ;
> Christoph Hellwig ; a...@brainfault.org; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org; Anup Patel
> 
> Subject: Re: [PATCH v2 2/2] RISC-V: defconfig: Enable Goldfish RTC driver
> 
> On Tue, 24 Sep 2019 23:38:08 PDT (-0700), Anup Patel wrote:
> > We have Goldfish RTC device available on QEMU RISC-V virt machine
> > hence enable required driver in RV32 and RV64 defconfigs.
> >
> > Signed-off-by: Anup Patel 
> > ---
> >  arch/riscv/configs/defconfig  | 3 +++
> >  arch/riscv/configs/rv32_defconfig | 3 +++
> >  2 files changed, 6 insertions(+)
> >
> > diff --git a/arch/riscv/configs/defconfig
> > b/arch/riscv/configs/defconfig index 3efff552a261..57b4f67b0c0b 100644
> > --- a/arch/riscv/configs/defconfig
> > +++ b/arch/riscv/configs/defconfig
> > @@ -73,7 +73,10 @@ CONFIG_USB_STORAGE=y  CONFIG_USB_UAS=y
> > CONFIG_MMC=y  CONFIG_MMC_SPI=y
> > +CONFIG_RTC_CLASS=y
> > +CONFIG_RTC_DRV_GOLDFISH=y
> >  CONFIG_VIRTIO_MMIO=y
> > +CONFIG_GOLDFISH=y
> >  CONFIG_EXT4_FS=y
> >  CONFIG_EXT4_FS_POSIX_ACL=y
> >  CONFIG_AUTOFS4_FS=y
> > diff --git a/arch/riscv/configs/rv32_defconfig
> > b/arch/riscv/configs/rv32_defconfig
> > index 7da93e494445..50716c1395aa 100644
> > --- a/arch/riscv/configs/rv32_defconfig
> > +++ b/arch/riscv/configs/rv32_defconfig
> > @@ -69,7 +69,10 @@ CONFIG_USB_OHCI_HCD=y
> > CONFIG_USB_OHCI_HCD_PLATFORM=y  CONFIG_USB_STORAGE=y
> CONFIG_USB_UAS=y
> > +CONFIG_RTC_CLASS=y
> > +CONFIG_RTC_DRV_GOLDFISH=y
> >  CONFIG_VIRTIO_MMIO=y
> > +CONFIG_GOLDFISH=y
> >  CONFIG_SIFIVE_PLIC=y
> >  CONFIG_EXT4_FS=y
> >  CONFIG_EXT4_FS_POSIX_ACL=y
> > --
> > 2.17.1
> 
> Reviewed-by: Palmer Dabbelt 
> 
> IIRC there was supposed to be a follow-up to your QEMU patch set to rebase
> it on top of a refactoring of their RTC code, but I don't see it in my inbox. 
>  LMK
> if I missed it, as QEMU's soft freeze is in a few weeks and I'd like to make
> sure I get everything in.

I was hoping for QEMU RTC refactoring to be merged soon but it has not
happened so far. I will wait couple of more days then send v3 of QEMU
patches.

> 
> Additionally: we should refactor our Kconfig to have some sort of
> CONFIG_SOC_VIRT that selects this stuff, like we have the
> CONFIG_SOC_SIFIVE.
> This will explicitly document why devices are in the defconfig, avoid
> duplicating a bunch of stuff between defconfigs, and provide an example of
> how we support multiple SOCs in a single image.

Yes, indeed we need CONFIG_SOC_VIRT but this will be a separate patch.

> 
> I don't see why either of these should block merging the patch, though.

Thanks,
Anup


Re: [PATCH v7 10/21] RISC-V: KVM: Handle MMIO exits for VCPU

2019-10-08 Thread Anup Patel
On Wed, Oct 9, 2019 at 4:14 AM Palmer Dabbelt  wrote:
>
> On Mon, 23 Sep 2019 04:12:17 PDT (-0700), pbonz...@redhat.com wrote:
> > On 04/09/19 18:15, Anup Patel wrote:
> >> +unsigned long guest_sstatus =
> >> +vcpu->arch.guest_context.sstatus | SR_MXR;
> >> +unsigned long guest_hstatus =
> >> +vcpu->arch.guest_context.hstatus | HSTATUS_SPRV;
> >> +unsigned long guest_vsstatus, old_stvec, tmp;
> >> +
> >> +guest_sstatus = csr_swap(CSR_SSTATUS, guest_sstatus);
> >> +old_stvec = csr_swap(CSR_STVEC, (ulong)&__kvm_riscv_unpriv_trap);
> >> +
> >> +if (read_insn) {
> >> +guest_vsstatus = csr_read_set(CSR_VSSTATUS, SR_MXR);
> >
> > Is this needed?  IIUC SSTATUS.MXR encompasses a wider set of permissions:
> >
> >   The HS-level MXR bit makes any executable page readable.  {\tt
> >   vsstatus}.MXR makes readable those pages marked executable at the VS
> >   translation level, but only if readable at the guest-physical
> >   translation level.
> >
> > So it should be enough to set SSTATUS.MXR=1 I think.  But you also
> > shouldn't set SSTATUS.MXR=1 in the !read_insn case.
> >
> > Also, you can drop the irq save/restore (which is already a save/restore
> > of SSTATUS) since you already write 0 to SSTATUS.SIE in your csr_swap.
> > Perhaps add a BUG_ON(guest_sstatus & SR_SIE) before the csr_swap?
> >
> >> +asm volatile ("\n"
> >> +"csrrw %[hstatus], " STR(CSR_HSTATUS) ", %[hstatus]\n"
> >> +"li %[tilen], 4\n"
> >> +"li %[tscause], 0\n"
> >> +"lhu %[val], (%[addr])\n"
> >> +"andi %[tmp], %[val], 3\n"
> >> +"addi %[tmp], %[tmp], -3\n"
> >> +"bne %[tmp], zero, 2f\n"
> >> +"lhu %[tmp], 2(%[addr])\n"
> >> +"sll %[tmp], %[tmp], 16\n"
> >> +"add %[val], %[val], %[tmp]\n"
> >> +"2: csrw " STR(CSR_HSTATUS) ", %[hstatus]"
> >> +: [hstatus] "+"(guest_hstatus), [val] "=" (val),
> >> +  [tmp] "=" (tmp), [tilen] "+" (tilen),
> >> +  [tscause] "+" (tscause)
> >> +: [addr] "r" (addr));
> >> +csr_write(CSR_VSSTATUS, guest_vsstatus);
> >
> >>
> >> +#ifndef CONFIG_RISCV_ISA_C
> >> +"li %[tilen], 4\n"
> >> +#else
> >> +"li %[tilen], 2\n"
> >> +#endif
> >
> > Can you use an assembler directive to force using a non-compressed
> > format for ld and lw?  This would get rid of tilen, which is costing 6
> > bytes (if I did the RVC math right) in order to save two. :)
> >
> > Paolo
> >
> >> +"li %[tscause], 0\n"
> >> +#ifdef CONFIG_64BIT
> >> +"ld %[val], (%[addr])\n"
> >> +#else
> >> +"lw %[val], (%[addr])\n"
> >> +#endif
> To:      a...@brainfault.org
> CC:  pbonz...@redhat.com
> CC:  Anup Patel 
> CC:  Paul Walmsley 
> CC:  rkrc...@redhat.com
> CC:  daniel.lezc...@linaro.org
> CC:  t...@linutronix.de
> CC:  g...@amazon.com
> CC:  Atish Patra 
> CC:  Alistair Francis 
> CC:  Damien Le Moal 
> CC:  Christoph Hellwig 
> CC:  k...@vger.kernel.org
> CC:  linux-ri...@lists.infradead.org
> CC:  linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v7 10/21] RISC-V: KVM: Handle MMIO exits for VCPU
> In-Reply-To: 
> 
>
> On Mon, 23 Sep 2019 06:09:43 PDT (-0700), a...@brainfault.org wrote:
> > On Mon, Sep 23, 2019 at 4:42 PM Paolo Bonzini  wrote:
> >>
> >> On 04/09/19 18:15, Anup Patel wrote:
> >> > + unsigned long guest_sstatus =
> >> > + vcpu->arch.guest_context.sstatus | SR_MXR;
> >> > + unsigned long guest_hstatus =
> >> > + vcpu->arch.guest_context.hstatus | HSTATUS_SPRV;
> >> > + unsigned long guest_vsstatus, old_stvec, tmp;
> >> > +
> >> > + guest_sstatus = csr_swap(CSR_SSTATUS, gu

Re: [PATCH v2 3/3] RISC-V: Move SBI related macros under uapi.

2019-10-02 Thread Anup Patel
On Sat, Sep 28, 2019 at 3:51 AM Christoph Hellwig  wrote:
>
> On Thu, Sep 26, 2019 at 05:09:15PM -0700, Atish Patra wrote:
> > All SBI related macros can be reused by KVM RISC-V and userspace tools
> > such as kvmtool, qemu-kvm. SBI calls can also be emulated by userspace
> > if required. Any future vendor extensions can leverage this to emulate
> > the specific extension in userspace instead of kernel.
>
> Just because userspace can use them that doesn't mean they are a
> userspace API.  Please don't do this as this limits how we can ever
> remove previously existing symbols.  Just copy over the current
> version of the file into the other project of your choice instead
> of creating and API we need to maintain.

These defines are indeed part of KVM userspace API because we will
be forwarding SBI calls not handled by KVM RISC-V kernel module to
KVM userspace (QEMU/KVMTOOL). The forwarded SBI call details
are passed to userspace via "struct kvm_run" of KVM_RUN ioctl.

Please refer PATCH17 and PATCH18 of KVM RISC-V v8 series.

Currently, we implement SBI v0.1 for KVM Guest hence we end-up
forwarding CONSOLE_GETCHAR and CONSOLE_PUTCHART to
KVM userspace.

In future we will implement SBI v0.2 for KVM Guest where we will be
forwarding the SBI v0.2 experimental and vendor extension calls
to KVM userspace.

Eventually, we will stop emulating SBI v0.1 for Guest once we have
all required calls in SBI v0.2. At that time, all SBI v0.1 calls will be
always forwarded to KVM userspace.

Regards,
Anup


Re: [PATCH v2 2/3] RISC-V: Add basic support for SBI v0.2

2019-10-02 Thread Anup Patel
On Fri, Sep 27, 2019 at 5:39 AM Atish Patra  wrote:
>
> The SBI v0.2 introduces a base extension which is backward compatible
> with v0.1. Implement all helper functions and minimum required SBI
> calls from v0.2 for now. All other base extension function will be
> added later as per need.
> As v0.2 calling convention is backward compatible with v0.1, remove
> the v0.1 helper functions and just use v0.2 calling convention.
>
> Signed-off-by: Atish Patra 
> ---
>  arch/riscv/include/asm/sbi.h | 139 ++--
>  arch/riscv/kernel/Makefile   |   1 +
>  arch/riscv/kernel/sbi.c  | 241 +++
>  arch/riscv/kernel/setup.c|   2 +
>  4 files changed, 311 insertions(+), 72 deletions(-)
>  create mode 100644 arch/riscv/kernel/sbi.c
>
> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> index 2147f384fad0..279b7f10b3c2 100644
> --- a/arch/riscv/include/asm/sbi.h
> +++ b/arch/riscv/include/asm/sbi.h
> @@ -8,93 +8,88 @@
>
>  #include 
>
> -#define SBI_EXT_0_1_SET_TIMER 0x0
> -#define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1
> -#define SBI_EXT_0_1_CONSOLE_GETCHAR 0x2
> -#define SBI_EXT_0_1_CLEAR_IPI 0x3
> -#define SBI_EXT_0_1_SEND_IPI 0x4
> -#define SBI_EXT_0_1_REMOTE_FENCE_I 0x5
> -#define SBI_EXT_0_1_REMOTE_SFENCE_VMA 0x6
> -#define SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID 0x7
> -#define SBI_EXT_0_1_SHUTDOWN 0x8
> +enum sbi_ext_id {
> +   SBI_EXT_0_1_SET_TIMER = 0x0,
> +   SBI_EXT_0_1_CONSOLE_PUTCHAR = 0x1,
> +   SBI_EXT_0_1_CONSOLE_GETCHAR = 0x2,
> +   SBI_EXT_0_1_CLEAR_IPI = 0x3,
> +   SBI_EXT_0_1_SEND_IPI = 0x4,
> +   SBI_EXT_0_1_REMOTE_FENCE_I = 0x5,
> +   SBI_EXT_0_1_REMOTE_SFENCE_VMA = 0x6,
> +   SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID = 0x7,
> +   SBI_EXT_0_1_SHUTDOWN = 0x8,
> +   SBI_EXT_BASE = 0x10,
> +};
>
> -#define SBI_CALL(which, arg0, arg1, arg2, arg3) ({ \
> -   register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);   \
> -   register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);   \
> -   register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);   \
> -   register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);   \
> -   register uintptr_t a7 asm ("a7") = (uintptr_t)(which);  \
> -   asm volatile ("ecall"   \
> - : "+r" (a0)   \
> - : "r" (a1), "r" (a2), "r" (a3), "r" (a7)  \
> - : "memory");  \
> -   a0; \
> -})
> +enum sbi_ext_base_fid {
> +   SBI_BASE_GET_SPEC_VERSION = 0,
> +   SBI_BASE_GET_IMP_ID,
> +   SBI_BASE_GET_IMP_VERSION,
> +   SBI_BASE_PROBE_EXT,
> +   SBI_BASE_GET_MVENDORID,
> +   SBI_BASE_GET_MARCHID,
> +   SBI_BASE_GET_MIMPID,
> +};
>
> -/* Lazy implementations until SBI is finalized */
> -#define SBI_CALL_0(which) SBI_CALL(which, 0, 0, 0, 0)
> -#define SBI_CALL_1(which, arg0) SBI_CALL(which, arg0, 0, 0, 0)
> -#define SBI_CALL_2(which, arg0, arg1) SBI_CALL(which, arg0, arg1, 0, 0)
> -#define SBI_CALL_3(which, arg0, arg1, arg2) \
> -   SBI_CALL(which, arg0, arg1, arg2, 0)
> -#define SBI_CALL_4(which, arg0, arg1, arg2, arg3) \
> -   SBI_CALL(which, arg0, arg1, arg2, arg3)
> +#define SBI_SPEC_VERSION_DEFAULT   0x1
> +#define SBI_SPEC_VERSION_MAJOR_OFFSET  24
> +#define SBI_SPEC_VERSION_MAJOR_MASK0x7f
> +#define SBI_SPEC_VERSION_MINOR_MASK0xff
>
> -static inline void sbi_console_putchar(int ch)
> -{
> -   SBI_CALL_1(SBI_EXT_0_1_CONSOLE_PUTCHAR, ch);
> -}
> +/* SBI return error codes */
> +#define SBI_SUCCESS0
> +#define SBI_ERR_FAILURE-1
> +#define SBI_ERR_NOT_SUPPORTED  -2
> +#define SBI_ERR_INVALID_PARAM   -3
> +#define SBI_ERR_DENIED -4
> +#define SBI_ERR_INVALID_ADDRESS -5
>
> -static inline int sbi_console_getchar(void)
> -{
> -   return SBI_CALL_0(SBI_EXT_0_1_CONSOLE_GETCHAR);
> -}
> -
> -static inline void sbi_set_timer(uint64_t stime_value)
> -{
> -#if __riscv_xlen == 32
> -   SBI_CALL_2(SBI_EXT_0_1_SET_TIMER, stime_value,
> - stime_value >> 32);
> -#else
> -   SBI_CALL_1(SBI_EXT_0_1_SET_TIMER, stime_value);
> -#endif
> -}
> +extern unsigned long sbi_spec_version;
> +struct sbiret {
> +   long error;
> +   long value;
> +};
>
> -static inline void sbi_shutdown(void)
> -{
> -   SBI_CALL_0(SBI_EXT_0_1_SHUTDOWN);
> -}
> +void sbi_init(void);
> +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
> + unsigned long arg1, unsigned long arg2,
> + unsigned long arg3);
> +int sbi_err_map_linux_errorno(int err);
>
> -static inline void sbi_clear_ipi(void)
> -{
> -   SBI_CALL_0(SBI_EXT_0_1_CLEAR_IPI);
> -}
> +void sbi_console_putchar(int ch);
> +int sbi_console_getchar(void);
> +void sbi_set_timer(uint64_t stime_value);
> +void sbi_shutdown(void);
> +void 

[PATCH v8 19/19] RISC-V: KVM: Add MAINTAINERS entry

2019-10-02 Thread Anup Patel
Add myself as maintainer for KVM RISC-V and Atish as designated reviewer.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 MAINTAINERS | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 296de2b51c83..67f6cb317866 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8980,6 +8980,16 @@ F:   arch/powerpc/include/asm/kvm*
 F: arch/powerpc/kvm/
 F: arch/powerpc/kernel/kvm*
 
+KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
+M: Anup Patel 
+R: Atish Patra 
+L: k...@vger.kernel.org
+T: git git://github.com/kvm-riscv/linux.git
+S: Maintained
+F: arch/riscv/include/uapi/asm/kvm*
+F: arch/riscv/include/asm/kvm*
+F: arch/riscv/kvm/
+
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M: Christian Borntraeger 
 M: Janosch Frank 
-- 
2.17.1



[PATCH v8 17/19] RISC-V: KVM: Forward unhandled SBI calls to userspace

2019-10-02 Thread Anup Patel
Instead of returning error to Guest for unhandled SBI calls, we should
forward such SBI calls to KVM user-space tool (QEMU/KVMTOOL).

This way KVM userspace tool can do something about unhandled SBI calls:
1. Print unhandled SBI call details and kill the Guest
2. Emulate unhandled SBI call and resume the Guest

To achieve this, we end-up having a RISC-V specific SBI exit reason
and riscv_sbi member under "struct kvm_run". The riscv_sbi member of
"struct kvm_run" added by this patch is compatible with both SBI v0.1
and SBI v0.2 specs.

Currently, we implement SBI v0.1 for Guest where CONSOLE_GETCHAR and
CONSOLE_PUTCHART SBI calls are unhandled in KVM RISC-V kernel module
so we forward these calls to userspace. In future when we implement
SBI v0.2 for Guest, we will forward SBI v0.2 experimental and vendor
extension calls to userspace.

Signed-off-by: Anup Patel 
---
 arch/riscv/include/asm/kvm_host.h |  8 
 arch/riscv/kvm/vcpu.c |  9 
 arch/riscv/kvm/vcpu_sbi.c | 69 +--
 include/uapi/linux/kvm.h  |  8 
 4 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 74ccd8d00ec5..6f44eefc1641 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -74,6 +74,10 @@ struct kvm_mmio_decode {
int return_handled;
 };
 
+struct kvm_sbi_context {
+   int return_handled;
+};
+
 #define KVM_MMU_PAGE_CACHE_NR_OBJS 32
 
 struct kvm_mmu_page_cache {
@@ -176,6 +180,9 @@ struct kvm_vcpu_arch {
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
+   /* SBI context */
+   struct kvm_sbi_context sbi_context;
+
/* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_page_cache mmu_page_cache;
 
@@ -250,6 +257,7 @@ bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
 
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 8f2b058a4714..27174e2ec8a0 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -885,6 +885,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
}
}
 
+   /* Process SBI value returned from user-space */
+   if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
+   ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
+   if (ret) {
+   srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
+   return ret;
+   }
+   }
+
if (run->immediate_exit) {
srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
return -EINTR;
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 88fa0faa3545..983ccaf2a54e 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -31,6 +31,44 @@ static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
 }
 
+static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
+  struct kvm_run *run)
+{
+   struct kvm_cpu_context *cp = >arch.guest_context;
+
+   vcpu->arch.sbi_context.return_handled = 0;
+   run->exit_reason = KVM_EXIT_RISCV_SBI;
+   run->riscv_sbi.extension_id = cp->a7;
+   run->riscv_sbi.function_id = cp->a6;
+   run->riscv_sbi.args[0] = cp->a0;
+   run->riscv_sbi.args[1] = cp->a1;
+   run->riscv_sbi.args[2] = cp->a2;
+   run->riscv_sbi.args[3] = cp->a3;
+   run->riscv_sbi.args[4] = cp->a4;
+   run->riscv_sbi.args[5] = cp->a5;
+   run->riscv_sbi.ret[0] = cp->a0;
+   run->riscv_sbi.ret[1] = cp->a1;
+}
+
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+   struct kvm_cpu_context *cp = >arch.guest_context;
+
+   /* Handle SBI return only once */
+   if (vcpu->arch.sbi_context.return_handled)
+   return 0;
+   vcpu->arch.sbi_context.return_handled = 1;
+
+   /* Update return values */
+   cp->a0 = run->riscv_sbi.ret[0];
+   cp->a1 = run->riscv_sbi.ret[1];
+
+   /* Move to next instruction */
+   vcpu->arch.guest_context.sepc += 4;
+
+   return 0;
+}
+
 int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
int i, ret = 1;
@@ -44,7 +82,16 @@ int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
return -EINVAL;
 
switch (cp->a7) {
-   case SBI_SET_TI

[PATCH v8 18/19] RISC-V: KVM: Document RISC-V specific parts of KVM API.

2019-10-02 Thread Anup Patel
Document RISC-V specific parts of the KVM API, such as:
 - The interrupt numbers passed to the KVM_INTERRUPT ioctl.
 - The states supported by the KVM_{GET,SET}_MP_STATE ioctls.
 - The registers supported by the KVM_{GET,SET}_ONE_REG interface
   and the encoding of those register ids.
 - The exit reason KVM_EXIT_RISCV_SBI for SBI calls forwarded to
   userspace tool.

Signed-off-by: Anup Patel 
---
 Documentation/virt/kvm/api.txt | 158 +++--
 1 file changed, 151 insertions(+), 7 deletions(-)

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 4833904d32a5..f9ea81fe1143 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -471,7 +471,7 @@ struct kvm_translation {
 4.16 KVM_INTERRUPT
 
 Capability: basic
-Architectures: x86, ppc, mips
+Architectures: x86, ppc, mips, riscv
 Type: vcpu ioctl
 Parameters: struct kvm_interrupt (in)
 Returns: 0 on success, negative on failure.
@@ -531,6 +531,22 @@ interrupt number dequeues the interrupt.
 
 This is an asynchronous vcpu ioctl and can be invoked from any thread.
 
+RISC-V:
+
+Queues an external interrupt to be injected into the virutal CPU. This ioctl
+is overloaded with 2 different irq values:
+
+a) KVM_INTERRUPT_SET
+
+  This sets external interrupt for a virtual CPU and it will receive
+  once it is ready.
+
+b) KVM_INTERRUPT_UNSET
+
+  This clears pending external interrupt for a virtual CPU.
+
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
 
 4.17 KVM_DEBUG_GUEST
 
@@ -1219,7 +1235,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
+Architectures: x86, s390, arm, arm64, riscv
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1233,7 +1249,8 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:the vcpu is currently running [x86,arm/arm64]
+ - KVM_MP_STATE_RUNNABLE:the vcpu is currently running
+ [x86,arm/arm64,riscv]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
  which has not yet received an INIT signal 
[x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
@@ -1242,7 +1259,7 @@ Possible values are:
  is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
  accessible via KVM_GET_VCPU_EVENTS) [x86]
- - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64]
+ - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64,riscv]
  - KVM_MP_STATE_CHECK_STOP:  the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:   the vcpu is operating (running or halted)
  [s390]
@@ -1253,7 +1270,7 @@ On x86, this ioctl is only useful after 
KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
-For arm/arm64:
+For arm/arm64/riscv:
 
 The only states that are valid are KVM_MP_STATE_STOPPED and
 KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
@@ -1261,7 +1278,7 @@ KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused 
or not.
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
+Architectures: x86, s390, arm, arm64, riscv
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1273,7 +1290,7 @@ On x86, this ioctl is only useful after 
KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
-For arm/arm64:
+For arm/arm64/riscv:
 
 The only states that are valid are KVM_MP_STATE_STOPPED and
 KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
@@ -2282,6 +2299,116 @@ following id bit patterns:
   0x7020  0003 02 <0:3> 
 
 
+RISC-V registers are mapped using the lower 32 bits. The upper 8 bits of
+that is the register group type.
+
+RISC-V config registers are meant for configuring a Guest VCPU and it has
+the following id bit patterns:
+  0x8020  01  (32bit Host)
+  0x8030  01  (64bit Host)
+
+Following are the RISC-V config registers:
+
+EncodingRegister  Description
+--
+  0x80x0  0100  isa   ISA feature bitmap of Guest VCPU
+  0x80x0  0100 0001 tbfreqTime base frequency
+
+The isa config register can be read anytime but can only be written before
+a Guest VCPU runs. It will have ISA feature bits matching underlying host
+set by default. The tbfreq config register is a read-only register and it
+will return host timebase frequenc.
+
+RISC-V core registers represent the general excution state of a

[PATCH v8 13/19] RISC-V: KVM: Add timer functionality

2019-10-02 Thread Anup Patel
From: Atish Patra 

The RISC-V hypervisor specification doesn't have any virtual timer
feature.

Due to this, the guest VCPU timer will be programmed via SBI calls.
The host will use a separate hrtimer event for each guest VCPU to
provide timer functionality. We inject a virtual timer interrupt to
the guest VCPU whenever the guest VCPU hrtimer event expires.

The following features are not supported yet and will be added in
future:
1. A time offset to adjust guest time from host time
2. A saved next event in guest vcpu for vm migration

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h   |   4 +
 arch/riscv/include/asm/kvm_vcpu_timer.h |  30 +++
 arch/riscv/kvm/Makefile |   2 +-
 arch/riscv/kvm/vcpu.c   |   6 ++
 arch/riscv/kvm/vcpu_timer.c | 113 
 drivers/clocksource/timer-riscv.c   |   8 ++
 include/clocksource/timer-riscv.h   |  16 
 7 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h
 create mode 100644 arch/riscv/kvm/vcpu_timer.c
 create mode 100644 include/clocksource/timer-riscv.h

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 79ceb2aa8ae6..9179ff019235 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_64BIT
 #define KVM_MAX_VCPUS  (1U << 16)
@@ -168,6 +169,9 @@ struct kvm_vcpu_arch {
unsigned long irqs_pending;
unsigned long irqs_pending_mask;
 
+   /* VCPU Timer */
+   struct kvm_vcpu_timer timer;
+
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h 
b/arch/riscv/include/asm/kvm_vcpu_timer.h
new file mode 100644
index ..6f904d49e27e
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#ifndef __KVM_VCPU_RISCV_TIMER_H
+#define __KVM_VCPU_RISCV_TIMER_H
+
+#include 
+
+struct kvm_vcpu_timer {
+   bool init_done;
+   /* Check if the timer is programmed */
+   bool next_set;
+   u64 next_cycles;
+   struct hrtimer hrt;
+   /* Mult & Shift values to get nanosec from cycles */
+   u32 mult;
+   u32 shift;
+};
+
+int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles);
+
+#endif
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index c0f57f26c13d..3e0c7558320d 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 kvm-objs := $(common-objs-y)
 
 kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
-kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 12bd837f564a..2ca913f00570 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
 
memcpy(cntx, reset_cntx, sizeof(*cntx));
 
+   kvm_riscv_vcpu_timer_reset(vcpu);
+
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
 }
@@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
cntx->hstatus |= HSTATUS_SP2P;
cntx->hstatus |= HSTATUS_SPV;
 
+   /* Setup VCPU timer */
+   kvm_riscv_vcpu_timer_init(vcpu);
+
/* Reset VCPU */
kvm_riscv_reset_vcpu(vcpu);
 
@@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+   kvm_riscv_vcpu_timer_deinit(vcpu);
kvm_riscv_stage2_flush_cache(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
new file mode 100644
index ..9ffdd6ff8d6e
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define VCPU_TIMER_PROGRAM_THRESHOLD_NS1000
+
+static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h)
+{
+   struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt);
+   struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, ar

[PATCH v8 11/19] RISC-V: KVM: Implement stage2 page table programming

2019-10-02 Thread Anup Patel
This patch implements all required functions for programming
the stage2 page table for each Guest/VM.

At high-level, the flow of stage2 related functions is similar
from KVM ARM/ARM64 implementation but the stage2 page table
format is quite different for KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h |  10 +
 arch/riscv/include/asm/pgtable-bits.h |   1 +
 arch/riscv/kvm/mmu.c  | 643 +-
 3 files changed, 644 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 8aaf22a900be..bc27f664b443 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -73,6 +73,13 @@ struct kvm_mmio_decode {
int return_handled;
 };
 
+#define KVM_MMU_PAGE_CACHE_NR_OBJS 32
+
+struct kvm_mmu_page_cache {
+   int nobjs;
+   void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS];
+};
+
 struct kvm_cpu_context {
unsigned long zero;
unsigned long ra;
@@ -164,6 +171,9 @@ struct kvm_vcpu_arch {
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
 
+   /* Cache pages needed to program page tables with spinlock held */
+   struct kvm_mmu_page_cache mmu_page_cache;
+
/* VCPU power-off state */
bool power_off;
 
diff --git a/arch/riscv/include/asm/pgtable-bits.h 
b/arch/riscv/include/asm/pgtable-bits.h
index bbaeb5d35842..be49d62fcc2b 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -26,6 +26,7 @@
 
 #define _PAGE_SPECIAL   _PAGE_SOFT
 #define _PAGE_TABLE _PAGE_PRESENT
+#define _PAGE_LEAF  (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 
 /*
  * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 2b965f9aac07..590669290139 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -18,6 +18,438 @@
 #include 
 #include 
 
+#ifdef CONFIG_64BIT
+#define stage2_have_pmdtrue
+#define stage2_gpa_size((phys_addr_t)(1ULL << 39))
+#define stage2_cache_min_pages 2
+#else
+#define pmd_index(x)   0
+#define pfn_pmd(x, y)  ({ pmd_t __x = { 0 }; __x; })
+#define stage2_have_pmdfalse
+#define stage2_gpa_size((phys_addr_t)(1ULL << 32))
+#define stage2_cache_min_pages 1
+#endif
+
+static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
+ int min, int max)
+{
+   void *page;
+
+   BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
+   if (pcache->nobjs >= min)
+   return 0;
+   while (pcache->nobjs < max) {
+   page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+   if (!page)
+   return -ENOMEM;
+   pcache->objects[pcache->nobjs++] = page;
+   }
+
+   return 0;
+}
+
+static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
+{
+   while (pcache && pcache->nobjs)
+   free_page((unsigned long)pcache->objects[--pcache->nobjs]);
+}
+
+static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
+{
+   void *p;
+
+   if (!pcache)
+   return NULL;
+
+   BUG_ON(!pcache->nobjs);
+   p = pcache->objects[--pcache->nobjs];
+
+   return p;
+}
+
+struct local_guest_tlb_info {
+   struct kvm_vmid *vmid;
+   gpa_t addr;
+};
+
+static void local_guest_tlb_flush_vmid_gpa(void *info)
+{
+   struct local_guest_tlb_info *infop = info;
+
+   __kvm_riscv_hfence_gvma_vmid_gpa(READ_ONCE(infop->vmid->vmid_version),
+infop->addr);
+}
+
+static void stage2_remote_tlb_flush(struct kvm *kvm, gpa_t addr)
+{
+   struct local_guest_tlb_info info;
+   struct kvm_vmid *vmid = >arch.vmid;
+
+   /*
+* Ideally, we should have a SBI call OR some remote TLB instruction
+* but we don't have it so we explicitly flush TLBs using IPIs.
+*
+* TODO: Instead of cpu_online_mask, we should only target CPUs
+* where the Guest/VM is running.
+*/
+   info.vmid = vmid;
+   info.addr = addr;
+   preempt_disable();
+   smp_call_function_many(cpu_online_mask,
+  local_guest_tlb_flush_vmid_gpa, , true);
+   preempt_enable();
+}
+
+static int stage2_set_pgd(struct kvm *kvm, gpa_t addr, const pgd_t *new_pgd)
+{
+   pgd_t *pgdp = >arch.pgd[pgd_index(addr)];
+
+   *pgdp = *new_pgd;
+   if (pgd_val(*pgdp) & _PAGE_LEAF)
+   stage2_remote_tlb_flush(kvm, addr);
+
+   return 0;
+}
+
+static int stage2_set_pmd(struct kvm *kvm, struct kvm_mmu_page_cache *pcache,
+ gpa_t addr, const pmd_t *new_pmd)
+{
+   int rc;
+   pmd_

[PATCH v8 10/19] RISC-V: KVM: Implement VMID allocator

2019-10-02 Thread Anup Patel
We implement a simple VMID allocator for Guests/VMs which:
1. Detects number of VMID bits at boot-time
2. Uses atomic number to track VMID version and increments
   VMID version whenever we run-out of VMIDs
3. Flushes Guest TLBs on all host CPUs whenever we run-out
   of VMIDs
4. Force updates HW Stage2 VMID for each Guest VCPU whenever
   VMID changes using VCPU request KVM_REQ_UPDATE_HGATP

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  25 ++
 arch/riscv/kvm/Makefile   |   3 +-
 arch/riscv/kvm/main.c |   4 +
 arch/riscv/kvm/tlb.S  |  43 +++
 arch/riscv/kvm/vcpu.c |   9 +++
 arch/riscv/kvm/vm.c   |   6 ++
 arch/riscv/kvm/vmid.c | 123 ++
 7 files changed, 212 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/kvm/tlb.S
 create mode 100644 arch/riscv/kvm/vmid.c

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 2a5209fff68d..8aaf22a900be 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+#define KVM_REQ_UPDATE_HGATP   KVM_ARCH_REQ(2)
 
 struct kvm_vm_stat {
ulong remote_tlb_flush;
@@ -47,7 +48,19 @@ struct kvm_vcpu_stat {
 struct kvm_arch_memory_slot {
 };
 
+struct kvm_vmid {
+   /*
+* Writes to vmid_version and vmid happen with vmid_lock held
+* whereas reads happen without any lock held.
+*/
+   unsigned long vmid_version;
+   unsigned long vmid;
+};
+
 struct kvm_arch {
+   /* stage2 vmid */
+   struct kvm_vmid vmid;
+
/* stage2 page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
@@ -170,6 +183,12 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 int kvm_riscv_setup_vsip(void);
 void kvm_riscv_cleanup_vsip(void);
 
+void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
+ unsigned long gpa);
+void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
+void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa);
+void __kvm_riscv_hfence_gvma_all(void);
+
 int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
 bool is_write);
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
@@ -177,6 +196,12 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
 
+void kvm_riscv_stage2_vmid_detect(void);
+unsigned long kvm_riscv_stage2_vmid_bits(void);
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
+
 void __kvm_riscv_unpriv_trap(void);
 
 unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 845579273727..c0f57f26c13d 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -8,6 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 
 kvm-objs := $(common-objs-y)
 
-kvm-objs += main.o vm.o mmu.o vcpu.o vcpu_exit.o vcpu_switch.o
+kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index d088247843c5..55df85184241 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -72,8 +72,12 @@ int kvm_arch_init(void *opaque)
if (ret)
return ret;
 
+   kvm_riscv_stage2_vmid_detect();
+
kvm_info("hypervisor extension available\n");
 
+   kvm_info("host has %ld VMID bits\n", kvm_riscv_stage2_vmid_bits());
+
return 0;
 }
 
diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
new file mode 100644
index ..453fca8d7940
--- /dev/null
+++ b/arch/riscv/kvm/tlb.S
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel 
+ */
+
+#include 
+#include 
+
+   .text
+   .altmacro
+   .option norelax
+
+   /*
+* Instruction encoding of hfence.gvma is:
+* 0110001 rs2(5) rs1(5) 000 0 1110011
+*/
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
+   /* hfence.gvma a1, a0 */
+   .word 0x62a60073
+   ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid)
+   /* hfence.gvma zero, a0 */
+   .word 0x62a00073
+   ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid)
+
+ENTRY(__kvm_riscv_hfence_gvma_gpa)
+   /* hfence.gvma a0 */
+   .word 0x62050073
+   ret
+ENDPROC(__kvm_r

[PATCH v8 15/19] RISC-V: KVM: Implement ONE REG interface for FP registers

2019-10-02 Thread Anup Patel
From: Atish Patra 

Add a KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctl interface for floating
point registers such as F0-F31 and FCSR. This support is added for
both 'F' and 'D' extensions.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/uapi/asm/kvm.h |  10 +++
 arch/riscv/kvm/vcpu.c | 104 ++
 2 files changed, 114 insertions(+)

diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index 997b85f6fded..19811823ab70 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -96,6 +96,16 @@ struct kvm_riscv_csr {
 #define KVM_REG_RISCV_CSR_REG(name)\
(offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
 
+/* F extension registers are mapped as type4 */
+#define KVM_REG_RISCV_FP_F (0x04 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_F_REG(name)   \
+   (offsetof(struct __riscv_f_ext_state, name) / sizeof(u32))
+
+/* D extension registers are mapped as type 5 */
+#define KVM_REG_RISCV_FP_D (0x05 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_D_REG(name)   \
+   (offsetof(struct __riscv_d_ext_state, name) / sizeof(u64))
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 67f9dd66f2db..8f2b058a4714 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -431,6 +431,98 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+static int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg,
+unsigned long rtype)
+{
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   unsigned long isa = vcpu->arch.isa;
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   rtype);
+   void *reg_val;
+
+   if ((rtype == KVM_REG_RISCV_FP_F) &&
+   riscv_isa_extension_available(, f)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+   reg_val = >fp.f.fcsr;
+   else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+   reg_val = >fp.f.f[reg_num];
+   else
+   return -EINVAL;
+   } else if ((rtype == KVM_REG_RISCV_FP_D) &&
+  riscv_isa_extension_available(, d)) {
+   if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   reg_val = >fp.d.fcsr;
+   } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
+  reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+   return -EINVAL;
+   reg_val = >fp.d.f[reg_num];
+   } else
+   return -EINVAL;
+   } else
+   return -EINVAL;
+
+   if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
+   return -EFAULT;
+
+   return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg,
+unsigned long rtype)
+{
+   struct kvm_cpu_context *cntx = >arch.guest_context;
+   unsigned long isa = vcpu->arch.isa;
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   rtype);
+   void *reg_val;
+
+   if ((rtype == KVM_REG_RISCV_FP_F) &&
+   riscv_isa_extension_available(, f)) {
+   if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+   return -EINVAL;
+   if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+   reg_val = >fp.f.fcsr;
+   else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+   reg_val = >fp.f.f[reg_num];
+   else
+   return -E

[PATCH v8 14/19] RISC-V: KVM: FP lazy save/restore

2019-10-02 Thread Anup Patel
From: Atish Patra 

This patch adds floating point (F and D extension) context save/restore
for guest VCPUs. The FP context is saved and restored lazily only when
kernel enter/exits the in-kernel run loop and not during the KVM world
switch. This way FP save/restore has minimal impact on KVM performance.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   5 +
 arch/riscv/kernel/asm-offsets.c   |  72 +
 arch/riscv/kvm/vcpu.c |  81 ++
 arch/riscv/kvm/vcpu_switch.S  | 174 ++
 4 files changed, 332 insertions(+)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 9179ff019235..928c67828b1b 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -117,6 +117,7 @@ struct kvm_cpu_context {
unsigned long sepc;
unsigned long sstatus;
unsigned long hstatus;
+   union __riscv_fp_state fp;
 };
 
 struct kvm_vcpu_csr {
@@ -236,6 +237,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
unsigned long scause, unsigned long stval);
 
 void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
+void __kvm_riscv_fp_f_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_f_restore(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context);
 
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 711656710190..9980069a1acf 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -185,6 +185,78 @@ void asm_offsets(void)
OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch);
OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec);
 
+   /* F extension */
+
+   OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]);
+   OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]);
+   OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]);
+   OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]);
+   OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]);
+   OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]);
+   OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]);
+   OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]);
+   OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]);
+   OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]);
+   OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]);
+   OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]);
+   OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]);
+   OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]);
+   OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]);
+   OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]);
+   OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]);
+   OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]);
+   OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]);
+   OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]);
+   OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]);
+   OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]);
+   OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]);
+   OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]);
+   OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]);
+   OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]);
+   OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]);
+   OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]);
+   OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]);
+   OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]);
+   OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]);
+   OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]);
+   OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr);
+
+   /* D extension */
+
+   OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]);
+   OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]);
+   OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]);
+   OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]);
+   OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]);
+   OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]);
+   OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]);
+   OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]);
+   OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]);
+   OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]);
+   OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]);
+   OFFSET(KVM_ARCH_FP_D_F11

[PATCH v8 16/19] RISC-V: KVM: Add SBI v0.1 support

2019-10-02 Thread Anup Patel
From: Atish Patra 

The KVM host kernel running in HS-mode needs to handle SBI calls coming
from guest kernel running in VS-mode.

This patch adds SBI v0.1 support in KVM RISC-V. All the SBI calls are
implemented correctly except remote tlb flushes. For remote TLB flushes,
we are doing full TLB flush and this will be optimized in future.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/asm/kvm_host.h |   2 +
 arch/riscv/kvm/Makefile   |   2 +-
 arch/riscv/kvm/vcpu_exit.c|   4 ++
 arch/riscv/kvm/vcpu_sbi.c | 106 ++
 4 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/kvm/vcpu_sbi.c

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 928c67828b1b..74ccd8d00ec5 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -250,4 +250,6 @@ bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
 
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
 #endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 3e0c7558320d..b56dc1650d2c 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 kvm-objs := $(common-objs-y)
 
 kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
-kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o vcpu_sbi.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 7507b859246b..0e9b0ffa169d 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -587,6 +587,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
(vcpu->arch.guest_context.hstatus & HSTATUS_STL))
ret = stage2_page_fault(vcpu, run, scause, stval);
break;
+   case EXC_SUPERVISOR_SYSCALL:
+   if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+   ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
+   break;
default:
break;
};
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
new file mode 100644
index ..88fa0faa3545
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define SBI_VERSION_MAJOR  0
+#define SBI_VERSION_MINOR  1
+
+static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
+   struct kvm_run *run, u32 type)
+{
+   int i;
+   struct kvm_vcpu *tmp;
+
+   kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+   tmp->arch.power_off = true;
+   kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+   memset(>system_event, 0, sizeof(run->system_event));
+   run->system_event.type = type;
+   run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+   int i, ret = 1;
+   u64 next_cycle;
+   struct kvm_vcpu *rvcpu;
+   bool next_sepc = true;
+   ulong hmask, ut_scause = 0;
+   struct kvm_cpu_context *cp = >arch.guest_context;
+
+   if (!cp)
+   return -EINVAL;
+
+   switch (cp->a7) {
+   case SBI_SET_TIMER:
+#if __riscv_xlen == 32
+   next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+   next_cycle = (u64)cp->a0;
+#endif
+   kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+   break;
+   case SBI_CLEAR_IPI:
+   kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_SOFT);
+   break;
+   case SBI_SEND_IPI:
+   hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+  _scause);
+   if (ut_scause) {
+   kvm_riscv_vcpu_trap_redirect(vcpu, ut_scause,
+cp->a0);
+   next_sepc = false;
+   } else {
+   for_each_set_bit(i, , BITS_PER_LONG) {
+   rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+   kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_S_SOFT);
+   }
+   }
+   break;
+   case SBI_SHUTDOWN:
+   kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
+ 

[PATCH v8 12/19] RISC-V: KVM: Implement MMU notifiers

2019-10-02 Thread Anup Patel
This patch implements MMU notifiers for KVM RISC-V so that Guest
physical address space is in-sync with Host physical address space.

This will allow swapping, page migration, etc to work transparently
with KVM RISC-V.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   7 ++
 arch/riscv/kvm/Kconfig|   1 +
 arch/riscv/kvm/mmu.c  | 200 +-
 arch/riscv/kvm/vm.c   |   1 +
 4 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index bc27f664b443..79ceb2aa8ae6 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -193,6 +193,13 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 int kvm_riscv_setup_vsip(void);
 void kvm_riscv_cleanup_vsip(void);
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva_range(struct kvm *kvm,
+   unsigned long start, unsigned long end);
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
+
 void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
  unsigned long gpa);
 void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 9cca98c4673b..d8fa13b0da18 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -20,6 +20,7 @@ if VIRTUALIZATION
 config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on OF
+   select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_MMIO
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 590669290139..d8a692d3e640 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -67,6 +67,66 @@ static void *stage2_cache_alloc(struct kvm_mmu_page_cache 
*pcache)
return p;
 }
 
+static int stage2_pgdp_test_and_clear_young(pgd_t *pgd)
+{
+   return ptep_test_and_clear_young(NULL, 0, (pte_t *)pgd);
+}
+
+static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
+{
+   return ptep_test_and_clear_young(NULL, 0, (pte_t *)pmd);
+}
+
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+   return ptep_test_and_clear_young(NULL, 0, pte);
+}
+
+static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
+ pgd_t **pgdpp, pmd_t **pmdpp, pte_t **ptepp)
+{
+   pgd_t *pgdp;
+   pmd_t *pmdp;
+   pte_t *ptep;
+
+   *pgdpp = NULL;
+   *pmdpp = NULL;
+   *ptepp = NULL;
+
+   pgdp = >arch.pgd[pgd_index(addr)];
+   if (!pgd_val(*pgdp))
+   return false;
+   if (pgd_val(*pgdp) & _PAGE_LEAF) {
+   *pgdpp = pgdp;
+   return true;
+   }
+
+   if (stage2_have_pmd) {
+   pmdp = (void *)pgd_page_vaddr(*pgdp);
+   pmdp = [pmd_index(addr)];
+   if (!pmd_present(*pmdp))
+   return false;
+   if (pmd_val(*pmdp) & _PAGE_LEAF) {
+   *pmdpp = pmdp;
+   return true;
+   }
+
+   ptep = (void *)pmd_page_vaddr(*pmdp);
+   } else {
+   ptep = (void *)pgd_page_vaddr(*pgdp);
+   }
+
+   ptep = [pte_index(addr)];
+   if (!pte_present(*ptep))
+   return false;
+   if (pte_val(*ptep) & _PAGE_LEAF) {
+   *ptepp = ptep;
+   return true;
+   }
+
+   return false;
+}
+
 struct local_guest_tlb_info {
struct kvm_vmid *vmid;
gpa_t addr;
@@ -450,6 +510,38 @@ int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t 
hpa,
 
 }
 
+static int handle_hva_to_gpa(struct kvm *kvm,
+unsigned long start,
+unsigned long end,
+int (*handler)(struct kvm *kvm,
+   gpa_t gpa, u64 size,
+   void *data),
+void *data)
+{
+   struct kvm_memslots *slots;
+   struct kvm_memory_slot *memslot;
+   int ret = 0;
+
+   slots = kvm_memslots(kvm);
+
+   /* we only care about the pages that the guest sees */
+   kvm_for_each_memslot(memslot, slots) {
+   unsigned long hva_start, hva_end;
+   gfn_t gpa;
+
+   hva_start = max(start, memslot->userspace_addr);
+   hva_end = min(end, memslot->userspace_addr +
+   (memslot->npages << PAGE_SHIFT));
+   if (hva_start >= hva_end)
+   continue;
+
+   gpa = hva_to_gf

[PATCH v8 06/19] RISC-V: KVM: Implement KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls

2019-10-02 Thread Anup Patel
For KVM RISC-V, we use KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls to access
VCPU config and registers from user-space.

We have three types of VCPU registers:
1. CONFIG - these are VCPU config and capabilities
2. CORE   - these are VCPU general purpose registers
3. CSR- these are VCPU control and status registers

The CONFIG registers available to user-space are ISA and TIMEBASE. Out
of these, TIMEBASE is a read-only register which inform user-space about
VCPU timer base frequency. The ISA register is a read and write register
where user-space can only write the desired VCPU ISA capabilities before
running the VCPU.

The CORE registers available to user-space are PC, RA, SP, GP, TP, A0-A7,
T0-T6, S0-S11 and MODE. Most of these are RISC-V general registers except
PC and MODE. The PC register represents program counter whereas the MODE
register represent VCPU privilege mode (i.e. S/U-mode).

The CSRs available to user-space are SSTATUS, SIE, STVEC, SSCRATCH, SEPC,
SCAUSE, STVAL, SIP, and SATP. All of these are read/write registers.

In future, more VCPU register types will be added (such as FP) for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/include/uapi/asm/kvm.h |  53 ++-
 arch/riscv/kvm/vcpu.c | 239 +-
 2 files changed, 289 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/uapi/asm/kvm.h 
b/arch/riscv/include/uapi/asm/kvm.h
index 6dbc056d58ba..997b85f6fded 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -41,10 +41,61 @@ struct kvm_guest_debug_arch {
 struct kvm_sync_regs {
 };
 
-/* dummy definition */
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
 struct kvm_sregs {
 };
 
+/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_config {
+   unsigned long isa;
+   unsigned long tbfreq;
+};
+
+/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_core {
+   struct user_regs_struct regs;
+   unsigned long mode;
+};
+
+/* Possible privilege modes for kvm_riscv_core */
+#define KVM_RISCV_MODE_S   1
+#define KVM_RISCV_MODE_U   0
+
+/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_csr {
+   unsigned long sstatus;
+   unsigned long sie;
+   unsigned long stvec;
+   unsigned long sscratch;
+   unsigned long sepc;
+   unsigned long scause;
+   unsigned long stval;
+   unsigned long sip;
+   unsigned long satp;
+};
+
+#define KVM_REG_SIZE(id)   \
+   (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_RISCV_TYPE_MASK0xFF00
+#define KVM_REG_RISCV_TYPE_SHIFT   24
+
+/* Config registers are mapped as type 1 */
+#define KVM_REG_RISCV_CONFIG   (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CONFIG_REG(name) \
+   (offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long))
+
+/* Core registers are mapped as type 2 */
+#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CORE_REG(name)   \
+   (offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long))
+
+/* Control and status registers are mapped as type 3 */
+#define KVM_REG_RISCV_CSR  (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CSR_REG(name)\
+   (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 3223f723f79e..c9faca14f8cd 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -165,6 +165,219 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, 
struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
 }
 
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+const struct kvm_one_reg *reg)
+{
+   unsigned long __user *uaddr =
+   (unsigned long __user *)(unsigned long)reg->addr;
+   unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+   KVM_REG_SIZE_MASK |
+   KVM_REG_RISCV_CONFIG);
+   unsigned long reg_val;
+
+   if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+   return -EINVAL;
+
+   switch (reg_num) {
+   case KVM_REG_RISCV_CONFIG_REG(isa):
+   reg_val = vcpu->arch.isa;
+   break;
+   case KVM_REG_RISCV_CONFIG_REG(tbfreq):
+   reg_val = riscv_timebase;
+   break;
+   default:
+   return -EINVAL;
+   };
+
+   if (copy_to_user(uaddr, _val, KVM_REG_SIZE(reg->id)))
+   return -EFAULT;
+
+   return 0;

[PATCH v8 08/19] RISC-V: KVM: Handle MMIO exits for VCPU

2019-10-02 Thread Anup Patel
We will get stage2 page faults whenever Guest/VM access SW emulated
MMIO device or unmapped Guest RAM.

This patch implements MMIO read/write emulation by extracting MMIO
details from the trapped load/store instruction and forwarding the
MMIO read/write to user-space. The actual MMIO emulation will happen
in user-space and KVM kernel module will only take care of register
updates before resuming the trapped VCPU.

The handling for stage2 page faults for unmapped Guest RAM will be
implemeted by a separate patch later.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |  20 ++
 arch/riscv/kvm/mmu.c  |   7 +
 arch/riscv/kvm/vcpu_exit.c| 505 +-
 arch/riscv/kvm/vcpu_switch.S  |  14 +
 4 files changed, 543 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 18f1097f1d8d..2a5209fff68d 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -53,6 +53,13 @@ struct kvm_arch {
phys_addr_t pgd_phys;
 };
 
+struct kvm_mmio_decode {
+   unsigned long insn;
+   int len;
+   int shift;
+   int return_handled;
+};
+
 struct kvm_cpu_context {
unsigned long zero;
unsigned long ra;
@@ -141,6 +148,9 @@ struct kvm_vcpu_arch {
unsigned long irqs_pending;
unsigned long irqs_pending_mask;
 
+   /* MMIO instruction details */
+   struct kvm_mmio_decode mmio_decode;
+
/* VCPU power-off state */
bool power_off;
 
@@ -160,11 +170,21 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 int kvm_riscv_setup_vsip(void);
 void kvm_riscv_cleanup_vsip(void);
 
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
+bool is_write);
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
 int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
 
+void __kvm_riscv_unpriv_trap(void);
+
+unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
+bool read_insn,
+unsigned long guest_addr,
+unsigned long *trap_scause);
+void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
+ unsigned long scause, unsigned long stval);
 int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long scause, unsigned long stval);
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 04dd089b86ff..2b965f9aac07 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -61,6 +61,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return 0;
 }
 
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
+bool is_write)
+{
+   /* TODO: */
+   return 0;
+}
+
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
 {
/* TODO: */
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index e4d7c8f0807a..f1378c0a447f 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -6,9 +6,430 @@
  * Anup Patel 
  */
 
+#include 
 #include 
 #include 
 #include 
+#include 
+
+#define INSN_MATCH_LB  0x3
+#define INSN_MASK_LB   0x707f
+#define INSN_MATCH_LH  0x1003
+#define INSN_MASK_LH   0x707f
+#define INSN_MATCH_LW  0x2003
+#define INSN_MASK_LW   0x707f
+#define INSN_MATCH_LD  0x3003
+#define INSN_MASK_LD   0x707f
+#define INSN_MATCH_LBU 0x4003
+#define INSN_MASK_LBU  0x707f
+#define INSN_MATCH_LHU 0x5003
+#define INSN_MASK_LHU  0x707f
+#define INSN_MATCH_LWU 0x6003
+#define INSN_MASK_LWU  0x707f
+#define INSN_MATCH_SB  0x23
+#define INSN_MASK_SB   0x707f
+#define INSN_MATCH_SH  0x1023
+#define INSN_MASK_SH   0x707f
+#define INSN_MATCH_SW  0x2023
+#define INSN_MASK_SW   0x707f
+#define INSN_MATCH_SD  0x3023
+#define INSN_MASK_SD   0x707f
+
+#define INSN_MATCH_C_LD0x6000
+#define INSN_MASK_C_LD 0xe003
+#define INSN_MATCH_C_SD0xe000
+#define INSN_MASK_C_SD 0xe003
+#define INSN_MATCH_C_LW0x4000
+#define INSN_MASK_C_LW 0xe003
+#define INSN_MATCH_C_SW0xc000
+#define INSN_MASK_C_SW 0xe003
+#define INSN_MATCH_C_LDSP  0x6002
+#define INSN_MASK_C_LDSP   0xe003
+#define INSN_MATCH_C_SDSP  0xe002
+#define INSN_MASK_C_SDSP   0xe003
+#define INSN_MATCH_C_LWSP  0x4002
+#define INSN_MASK_C_LWSP   0xe003

[PATCH v8 07/19] RISC-V: KVM: Implement VCPU world-switch

2019-10-02 Thread Anup Patel
This patch implements the VCPU world-switch for KVM RISC-V.

The KVM RISC-V world-switch (i.e. __kvm_riscv_switch_to()) mostly
switches general purpose registers, SSTATUS, STVEC, SSCRATCH and
HSTATUS CSRs. Other CSRs are switched via vcpu_load() and vcpu_put()
interface in kvm_arch_vcpu_load() and kvm_arch_vcpu_put() functions
respectively.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/kvm_host.h |   9 +-
 arch/riscv/kernel/asm-offsets.c   |  76 
 arch/riscv/kvm/Makefile   |   2 +-
 arch/riscv/kvm/vcpu.c |  32 -
 arch/riscv/kvm/vcpu_switch.S  | 194 ++
 5 files changed, 309 insertions(+), 4 deletions(-)
 create mode 100644 arch/riscv/kvm/vcpu_switch.S

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index d801216da6d0..18f1097f1d8d 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -110,6 +110,13 @@ struct kvm_vcpu_arch {
/* ISA feature bits (similar to MISA) */
unsigned long isa;
 
+   /* SSCRATCH and STVEC of Host */
+   unsigned long host_sscratch;
+   unsigned long host_stvec;
+
+   /* CPU context of Host */
+   struct kvm_cpu_context host_context;
+
/* CPU context of Guest VCPU */
struct kvm_cpu_context guest_context;
 
@@ -162,7 +169,7 @@ int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, 
struct kvm_run *run);
 int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long scause, unsigned long stval);
 
-static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
+void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
 
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 9f5628c38ac9..711656710190 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -7,7 +7,9 @@
 #define GENERATING_ASM_OFFSETS
 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 
@@ -109,6 +111,80 @@ void asm_offsets(void)
OFFSET(PT_SBADADDR, pt_regs, sbadaddr);
OFFSET(PT_SCAUSE, pt_regs, scause);
 
+   OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero);
+   OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra);
+   OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp);
+   OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp);
+   OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp);
+   OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0);
+   OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1);
+   OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2);
+   OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0);
+   OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1);
+   OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0);
+   OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1);
+   OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2);
+   OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3);
+   OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4);
+   OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5);
+   OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6);
+   OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7);
+   OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2);
+   OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3);
+   OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4);
+   OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5);
+   OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6);
+   OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7);
+   OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8);
+   OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9);
+   OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10);
+   OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11);
+   OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3);
+   OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4);
+   OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5);
+   OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6);
+   OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc);
+   OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus);
+   OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus);
+
+   OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero);
+   OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra);
+   OFFSET(KVM_ARCH_HOST_SP

[PATCH v8 09/19] RISC-V: KVM: Handle WFI exits for VCPU

2019-10-02 Thread Anup Patel
We get illegal instruction trap whenever Guest/VM executes WFI
instruction.

This patch handles WFI trap by blocking the trapped VCPU using
kvm_vcpu_block() API. The blocked VCPU will be automatically
resumed whenever a VCPU interrupt is injected from user-space
or from in-kernel IRQCHIP emulation.

Signed-off-by: Anup Patel 
Acked-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
---
 arch/riscv/kvm/vcpu_exit.c | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index f1378c0a447f..7507b859246b 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+#define INSN_OPCODE_MASK   0x007c
+#define INSN_OPCODE_SHIFT  2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_WFI  0xff00
+#define INSN_MATCH_WFI 0x1050
+
 #define INSN_MATCH_LB  0x3
 #define INSN_MASK_LB   0x707f
 #define INSN_MATCH_LH  0x1003
@@ -116,6 +123,67 @@
 (s32)(((insn) >> 7) & 0x1f))
 #define MASK_FUNCT30x7000
 
+static int truly_illegal_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+   /* Redirect trap to Guest VCPU */
+   kvm_riscv_vcpu_trap_redirect(vcpu, EXC_INST_ILLEGAL, insn);
+
+   return 1;
+}
+
+static int system_opcode_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+   if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
+   vcpu->stat.wfi_exit_stat++;
+   if (!kvm_arch_vcpu_runnable(vcpu)) {
+   srcu_read_unlock(>kvm->srcu, vcpu->arch.srcu_idx);
+   kvm_vcpu_block(vcpu);
+   vcpu->arch.srcu_idx = srcu_read_lock(>kvm->srcu);
+   kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+   }
+   vcpu->arch.guest_context.sepc += INSN_LEN(insn);
+   return 1;
+   }
+
+   return truly_illegal_insn(vcpu, run, insn);
+}
+
+static int illegal_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long insn)
+{
+   unsigned long ut_scause = 0;
+   struct kvm_cpu_context *ct;
+
+   if (unlikely(INSN_IS_16BIT(insn))) {
+   if (insn == 0) {
+   ct = >arch.guest_context;
+   insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
+ ct->sepc,
+ _scause);
+   if (ut_scause) {
+   if (ut_scause == EXC_LOAD_PAGE_FAULT)
+   ut_scause = EXC_INST_PAGE_FAULT;
+   kvm_riscv_vcpu_trap_redirect(vcpu, ut_scause,
+ct->sepc);
+   return 1;
+   }
+   }
+   if (INSN_IS_16BIT(insn))
+   return truly_illegal_insn(vcpu, run, insn);
+   }
+
+   switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
+   case INSN_OPCODE_SYSTEM:
+   return system_opcode_insn(vcpu, run, insn);
+   default:
+   return truly_illegal_insn(vcpu, run, insn);
+   }
+}
+
 static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long fault_addr)
 {
@@ -508,6 +576,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
ret = -EFAULT;
run->exit_reason = KVM_EXIT_UNKNOWN;
switch (scause) {
+   case EXC_INST_ILLEGAL:
+   if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+   ret = illegal_inst_fault(vcpu, run, stval);
+   break;
case EXC_INST_PAGE_FAULT:
case EXC_LOAD_PAGE_FAULT:
case EXC_STORE_PAGE_FAULT:
-- 
2.17.1



[PATCH v8 01/19] RISC-V: Add bitmap reprensenting ISA features common across CPUs

2019-10-02 Thread Anup Patel
This patch adds riscv_isa bitmap which represents Host ISA features
common across all Host CPUs. The riscv_isa is not same as elf_hwcap
because elf_hwcap will only have ISA features relevant for user-space
apps whereas riscv_isa will have ISA features relevant to both kernel
and user-space apps.

One of the use-case for riscv_isa bitmap is in KVM hypervisor where
we will use it to do following operations:

1. Check whether hypervisor extension is available
2. Find ISA features that need to be virtualized (e.g. floating
   point support, vector extension, etc.)

Signed-off-by: Anup Patel 
Signed-off-by: Atish Patra 
Reviewed-by: Alexander Graf 
---
 arch/riscv/include/asm/hwcap.h | 22 +
 arch/riscv/kernel/cpufeature.c | 83 --
 2 files changed, 102 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 7ecb7c6a57b1..5989dd4426d1 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_HWCAP_H
 #define __ASM_HWCAP_H
 
+#include 
 #include 
 
 #ifndef __ASSEMBLY__
@@ -22,5 +23,26 @@ enum {
 };
 
 extern unsigned long elf_hwcap;
+
+#define RISCV_ISA_EXT_a('a' - 'a')
+#define RISCV_ISA_EXT_c('c' - 'a')
+#define RISCV_ISA_EXT_d('d' - 'a')
+#define RISCV_ISA_EXT_f('f' - 'a')
+#define RISCV_ISA_EXT_h('h' - 'a')
+#define RISCV_ISA_EXT_i('i' - 'a')
+#define RISCV_ISA_EXT_m('m' - 'a')
+#define RISCV_ISA_EXT_s('s' - 'a')
+#define RISCV_ISA_EXT_u('u' - 'a')
+
+#define RISCV_ISA_EXT_MAX  256
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+   __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
 #endif
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index b1ade9a49347..941aeb33f85b 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -6,21 +6,64 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
 
 unsigned long elf_hwcap __read_mostly;
+
+/* Host ISA bitmap */
+static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+
 #ifdef CONFIG_FPU
 bool has_fpu __read_mostly;
 #endif
 
+/**
+ * riscv_isa_extension_base() - Get base extension word
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * Return: base extension word as unsigned long value
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
+{
+   if (!isa_bitmap)
+   return riscv_isa[0];
+   return isa_bitmap[0];
+}
+EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
+
+/**
+ * __riscv_isa_extension_available() - Check whether given extension
+ * is available or not
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+{
+   const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
+
+   if (bit >= RISCV_ISA_EXT_MAX)
+   return false;
+
+   return test_bit(bit, bmap) ? true : false;
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
+
 void riscv_fill_hwcap(void)
 {
struct device_node *node;
const char *isa;
-   size_t i;
+   char print_str[BITS_PER_LONG+1];
+   size_t i, j, isa_len;
static unsigned long isa2hwcap[256] = {0};
 
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -32,8 +75,11 @@ void riscv_fill_hwcap(void)
 
elf_hwcap = 0;
 
+   bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
+
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
+   unsigned long this_isa = 0;
 
if (riscv_of_processor_hartid(node) < 0)
continue;
@@ -43,8 +89,24 @@ void riscv_fill_hwcap(void)
continue;
}
 
-   for (i = 0; i < strlen(isa); ++i)
+   i = 0;
+   isa_len = strlen(isa);
+#if defined(CONFIG_32BIT)
+   if (!strncmp(isa, "rv32", 4))
+   i += 4;
+#elif defined(CONFIG_64BIT)
+   if (!strncmp(isa, "rv64", 4))
+   i += 4;
+#endif
+   for (; i < isa_len; ++i) {
this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
+   /*
+* TODO: X, Y and Z extension parsing for Host ISA
+  

<    1   2   3   4   5   6   7   8   9   10   >