date:20191217

[Xen-devel] [PATCH 1/9] xen/sched: move schedulers and cpupool coding to dedicated directory

2019-12-17 Thread Juergen Gross

Move sched*c and cpupool.c to a new directory common/sched.

Signed-off-by: Juergen Gross 
---
 MAINTAINERS|  8 +--
 xen/common/Kconfig | 66 +-
 xen/common/Makefile|  8 +--
 xen/common/sched/Kconfig   | 65 +
 xen/common/sched/Makefile  |  7 +++
 .../{compat/schedule.c => sched/compat_schedule.c} |  2 +-
 xen/common/{ => sched}/cpupool.c   |  0
 xen/common/{ => sched}/sched_arinc653.c|  0
 xen/common/{ => sched}/sched_credit.c  |  0
 xen/common/{ => sched}/sched_credit2.c |  0
 xen/common/{ => sched}/sched_null.c|  0
 xen/common/{ => sched}/sched_rt.c  |  0
 xen/common/{ => sched}/schedule.c  |  2 +-
 13 files changed, 80 insertions(+), 78 deletions(-)
 create mode 100644 xen/common/sched/Kconfig
 create mode 100644 xen/common/sched/Makefile
 rename xen/common/{compat/schedule.c => sched/compat_schedule.c} (97%)
 rename xen/common/{ => sched}/cpupool.c (100%)
 rename xen/common/{ => sched}/sched_arinc653.c (100%)
 rename xen/common/{ => sched}/sched_credit.c (100%)
 rename xen/common/{ => sched}/sched_credit2.c (100%)
 rename xen/common/{ => sched}/sched_null.c (100%)
 rename xen/common/{ => sched}/sched_rt.c (100%)
 rename xen/common/{ => sched}/schedule.c (99%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 012c847ebd..37d4da2bc2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -174,7 +174,7 @@ M:  Josh Whitehead 
 M: Stewart Hildebrand 
 S: Supported
 L: DornerWorks Xen-Devel 
-F: xen/common/sched_arinc653.c
+F: xen/common/sched/sched_arinc653.c
 F: tools/libxc/xc_arinc653.c
 
 ARM (W/ VIRTUALISATION EXTENSIONS) ARCHITECTURE
@@ -212,7 +212,7 @@ CPU POOLS
 M: Juergen Gross 
 M: Dario Faggioli 
 S: Supported
-F: xen/common/cpupool.c
+F: xen/common/sched/cpupool.c
 
 DEVICE TREE
 M: Stefano Stabellini 
@@ -378,13 +378,13 @@ RTDS SCHEDULER
 M: Dario Faggioli 
 M: Meng Xu 
 S: Supported
-F: xen/common/sched_rt.c
+F: xen/common/sched/sched_rt.c
 
 SCHEDULING
 M: George Dunlap 
 M: Dario Faggioli 
 S: Supported
-F: xen/common/sched*
+F: xen/common/sched/
 
 SEABIOS UPSTREAM
 M: Wei Liu 
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index 2f516da101..79465fc1f9 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -278,71 +278,7 @@ config ARGO
 
  If unsure, say N.
 
-menu "Schedulers"
-   visible if EXPERT = "y"
-
-config SCHED_CREDIT
-   bool "Credit scheduler support"
-   default y
-   ---help---
- The traditional credit scheduler is a general purpose scheduler.
-
-config SCHED_CREDIT2
-   bool "Credit2 scheduler support"
-   default y
-   ---help---
- The credit2 scheduler is a general purpose scheduler that is
- optimized for lower latency and higher VM density.
-
-config SCHED_RTDS
-   bool "RTDS scheduler support (EXPERIMENTAL)"
-   default y
-   ---help---
- The RTDS scheduler is a soft and firm real-time scheduler for
- multicore, targeted for embedded, automotive, graphics and gaming
- in the cloud, and general low-latency workloads.
-
-config SCHED_ARINC653
-   bool "ARINC653 scheduler support (EXPERIMENTAL)"
-   default DEBUG
-   ---help---
- The ARINC653 scheduler is a hard real-time scheduler for single
- cores, targeted for avionics, drones, and medical devices.
-
-config SCHED_NULL
-   bool "Null scheduler support (EXPERIMENTAL)"
-   default y
-   ---help---
- The null scheduler is a static, zero overhead scheduler,
- for when there always are less vCPUs than pCPUs, typically
- in embedded or HPC scenarios.
-
-choice
-   prompt "Default Scheduler?"
-   default SCHED_CREDIT2_DEFAULT
-
-   config SCHED_CREDIT_DEFAULT
-   bool "Credit Scheduler" if SCHED_CREDIT
-   config SCHED_CREDIT2_DEFAULT
-   bool "Credit2 Scheduler" if SCHED_CREDIT2
-   config SCHED_RTDS_DEFAULT
-   bool "RT Scheduler" if SCHED_RTDS
-   config SCHED_ARINC653_DEFAULT
-   bool "ARINC653 Scheduler" if SCHED_ARINC653
-   config SCHED_NULL_DEFAULT
-   bool "Null Scheduler" if SCHED_NULL
-endchoice
-
-config SCHED_DEFAULT
-   string
-   default "credit" if SCHED_CREDIT_DEFAULT
-   default "credit2" if SCHED_CREDIT2_DEFAULT
-   default "rtds" if SCHED_RTDS_DEFAULT
-   default "arinc653" if SCHED_ARINC653_DEFAULT
-   default "null" if SCHED_NULL_DEFAULT
-   default "credit2"
-
-endmenu
+source "common/sched/Kconfig"
 
 config CRYPTO
bool
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 62b34e69e9..2abb8250b0 100644
--- a/xen/common/Makefile
+++

[Xen-devel] [PATCH 6/9] xen/sched: replace null scheduler percpu-variable with pdata hook

2019-12-17 Thread Juergen Gross

Instead of having an own percpu-variable for private data per cpu the
generic scheduler interface for that purpose should be used.

Signed-off-by: Juergen Gross 
---
 xen/common/sched/sched_null.c | 89 +--
 1 file changed, 60 insertions(+), 29 deletions(-)

diff --git a/xen/common/sched/sched_null.c b/xen/common/sched/sched_null.c
index 5a23a7e7dc..11aab25743 100644
--- a/xen/common/sched/sched_null.c
+++ b/xen/common/sched/sched_null.c
@@ -89,7 +89,6 @@ struct null_private {
 struct null_pcpu {
 struct sched_unit *unit;
 };
-DEFINE_PER_CPU(struct null_pcpu, npc);
 
 /*
  * Schedule unit
@@ -159,32 +158,48 @@ static void null_deinit(struct scheduler *ops)
 ops->sched_data = NULL;
 }
 
-static void init_pdata(struct null_private *prv, unsigned int cpu)
+static void init_pdata(struct null_private *prv, struct null_pcpu *npc,
+   unsigned int cpu)
 {
 /* Mark the pCPU as free, and with no unit assigned */
 cpumask_set_cpu(cpu, >cpus_free);
-per_cpu(npc, cpu).unit = NULL;
+npc->unit = NULL;
 }
 
 static void null_init_pdata(const struct scheduler *ops, void *pdata, int cpu)
 {
 struct null_private *prv = null_priv(ops);
 
-/* alloc_pdata is not implemented, so we want this to be NULL. */
-ASSERT(!pdata);
+ASSERT(pdata);
 
-init_pdata(prv, cpu);
+init_pdata(prv, pdata, cpu);
 }
 
 static void null_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
 {
 struct null_private *prv = null_priv(ops);
+struct null_pcpu *npc = pcpu;
 
-/* alloc_pdata not implemented, so this must have stayed NULL */
-ASSERT(!pcpu);
+ASSERT(npc);
 
 cpumask_clear_cpu(cpu, >cpus_free);
-per_cpu(npc, cpu).unit = NULL;
+npc->unit = NULL;
+}
+
+static void *null_alloc_pdata(const struct scheduler *ops, int cpu)
+{
+struct null_pcpu *npc;
+
+npc = xzalloc(struct null_pcpu);
+if ( npc == NULL )
+return ERR_PTR(-ENOMEM);
+
+return npc;
+}
+
+static void null_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
+{
+xfree(pcpu);
 }
 
 static void *null_alloc_udata(const struct scheduler *ops,
@@ -268,6 +283,7 @@ pick_res(struct null_private *prv, const struct sched_unit 
*unit)
 unsigned int bs;
 unsigned int cpu = sched_unit_master(unit), new_cpu;
 cpumask_t *cpus = cpupool_domain_master_cpumask(unit->domain);
+struct null_pcpu *npc = get_sched_res(cpu)->sched_priv;
 
 ASSERT(spin_is_locked(get_sched_res(cpu)->schedule_lock));
 
@@ -286,8 +302,7 @@ pick_res(struct null_private *prv, const struct sched_unit 
*unit)
  * don't, so we get to keep in the scratch cpumask what we have just
  * put in it.)
  */
-if ( likely((per_cpu(npc, cpu).unit == NULL ||
- per_cpu(npc, cpu).unit == unit)
+if ( likely((npc->unit == NULL || npc->unit == unit)
 && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
 {
 new_cpu = cpu;
@@ -336,9 +351,11 @@ pick_res(struct null_private *prv, const struct sched_unit 
*unit)
 static void unit_assign(struct null_private *prv, struct sched_unit *unit,
 unsigned int cpu)
 {
+struct null_pcpu *npc = get_sched_res(cpu)->sched_priv;
+
 ASSERT(is_unit_online(unit));
 
-per_cpu(npc, cpu).unit = unit;
+npc->unit = unit;
 sched_set_res(unit, get_sched_res(cpu));
 cpumask_clear_cpu(cpu, >cpus_free);
 
@@ -363,12 +380,13 @@ static bool unit_deassign(struct null_private *prv, 
struct sched_unit *unit)
 unsigned int bs;
 unsigned int cpu = sched_unit_master(unit);
 struct null_unit *wvc;
+struct null_pcpu *npc = get_sched_res(cpu)->sched_priv;
 
 ASSERT(list_empty(_unit(unit)->waitq_elem));
-ASSERT(per_cpu(npc, cpu).unit == unit);
+ASSERT(npc->unit == unit);
 ASSERT(!cpumask_test_cpu(cpu, >cpus_free));
 
-per_cpu(npc, cpu).unit = NULL;
+npc->unit = NULL;
 cpumask_set_cpu(cpu, >cpus_free);
 
 dprintk(XENLOG_G_INFO, "%d <-- NULL (%pdv%d)\n", cpu, unit->domain,
@@ -436,7 +454,7 @@ static spinlock_t *null_switch_sched(struct scheduler 
*new_ops,
  */
 ASSERT(!local_irq_is_enabled());
 
-init_pdata(prv, cpu);
+init_pdata(prv, pdata, cpu);
 
 return >_lock;
 }
@@ -446,6 +464,7 @@ static void null_unit_insert(const struct scheduler *ops,
 {
 struct null_private *prv = null_priv(ops);
 struct null_unit *nvc = null_unit(unit);
+struct null_pcpu *npc;
 unsigned int cpu;
 spinlock_t *lock;
 
@@ -462,6 +481,7 @@ static void null_unit_insert(const struct scheduler *ops,
  retry:
 sched_set_res(unit, pick_res(prv, unit));
 cpu = sched_unit_master(unit);
+npc = get_sched_res(cpu)->sched_priv;
 
 spin_unlock(lock);
 
@@ -471,7 +491,7 @@ static void null_unit_insert(const struct scheduler *ops,
 cpupool_domain_master_cpumask(unit->domain));
 
 /* If the pCPU is free, we assign unit

[Xen-devel] [PATCH 4/9] xen/sched: remove special cases for free cpus in schedulers

2019-12-17 Thread Juergen Gross

With the idle scheduler now taking care of all cpus not in any cpupool
the special cases in the other schedulers for no cpupool associated
can be removed.

Signed-off-by: Juergen Gross 
---
 xen/common/sched/sched_credit.c  |  7 ++-
 xen/common/sched/sched_credit2.c | 30 --
 2 files changed, 2 insertions(+), 35 deletions(-)

diff --git a/xen/common/sched/sched_credit.c b/xen/common/sched/sched_credit.c
index a098ca0f3a..8b1de9b033 100644
--- a/xen/common/sched/sched_credit.c
+++ b/xen/common/sched/sched_credit.c
@@ -1690,11 +1690,8 @@ csched_load_balance(struct csched_private *prv, int cpu,
 
 BUG_ON(get_sched_res(cpu) != snext->unit->res);
 
-/*
- * If this CPU is going offline, or is not (yet) part of any cpupool
- * (as it happens, e.g., during cpu bringup), we shouldn't steal work.
- */
-if ( unlikely(!cpumask_test_cpu(cpu, online) || c == NULL) )
+/* If this CPU is going offline, we shouldn't steal work.  */
+if ( unlikely(!cpumask_test_cpu(cpu, online)) )
 goto out;
 
 if ( snext->pri == CSCHED_PRI_IDLE )
diff --git a/xen/common/sched/sched_credit2.c b/xen/common/sched/sched_credit2.c
index 5bfe1441a2..f9e521a3a8 100644
--- a/xen/common/sched/sched_credit2.c
+++ b/xen/common/sched/sched_credit2.c
@@ -2744,40 +2744,10 @@ static void
 csched2_unit_migrate(
 const struct scheduler *ops, struct sched_unit *unit, unsigned int new_cpu)
 {
-struct domain *d = unit->domain;
 struct csched2_unit * const svc = csched2_unit(unit);
 struct csched2_runqueue_data *trqd;
 s_time_t now = NOW();
 
-/*
- * Being passed a target pCPU which is outside of our cpupool is only
- * valid if we are shutting down (or doing ACPI suspend), and we are
- * moving everyone to BSP, no matter whether or not BSP is inside our
- * cpupool.
- *
- * And since there indeed is the chance that it is not part of it, all
- * we must do is remove _and_ unassign the unit from any runqueue, as
- * well as updating v->processor with the target, so that the suspend
- * process can continue.
- *
- * It will then be during resume that a new, meaningful, value for
- * v->processor will be chosen, and during actual domain unpause that
- * the unit will be assigned to and added to the proper runqueue.
- */
-if ( unlikely(!cpumask_test_cpu(new_cpu, 
cpupool_domain_master_cpumask(d))) )
-{
-ASSERT(system_state == SYS_STATE_suspend);
-if ( unit_on_runq(svc) )
-{
-runq_remove(svc);
-update_load(ops, svc->rqd, NULL, -1, now);
-}
-_runq_deassign(svc);
-sched_set_res(unit, get_sched_res(new_cpu));
-return;
-}
-
-/* If here, new_cpu must be a valid Credit2 pCPU, and in our affinity. */
 ASSERT(cpumask_test_cpu(new_cpu, _priv(ops)->initialized));
 ASSERT(cpumask_test_cpu(new_cpu, unit->cpu_hard_affinity));
 
-- 
2.16.4


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH 9/9] xen/sched: add const qualifier where appropriate

2019-12-17 Thread Juergen Gross

Make use of the const qualifier more often in scheduling code.

Signed-off-by: Juergen Gross 
---
 xen/common/sched/cpupool.c|  2 +-
 xen/common/sched/sched_arinc653.c |  4 +--
 xen/common/sched/sched_credit.c   | 44 +
 xen/common/sched/sched_credit2.c  | 52 ---
 xen/common/sched/sched_null.c | 17 +++--
 xen/common/sched/sched_rt.c   | 32 
 xen/common/sched/schedule.c   | 25 ++-
 xen/include/xen/sched.h   |  9 ---
 8 files changed, 96 insertions(+), 89 deletions(-)

diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index 14212bb4ae..a6c04c46cb 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -882,7 +882,7 @@ int cpupool_get_id(const struct domain *d)
 return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
 }
 
-cpumask_t *cpupool_valid_cpus(struct cpupool *pool)
+const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool)
 {
 return pool->cpu_valid;
 }
diff --git a/xen/common/sched/sched_arinc653.c 
b/xen/common/sched/sched_arinc653.c
index dc45378952..0de4ba6b2c 100644
--- a/xen/common/sched/sched_arinc653.c
+++ b/xen/common/sched/sched_arinc653.c
@@ -608,7 +608,7 @@ static struct sched_resource *
 a653sched_pick_resource(const struct scheduler *ops,
 const struct sched_unit *unit)
 {
-cpumask_t *online;
+const cpumask_t *online;
 unsigned int cpu;
 
 /*
@@ -639,7 +639,7 @@ a653_switch_sched(struct scheduler *new_ops, unsigned int 
cpu,
   void *pdata, void *vdata)
 {
 struct sched_resource *sr = get_sched_res(cpu);
-arinc653_unit_t *svc = vdata;
+const arinc653_unit_t *svc = vdata;
 
 ASSERT(!pdata && svc && is_idle_unit(svc->unit));
 
diff --git a/xen/common/sched/sched_credit.c b/xen/common/sched/sched_credit.c
index 05930261d9..f2fc1cca5a 100644
--- a/xen/common/sched/sched_credit.c
+++ b/xen/common/sched/sched_credit.c
@@ -233,7 +233,7 @@ static void csched_tick(void *_cpu);
 static void csched_acct(void *dummy);
 
 static inline int
-__unit_on_runq(struct csched_unit *svc)
+__unit_on_runq(const struct csched_unit *svc)
 {
 return !list_empty(>runq_elem);
 }
@@ -349,11 +349,11 @@ boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
 
 DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
 
-static inline void __runq_tickle(struct csched_unit *new)
+static inline void __runq_tickle(const struct csched_unit *new)
 {
 unsigned int cpu = sched_unit_master(new->unit);
-struct sched_resource *sr = get_sched_res(cpu);
-struct sched_unit *unit = new->unit;
+const struct sched_resource *sr = get_sched_res(cpu);
+const struct sched_unit *unit = new->unit;
 struct csched_unit * const cur = CSCHED_UNIT(curr_on_cpu(cpu));
 struct csched_private *prv = CSCHED_PRIV(sr->scheduler);
 cpumask_t mask, idle_mask, *online;
@@ -509,7 +509,7 @@ static inline void __runq_tickle(struct csched_unit *new)
 static void
 csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
 {
-struct csched_private *prv = CSCHED_PRIV(ops);
+const struct csched_private *prv = CSCHED_PRIV(ops);
 
 /*
  * pcpu either points to a valid struct csched_pcpu, or is NULL, if we're
@@ -652,7 +652,7 @@ csched_switch_sched(struct scheduler *new_ops, unsigned int 
cpu,
 
 #ifndef NDEBUG
 static inline void
-__csched_unit_check(struct sched_unit *unit)
+__csched_unit_check(const struct sched_unit *unit)
 {
 struct csched_unit * const svc = CSCHED_UNIT(unit);
 struct csched_dom * const sdom = svc->sdom;
@@ -700,8 +700,8 @@ __csched_vcpu_is_cache_hot(const struct csched_private *prv,
 
 static inline int
 __csched_unit_is_migrateable(const struct csched_private *prv,
- struct sched_unit *unit,
- int dest_cpu, cpumask_t *mask)
+ const struct sched_unit *unit,
+ int dest_cpu, const cpumask_t *mask)
 {
 const struct csched_unit *svc = CSCHED_UNIT(unit);
 /*
@@ -725,7 +725,7 @@ _csched_cpu_pick(const struct scheduler *ops, const struct 
sched_unit *unit,
 /* We must always use cpu's scratch space */
 cpumask_t *cpus = cpumask_scratch_cpu(cpu);
 cpumask_t idlers;
-cpumask_t *online = cpupool_domain_master_cpumask(unit->domain);
+const cpumask_t *online = cpupool_domain_master_cpumask(unit->domain);
 struct csched_pcpu *spc = NULL;
 int balance_step;
 
@@ -932,7 +932,7 @@ csched_unit_acct(struct csched_private *prv, unsigned int 
cpu)
 {
 struct sched_unit *currunit = current->sched_unit;
 struct csched_unit * const svc = CSCHED_UNIT(currunit);
-struct sched_resource *sr = get_sched_res(cpu);
+const struct sched_resource *sr = get_sched_res(cpu);
 const struct scheduler *ops = sr->scheduler;
 
 ASSERT( sched_unit_master(currunit) == cpu );
@@

[Xen-devel] [PATCH 0/9] xen: scheduler cleanups

2019-12-17 Thread Juergen Gross

Move all scheduler related hypervisor code to xen/common/sched/ and
do a lot of cleanups.

Juergen Gross (9):
  xen/sched: move schedulers and cpupool coding to dedicated directory
  xen/sched: make sched-if.h really scheduler private
  xen/sched: cleanup sched.h
  xen/sched: remove special cases for free cpus in schedulers
  xen/sched: use scratch cpumask instead of allocating it on the stack
  xen/sched: replace null scheduler percpu-variable with pdata hook
  xen/sched: switch scheduling to bool where appropriate
  xen/sched: eliminate sched_tick_suspend() and sched_tick_resume()
  xen/sched: add const qualifier where appropriate

 MAINTAINERS|   8 +-
 xen/arch/arm/domain.c  |   6 +-
 xen/arch/x86/acpi/cpu_idle.c   |  15 +-
 xen/arch/x86/cpu/mwait-idle.c  |   8 +-
 xen/arch/x86/dom0_build.c  |   5 +-
 xen/common/Kconfig |  66 +-
 xen/common/Makefile|   8 +-
 xen/common/domain.c|  70 --
 xen/common/domctl.c| 135 +--
 xen/common/rcupdate.c  |   7 +-
 xen/common/sched/Kconfig   |  65 ++
 xen/common/sched/Makefile  |   7 +
 .../{compat/schedule.c => sched/compat_schedule.c} |   2 +-
 xen/common/{ => sched}/cpupool.c   |  23 +-
 xen/{include/xen => common/sched}/sched-if.h   |  18 +-
 xen/common/{ => sched}/sched_arinc653.c|  15 +-
 xen/common/{ => sched}/sched_credit.c  |  65 +++---
 xen/common/{ => sched}/sched_credit2.c |  85 +++
 xen/common/{ => sched}/sched_null.c| 105 ++---
 xen/common/{ => sched}/sched_rt.c  | 105 +
 xen/common/{ => sched}/schedule.c  | 246 ++---
 xen/include/xen/domain.h   |   3 +
 xen/include/xen/rcupdate.h |   3 -
 xen/include/xen/sched.h|  39 ++--
 24 files changed, 566 insertions(+), 543 deletions(-)
 create mode 100644 xen/common/sched/Kconfig
 create mode 100644 xen/common/sched/Makefile
 rename xen/common/{compat/schedule.c => sched/compat_schedule.c} (97%)
 rename xen/common/{ => sched}/cpupool.c (97%)
 rename xen/{include/xen => common/sched}/sched-if.h (96%)
 rename xen/common/{ => sched}/sched_arinc653.c (99%)
 rename xen/common/{ => sched}/sched_credit.c (97%)
 rename xen/common/{ => sched}/sched_credit2.c (98%)
 rename xen/common/{ => sched}/sched_null.c (92%)
 rename xen/common/{ => sched}/sched_rt.c (94%)
 rename xen/common/{ => sched}/schedule.c (92%)

-- 
2.16.4


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH 8/9] xen/sched: eliminate sched_tick_suspend() and sched_tick_resume()

2019-12-17 Thread Juergen Gross

sched_tick_suspend() and sched_tick_resume() only call rcu related
functions, so eliminate them and do the rcu_idle_timer*() calling in
rcu_idle_[enter|exit]().

Signed-off-by: Juergen Gross 
---
 xen/arch/arm/domain.c |  6 +++---
 xen/arch/x86/acpi/cpu_idle.c  | 15 ---
 xen/arch/x86/cpu/mwait-idle.c |  8 
 xen/common/rcupdate.c |  7 +--
 xen/common/sched/schedule.c   | 12 
 xen/include/xen/rcupdate.h|  3 ---
 xen/include/xen/sched.h   |  2 --
 7 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index c0a13aa0ab..aa3df3b3ba 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -46,8 +46,8 @@ static void do_idle(void)
 {
 unsigned int cpu = smp_processor_id();
 
-sched_tick_suspend();
-/* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
+rcu_idle_enter(cpu);
+/* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */
 process_pending_softirqs();
 
 local_irq_disable();
@@ -58,7 +58,7 @@ static void do_idle(void)
 }
 local_irq_enable();
 
-sched_tick_resume();
+rcu_idle_exit(cpu);
 }
 
 void idle_loop(void)
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index 5edd1844f4..2676f0d7da 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -599,7 +599,8 @@ void update_idle_stats(struct acpi_processor_power *power,
 
 static void acpi_processor_idle(void)
 {
-struct acpi_processor_power *power = processor_powers[smp_processor_id()];
+unsigned int cpu = smp_processor_id();
+struct acpi_processor_power *power = processor_powers[cpu];
 struct acpi_processor_cx *cx = NULL;
 int next_state;
 uint64_t t1, t2 = 0;
@@ -648,8 +649,8 @@ static void acpi_processor_idle(void)
 
 cpufreq_dbs_timer_suspend();
 
-sched_tick_suspend();
-/* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
+rcu_idle_enter(cpu);
+/* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */
 process_pending_softirqs();
 
 /*
@@ -658,10 +659,10 @@ static void acpi_processor_idle(void)
  */
 local_irq_disable();
 
-if ( !cpu_is_haltable(smp_processor_id()) )
+if ( !cpu_is_haltable(cpu) )
 {
 local_irq_enable();
-sched_tick_resume();
+rcu_idle_exit(cpu);
 cpufreq_dbs_timer_resume();
 return;
 }
@@ -786,7 +787,7 @@ static void acpi_processor_idle(void)
 /* Now in C0 */
 power->last_state = >states[0];
 local_irq_enable();
-sched_tick_resume();
+rcu_idle_exit(cpu);
 cpufreq_dbs_timer_resume();
 return;
 }
@@ -794,7 +795,7 @@ static void acpi_processor_idle(void)
 /* Now in C0 */
 power->last_state = >states[0];
 
-sched_tick_resume();
+rcu_idle_exit(cpu);
 cpufreq_dbs_timer_resume();
 
 if ( cpuidle_current_governor->reflect )
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 52413e6da1..f49b04c45b 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -755,8 +755,8 @@ static void mwait_idle(void)
 
cpufreq_dbs_timer_suspend();
 
-   sched_tick_suspend();
-   /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
+   rcu_idle_enter(cpu);
+   /* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */
process_pending_softirqs();
 
/* Interrupts must be disabled for C2 and higher transitions. */
@@ -764,7 +764,7 @@ static void mwait_idle(void)
 
if (!cpu_is_haltable(cpu)) {
local_irq_enable();
-   sched_tick_resume();
+   rcu_idle_exit(cpu);
cpufreq_dbs_timer_resume();
return;
}
@@ -806,7 +806,7 @@ static void mwait_idle(void)
if (!(lapic_timer_reliable_states & (1 << cstate)))
lapic_timer_on();
 
-   sched_tick_resume();
+   rcu_idle_exit(cpu);
cpufreq_dbs_timer_resume();
 
if ( cpuidle_current_governor->reflect )
diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
index a56103c6f7..cb712c8690 100644
--- a/xen/common/rcupdate.c
+++ b/xen/common/rcupdate.c
@@ -459,7 +459,7 @@ int rcu_needs_cpu(int cpu)
  * periodically poke rcu_pedning(), so that it will invoke the callback
  * not too late after the end of the grace period.
  */
-void rcu_idle_timer_start()
+static void rcu_idle_timer_start(void)
 {
 struct rcu_data *rdp = _cpu(rcu_data);
 
@@ -475,7 +475,7 @@ void rcu_idle_timer_start()
 rdp->idle_timer_active = true;
 }
 
-void rcu_idle_timer_stop()
+static void rcu_idle_timer_stop(void)
 {
 struct rcu_data *rdp = _cpu(rcu_data);
 
@@ -633,10 +633,13 @@ void rcu_idle_enter(unsigned int cpu)
  * Se the comment before cpumask_andnot() in  rcu_start_batch().
  */
 smp_mb();
+
+rcu_idle_timer_start();
 }
 
 void

[Xen-devel] [PATCH 7/9] xen/sched: switch scheduling to bool where appropriate

2019-12-17 Thread Juergen Gross

Scheduling code has several places using int or bool_t instead of bool.
Switch those.

Signed-off-by: Juergen Gross 
---
 xen/common/sched/cpupool.c| 10 +-
 xen/common/sched/sched-if.h   |  2 +-
 xen/common/sched/sched_arinc653.c |  8 
 xen/common/sched/sched_credit.c   | 12 ++--
 xen/common/sched/sched_rt.c   | 14 +++---
 xen/common/sched/schedule.c   | 14 +++---
 xen/include/xen/sched.h   |  6 +++---
 7 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index d5b64d0a6a..14212bb4ae 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -154,7 +154,7 @@ static struct cpupool *alloc_cpupool_struct(void)
  * the searched id is returned
  * returns NULL if not found.
  */
-static struct cpupool *__cpupool_find_by_id(int id, int exact)
+static struct cpupool *__cpupool_find_by_id(int id, bool exact)
 {
 struct cpupool **q;
 
@@ -169,10 +169,10 @@ static struct cpupool *__cpupool_find_by_id(int id, int 
exact)
 
 static struct cpupool *cpupool_find_by_id(int poolid)
 {
-return __cpupool_find_by_id(poolid, 1);
+return __cpupool_find_by_id(poolid, true);
 }
 
-static struct cpupool *__cpupool_get_by_id(int poolid, int exact)
+static struct cpupool *__cpupool_get_by_id(int poolid, bool exact)
 {
 struct cpupool *c;
 spin_lock(_lock);
@@ -185,12 +185,12 @@ static struct cpupool *__cpupool_get_by_id(int poolid, 
int exact)
 
 struct cpupool *cpupool_get_by_id(int poolid)
 {
-return __cpupool_get_by_id(poolid, 1);
+return __cpupool_get_by_id(poolid, true);
 }
 
 static struct cpupool *cpupool_get_next_by_id(int poolid)
 {
-return __cpupool_get_by_id(poolid, 0);
+return __cpupool_get_by_id(poolid, false);
 }
 
 void cpupool_put(struct cpupool *pool)
diff --git a/xen/common/sched/sched-if.h b/xen/common/sched/sched-if.h
index edce354dc7..9d0db75cbb 100644
--- a/xen/common/sched/sched-if.h
+++ b/xen/common/sched/sched-if.h
@@ -589,7 +589,7 @@ unsigned int cpupool_get_granularity(const struct cpupool 
*c);
  * * The hard affinity is not a subset of soft affinity
  * * There is an overlap between the soft and hard affinity masks
  */
-static inline int has_soft_affinity(const struct sched_unit *unit)
+static inline bool has_soft_affinity(const struct sched_unit *unit)
 {
 return unit->soft_aff_effective &&
!cpumask_subset(cpupool_domain_master_cpumask(unit->domain),
diff --git a/xen/common/sched/sched_arinc653.c 
b/xen/common/sched/sched_arinc653.c
index fe15754900..dc45378952 100644
--- a/xen/common/sched/sched_arinc653.c
+++ b/xen/common/sched/sched_arinc653.c
@@ -75,7 +75,7 @@ typedef struct arinc653_unit_s
  * arinc653_unit_t pointer. */
 struct sched_unit * unit;
 /* awake holds whether the UNIT has been woken with vcpu_wake() */
-bool_t  awake;
+boolawake;
 /* list holds the linked list information for the list this UNIT
  * is stored in */
 struct list_headlist;
@@ -427,7 +427,7 @@ a653sched_alloc_udata(const struct scheduler *ops, struct 
sched_unit *unit,
  * will mark the UNIT awake.
  */
 svc->unit = unit;
-svc->awake = 0;
+svc->awake = false;
 if ( !is_idle_unit(unit) )
 list_add(>list, _PRIV(ops)->unit_list);
 update_schedule_units(ops);
@@ -473,7 +473,7 @@ static void
 a653sched_unit_sleep(const struct scheduler *ops, struct sched_unit *unit)
 {
 if ( AUNIT(unit) != NULL )
-AUNIT(unit)->awake = 0;
+AUNIT(unit)->awake = false;
 
 /*
  * If the UNIT being put to sleep is the same one that is currently
@@ -493,7 +493,7 @@ static void
 a653sched_unit_wake(const struct scheduler *ops, struct sched_unit *unit)
 {
 if ( AUNIT(unit) != NULL )
-AUNIT(unit)->awake = 1;
+AUNIT(unit)->awake = true;
 
 cpu_raise_softirq(sched_unit_master(unit), SCHEDULE_SOFTIRQ);
 }
diff --git a/xen/common/sched/sched_credit.c b/xen/common/sched/sched_credit.c
index 8b1de9b033..05930261d9 100644
--- a/xen/common/sched/sched_credit.c
+++ b/xen/common/sched/sched_credit.c
@@ -245,7 +245,7 @@ __runq_elem(struct list_head *elem)
 }
 
 /* Is the first element of cpu's runq (if any) cpu's idle unit? */
-static inline bool_t is_runq_idle(unsigned int cpu)
+static inline bool is_runq_idle(unsigned int cpu)
 {
 /*
  * We're peeking at cpu's runq, we must hold the proper lock.
@@ -344,7 +344,7 @@ static void burn_credits(struct csched_unit *svc, s_time_t 
now)
 svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC;
 }
 
-static bool_t __read_mostly opt_tickle_one_idle = 1;
+static bool __read_mostly opt_tickle_one_idle = true;
 boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
 
 DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
@@ -719,7 +719,7 @@ __csched_unit_is_migrateable(const struct csched_private 
*prv,
 
 static int
 _csched_cpu_pick(const struct

[Xen-devel] [PATCH 5/9] xen/sched: use scratch cpumask instead of allocating it on the stack

2019-12-17 Thread Juergen Gross

In sched_rt there are three instances of cpumasks allocated on the
stack. Replace them by using cpumask_scratch.

Signed-off-by: Juergen Gross 
---
 xen/common/sched/sched_rt.c | 56 ++---
 1 file changed, 37 insertions(+), 19 deletions(-)

diff --git a/xen/common/sched/sched_rt.c b/xen/common/sched/sched_rt.c
index 379b56bc2a..264a753116 100644
--- a/xen/common/sched/sched_rt.c
+++ b/xen/common/sched/sched_rt.c
@@ -637,23 +637,38 @@ replq_reinsert(const struct scheduler *ops, struct 
rt_unit *svc)
  * and available resources
  */
 static struct sched_resource *
-rt_res_pick(const struct scheduler *ops, const struct sched_unit *unit)
+rt_res_pick_locked(const struct sched_unit *unit, unsigned int locked_cpu)
 {
-cpumask_t cpus;
+cpumask_t *cpus = cpumask_scratch_cpu(locked_cpu);
 cpumask_t *online;
 int cpu;
 
 online = cpupool_domain_master_cpumask(unit->domain);
-cpumask_and(, online, unit->cpu_hard_affinity);
+cpumask_and(cpus, online, unit->cpu_hard_affinity);
 
-cpu = cpumask_test_cpu(sched_unit_master(unit), )
+cpu = cpumask_test_cpu(sched_unit_master(unit), cpus)
 ? sched_unit_master(unit)
-: cpumask_cycle(sched_unit_master(unit), );
-ASSERT( !cpumask_empty() && cpumask_test_cpu(cpu, ) );
+: cpumask_cycle(sched_unit_master(unit), cpus);
+ASSERT( !cpumask_empty(cpus) && cpumask_test_cpu(cpu, cpus) );
 
 return get_sched_res(cpu);
 }
 
+/*
+ * Pick a valid resource for the unit vc
+ * Valid resource of an unit is intesection of unit's affinity
+ * and available resources
+ */
+static struct sched_resource *
+rt_res_pick(const struct scheduler *ops, const struct sched_unit *unit)
+{
+struct sched_resource *res;
+
+res = rt_res_pick_locked(unit, unit->res->master_cpu);
+
+return res;
+}
+
 /*
  * Init/Free related code
  */
@@ -886,11 +901,14 @@ rt_unit_insert(const struct scheduler *ops, struct 
sched_unit *unit)
 struct rt_unit *svc = rt_unit(unit);
 s_time_t now;
 spinlock_t *lock;
+unsigned int cpu = smp_processor_id();
 
 BUG_ON( is_idle_unit(unit) );
 
 /* This is safe because unit isn't yet being scheduled */
-sched_set_res(unit, rt_res_pick(ops, unit));
+lock = pcpu_schedule_lock_irq(cpu);
+sched_set_res(unit, rt_res_pick_locked(unit, cpu));
+pcpu_schedule_unlock_irq(lock, cpu);
 
 lock = unit_schedule_lock_irq(unit);
 
@@ -1003,13 +1021,13 @@ burn_budget(const struct scheduler *ops, struct rt_unit 
*svc, s_time_t now)
  * lock is grabbed before calling this function
  */
 static struct rt_unit *
-runq_pick(const struct scheduler *ops, const cpumask_t *mask)
+runq_pick(const struct scheduler *ops, const cpumask_t *mask, unsigned int cpu)
 {
 struct list_head *runq = rt_runq(ops);
 struct list_head *iter;
 struct rt_unit *svc = NULL;
 struct rt_unit *iter_svc = NULL;
-cpumask_t cpu_common;
+cpumask_t *cpu_common = cpumask_scratch_cpu(cpu);
 cpumask_t *online;
 
 list_for_each ( iter, runq )
@@ -1018,9 +1036,9 @@ runq_pick(const struct scheduler *ops, const cpumask_t 
*mask)
 
 /* mask cpu_hard_affinity & cpupool & mask */
 online = cpupool_domain_master_cpumask(iter_svc->unit->domain);
-cpumask_and(_common, online, iter_svc->unit->cpu_hard_affinity);
-cpumask_and(_common, mask, _common);
-if ( cpumask_empty(_common) )
+cpumask_and(cpu_common, online, iter_svc->unit->cpu_hard_affinity);
+cpumask_and(cpu_common, mask, cpu_common);
+if ( cpumask_empty(cpu_common) )
 continue;
 
 ASSERT( iter_svc->cur_budget > 0 );
@@ -1092,7 +1110,7 @@ rt_schedule(const struct scheduler *ops, struct 
sched_unit *currunit,
 }
 else
 {
-snext = runq_pick(ops, cpumask_of(sched_cpu));
+snext = runq_pick(ops, cpumask_of(sched_cpu), cur_cpu);
 
 if ( snext == NULL )
 snext = rt_unit(sched_idle_unit(sched_cpu));
@@ -1186,22 +1204,22 @@ runq_tickle(const struct scheduler *ops, struct rt_unit 
*new)
 struct rt_unit *iter_svc;
 struct sched_unit *iter_unit;
 int cpu = 0, cpu_to_tickle = 0;
-cpumask_t not_tickled;
+cpumask_t *not_tickled = cpumask_scratch_cpu(smp_processor_id());
 cpumask_t *online;
 
 if ( new == NULL || is_idle_unit(new->unit) )
 return;
 
 online = cpupool_domain_master_cpumask(new->unit->domain);
-cpumask_and(_tickled, online, new->unit->cpu_hard_affinity);
-cpumask_andnot(_tickled, _tickled, >tickled);
+cpumask_and(not_tickled, online, new->unit->cpu_hard_affinity);
+cpumask_andnot(not_tickled, not_tickled, >tickled);
 
 /*
  * 1) If there are any idle CPUs, kick one.
  *For cache benefit,we first search new->cpu.
  *The same loop also find the one with lowest priority.
  */
-cpu = cpumask_test_or_cycle(sched_unit_master(new->unit), _tickled);
+cpu =

[Xen-devel] [PATCH 3/9] xen/sched: cleanup sched.h

2019-12-17 Thread Juergen Gross

There are some items in include/xen/sched.h which can be moved to
sched-if.h as they are scheduler private.

Signed-off-by: Juergen Gross 
---
 xen/common/sched/sched-if.h | 13 +
 xen/common/sched/schedule.c |  2 +-
 xen/include/xen/sched.h | 17 -
 3 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/xen/common/sched/sched-if.h b/xen/common/sched/sched-if.h
index a702fd23b1..edce354dc7 100644
--- a/xen/common/sched/sched-if.h
+++ b/xen/common/sched/sched-if.h
@@ -533,6 +533,7 @@ static inline void sched_unit_unpause(const struct 
sched_unit *unit)
 struct cpupool
 {
 int  cpupool_id;
+#define CPUPOOLID_NONE-1
 unsigned int n_dom;
 cpumask_var_tcpu_valid;  /* all cpus assigned to pool */
 cpumask_var_tres_valid;  /* all scheduling resources of pool */
@@ -618,5 +619,17 @@ affinity_balance_cpumask(const struct sched_unit *unit, 
int step,
 
 void sched_rm_cpu(unsigned int cpu);
 const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+void schedule_dump(struct cpupool *c);
+struct scheduler *scheduler_get_default(void);
+struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr);
+void scheduler_free(struct scheduler *sched);
+int cpu_disable_scheduler(unsigned int cpu);
+int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
+int schedule_cpu_rm(unsigned int cpu);
+int sched_move_domain(struct domain *d, struct cpupool *c);
+struct cpupool *cpupool_get_by_id(int poolid);
+void cpupool_put(struct cpupool *pool);
+int cpupool_add_domain(struct domain *d, int poolid);
+void cpupool_rm_domain(struct domain *d);
 
 #endif /* __XEN_SCHED_IF_H__ */
diff --git a/xen/common/sched/schedule.c b/xen/common/sched/schedule.c
index c751faa741..db8ce146ca 100644
--- a/xen/common/sched/schedule.c
+++ b/xen/common/sched/schedule.c
@@ -1346,7 +1346,7 @@ int vcpu_set_hard_affinity(struct vcpu *v, const 
cpumask_t *affinity)
 return vcpu_set_affinity(v, affinity, v->sched_unit->cpu_hard_affinity);
 }
 
-int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity)
+static int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity)
 {
 return vcpu_set_affinity(v, affinity, v->sched_unit->cpu_soft_affinity);
 }
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 2507a833c2..55335d6ab3 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -685,7 +685,6 @@ int  sched_init_vcpu(struct vcpu *v);
 void sched_destroy_vcpu(struct vcpu *v);
 int  sched_init_domain(struct domain *d, int poolid);
 void sched_destroy_domain(struct domain *d);
-int sched_move_domain(struct domain *d, struct cpupool *c);
 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
 long sched_adjust_global(struct xen_sysctl_scheduler_op *);
 int  sched_id(void);
@@ -918,19 +917,10 @@ static inline bool sched_has_urgent_vcpu(void)
 return atomic_read(_cpu(sched_urgent_count));
 }
 
-struct scheduler;
-
-struct scheduler *scheduler_get_default(void);
-struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr);
-void scheduler_free(struct scheduler *sched);
-int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
-int schedule_cpu_rm(unsigned int cpu);
 void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value);
-int cpu_disable_scheduler(unsigned int cpu);
 void sched_setup_dom0_vcpus(struct domain *d);
 int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
-int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
 void restore_vcpu_affinity(struct domain *d);
 int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
  struct xen_domctl_vcpuaffinity *vcpuaff);
@@ -1051,17 +1041,10 @@ extern enum cpufreq_controller {
 FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
 } cpufreq_controller;
 
-#define CPUPOOLID_NONE-1
-
-struct cpupool *cpupool_get_by_id(int poolid);
-void cpupool_put(struct cpupool *pool);
-int cpupool_add_domain(struct domain *d, int poolid);
-void cpupool_rm_domain(struct domain *d);
 int cpupool_move_domain(struct domain *d, struct cpupool *c);
 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
 int cpupool_get_id(const struct domain *d);
 cpumask_t *cpupool_valid_cpus(struct cpupool *pool);
-void schedule_dump(struct cpupool *c);
 extern void dump_runq(unsigned char key);
 
 void arch_do_physinfo(struct xen_sysctl_physinfo *pi);
-- 
2.16.4


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH 2/9] xen/sched: make sched-if.h really scheduler private

2019-12-17 Thread Juergen Gross

include/xen/sched-if.h should be private to scheduler code, so move it
to common/sched/sched-if.h and move the remaining use cases to
cpupool.c and schedule.c.

Signed-off-by: Juergen Gross 
---
 xen/arch/x86/dom0_build.c|   5 +-
 xen/common/domain.c  |  70 --
 xen/common/domctl.c  | 135 +--
 xen/common/sched/cpupool.c   |  13 +-
 xen/{include/xen => common/sched}/sched-if.h |   3 -
 xen/common/sched/sched_arinc653.c|   3 +-
 xen/common/sched/sched_credit.c  |   2 +-
 xen/common/sched/sched_credit2.c |   3 +-
 xen/common/sched/sched_null.c|   3 +-
 xen/common/sched/sched_rt.c  |   3 +-
 xen/common/sched/schedule.c  | 191 ++-
 xen/include/xen/domain.h |   3 +
 xen/include/xen/sched.h  |   7 +
 13 files changed, 228 insertions(+), 213 deletions(-)
 rename xen/{include/xen => common/sched}/sched-if.h (99%)

diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index 28b964e018..56c2dee0fc 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -9,7 +9,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include 
@@ -227,9 +226,9 @@ unsigned int __init dom0_max_vcpus(void)
 dom0_nodes = node_online_map;
 for_each_node_mask ( node, dom0_nodes )
 cpumask_or(_cpus, _cpus, _to_cpumask(node));
-cpumask_and(_cpus, _cpus, cpupool0->cpu_valid);
+cpumask_and(_cpus, _cpus, cpupool_valid_cpus(cpupool0));
 if ( cpumask_empty(_cpus) )
-cpumask_copy(_cpus, cpupool0->cpu_valid);
+cpumask_copy(_cpus, cpupool_valid_cpus(cpupool0));
 
 max_vcpus = cpumask_weight(_cpus);
 if ( opt_dom0_max_vcpus_min > max_vcpus )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 66c7fc..f4f0a66262 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -10,7 +10,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -565,75 +564,6 @@ void __init setup_system_domains(void)
 #endif
 }
 
-void domain_update_node_affinity(struct domain *d)
-{
-cpumask_var_t dom_cpumask, dom_cpumask_soft;
-cpumask_t *dom_affinity;
-const cpumask_t *online;
-struct sched_unit *unit;
-unsigned int cpu;
-
-/* Do we have vcpus already? If not, no need to update node-affinity. */
-if ( !d->vcpu || !d->vcpu[0] )
-return;
-
-if ( !zalloc_cpumask_var(_cpumask) )
-return;
-if ( !zalloc_cpumask_var(_cpumask_soft) )
-{
-free_cpumask_var(dom_cpumask);
-return;
-}
-
-online = cpupool_domain_master_cpumask(d);
-
-spin_lock(>node_affinity_lock);
-
-/*
- * If d->auto_node_affinity is true, let's compute the domain's
- * node-affinity and update d->node_affinity accordingly. if false,
- * just leave d->auto_node_affinity alone.
- */
-if ( d->auto_node_affinity )
-{
-/*
- * We want the narrowest possible set of pcpus (to get the narowest
- * possible set of nodes). What we need is the cpumask of where the
- * domain can run (the union of the hard affinity of all its vcpus),
- * and the full mask of where it would prefer to run (the union of
- * the soft affinity of all its various vcpus). Let's build them.
- */
-for_each_sched_unit ( d, unit )
-{
-cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
-cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
-   unit->cpu_soft_affinity);
-}
-/* Filter out non-online cpus */
-cpumask_and(dom_cpumask, dom_cpumask, online);
-ASSERT(!cpumask_empty(dom_cpumask));
-/* And compute the intersection between hard, online and soft */
-cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
-
-/*
- * If not empty, the intersection of hard, soft and online is the
- * narrowest set we want. If empty, we fall back to hard
- */
-dom_affinity = cpumask_empty(dom_cpumask_soft) ?
-   dom_cpumask : dom_cpumask_soft;
-
-nodes_clear(d->node_affinity);
-for_each_cpu ( cpu, dom_affinity )
-node_set(cpu_to_node(cpu), d->node_affinity);
-}
-
-spin_unlock(>node_affinity_lock);
-
-free_cpumask_var(dom_cpumask_soft);
-free_cpumask_var(dom_cpumask);
-}
-
-
 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
 {
 /* Being disjoint with the system is just wrong. */
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 03d0226039..3407db44fd 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -11,7 +11,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -65,9 +64,9 @@ static int bitmap_to_xenctl_bitmap(struct

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Roman Shaposhnik

On Tue, Dec 17, 2019 at 6:56 PM Roman Shaposhnik  wrote:
>
> On Tue, Dec 17, 2019 at 5:51 PM Stefano Stabellini
>  wrote:
> >
> > On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> > > On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini
> > >  wrote:
> > > >
> > > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> > > > > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini
> > > > >  wrote:
> > > > > >
> > > > > > On Tue, 17 Dec 2019, Julien Grall wrote:
> > > > > > > Hi,
> > > > > > >
> > > > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > > > > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> > > > > > > >  wrote:
> > > > > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> > > > > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which 
> > > > > > > > > is 990M.
> > > > > > > > > If so, I wonder how you could boot succesfully with 
> > > > > > > > > dom0_mem=1024M even
> > > > > > > > > on Xen 4.12... :-?
> > > > > > > >
> > > > > > > > That is a very interesting observation indeed! I actually don't
> > > > > > > > remember where that device tree came from, but I think it was 
> > > > > > > > from one
> > > > > > > > of the Linaro sites.
> > > > > > >
> > > > > > > This is mostly likely because of:
> > > > > > >
> > > > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> > > > > > > Author: Julien Grall 
> > > > > > > Date:   Wed Aug 21 22:42:31 2019 +0100
> > > > > > >
> > > > > > > xen/arm: domain_build: Don't continue if unable to allocate 
> > > > > > > all dom0 banks
> > > > > > >
> > > > > > > Xen will only print a warning if there are memory unallocated 
> > > > > > > when using
> > > > > > > 1:1 mapping (only used by dom0). This also includes the case 
> > > > > > > where no
> > > > > > > memory has been allocated.
> > > > > > >
> > > > > > > It will bring to all sort of issues that can be hard to 
> > > > > > > diagnostic for
> > > > > > > users (the warning can be difficult to spot or disregard).
> > > > > > >
> > > > > > > If the users request 1GB of memory, then most likely they 
> > > > > > > want the exact
> > > > > > > amount and not 512MB. So panic if all the memory has not been 
> > > > > > > allocated.
> > > > > > >
> > > > > > > After this change, the behavior is the same as for non-1:1 
> > > > > > > memory
> > > > > > > allocation (used by domU).
> > > > > > >
> > > > > > > At the same time, reflow the message to have the format on a 
> > > > > > > single
> > > > > > > line.
> > > > > > >
> > > > > > > Signed-off-by: Julien Grall 
> > > > > > > Acked-by: Stefano Stabellini 
> > > > > >
> > > > > > Ah! Roman, could you please post the full boot log of a successful 
> > > > > > 4.12
> > > > > > boot?
> > > > > >
> > > > > > If it has a "Failed to allocate requested dom0 memory" message, 
> > > > > > then we
> > > > > > know what the issue is.
> > > > >
> > > > > Aha! Our messages seems to have crossed ;-) Full log is attached and
> > > > > yes -- that's
> > > > > the problem indeed.
> > > > >
> > > > > So at least that mystery is solved. But I'm still not able to get to a
> > > > > full 1G of memory
> > > > > even with your update to the device tree file. Any chance you can 
> > > > > send me the
> > > > > device tree file that works for you?
> > > >
> > > > I didn't try on real hardware, I only tried on QEMU with a similar
> > > > configuration. I went back and check the HiKey device tree I used and it
> > > > is the same as yours (including the ramoops reserved-memory error).
> > > >
> > > > Apparently there are 1G and 2G variants of the HiKey, obviously both
> > > > yours and my device tree are for the 1G variant. I try to dig through
> > > > the docs but couldn't find the details of the 2G variant. I cannot find
> > > > anywhere the memory range for the top 1G of memory not even on the
> > > > LeMaker docs! :-/
> > >
> > > Yup. That's exactly the issue on my end as well - can't seem to find an
> > > authoritative source for that devicetree.
> > >
> > > I did find this, though:
> > >  https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/
> > > which looks like it has the latest (at least file timestamp-wise) 
> > > devicetree.
> > >
> > > If you look at the memory and reserved memory nodes there, they
> > > are actually much simpler than what we've got:
> > >
> > > memory {
> > > device_type = "memory";
> > > reg = <0x0 0x0 0x0 0x4000>;
> > > };
> >
> > Which is still 1G, but it is surprisingly simpler.
> >
> >
> > > reserved-memory {
> > > #address-cells = <0x2>;
> > > #size-cells = <0x2>;
> > > ranges;
> > >
> > > mcu-buf@05e0 {
> > > no-map;
> > > reg = <0x0 0x5e0 0x0 0x10 0x0
> > > 0x740f000 0x0 0x1000>;
> > > };
> > >
> > > mbox-buf@06dff000 {
> > >

Re: [Xen-devel] [PATCH net-next 3/3] xen-netback: remove 'hotplug-status' once it has served its purpose

2019-12-17 Thread David Miller

From: Paul Durrant 
Date: Tue, 17 Dec 2019 13:32:18 +

> Removing the 'hotplug-status' node in netback_remove() is wrong; the script
> may not have completed. Only remove the node once the watch has fired and
> has been unregistered.
> 
> Signed-off-by: Paul Durrant 

Applied.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH net-next 2/3] xen-netback: switch state to InitWait at the end of netback_probe()...

2019-12-17 Thread David Miller

From: Paul Durrant 
Date: Tue, 17 Dec 2019 13:32:17 +

> ...as the comment above the function states.
> 
> The switch to Initialising at the start of the function is somewhat bogus
> as the toolstack will have set that initial state anyway. To behave
> correctly, a backend should switch to InitWait once it has set up all
> xenstore values that may be required by a initialising frontend. This
> patch calls backend_switch_state() to make the transition at the
> appropriate point.
> 
> NOTE: backend_switch_state() ignores errors from xenbus_switch_state()
>   and so this patch removes an error path from netback_probe(). This
>   means a failure to change state at this stage (in the absence of
>   other failures) will leave the device instantiated. This is highly
>   unlikley to happen as a failure to change state would indicate a
>   failure to write to xenstore, and that will trigger other error
>   paths. Also, a 'stuck' device can still be cleaned up using 'unbind'
>   in any case.
> 
> Signed-off-by: Paul Durrant 

Applied.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH net-next 1/3] xen-netback: move netback_probe() and netback_remove() to the end...

2019-12-17 Thread David Miller

From: Paul Durrant 
Date: Tue, 17 Dec 2019 13:32:16 +

> ...of xenbus.c
> 
> This is a cosmetic function re-ordering to reduce churn in a subsequent
> patch. Some style fix-up was done to make checkpatch.pl happier.
> 
> No functional change.
> 
> Signed-off-by: Paul Durrant 

Applied.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [xen-unstable test] 144905: tolerable FAIL - PUSHED

2019-12-17 Thread osstest service owner

flight 144905 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144905/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-xtf-amd64-amd64-1   72 xtf/test-hvm64-xsa-308  fail blocked in 144850
 test-xtf-amd64-amd64-3   72 xtf/test-hvm64-xsa-308  fail blocked in 144850
 test-amd64-amd64-xl-rtds 18 guest-localmigrate/x10   fail  like 144813
 test-amd64-amd64-xl-qemut-win7-amd64 17 guest-stopfail like 144827
 test-amd64-amd64-xl-qemuu-win7-amd64 17 guest-stopfail like 144850
 test-armhf-armhf-libvirt 14 saverestore-support-checkfail  like 144850
 test-amd64-i386-xl-qemuu-win7-amd64 17 guest-stop fail like 144850
 test-amd64-i386-xl-qemut-win7-amd64 17 guest-stop fail like 144850
 test-armhf-armhf-libvirt-raw 13 saverestore-support-checkfail  like 144850
 test-amd64-amd64-xl-qemuu-ws16-amd64 17 guest-stopfail like 144850
 test-amd64-amd64-xl-qemut-ws16-amd64 17 guest-stopfail like 144850
 test-amd64-i386-xl-qemuu-ws16-amd64 17 guest-stop fail like 144850
 test-amd64-i386-xl-pvshim12 guest-start  fail   never pass
 test-amd64-i386-libvirt  13 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 13 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 13 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  13 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check 
fail never pass
 test-arm64-arm64-xl-credit2  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 13 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 14 saverestore-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 17 debian-hvm-install/l1/l2  fail never pass
 test-armhf-armhf-xl-arndale  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  14 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 13 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 14 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-rtds 13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 13 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 14 saverestore-support-checkfail never pass
 test-armhf-armhf-libvirt 13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  14 saverestore-support-checkfail   never pass
 test-amd64-i386-xl-qemut-ws16-amd64 17 guest-stop  fail never pass

version targeted for testing:
 xen  f50a4f6e244cfc8e773300c03aaf4db391f3028a
baseline version:
 xen  c9115affa6f83aebe29ae9cbf503aa163911a5bb

Last test of basis   144850  2019-12-16 01:51:10 Z2 days
Failing since144878  2019-12-16 19:06:11 Z1 days3 attempts
Testing same since   144905  2019-12-17 18:36:21 Z0 days1 attempts


People who touched

[Xen-devel] [ovmf test] 144918: all pass - PUSHED

2019-12-17 Thread osstest service owner

flight 144918 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144918/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf 69ebe8280672589d8f5826f74c0fa92c103c8042
baseline version:
 ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0

Last test of basis   144637  2019-12-09 09:09:49 Z8 days
Failing since144646  2019-12-10 01:39:53 Z8 days   73 attempts
Testing same since   144918  2019-12-18 02:39:46 Z0 days1 attempts


People who touched revisions under test:
  Antoine Coeur 
  Ard Biesheuvel 
  Bob Feng 
  Fan, Zhiju 
  Jiewen Yao 
  Michael Kubacki 
  Pete Batard 
  Philippe Mathieu-Daude 
  Steven Shi 
  Zhiju.Fan 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/osstest/ovmf.git
   804666c86e..69ebe82806  69ebe8280672589d8f5826f74c0fa92c103c8042 -> 
xen-tested-master

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [XEN PATCH v2] x86/vm_event: add short-circuit for breakpoints (aka, "fast single step")

2019-12-17 Thread Sergey Kovalev


When using DRAKVUF (or another system using altp2m with shadow pages similar
to what is described in
https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m),
after a breakpoint is hit the system switches to the default
unrestricted altp2m view with singlestep enabled. When the singlestep
traps to Xen another vm_event is sent to the monitor agent, which then
normally disables singlestepping and switches the altp2m view back to
the restricted view.

This patch short-circuiting that last part so that it doesn't need to send the
vm_event out for the singlestep event and should switch back to the restricted
view in Xen automatically.

This optimization gains about 35% speed-up.

Was tested on Debian branch of Xen 4.12. See at:
https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep

Rebased on master:
https://github.com/skvl/xen/tree/fast-singlestep

Signed-off-by: Sergey Kovalev 
---
 xen/arch/x86/hvm/hvm.c | 12 
 xen/arch/x86/hvm/monitor.c |  9 +
 xen/arch/x86/vm_event.c|  8 ++--
 xen/include/asm-x86/hvm/hvm.h  |  1 +
 xen/include/asm-x86/hvm/vcpu.h |  4 
 xen/include/public/vm_event.h  | 10 ++
 6 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 47573f71b8..4999569503 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5126,6 +5126,18 @@ void hvm_toggle_singlestep(struct vcpu *v)
 v->arch.hvm.single_step = !v->arch.hvm.single_step;
 }

+void hvm_fast_singlestep(struct vcpu *v, uint16_t p2midx)
+{
+ASSERT(atomic_read(>pause_count));
+
+if ( !hvm_is_singlestep_supported() )
+return;
+
+v->arch.hvm.single_step = true;
+v->arch.hvm.fast_single_step.enabled = true;
+v->arch.hvm.fast_single_step.p2midx = p2midx;
+}
+
 /*
  * Segment caches in VMCB/VMCS are inconsistent about which bits are checked,
  * important, and preserved across vmentry/exit.  Cook the values to make them
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index 1f23fe25e8..85996a3edd 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -159,6 +160,14 @@ int hvm_monitor_debug(unsigned long rip, enum 
hvm_monitor_debug_type type,
 case HVM_MONITOR_SINGLESTEP_BREAKPOINT:
 if ( !ad->monitor.singlestep_enabled )
 return 0;
+if ( curr->arch.hvm.fast_single_step.enabled )
+{
+p2m_altp2m_check(curr, curr->arch.hvm.fast_single_step.p2midx);
+curr->arch.hvm.single_step = false;
+curr->arch.hvm.fast_single_step.enabled = false;
+curr->arch.hvm.fast_single_step.p2midx = 0;
+return 0;
+}
 req.reason = VM_EVENT_REASON_SINGLESTEP;
 req.u.singlestep.gfn = gfn_of_rip(rip);
 sync = true;
diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
index 52c2a71fa0..3788d103f9 100644
--- a/xen/arch/x86/vm_event.c
+++ b/xen/arch/x86/vm_event.c
@@ -61,7 +61,8 @@ void vm_event_cleanup_domain(struct domain *d)
 void vm_event_toggle_singlestep(struct domain *d, struct vcpu *v,
 vm_event_response_t *rsp)
 {
-if ( !(rsp->flags & VM_EVENT_FLAG_TOGGLE_SINGLESTEP) )
+if ( !(rsp->flags & VM_EVENT_FLAG_TOGGLE_SINGLESTEP ||
+   rsp->flags & VM_EVENT_FLAG_FAST_SINGLESTEP) )
 return;

 if ( !is_hvm_domain(d) )
@@ -69,7 +70,10 @@ void vm_event_toggle_singlestep(struct domain *d, struct 
vcpu *v,

 ASSERT(atomic_read(>vm_event_pause_count));

-hvm_toggle_singlestep(v);
+if ( rsp->flags & VM_EVENT_FLAG_TOGGLE_SINGLESTEP )
+hvm_toggle_singlestep(v);
+else
+hvm_fast_singlestep(v, rsp->u.fast_singlestep.p2midx);
 }

 void vm_event_register_write_resume(struct vcpu *v, vm_event_response_t *rsp)
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 1d7b66f927..09793c12e9 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -323,6 +323,7 @@ int hvm_debug_op(struct vcpu *v, int32_t op);

 /* Caller should pause vcpu before calling this function */
 void hvm_toggle_singlestep(struct vcpu *v);
+void hvm_fast_singlestep(struct vcpu *v, uint16_t p2midx);

 int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
   struct npfec npfec);
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 38f5c2bb9b..8b8494 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -172,6 +172,10 @@ struct hvm_vcpu {
 boolflag_dr_dirty;
 booldebug_state_latch;
 boolsingle_step;
+struct {
+bool enabled;
+uint16_t p2midx;
+} fast_single_step;

 struct hvm_vcpu_asid n1asid;

diff --git a/xen/include/public/vm_event.h

Re: [Xen-devel] [RFC 6/6] arm64: hyperv: Enable vDSO

2019-12-17 Thread Boqun Feng

On Tue, Dec 17, 2019 at 03:10:16PM +0100, Vitaly Kuznetsov wrote:
> Boqun Feng  writes:
> 
> > Similar to x86, add a new vclock_mode VCLOCK_HVCLOCK, and reuse the
> > hv_read_tsc_page() for userspace to read tsc page clocksource.
> >
> > Signed-off-by: Boqun Feng (Microsoft) 
> > ---
> >  arch/arm64/include/asm/clocksource.h   |  3 ++-
> >  arch/arm64/include/asm/mshyperv.h  |  2 +-
> >  arch/arm64/include/asm/vdso/gettimeofday.h | 19 +++
> >  3 files changed, 22 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/clocksource.h 
> > b/arch/arm64/include/asm/clocksource.h
> > index fbe80057468c..c6acd45fe748 100644
> > --- a/arch/arm64/include/asm/clocksource.h
> > +++ b/arch/arm64/include/asm/clocksource.h
> > @@ -4,7 +4,8 @@
> >  
> >  #define VCLOCK_NONE0   /* No vDSO clock available. 
> > */
> >  #define VCLOCK_CNTVCT  1   /* vDSO should use cntvcnt  
> > */
> > -#define VCLOCK_MAX 1
> > +#define VCLOCK_HVCLOCK 2   /* vDSO should use vread_hvclock()  
> > */
> > +#define VCLOCK_MAX 2
> >  
> >  struct arch_clocksource_data {
> > int vclock_mode;
> > diff --git a/arch/arm64/include/asm/mshyperv.h 
> > b/arch/arm64/include/asm/mshyperv.h
> > index 0afb00e3501d..7c85dd816dca 100644
> > --- a/arch/arm64/include/asm/mshyperv.h
> > +++ b/arch/arm64/include/asm/mshyperv.h
> > @@ -90,7 +90,7 @@ extern void hv_get_vpreg_128(u32 reg, struct 
> > hv_get_vp_register_output *result);
> >  #define hv_set_reference_tsc(val) \
> > hv_set_vpreg(HV_REGISTER_REFERENCE_TSC, val)
> >  #define hv_set_clocksource_vdso(val) \
> > -   ((val).archdata.vclock_mode = VCLOCK_NONE)
> > +   ((val).archdata.vclock_mode = VCLOCK_HVCLOCK)
> >  
> >  #if IS_ENABLED(CONFIG_HYPERV)
> >  #define hv_enable_stimer0_percpu_irq(irq)  enable_percpu_irq(irq, 0)
> > diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h 
> > b/arch/arm64/include/asm/vdso/gettimeofday.h
> > index e6e3fe0488c7..7e689b903f4d 100644
> > --- a/arch/arm64/include/asm/vdso/gettimeofday.h
> > +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
> > @@ -67,6 +67,20 @@ int clock_getres_fallback(clockid_t _clkid, struct 
> > __kernel_timespec *_ts)
> > return ret;
> >  }
> >  
> > +#ifdef CONFIG_HYPERV_TIMER
> > +/* This will override the default hv_get_raw_timer() */
> > +#define hv_get_raw_timer() __arch_counter_get_cntvct()
> > +#include 
> > +
> > +extern struct ms_hyperv_tsc_page
> > +_hvclock_page __attribute__((visibility("hidden")));
> > +
> > +static u64 vread_hvclock(void)
> > +{
> > +   return hv_read_tsc_page(&_hvclock_page);
> > +}
> > +#endif
> 
> The function is almost the same on x86 (&_hvclock_page ->
> _page), would it maybe make sense to move this to arch neutral
> clocksource/hyperv_timer.h?
> 

I'm not sure whether the underscore matters in the vDSO data symbol, so
I follow the architectural name convention. If the leading underscore
doesn't have special purpose I'm happy to move this to arch neutral
header file.

> > +
> >  static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
> >  {
> > u64 res;
> > @@ -78,6 +92,11 @@ static __always_inline u64 __arch_get_hw_counter(s32 
> > clock_mode)
> > if (clock_mode == VCLOCK_NONE)
> > return __VDSO_USE_SYSCALL;
> >  
> > +#ifdef CONFIG_HYPERV_TIMER
> > +   if (likely(clock_mode == VCLOCK_HVCLOCK))
> > +   return vread_hvclock();
> 
> I'm not sure likely() is justified here: it'll make ALL builds which
> enable CONFIG_HYPERV_TIMER (e.g. distro kernels) to prefer
> VCLOCK_HVCLOCK, even if the kernel is not running on Hyper-V.
> 

Make sense. Thanks for pointing this out! I will change it in the next
version.

Regards,
Boqun

> > +#endif
> > +
> > /*
> >  * This isb() is required to prevent that the counter value
> >  * is speculated.
> 
> -- 
> Vitaly
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] clock source in PV Linux

2019-12-17 Thread Roman Shaposhnik

On Wed, Dec 11, 2019 at 12:41 AM Jan Beulich  wrote:
>
> On 11.12.2019 09:16, Jürgen Groß wrote:
> > On 11.12.19 08:28, Jan Beulich wrote:
> >> Jürgen, Boris,
> >>
> >> I've noticed
> >>
> >> <6>clocksource: Switched to clocksource tsc
> >>
> >> as the final clocksource related boot message in a PV Dom0's
> >> log with 5.4.2. Is it intentional that it's not the "xen" one
> >> that gets used by default?
> >
> > I think this is fine. I just tested it and I'm seeing the same in dom0,
> > while in a PV domU "xen" is used per default.
> >
> > In dom0 "tsc" should be okay in case it is stable. Or are you expecting
> > problems with that setting?
>
> Well, first of all I found this surprising. Whether there are problems to
> be expected largely depends on the reliability of the "stable" detection
> in PV Dom0.

Related question: does this mean that tsc is now default for PVH as well?

The reason I'm asking is because I'm still a bit worried about the
clock drift with tsc.

Thanks,
Roman.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Roman Shaposhnik

On Tue, Dec 17, 2019 at 5:51 PM Stefano Stabellini
 wrote:
>
> On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> > On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini
> >  wrote:
> > >
> > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> > > > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini
> > > >  wrote:
> > > > >
> > > > > On Tue, 17 Dec 2019, Julien Grall wrote:
> > > > > > Hi,
> > > > > >
> > > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > > > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> > > > > > >  wrote:
> > > > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> > > > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which 
> > > > > > > > is 990M.
> > > > > > > > If so, I wonder how you could boot succesfully with 
> > > > > > > > dom0_mem=1024M even
> > > > > > > > on Xen 4.12... :-?
> > > > > > >
> > > > > > > That is a very interesting observation indeed! I actually don't
> > > > > > > remember where that device tree came from, but I think it was 
> > > > > > > from one
> > > > > > > of the Linaro sites.
> > > > > >
> > > > > > This is mostly likely because of:
> > > > > >
> > > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> > > > > > Author: Julien Grall 
> > > > > > Date:   Wed Aug 21 22:42:31 2019 +0100
> > > > > >
> > > > > > xen/arm: domain_build: Don't continue if unable to allocate all 
> > > > > > dom0 banks
> > > > > >
> > > > > > Xen will only print a warning if there are memory unallocated 
> > > > > > when using
> > > > > > 1:1 mapping (only used by dom0). This also includes the case 
> > > > > > where no
> > > > > > memory has been allocated.
> > > > > >
> > > > > > It will bring to all sort of issues that can be hard to 
> > > > > > diagnostic for
> > > > > > users (the warning can be difficult to spot or disregard).
> > > > > >
> > > > > > If the users request 1GB of memory, then most likely they want 
> > > > > > the exact
> > > > > > amount and not 512MB. So panic if all the memory has not been 
> > > > > > allocated.
> > > > > >
> > > > > > After this change, the behavior is the same as for non-1:1 
> > > > > > memory
> > > > > > allocation (used by domU).
> > > > > >
> > > > > > At the same time, reflow the message to have the format on a 
> > > > > > single
> > > > > > line.
> > > > > >
> > > > > > Signed-off-by: Julien Grall 
> > > > > > Acked-by: Stefano Stabellini 
> > > > >
> > > > > Ah! Roman, could you please post the full boot log of a successful 
> > > > > 4.12
> > > > > boot?
> > > > >
> > > > > If it has a "Failed to allocate requested dom0 memory" message, then 
> > > > > we
> > > > > know what the issue is.
> > > >
> > > > Aha! Our messages seems to have crossed ;-) Full log is attached and
> > > > yes -- that's
> > > > the problem indeed.
> > > >
> > > > So at least that mystery is solved. But I'm still not able to get to a
> > > > full 1G of memory
> > > > even with your update to the device tree file. Any chance you can send 
> > > > me the
> > > > device tree file that works for you?
> > >
> > > I didn't try on real hardware, I only tried on QEMU with a similar
> > > configuration. I went back and check the HiKey device tree I used and it
> > > is the same as yours (including the ramoops reserved-memory error).
> > >
> > > Apparently there are 1G and 2G variants of the HiKey, obviously both
> > > yours and my device tree are for the 1G variant. I try to dig through
> > > the docs but couldn't find the details of the 2G variant. I cannot find
> > > anywhere the memory range for the top 1G of memory not even on the
> > > LeMaker docs! :-/
> >
> > Yup. That's exactly the issue on my end as well - can't seem to find an
> > authoritative source for that devicetree.
> >
> > I did find this, though:
> >  https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/
> > which looks like it has the latest (at least file timestamp-wise) 
> > devicetree.
> >
> > If you look at the memory and reserved memory nodes there, they
> > are actually much simpler than what we've got:
> >
> > memory {
> > device_type = "memory";
> > reg = <0x0 0x0 0x0 0x4000>;
> > };
>
> Which is still 1G, but it is surprisingly simpler.
>
>
> > reserved-memory {
> > #address-cells = <0x2>;
> > #size-cells = <0x2>;
> > ranges;
> >
> > mcu-buf@05e0 {
> > no-map;
> > reg = <0x0 0x5e0 0x0 0x10 0x0
> > 0x740f000 0x0 0x1000>;
> > };
> >
> > mbox-buf@06dff000 {
> > no-map;
> > reg = <0x0 0x6dff000 0x0 0x1000>;
> > };
> > };
> >
> > So -- just on a whim -- I changed it to:
> > reg = <0x0 0x0 0x0 0x8000>;
>
> I would have tried that too :-)
>
>
> > Interestingly enough, Xen booted, and complained about

[Xen-devel] [ovmf test] 144914: regressions - FAIL

2019-12-17 Thread osstest service owner

flight 144914 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144914/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-i386-xsm6 xen-buildfail REGR. vs. 144637
 build-amd64   6 xen-buildfail REGR. vs. 144637
 build-amd64-xsm   6 xen-buildfail REGR. vs. 144637
 build-i3866 xen-buildfail REGR. vs. 144637

Tests which did not succeed, but are not blocking:
 build-i386-libvirt1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-qemuu-ovmf-amd64  1 build-check(1) blocked n/a
 build-amd64-libvirt   1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemuu-ovmf-amd64  1 build-check(1)  blocked n/a

version targeted for testing:
 ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
baseline version:
 ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0

Last test of basis   144637  2019-12-09 09:09:49 Z8 days
Failing since144646  2019-12-10 01:39:53 Z8 days   72 attempts
Testing same since   144770  2019-12-12 18:41:26 Z5 days   61 attempts


People who touched revisions under test:
  Antoine Coeur 
  Ard Biesheuvel 
  Bob Feng 
  Jiewen Yao 
  Michael Kubacki 
  Pete Batard 
  Philippe Mathieu-Daude 
  Steven Shi 

jobs:
 build-amd64-xsm  fail
 build-i386-xsm   fail
 build-amd64  fail
 build-i386   fail
 build-amd64-libvirt  blocked 
 build-i386-libvirt   blocked 
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked 
 test-amd64-i386-xl-qemuu-ovmf-amd64  blocked 



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Not pushing.


commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
Author: Pete Batard 
Date:   Tue Dec 10 18:23:04 2019 +

MdePkg/Include: Add DCC and BCM2835 SPCR UART types

As per the Microsoft Debug Port Table 2 (DBG2) documentation, that
can be found online, we are missing 2 serial interface types for
Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi).

These same types are present in DebugPort2Table.h so add them to
SerialPortConsoleRedirectionTable.h too.

Note that we followed the same idiosyncrasies as DebugPort2Table
for naming these new macros.

Signed-off-by: Pete Batard 
Acked-by: Ard Biesheuvel 
Reviewed-by: Liming Gao 

commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e
Author: Ard Biesheuvel 
Date:   Tue Mar 5 14:32:48 2019 +0100

ArmPkg/MmCommunicationDxe: relay architected PI events to MM context

PI defines a few architected events that have significance in the MM
context as well as in the non-secure DXE context. So register notify
handlers for these events, and relay them into the standalone MM world.

Signed-off-by: Ard Biesheuvel 
Reviewed-by: Jiewen Yao 
Reviewed-by: Achin Gupta 

commit d3add11e87dace180387562d6f1951f2bffbd3d9
Author: Michael Kubacki 
Date:   Wed Nov 20 17:31:24 2019 -0800

MdeModulePkg PeiCore: Improve comment semantics

This patch clarifies wording in several PeiCore comments to improve
reading comprehension.

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Jian J Wang 

commit d39d1260c615b716675f67f5c4e1f4f52df01dad
Author: Michael Kubacki 
Date:   Wed Nov 20 17:10:48 2019 -0800

MdeModulePkg PeiCore: Fix typos

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Philippe Mathieu-Daude 
Reviewed-by: Jian J Wang 

commit 97eedf5dfbaffde33210fd88066247cf0b7d3325
Author: Antoine Coeur 
Date:   Wed Dec 4 12:14:53

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Stefano Stabellini

On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini
>  wrote:
> >
> > On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> > > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini
> > >  wrote:
> > > >
> > > > On Tue, 17 Dec 2019, Julien Grall wrote:
> > > > > Hi,
> > > > >
> > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> > > > > >  wrote:
> > > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> > > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which is 
> > > > > > > 990M.
> > > > > > > If so, I wonder how you could boot succesfully with 
> > > > > > > dom0_mem=1024M even
> > > > > > > on Xen 4.12... :-?
> > > > > >
> > > > > > That is a very interesting observation indeed! I actually don't
> > > > > > remember where that device tree came from, but I think it was from 
> > > > > > one
> > > > > > of the Linaro sites.
> > > > >
> > > > > This is mostly likely because of:
> > > > >
> > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> > > > > Author: Julien Grall 
> > > > > Date:   Wed Aug 21 22:42:31 2019 +0100
> > > > >
> > > > > xen/arm: domain_build: Don't continue if unable to allocate all 
> > > > > dom0 banks
> > > > >
> > > > > Xen will only print a warning if there are memory unallocated 
> > > > > when using
> > > > > 1:1 mapping (only used by dom0). This also includes the case 
> > > > > where no
> > > > > memory has been allocated.
> > > > >
> > > > > It will bring to all sort of issues that can be hard to 
> > > > > diagnostic for
> > > > > users (the warning can be difficult to spot or disregard).
> > > > >
> > > > > If the users request 1GB of memory, then most likely they want 
> > > > > the exact
> > > > > amount and not 512MB. So panic if all the memory has not been 
> > > > > allocated.
> > > > >
> > > > > After this change, the behavior is the same as for non-1:1 memory
> > > > > allocation (used by domU).
> > > > >
> > > > > At the same time, reflow the message to have the format on a 
> > > > > single
> > > > > line.
> > > > >
> > > > > Signed-off-by: Julien Grall 
> > > > > Acked-by: Stefano Stabellini 
> > > >
> > > > Ah! Roman, could you please post the full boot log of a successful 4.12
> > > > boot?
> > > >
> > > > If it has a "Failed to allocate requested dom0 memory" message, then we
> > > > know what the issue is.
> > >
> > > Aha! Our messages seems to have crossed ;-) Full log is attached and
> > > yes -- that's
> > > the problem indeed.
> > >
> > > So at least that mystery is solved. But I'm still not able to get to a
> > > full 1G of memory
> > > even with your update to the device tree file. Any chance you can send me 
> > > the
> > > device tree file that works for you?
> >
> > I didn't try on real hardware, I only tried on QEMU with a similar
> > configuration. I went back and check the HiKey device tree I used and it
> > is the same as yours (including the ramoops reserved-memory error).
> >
> > Apparently there are 1G and 2G variants of the HiKey, obviously both
> > yours and my device tree are for the 1G variant. I try to dig through
> > the docs but couldn't find the details of the 2G variant. I cannot find
> > anywhere the memory range for the top 1G of memory not even on the
> > LeMaker docs! :-/
> 
> Yup. That's exactly the issue on my end as well - can't seem to find an
> authoritative source for that devicetree.
> 
> I did find this, though:
>  https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/
> which looks like it has the latest (at least file timestamp-wise) devicetree.
> 
> If you look at the memory and reserved memory nodes there, they
> are actually much simpler than what we've got:
> 
> memory {
> device_type = "memory";
> reg = <0x0 0x0 0x0 0x4000>;
> };

Which is still 1G, but it is surprisingly simpler.


> reserved-memory {
> #address-cells = <0x2>;
> #size-cells = <0x2>;
> ranges;
> 
> mcu-buf@05e0 {
> no-map;
> reg = <0x0 0x5e0 0x0 0x10 0x0
> 0x740f000 0x0 0x1000>;
> };
> 
> mbox-buf@06dff000 {
> no-map;
> reg = <0x0 0x6dff000 0x0 0x1000>;
> };
> };
> 
> So -- just on a whim -- I changed it to:
> reg = <0x0 0x0 0x0 0x8000>;

I would have tried that too :-)


> Interestingly enough, Xen booted, and complained about only 192MB
> unallocated this time.
> So, I dropped the size of Dom0 to 640M and I got it boot and here's
> what I'm seeing as
> an output of xl info:
>total_memory   : 1120
>free_memory: 390
> It still nowhere close to 2G.
> 
> Then I booted the Linux kernel without Xen and it correctly identified
> all 2G worth

[Xen-devel] [PATCH v2 2/4] x86/microcode: avoid unnecessary xmalloc/memcpy of ucode data

2019-12-17 Thread Eslam Elnikety

When using `ucode=scan` and if a matching module is found, the microcode
payload is maintained in an xmalloc()'d region. This is unnecessary since
the bootmap would just do. Remove the xmalloc and xfree on the microcode
module scan path.

This commit also does away with the restriction on the microcode module
size limit. The concern that a large microcode module would consume too
much memory preventing guests launch is misplaced since this is all the
init path. While having such safeguards is valuable, this should apply
across the board for all early/late microcode loading. Having it just on
the `scan` path is confusing.

Looking forward, we are a bit closer (i.e., one xmalloc down) to pulling
the early microcode loading of the BSP a bit earlier in the early boot
process. This commit is the low hanging fruit. There is still a sizable
amount of work to get there as there are still a handful of xmalloc in
microcode_{amd,intel}.c.

First, there are xmallocs on the path of finding a matching microcode
update. Similar to the commit at hand, searching through the microcode
blob can be done on the already present buffer with no need to xmalloc
any further. Even better, do the filtering in microcode.c before
requesting the microcode update on all CPUs. The latter requires careful
restructuring and exposing the arch-specific logic for iterating over
patches and declaring a match.

Second, there are xmallocs for the microcode cache. Here, we would need
to ensure that the cache corresponding to the BSP gets xmalloc()'d and
populated after the fact.

Signed-off-by: Eslam Elnikety 
---
 xen/arch/x86/microcode.c | 32 
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
index 8b4d87782c..c878fc71ff 100644
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -138,11 +138,6 @@ static int __init parse_ucode_param(const char *s)
 }
 custom_param("ucode", parse_ucode_param);
 
-/*
- * 8MB ought to be enough.
- */
-#define MAX_EARLY_CPIO_MICROCODE (8 << 20)
-
 void __init microcode_scan_module(
 unsigned long *module_map,
 const multiboot_info_t *mbi)
@@ -187,31 +182,12 @@ void __init microcode_scan_module(
 cd = find_cpio_data(p, _blob_start, _blob_size,  /* ignore */);
 if ( cd.data )
 {
-/*
- * This is an arbitrary check - it would be sad if the blob
- * consumed most of the memory and did not allow guests
- * to launch.
- */
-if ( cd.size > MAX_EARLY_CPIO_MICROCODE )
-{
-printk("Multiboot %d microcode payload too big! (%ld, we 
can do %d)\n",
-   i, cd.size, MAX_EARLY_CPIO_MICROCODE);
-goto err;
-}
-ucode_blob.size = cd.size;
-ucode_blob.data = xmalloc_bytes(cd.size);
-if ( !ucode_blob.data )
-cd.data = NULL;
-else
-memcpy(ucode_blob.data, cd.data, cd.size);
+ucode_blob.size = cd.size;
+ucode_blob.data = cd.data;
+break;
 }
 bootstrap_map(NULL);
-if ( cd.data )
-break;
 }
-return;
-err:
-bootstrap_map(NULL);
 }
 void __init microcode_grab_module(
 unsigned long *module_map,
@@ -725,7 +701,7 @@ static int __init microcode_init(void)
  */
 if ( ucode_blob.size )
 {
-xfree(ucode_blob.data);
+bootstrap_map(NULL);
 ucode_blob.size = 0;
 ucode_blob.data = NULL;
 }
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2 4/4] x86/microcode: Support builtin CPU microcode

2019-12-17 Thread Eslam Elnikety

Xen relies on boot modules to perform early microcode updates. This commit adds
another mode, namely "builtin" via the BUILTIN_UCODE config parameter. If set,
the Xen image itself will contain the microcode updates. Upon boot, Xen
inspects its image for microcode blobs and performs the update.

A Xen image with builtin microcode will, by default, attempt the microcode
update. Disabling the builtin microcode update can be done via the Xen command
line parameter 'ucode=no-builtin'. Moreover, the microcode provided via other
options (such as 'ucode=|scan' or 'ucode=' config when
booting via EFI) takes precedence over the builtin one.

Signed-off-by: Eslam Elnikety 

---
Changes in v2:
- Allow for ucode=|scan,{no-}builtin and detail the model. Reflect
  those changes onto microcode.c and docs/misc/xen-command-line.pandoc
- Add documentation to the existing docs/admin-guide/microcode-loading.rst
- Build on Patches 1--3 to avoid xmalloc/memcpy for the builtin microcode
- Work configuration in order to specify the individual microcode blobs to use
  for the builtin microcode, and rework the microcode/Makefile accordingly
---
 docs/admin-guide/microcode-loading.rst | 31 +++
 docs/misc/xen-command-line.pandoc  | 10 -
 xen/arch/x86/Kconfig   | 30 +++
 xen/arch/x86/Makefile  |  1 +
 xen/arch/x86/microcode.c   | 52 ++
 xen/arch/x86/microcode/Makefile| 46 +++
 xen/arch/x86/xen.lds.S | 12 ++
 7 files changed, 180 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/x86/microcode/Makefile

diff --git a/docs/admin-guide/microcode-loading.rst 
b/docs/admin-guide/microcode-loading.rst
index e83cadd2c2..989e8d446b 100644
--- a/docs/admin-guide/microcode-loading.rst
+++ b/docs/admin-guide/microcode-loading.rst
@@ -104,6 +104,37 @@ The ``ucode=scan`` command line option will cause Xen to 
search through all
 modules to find any CPIO archives, and search the archive for the applicable
 file.  Xen will stop searching at the first match.
 
+Loading microcode built within the Xen image
+
+
+Xen can bundle microcode updates within its image. This support is conditional
+on the build configuration BUILTIN_UCODE being enabled. Builtin microcode is
+useful to ensure that, by default, a minimum microcode patch level will be
+applied to the underlying CPU.
+
+To use microcode updates available on the build system as builtin,
+use BUILTIN_UCODE_DIR to refer to the directory containing the firmware updates
+and specify the individual microcode patches via either BUILTIN_UCODE_AMD or
+BUILTIN_UCODE_INTEL for AMD microcode or INTEL microcode, respectively. For
+instance, the configuration below is suitable for a build system which has a
+``/lib/firmware/`` directory which, in turn, includes the individual microcode
+patches ``amd-ucode/microcode_amd_fam15h.bin``, ``intel-ucode/06-3a-09``, and
+``intel-ucode/06-2f-02``.
+
+  CONFIG_BUILTIN_UCODE=y
+  CONFIG_BUILTIN_UCODE_DIR="/lib/firmware/"
+  CONFIG_BUILTIN_UCODE_AMD="amd-ucode/microcode_amd_fam15h.bin"
+  CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/06-3a-09 intel-ucode/06-2f-02"
+
+Alternatively, CONFIG_BUILTIN_UCODE_{AMD,INTEL} can directly point to the
+concatenation of the individual microcode blobs. For instance, assuming that
+``amd-ucode/AuthenticAMD.bin`` and ``intel-ucode/GenuineIntel.bin`` hold
+multiple microcode updates for AMD and INTEL, respectively, you may use the
+configuration below.
+
+  CONFIG_BUILTIN_UCODE_AMD="amd-ucode/AuthenticAMD.bin"
+  CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/GenuineIntel.bin"
+
 
 Run time microcode loading
 --
diff --git a/docs/misc/xen-command-line.pandoc 
b/docs/misc/xen-command-line.pandoc
index 40faf3bc3a..9cfc2df05a 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2126,10 +2126,10 @@ logic applies:
active by default.
 
 ### ucode (x86)
-> `= List of [  | scan=, nmi= ]`
+> `= List of [  | scan=, builtin=, nmi= ]`
 
 Applicability: x86
-Default: `nmi`
+Default: `nmi` if BUILTIN_UCODE is not enabled, `builtin,nmi` otherwise
 
 Controls for CPU microcode loading. For early loading, this parameter can
 specify how and where to find the microcode update blob. For late loading,
@@ -2150,6 +2150,12 @@ microcode in the cpio name space must be:
   - on Intel: kernel/x86/microcode/GenuineIntel.bin
   - on AMD  : kernel/x86/microcode/AuthenticAMD.bin
 
+'builtin' instructs the hypervisor to use the builtin microcode update. This
+option is available only if option BUILTIN_UCODE is enabled at build. The
+default value is `true`. If a microcode is provided via other options (such
+as 'integer', 'scan', or `ucode=` config when booting via EFI),
+the provided microcode takes precedence over the builtin one.
+
 'nmi' determines late loading is performed in NMI handler or just in

[Xen-devel] [PATCH v2 1/4] x86/microcode: Improve documentation and parsing for ucode=

2019-12-17 Thread Eslam Elnikety

Decouple the microcode referencing mechanism when using GRUB to that
when using EFI. This allows us to avoid the "unspecified effect" of
using ` | scan` along xen.efi. With that, Xen can explicitly
ignore those named options when using EFI. As an added benefit,
we get a straightfoward parsing of the ucode parameter. While at it,
simplify the logic in microcode_grab_module().

Update the command line documentation for consistency. Also, drop the
leading comment for parse_ucode_param. (No practical use for it given
this commit).

Signed-off-by: Eslam Elnikety 
---
 docs/misc/xen-command-line.pandoc | 18 ---
 xen/arch/x86/microcode.c  | 51 ++-
 2 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/docs/misc/xen-command-line.pandoc 
b/docs/misc/xen-command-line.pandoc
index 7a1be84ca9..40faf3bc3a 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2128,7 +2128,13 @@ logic applies:
 ### ucode (x86)
 > `= List of [  | scan=, nmi= ]`
 
-Specify how and where to find CPU microcode update blob.
+Applicability: x86
+Default: `nmi`
+
+Controls for CPU microcode loading. For early loading, this parameter can
+specify how and where to find the microcode update blob. For late loading,
+this parameter specifies if the update happens within a NMI handler or in
+a stop_machine context.
 
 'integer' specifies the CPU microcode update blob module index. When positive,
 this specifies the n-th module (in the GrUB entry, zero based) to be used
@@ -2136,10 +2142,7 @@ for updating CPU micrcode. When negative, counting 
starts at the end of
 the modules in the GrUB entry (so with the blob commonly being last,
 one could specify `ucode=-1`). Note that the value of zero is not valid
 here (entry zero, i.e. the first module, is always the Dom0 kernel
-image). Note further that use of this option has an unspecified effect
-when used with xen.efi (there the concept of modules doesn't exist, and
-the blob gets specified via the `ucode=` config file/section
-entry; see [EFI configuration file description](efi.html)).
+image).
 
 'scan' instructs the hypervisor to scan the multiboot images for an cpio
 image that contains microcode. Depending on the platform the blob with the
@@ -2151,6 +2154,11 @@ microcode in the cpio name space must be:
 stop_machine context. In NMI handler, even NMIs are blocked, which is
 considered safer. The default value is `true`.
 
+Note: When booting via EFI, both options 'integer' and 'scan' are ignored.
+Here, the concept of modules does not exist. The microcode update blob for
+early loading gets specified via the `ucode=` config file/section
+entry; see [EFI configuration file description](efi.html)).
+
 ### unrestricted_guest (Intel)
 > `= `
 
diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
index 6ced293d88..8b4d87782c 100644
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -60,7 +60,7 @@
 
 static module_t __initdata ucode_mod;
 static signed int __initdata ucode_mod_idx;
-static bool_t __initdata ucode_mod_forced;
+static signed int __initdata ucode_mod_efi_idx;
 static unsigned int nr_cores;
 
 /*
@@ -105,16 +105,10 @@ static struct microcode_patch *microcode_cache;
 
 void __init microcode_set_module(unsigned int idx)
 {
-ucode_mod_idx = idx;
-ucode_mod_forced = 1;
+ucode_mod_efi_idx = idx;
 }
 
-/*
- * The format is '[|scan=, nmi=]'. Both options are
- * optional. If the EFI has forced which of the multiboot payloads is to be
- * used, only nmi= is parsed.
- */
-static int __init parse_ucode(const char *s)
+static int __init parse_ucode_param(const char *s)
 {
 const char *ss;
 int val, rc = 0;
@@ -126,18 +120,15 @@ static int __init parse_ucode(const char *s)
 
 if ( (val = parse_boolean("nmi", s, ss)) >= 0 )
 ucode_in_nmi = val;
-else if ( !ucode_mod_forced ) /* Not forced by EFI */
+else if ( (val = parse_boolean("scan", s, ss)) >= 0 )
+ucode_scan = val;
+else
 {
-if ( (val = parse_boolean("scan", s, ss)) >= 0 )
-ucode_scan = val;
-else
-{
-const char *q;
-
-ucode_mod_idx = simple_strtol(s, , 0);
-if ( q != ss )
-rc = -EINVAL;
-}
+const char *q;
+
+ucode_mod_idx = simple_strtol(s, , 0);
+if ( q != ss )
+rc = -EINVAL;
 }
 
 s = ss + 1;
@@ -145,7 +136,7 @@ static int __init parse_ucode(const char *s)
 
 return rc;
 }
-custom_param("ucode", parse_ucode);
+custom_param("ucode", parse_ucode_param);
 
 /*
  * 8MB ought to be enough.
@@ -228,14 +219,18 @@ void __init microcode_grab_module(
 {
 module_t *mod = (module_t *)__va(mbi->mods_addr);
 
-if ( ucode_mod_idx < 0 )
+if ( ucode_mod_efi_idx ) /* Microcode specified by EFI */
+{
+ucode_mod =

[Xen-devel] [PATCH v2 0/4] x86/microcode: Support builtin CPU microcode

2019-12-17 Thread Eslam Elnikety

The main goal of this patch series is to add support for builtin microcode.
Towards that end, the series starts with a few improvements for the
documentation and parsing of the ucode= Xen command line parameter that
controls early loading of microcode (Patches 1--3), and follows with the
main builtin suppot (Patch 4).

Changes in v2:
- An earlier version of Patch 4 was submitted in isolation. Refer to the
  patch itself for details regarding the relevant changes.
- Patches 1--3 are additions.

Eslam Elnikety (4):
  x86/microcode: Improve documentation and parsing for ucode=
  x86/microcode: avoid unnecessary xmalloc/memcpy of ucode data
  x86/microcode: use const qualifier for microcode buffer
  x86/microcode: Support builtin CPU microcode

 docs/admin-guide/microcode-loading.rst |  31 ++
 docs/misc/xen-command-line.pandoc  |  26 +++--
 xen/arch/x86/Kconfig   |  30 ++
 xen/arch/x86/Makefile  |   1 +
 xen/arch/x86/microcode.c   | 139 ++---
 xen/arch/x86/microcode/Makefile|  46 
 xen/arch/x86/xen.lds.S |  12 +++
 7 files changed, 221 insertions(+), 64 deletions(-)
 create mode 100644 xen/arch/x86/microcode/Makefile

-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2 3/4] x86/microcode: use const qualifier for microcode buffer

2019-12-17 Thread Eslam Elnikety

The buffer holding the microcode bits should be marked as const.

Signed-off-by: Eslam Elnikety 
---
 xen/arch/x86/microcode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
index c878fc71ff..4616fa9d2e 100644
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -86,7 +86,7 @@ static enum {
  * memory.
  */
 struct ucode_mod_blob {
-void *data;
+const void *data;
 size_t size;
 };
 
@@ -744,7 +744,7 @@ int microcode_update_one(bool start_update)
 int __init early_microcode_update_cpu(void)
 {
 int rc = 0;
-void *data = NULL;
+const void *data = NULL;
 size_t len;
 struct microcode_patch *patch;
 
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH] [tools/hotplug] Use ip on systems where brctl is not available

2019-12-17 Thread Steven Haigh

Newer distros like CentOS 8 do not have brctl available. As such, we
can't use it to configure networking anymore.

This patch will fall back to 'ip' or 'bridge' commands if brctl is not
available in the working PATH.

This would be a likely backport candidate to any version expected to be
built on CentOS 8 etc.

---
 tools/hotplug/Linux/colo-proxy-setup  | 30 +--
 tools/hotplug/Linux/vif-bridge| 16 
 tools/hotplug/Linux/vif2  | 12 +++--
 tools/hotplug/Linux/xen-network-common.sh | 16 +---
 4 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/tools/hotplug/Linux/colo-proxy-setup 
b/tools/hotplug/Linux/colo-proxy-setup
index 94e2034452..d709146c47 100755
--- a/tools/hotplug/Linux/colo-proxy-setup
+++ b/tools/hotplug/Linux/colo-proxy-setup
@@ -76,10 +76,17 @@ function teardown_primary()
 
 function setup_secondary()
 {
-do_without_error brctl delif $bridge $vifname
-do_without_error brctl addbr $forwardbr
-do_without_error brctl addif $forwardbr $vifname
-do_without_error brctl addif $forwardbr $forwarddev
+if which brctl >&/dev/null; then
+do_without_error brctl delif $bridge $vifname
+do_without_error brctl addbr $forwardbr
+do_without_error brctl addif $forwardbr $vifname
+do_without_error brctl addif $forwardbr $forwarddev
+else
+do_without_error ip link set $vifname nomaster
+do_without_error ip link add name $forwardbr type bridge
+do_without_error ip link set $vifname master $forwardbr
+do_without_error ip link set $forwarddev master $forwardbr
+fi
 do_without_error ip link set dev $forwardbr up
 do_without_error modprobe xt_SECCOLO
 
@@ -91,10 +98,17 @@ function setup_secondary()
 
 function teardown_secondary()
 {
-do_without_error brctl delif $forwardbr $forwarddev
-do_without_error brctl delif $forwardbr $vifname
-do_without_error brctl delbr $forwardbr
-do_without_error brctl addif $bridge $vifname
+if which brctl >&/dev/null; then
+do_without_error brctl delif $forwardbr $forwarddev
+do_without_error brctl delif $forwardbr $vifname
+do_without_error brctl delbr $forwardbr
+do_without_error brctl addif $bridge $vifname
+else
+do_without_error ip link set $forwarddev nomaster
+do_without_error ip link set $vifname nomaster
+do_without_error ip link delete $forwardbr type bridge
+do_without_error ip link set $vifname master $bridge
+fi
 
 do_without_error iptables -t mangle -D PREROUTING -m physdev --physdev-in \
 $vifname -j SECCOLO --index $index
diff --git a/tools/hotplug/Linux/vif-bridge b/tools/hotplug/Linux/vif-bridge
index 6956dea66a..e722090ca8 100644
--- a/tools/hotplug/Linux/vif-bridge
+++ b/tools/hotplug/Linux/vif-bridge
@@ -31,10 +31,12 @@ dir=$(dirname "$0")
 bridge=${bridge:-}
 bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge")
 
-if [ -z "$bridge" ]
-then
-  bridge=$(brctl show | awk 'NR==2{print$1}')
-
+if [ -z "$bridge" ]; then
+if which brctl >&/dev/null; then
+bridge=$(brctl show | awk 'NR==2{print$1}')
+else
+bridge=$(bridge link | cut -d" " -f7)
+fi
   if [ -z "$bridge" ]
   then
  fatal "Could not find bridge, and none was specified"
@@ -82,7 +84,11 @@ case "$command" in
 ;;
 
 offline)
-do_without_error brctl delif "$bridge" "$dev"
+if which brctl >&/dev/null; then
+do_without_error brctl delif "$bridge" "$dev"
+else
+do_without_error ip link set "$dev" nomaster
+fi
 do_without_error ifconfig "$dev" down
 ;;
 
diff --git a/tools/hotplug/Linux/vif2 b/tools/hotplug/Linux/vif2
index 2c155be68c..5bd555c6f0 100644
--- a/tools/hotplug/Linux/vif2
+++ b/tools/hotplug/Linux/vif2
@@ -7,13 +7,21 @@ dir=$(dirname "$0")
 bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge")
 if [ -z "$bridge" ]
 then
-nr_bridges=$(($(brctl show | cut -f 1 | grep -v "^$" | wc -l) - 1))
+if which brctl >&/dev/null; then
+nr_bridges=$(($(brctl show | cut -f 1 | grep -v "^$" | wc -l) - 1))
+else
+nr_bridges=$(bridge link | wc -l)
+fi
 if [ "$nr_bridges" != 1 ]
then
fatal "no bridge specified, and don't know which one to use 
($nr_bridges found)"
 fi
-bridge=$(brctl show | cut -d "
+if which brctl >&/dev/null; then
+bridge=$(brctl show | cut -d "
 " -f 2 | cut -f 1)
+else
+bridge=$(bridge link | cut -d" " -f6)
+fi
 fi
 
 command="$1"
diff --git a/tools/hotplug/Linux/xen-network-common.sh 
b/tools/hotplug/Linux/xen-network-common.sh
index 92ffa603f7..8dd3a62068 100644
--- a/tools/hotplug/Linux/xen-network-common.sh
+++ b/tools/hotplug/Linux/xen-network-common.sh
@@ -111,9 +111,13 @@ create_bridge () {
 
 # Don't create the bridge if it already exists.
 if [ ! -e

[Xen-devel] [xen-unstable-smoke test] 144912: tolerable all pass - PUSHED

2019-12-17 Thread osstest service owner

flight 144912 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144912/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  14 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  704fa1532801bc02c4500462f0b913b3c137db4d
baseline version:
 xen  c61c1b49430527ee16fbf5b55aca195c325b1a23

Last test of basis   144906  2019-12-17 19:00:23 Z0 days
Testing same since   144912  2019-12-17 22:02:21 Z0 days1 attempts


People who touched revisions under test:
  Andrew Cooper 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  pass
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  pass
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   c61c1b4943..704fa15328  704fa1532801bc02c4500462f0b913b3c137db4d -> smoke

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] x86/microcode: Support builtin CPU microcode

2019-12-17 Thread Eslam Elnikety


On 13.12.19 14:40, Andrew Cooper wrote:

On 09/12/2019 21:49, Eslam Elnikety wrote:

+
+extern const char __builtin_intel_ucode_start[],
__builtin_intel_ucode_end[];
+extern const char __builtin_amd_ucode_start[],
__builtin_amd_ucode_end[];
+#endif
+
   /* By default, ucode loading is done in NMI handler */
   static bool ucode_in_nmi = true;
   @@ -110,9 +118,9 @@ void __init microcode_set_module(unsigned int
idx)
   }
     /*
- * The format is '[|scan=, nmi=]'. Both
options are
- * optional. If the EFI has forced which of the multiboot payloads
is to be
- * used, only nmi= is parsed.
+ * The format is '[|scan=|builtin=,
nmi=]'. All
+ * options are optional. If the EFI has forced which of the
multiboot payloads
+ * is to be used, only nmi= is parsed.
    */


Please delete this, or I'll do a prereq patch to fix it and the command
line docs.  (Both are in a poor state.)



Unless you are planning that along your on-going
docs/hypervisor-guide/microcode-loading.rst effort, I can pick up this
clean-up/prereq patch myself. What do you have in mind? (Or point me
to a good example and I will figure things out).


c/s 3c5552954, 53a84f672, 633a40947 or 3136dee9c are good examples.
ucode= is definitely more complicated to explain because of its implicit
EFI behaviour.



Currently massaging a patch to that effect.


+    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+    ucode_blob.size = (size_t)(__builtin_intel_ucode_end
+   - __builtin_intel_ucode_start);
+    else
+    return;
+
+    if ( !ucode_blob.size )
+    {
+    printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
+    return;
+    }
+    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )
+    {
+    printk("Builtin microcode payload too big! (%ld, we can do
%d)\n",
+   ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
+    ucode_blob.size = 0;
+    return;
+    }
+
+    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
+    if ( !ucode_blob.data )
+    return;


Any chance we can reuse the "fits" logic to avoid holding every
inapplicable blob in memory as well?



I think this would be a welcomed change. It seems to me that we have
two ways to go about it.

1) We factor the code in the intel-/amd-specific cpu_request_microcode
to extract logic for finding a match into its own new function, expose
that through microcode_ops, and finally do xalloc only for the
matching microcode when early loading is scan or builtin.

2) Cannot we just do away completely with xalloc? I see that each
individual microcode update gets allocated anyway in
microcode_intel.c/get_next_ucode_from_buffer() and in
microcode_amd.c/cpu_request_microcode(). Unless I am missing
something, the xmalloc_bytes for ucode_blob.data is redundant.

Thoughts?


I'm certain the code is more complicated than it needs to be.
Cleanup/simplification would be very welcome.  And if you're up for
that, there is a related area which would be a great improvement.

At the moment, BSP microcode loading is very late because it depends on
this xmalloc() to begin with.  However, no memory allocation is needed
to load microcode from a multiboot module or from the initrd, or from
this future builtin location - all loading can be done from a
directmap/bootmap pointer if needs be.

This would allow moving the BSP microcode to much earlier on boot,
probably somewhere between console setup and E820 handling.

One way or another, the microcode cache which persists past boot has to
be xmalloc()'d, because we will free the module/initrd/builtin.  It
would however be more friendly to AP's to only give them the single
correct piece of ucode, rather than everything to scan through.

(These behaviours and expectations are going to be a chunk of my
intended second microcode.rst doc, including a "be aware that machines
exist which do $X" section to cover some of the weirder corner cases we
have encountered.)



Avoiding the xmalloc/memcpy on the scan for microcode is one of the 
patches that I will share shortly. In particular, the ucode_blob.data 
would directly point to the buffer matching the canonical name within 
the cpio name space.


We are still a bit away from pushing the BSP microcode update earlier 
though. We will need to surgically remove all the unnecessary 
xmalloc/memcpy from within microcode_{amd,intel}.c. Also, as you hinted, 
the challenging bit is the per-cpu microcode cache.



+
+builtin_ucode.o: Makefile $(amd-blobs) $(intel-blobs)
+    # Create AMD microcode blob if there are AMD updates on the
build system
+    if [ ! -z "$(amd-blobs)" ]; then \
+    cat $(amd-blobs) > $@.bin ; \
+    $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64
--rename-section
.data=.builtin_amd_ucode,alloc,load,readonly,data,contents $@.bin
$@.amd; \
+    rm -f $@.bin; \
+    fi
+    # Create INTEL microcode blob if there are INTEL updates on the
build system
+    if [ ! -z "$(intel-blobs)" ]; then \
+    cat

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Roman Shaposhnik

On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini
 wrote:
>
> On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini
> >  wrote:
> > >
> > > On Tue, 17 Dec 2019, Julien Grall wrote:
> > > > Hi,
> > > >
> > > > On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> > > > >  wrote:
> > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which is 
> > > > > > 990M.
> > > > > > If so, I wonder how you could boot succesfully with dom0_mem=1024M 
> > > > > > even
> > > > > > on Xen 4.12... :-?
> > > > >
> > > > > That is a very interesting observation indeed! I actually don't
> > > > > remember where that device tree came from, but I think it was from one
> > > > > of the Linaro sites.
> > > >
> > > > This is mostly likely because of:
> > > >
> > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> > > > Author: Julien Grall 
> > > > Date:   Wed Aug 21 22:42:31 2019 +0100
> > > >
> > > > xen/arm: domain_build: Don't continue if unable to allocate all 
> > > > dom0 banks
> > > >
> > > > Xen will only print a warning if there are memory unallocated when 
> > > > using
> > > > 1:1 mapping (only used by dom0). This also includes the case where 
> > > > no
> > > > memory has been allocated.
> > > >
> > > > It will bring to all sort of issues that can be hard to diagnostic 
> > > > for
> > > > users (the warning can be difficult to spot or disregard).
> > > >
> > > > If the users request 1GB of memory, then most likely they want the 
> > > > exact
> > > > amount and not 512MB. So panic if all the memory has not been 
> > > > allocated.
> > > >
> > > > After this change, the behavior is the same as for non-1:1 memory
> > > > allocation (used by domU).
> > > >
> > > > At the same time, reflow the message to have the format on a single
> > > > line.
> > > >
> > > > Signed-off-by: Julien Grall 
> > > > Acked-by: Stefano Stabellini 
> > >
> > > Ah! Roman, could you please post the full boot log of a successful 4.12
> > > boot?
> > >
> > > If it has a "Failed to allocate requested dom0 memory" message, then we
> > > know what the issue is.
> >
> > Aha! Our messages seems to have crossed ;-) Full log is attached and
> > yes -- that's
> > the problem indeed.
> >
> > So at least that mystery is solved. But I'm still not able to get to a
> > full 1G of memory
> > even with your update to the device tree file. Any chance you can send me 
> > the
> > device tree file that works for you?
>
> I didn't try on real hardware, I only tried on QEMU with a similar
> configuration. I went back and check the HiKey device tree I used and it
> is the same as yours (including the ramoops reserved-memory error).
>
> Apparently there are 1G and 2G variants of the HiKey, obviously both
> yours and my device tree are for the 1G variant. I try to dig through
> the docs but couldn't find the details of the 2G variant. I cannot find
> anywhere the memory range for the top 1G of memory not even on the
> LeMaker docs! :-/

Yup. That's exactly the issue on my end as well - can't seem to find an
authoritative source for that devicetree.

I did find this, though:
 https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/
which looks like it has the latest (at least file timestamp-wise) devicetree.

If you look at the memory and reserved memory nodes there, they
are actually much simpler than what we've got:

memory {
device_type = "memory";
reg = <0x0 0x0 0x0 0x4000>;
};

reserved-memory {
#address-cells = <0x2>;
#size-cells = <0x2>;
ranges;

mcu-buf@05e0 {
no-map;
reg = <0x0 0x5e0 0x0 0x10 0x0
0x740f000 0x0 0x1000>;
};

mbox-buf@06dff000 {
no-map;
reg = <0x0 0x6dff000 0x0 0x1000>;
};
};

So -- just on a whim -- I changed it to:
reg = <0x0 0x0 0x0 0x8000>;

Interestingly enough, Xen booted, and complained about only 192MB
unallocated this time.
So, I dropped the size of Dom0 to 640M and I got it boot and here's
what I'm seeing as
an output of xl info:
   total_memory   : 1120
   free_memory: 390
It still nowhere close to 2G.

Then I booted the Linux kernel without Xen and it correctly identified
all 2G worth of RAM, and in fact,
when I converted /sys/firmware/devicetree/base back into dts, here's
what I've got:

memory {
device_type = "memory";
reg = <0x0 0x0 0x0 0x5e0 0x0 0x5f0 0x0 0x1000
0x0 0x5f02000 0x0 0xefd000 0x0 0x6e0 0x0 0x60f000 0x0 0x741
0x0 0x1aaf 0x0 0x21f0 0x0 0x10 0x0 0x2200 0x0
0x1c00>;
};

Re: [Xen-devel] [PATCH] x86/microcode: Support builtin CPU microcode

2019-12-17 Thread Andrew Cooper

On 17/12/2019 22:41, Eslam Elnikety wrote:
> On 13.12.19 14:57, Andrew Cooper wrote:
>> On 12/12/2019 22:13, Eslam Elnikety wrote:
> Second, there is often need to couple a Xen build with a minimum
> microcode patch level. Having the microcode built within the Xen
> image
> itself is a streamlined, natural way of achieving that.

 Okay, I can accept this as a reason, to some degree at least. Yet
 as said elsewhere, I don't think you want then to override a
 possible "external" ucode module with the builtin blobs. Instead
 the newest of everything that's available should then be loaded.
>>>
>>> Extending Xen to work around tools shortcomings is absolutely not what
>>> I have in mind. I should have started with the second reason. Read
>>> this as: Xen relies on a minimum microcode feature set, and it makes
>>> sense to couple both in one binary. This coupling just happens to
>>> provide an added benefit in the face of tools shortcoming.
>>
>> Do we have anything which strictly relies on a minimum version?
>
> I had in mind microcode speculation mitigation features when reasoning
> with the minimum patch level argument.

Considering how well the first round of speculative microcode went,
mandating it would have been a rather bad thing...

But yes - as a usecase of "I wish to bundle the minimum microcode I'd
like to work with", this seems entirely reasonable.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] x86/microcode: Support builtin CPU microcode

2019-12-17 Thread Eslam Elnikety


On 13.12.19 14:57, Andrew Cooper wrote:

On 12/12/2019 22:13, Eslam Elnikety wrote:

Second, there is often need to couple a Xen build with a minimum
microcode patch level. Having the microcode built within the Xen image
itself is a streamlined, natural way of achieving that.


Okay, I can accept this as a reason, to some degree at least. Yet
as said elsewhere, I don't think you want then to override a
possible "external" ucode module with the builtin blobs. Instead
the newest of everything that's available should then be loaded.


Extending Xen to work around tools shortcomings is absolutely not what
I have in mind. I should have started with the second reason. Read
this as: Xen relies on a minimum microcode feature set, and it makes
sense to couple both in one binary. This coupling just happens to
provide an added benefit in the face of tools shortcoming.


Do we have anything which strictly relies on a minimum version?


I had in mind microcode speculation mitigation features when reasoning 
with the minimum patch level argument.


-- Eslam

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [ovmf test] 144910: regressions - FAIL

2019-12-17 Thread osstest service owner

flight 144910 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144910/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-i386-xsm6 xen-buildfail REGR. vs. 144637
 build-amd64   6 xen-buildfail REGR. vs. 144637
 build-amd64-xsm   6 xen-buildfail REGR. vs. 144637
 build-i3866 xen-buildfail REGR. vs. 144637

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemuu-ovmf-amd64  1 build-check(1) blocked n/a
 build-amd64-libvirt   1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemuu-ovmf-amd64  1 build-check(1)  blocked n/a
 build-i386-libvirt1 build-check(1)   blocked  n/a

version targeted for testing:
 ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
baseline version:
 ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0

Last test of basis   144637  2019-12-09 09:09:49 Z8 days
Failing since144646  2019-12-10 01:39:53 Z7 days   71 attempts
Testing same since   144770  2019-12-12 18:41:26 Z5 days   60 attempts


People who touched revisions under test:
  Antoine Coeur 
  Ard Biesheuvel 
  Bob Feng 
  Jiewen Yao 
  Michael Kubacki 
  Pete Batard 
  Philippe Mathieu-Daude 
  Steven Shi 

jobs:
 build-amd64-xsm  fail
 build-i386-xsm   fail
 build-amd64  fail
 build-i386   fail
 build-amd64-libvirt  blocked 
 build-i386-libvirt   blocked 
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked 
 test-amd64-i386-xl-qemuu-ovmf-amd64  blocked 



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Not pushing.


commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
Author: Pete Batard 
Date:   Tue Dec 10 18:23:04 2019 +

MdePkg/Include: Add DCC and BCM2835 SPCR UART types

As per the Microsoft Debug Port Table 2 (DBG2) documentation, that
can be found online, we are missing 2 serial interface types for
Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi).

These same types are present in DebugPort2Table.h so add them to
SerialPortConsoleRedirectionTable.h too.

Note that we followed the same idiosyncrasies as DebugPort2Table
for naming these new macros.

Signed-off-by: Pete Batard 
Acked-by: Ard Biesheuvel 
Reviewed-by: Liming Gao 

commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e
Author: Ard Biesheuvel 
Date:   Tue Mar 5 14:32:48 2019 +0100

ArmPkg/MmCommunicationDxe: relay architected PI events to MM context

PI defines a few architected events that have significance in the MM
context as well as in the non-secure DXE context. So register notify
handlers for these events, and relay them into the standalone MM world.

Signed-off-by: Ard Biesheuvel 
Reviewed-by: Jiewen Yao 
Reviewed-by: Achin Gupta 

commit d3add11e87dace180387562d6f1951f2bffbd3d9
Author: Michael Kubacki 
Date:   Wed Nov 20 17:31:24 2019 -0800

MdeModulePkg PeiCore: Improve comment semantics

This patch clarifies wording in several PeiCore comments to improve
reading comprehension.

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Jian J Wang 

commit d39d1260c615b716675f67f5c4e1f4f52df01dad
Author: Michael Kubacki 
Date:   Wed Nov 20 17:10:48 2019 -0800

MdeModulePkg PeiCore: Fix typos

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Philippe Mathieu-Daude 
Reviewed-by: Jian J Wang 

commit 97eedf5dfbaffde33210fd88066247cf0b7d3325
Author: Antoine Coeur 
Date:   Wed Dec 4 12:14:53

[Xen-devel] [xen-unstable-smoke test] 144906: tolerable all pass - PUSHED

2019-12-17 Thread osstest service owner

flight 144906 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144906/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  14 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  c61c1b49430527ee16fbf5b55aca195c325b1a23
baseline version:
 xen  f50a4f6e244cfc8e773300c03aaf4db391f3028a

Last test of basis   144898  2019-12-17 15:00:35 Z0 days
Testing same since   144906  2019-12-17 19:00:23 Z0 days1 attempts


People who touched revisions under test:
  Andre Przywara 
  Hongyan Xia 
  Julien Grall 
  Yangtao Li 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  pass
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  pass
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   f50a4f6e24..c61c1b4943  c61c1b49430527ee16fbf5b55aca195c325b1a23 -> smoke

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2] xen/grant-table: remove multiple BUG_ON on gnttab_interface

2019-12-17 Thread Aditya Pakki

gnttab_request_version() always sets the gnttab_interface variable
and the assertions to check for empty gnttab_interface is unnecessary.
The patch eliminates multiple such assertions.

Signed-off-by: Aditya Pakki 
---
v1: Eliminate more BUG_ON calls, as suggested by Juergen Gross.
---
 drivers/xen/grant-table.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 49b381e104ef..7b36b51cdb9f 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -664,7 +664,6 @@ static int grow_gnttab_list(unsigned int more_frames)
unsigned int nr_glist_frames, new_nr_glist_frames;
unsigned int grefs_per_frame;
 
-   BUG_ON(gnttab_interface == NULL);
grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
 
new_nr_grant_frames = nr_grant_frames + more_frames;
@@ -1160,7 +1159,6 @@ EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
 
 static unsigned int nr_status_frames(unsigned int nr_grant_frames)
 {
-   BUG_ON(gnttab_interface == NULL);
return gnttab_frames(nr_grant_frames, SPP);
 }
 
@@ -1388,7 +1386,6 @@ static int gnttab_expand(unsigned int req_entries)
int rc;
unsigned int cur, extra;
 
-   BUG_ON(gnttab_interface == NULL);
cur = nr_grant_frames;
extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
 gnttab_interface->grefs_per_grant_frame);
@@ -1423,7 +1420,6 @@ int gnttab_init(void)
/* Determine the maximum number of frames required for the
 * grant reference free list on the current hypervisor.
 */
-   BUG_ON(gnttab_interface == NULL);
max_nr_glist_frames = (max_nr_grant_frames *
   gnttab_interface->grefs_per_grant_frame / RPP);
 
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 6/6] x86/suspend: Drop save_rest_processor_state() completely

2019-12-17 Thread Andrew Cooper

On 17/12/2019 12:38, Roger Pau Monné wrote:
>> @@ -46,25 +14,9 @@ void restore_rest_processor_state(void)
>>  /* Restore full CR4 (inc MCE) now that the IDT is in place. */
>>  write_cr4(mmu_cr4_features);
>>  
>> -/* Recover syscall MSRs */
>> -wrmsrl(MSR_LSTAR, saved_lstar);
>> -wrmsrl(MSR_CSTAR, saved_cstar);
>> -wrmsrl(MSR_STAR, XEN_MSR_STAR);
>> -wrmsrl(MSR_SYSCALL_MASK, XEN_SYSCALL_MASK);
>> -
>> -wrfsbase(saved_fs_base);
>> -wrgsbase(saved_gs_base);
>> -wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
>> -
>> -if ( cpu_has_sep )
>> -{
>> -/* Recover sysenter MSRs */
>> -wrmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp);
>> -wrmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip);
>> -wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
>> -}
>> +percpu_traps_init();
>>  
>> -if ( cpu_has_xsave && !set_xcr0(saved_xcr0) )
>> +if ( cpu_has_xsave && !set_xcr0(get_xcr0()) )
>>  BUG();
>>  
>>  wrmsrl(MSR_IA32_CR_PAT, XEN_MSR_PAT);
> Given what this functions does after this change, would it be feasible
> to place such calls directly in enter_state?
>
> AFAICT there's already some restoring done there anyway.

Hmm - we already appear to double up CR4/EFER restoration, so there is
clearly more cleanup to do.  I'll see if I can make
restore_rest_processor_state() disappear completely.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [qemu-mainline test] 144891: regressions - FAIL

2019-12-17 Thread osstest service owner

flight 144891 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144891/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-freebsd10-i386 14 guest-saverestore  fail REGR. vs. 144861
 test-amd64-i386-freebsd10-amd64 14 guest-saverestore fail REGR. vs. 144861
 test-amd64-amd64-xl-qemuu-debianhvm-amd64 13 guest-saverestore fail REGR. vs. 
144861
 test-amd64-amd64-xl-qemuu-win7-amd64 13 guest-saverestore fail REGR. vs. 144861
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 guest-saverestore fail 
REGR. vs. 144861
 test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 13 guest-saverestore fail REGR. 
vs. 144861
 test-amd64-i386-xl-qemuu-debianhvm-amd64-shadow 13 guest-saverestore fail 
REGR. vs. 144861
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 13 guest-saverestore fail 
REGR. vs. 144861
 test-amd64-amd64-xl-qemuu-ovmf-amd64 13 guest-saverestore fail REGR. vs. 144861
 test-amd64-i386-xl-qemuu-debianhvm-amd64 13 guest-saverestore fail REGR. vs. 
144861
 test-amd64-i386-xl-qemuu-ovmf-amd64 13 guest-saverestore fail REGR. vs. 144861
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 guest-saverestore fail 
REGR. vs. 144861
 test-amd64-i386-xl-qemuu-debianhvm-i386-xsm 13 guest-saverestore fail REGR. 
vs. 144861
 test-amd64-i386-xl-qemuu-win7-amd64 13 guest-saverestore fail REGR. vs. 144861
 test-amd64-amd64-xl-qemuu-ws16-amd64 13 guest-saverestore fail REGR. vs. 144861
 test-amd64-i386-xl-qemuu-ws16-amd64 13 guest-saverestore fail REGR. vs. 144861

Regressions which are regarded as allowable (not blocking):
 test-amd64-amd64-xl-rtds 16 guest-localmigrate   fail REGR. vs. 144861
 test-armhf-armhf-xl-rtds16 guest-start/debian.repeat fail REGR. vs. 144861

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt 14 saverestore-support-checkfail  like 144861
 test-armhf-armhf-libvirt-raw 13 saverestore-support-checkfail  like 144861
 test-amd64-amd64-libvirt 13 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  13 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  14 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  13 migrate-support-checkfail   never pass
 test-amd64-i386-xl-pvshim12 guest-start  fail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check 
fail never pass
 test-arm64-arm64-xl  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  14 saverestore-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 17 debian-hvm-install/l1/l2  fail never pass
 test-arm64-arm64-libvirt-xsm 13 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 14 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 13 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 14 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-credit2  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 13 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 14 saverestore-support-checkfail never pass
 test-armhf-armhf-xl-credit1  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  13 migrate-support-checkfail   never pass

[Xen-devel] [PATCH 4/4] tools/dombuilder: Don't allocate dom->p2m_host[] for translated domains

2019-12-17 Thread Andrew Cooper

xc_dom_p2m() and dom->p2m_host[] implement a linear transform for translated
domains, but waste a substantial chunk of RAM doing so.

ARM literally never reads dom->p2m_host[] (because of the xc_dom_translated()
short circuit in xc_dom_p2m()).  Drop it all.

x86 HVM does use dom->p2m_host[] for xc_domain_populate_physmap_exact() calls
when populating 4k pages.  Reuse the same tactic from 2M/1G ranges and use an
on-stack array instead.  Drop the memory allocation.

x86 PV guests do use dom->p2m_host[] as a non-identity transform.  Rename the
field to pv_p2m to make it clear it is PV-only.

No change in the constructed guests.

Reported-by: Varad Gautam 
Reported-by: Julien Grall 
Signed-off-by: Andrew Cooper 
---
CC: Ian Jackson 
CC: Wei Liu 
CC: Stefano Stabellini 
CC: Julien Grall 
CC: Volodymyr Babchuk 
CC: Varad Gautam 
---
 stubdom/grub/kexec.c | 28 -
 tools/libxc/include/xc_dom.h | 19 ++--
 tools/libxc/xc_dom_arm.c |  9 --
 tools/libxc/xc_dom_x86.c | 72 ++--
 4 files changed, 52 insertions(+), 76 deletions(-)

diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c
index 10891eabcc..0e68b969a2 100644
--- a/stubdom/grub/kexec.c
+++ b/stubdom/grub/kexec.c
@@ -87,17 +87,17 @@ static void do_exchange(struct xc_dom_image *dom, xen_pfn_t 
target_pfn, xen_pfn_
 xen_pfn_t target_mfn;
 
 for (source_pfn = 0; source_pfn < start_info.nr_pages; source_pfn++)
-if (dom->p2m_host[source_pfn] == source_mfn)
+if (dom->pv_p2m[source_pfn] == source_mfn)
 break;
 ASSERT(source_pfn < start_info.nr_pages);
 
-target_mfn = dom->p2m_host[target_pfn];
+target_mfn = dom->pv_p2m[target_pfn];
 
 /* Put target MFN at source PFN */
-dom->p2m_host[source_pfn] = target_mfn;
+dom->pv_p2m[source_pfn] = target_mfn;
 
 /* Put source MFN at target PFN */
-dom->p2m_host[target_pfn] = source_mfn;
+dom->pv_p2m[target_pfn] = source_mfn;
 }
 
 int kexec_allocate(struct xc_dom_image *dom)
@@ -110,7 +110,7 @@ int kexec_allocate(struct xc_dom_image *dom)
 pages_moved2pfns = realloc(pages_moved2pfns, new_allocated * 
sizeof(*pages_moved2pfns));
 for (i = allocated; i < new_allocated; i++) {
 /* Exchange old page of PFN i with a newly allocated page.  */
-xen_pfn_t old_mfn = dom->p2m_host[i];
+xen_pfn_t old_mfn = dom->pv_p2m[i];
 xen_pfn_t new_pfn;
 xen_pfn_t new_mfn;
 
@@ -122,7 +122,7 @@ int kexec_allocate(struct xc_dom_image *dom)
/*
 * If PFN of newly allocated page (new_pfn) is less then currently
 * requested PFN (i) then look for relevant PFN/MFN pair. In this
-* situation dom->p2m_host[new_pfn] no longer contains proper MFN
+* situation dom->pv_p2m[new_pfn] no longer contains proper MFN
 * because original page with new_pfn was moved earlier
 * to different location.
 */
@@ -132,10 +132,10 @@ int kexec_allocate(struct xc_dom_image *dom)
pages_moved2pfns[i] = new_pfn;
 
 /* Put old page at new PFN */
-dom->p2m_host[new_pfn] = old_mfn;
+dom->pv_p2m[new_pfn] = old_mfn;
 
 /* Put new page at PFN i */
-dom->p2m_host[i] = new_mfn;
+dom->pv_p2m[i] = new_mfn;
 }
 
 allocated = new_allocated;
@@ -282,11 +282,11 @@ void kexec(void *kernel, long kernel_size, void *module, 
long module_size, char
 dom->p2m_size = dom->total_pages;
 
 /* setup initial p2m */
-dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->p2m_size);
+dom->pv_p2m = malloc(sizeof(*dom->pv_p2m) * dom->p2m_size);
 
 /* Start with our current P2M */
 for (i = 0; i < dom->p2m_size; i++)
-dom->p2m_host[i] = pfn_to_mfn(i);
+dom->pv_p2m[i] = pfn_to_mfn(i);
 
 if ( (rc = xc_dom_build_image(dom)) != 0 ) {
 printk("xc_dom_build_image returned %d\n", rc);
@@ -373,7 +373,7 @@ void kexec(void *kernel, long kernel_size, void *module, 
long module_size, char
 _boot_oldpdmfn = virt_to_mfn(start_info.pt_base);
 DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn);
 DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart);
-_boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - 
dom->parms.virt_base)];
+_boot_pdmfn = dom->pv_p2m[PHYS_PFN(dom->pgtables_seg.vstart - 
dom->parms.virt_base)];
 DEBUG("boot pd mfn %lx\n", _boot_pdmfn);
 _boot_stack = _boot_target + PAGE_SIZE;
 DEBUG("boot stack %lx\n", _boot_stack);
@@ -384,13 +384,13 @@ void kexec(void *kernel, long kernel_size, void *module, 
long module_size, char
 
 /* Keep only useful entries */
 for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++)
-if (dom->p2m_host[pfn] != pfn_to_mfn(pfn))
+if (dom->pv_p2m[pfn] != pfn_to_mfn(pfn))
 nr_m2p_updates++;
 
 m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates);
 for (i = pfn = 0; pfn < start_info.nr_pages; pfn++)
-if

[Xen-devel] [PATCH 0/4] Don't allocate dom->p2m_host[] for translated domains

2019-12-17 Thread Andrew Cooper

Vastly drop xl's memory usage for HVM (x86 and ARM) guest construction.

See
https://lore.kernel.org/xen-devel/1562159202-11316-1-git-send-email-...@amazon.de/T/#u
for the origins of this work, but ultimately I think this is a far cleaner
solution to the problem.

Andrew Cooper (4):
  tools/dombuilder: xc_dom_x86 cleanup
  tools/dombuilder: Remove PV-only, mandatory hooks
  tools/dombuilder: Remove p2m_guest from the common interface
  tools/dombuilder: Don't allocate dom->p2m_host[] for translated
domains

 stubdom/grub/kexec.c |  36 --
 tools/libxc/include/xc_dom.h |  24 +++
 tools/libxc/xc_dom_arm.c |  30 
 tools/libxc/xc_dom_boot.c|   6 +-
 tools/libxc/xc_dom_core.c|  43 +--
 tools/libxc/xc_dom_x86.c | 166 ++-
 6 files changed, 114 insertions(+), 191 deletions(-)

-- 
2.11.0


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH 3/4] tools/dombuilder: Remove p2m_guest from the common interface

2019-12-17 Thread Andrew Cooper

In-guest p2m's are a concept specific to x86 PV guests.  alloc_p2m_list() is
the only hook which initialises dom->p2m_guest, making
xc_dom_update_guest_p2m() a nop for non-PV guests.

Move p2m_guest into xc_dom_image_x86 and adjust alloc_p2m_list() to match.

Drop xc_dom_update_guest_p2m() entirely.

One caller, move_l3_below_4G(), only uses it to modify a single entry, so
rewriting the whole guest p2m is wasteful - opencode the single update
instead.  The other caller is common code.  Instead, move the logic into the
setup_pgtables() hooks, which know their own sizeof_pfn and can do away with
the switch statement.

No change in the constructed guests.

Signed-off-by: Andrew Cooper 
---
CC: Ian Jackson 
CC: Wei Liu 
CC: Stefano Stabellini 
CC: Julien Grall 
CC: Volodymyr Babchuk 
CC: Varad Gautam 
---
 stubdom/grub/kexec.c |  8 
 tools/libxc/include/xc_dom.h |  2 --
 tools/libxc/xc_dom_boot.c|  2 --
 tools/libxc/xc_dom_core.c| 40 
 tools/libxc/xc_dom_x86.c | 41 +++--
 5 files changed, 35 insertions(+), 58 deletions(-)

diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c
index 61ca082d42..10891eabcc 100644
--- a/stubdom/grub/kexec.c
+++ b/stubdom/grub/kexec.c
@@ -320,14 +320,6 @@ void kexec(void *kernel, long kernel_size, void *module, 
long module_size, char
 do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base),
 virt_to_mfn(&_boot_page));
 
-/* Make sure the bootstrap page table does not RW-map any of our current
- * page table frames */
-if ( (rc = xc_dom_update_guest_p2m(dom))) {
-printk("xc_dom_update_guest_p2m returned %d\n", rc);
-errnum = ERR_BOOT_FAILURE;
-goto out;
-}
-
 if ( dom->arch_hooks->setup_pgtables )
 if ( (rc = dom->arch_hooks->setup_pgtables(dom))) {
 printk("setup_pgtables returned %d\n", rc);
diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 9ff1cb8b07..b7d0faf7e1 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -133,7 +133,6 @@ struct xc_dom_image {
  * Note that the input is offset by rambase.
  */
 xen_pfn_t *p2m_host;
-void *p2m_guest;
 
 /* physical memory
  *
@@ -331,7 +330,6 @@ int xc_dom_devicetree_mem(struct xc_dom_image *dom, const 
void *mem,
 int xc_dom_parse_image(struct xc_dom_image *dom);
 int xc_dom_set_arch_hooks(struct xc_dom_image *dom);
 int xc_dom_build_image(struct xc_dom_image *dom);
-int xc_dom_update_guest_p2m(struct xc_dom_image *dom);
 
 int xc_dom_boot_xen_init(struct xc_dom_image *dom, xc_interface *xch,
  uint32_t domid);
diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
index 79dbbf6571..bb599b33ba 100644
--- a/tools/libxc/xc_dom_boot.c
+++ b/tools/libxc/xc_dom_boot.c
@@ -197,8 +197,6 @@ int xc_dom_boot_image(struct xc_dom_image *dom)
 return -1;
 
 /* initial mm setup */
-if ( (rc = xc_dom_update_guest_p2m(dom)) != 0 )
-return rc;
 if ( dom->arch_hooks->setup_pgtables &&
  (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 )
 return rc;
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index fc77804a7e..f30c73b5e8 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -969,46 +969,6 @@ int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int 
mem_mb)
 return 0;
 }
 
-int xc_dom_update_guest_p2m(struct xc_dom_image *dom)
-{
-uint32_t *p2m_32;
-uint64_t *p2m_64;
-xen_pfn_t i;
-
-if ( !dom->p2m_guest )
-return 0;
-
-switch ( dom->arch_hooks->sizeof_pfn )
-{
-case 4:
-DOMPRINTF("%s: dst 32bit, pages 0x%" PRIpfn "",
-  __FUNCTION__, dom->p2m_size);
-p2m_32 = dom->p2m_guest;
-for ( i = 0; i < dom->p2m_size; i++ )
-if ( dom->p2m_host[i] != INVALID_PFN )
-p2m_32[i] = dom->p2m_host[i];
-else
-p2m_32[i] = (uint32_t) - 1;
-break;
-case 8:
-DOMPRINTF("%s: dst 64bit, pages 0x%" PRIpfn "",
-  __FUNCTION__, dom->p2m_size);
-p2m_64 = dom->p2m_guest;
-for ( i = 0; i < dom->p2m_size; i++ )
-if ( dom->p2m_host[i] != INVALID_PFN )
-p2m_64[i] = dom->p2m_host[i];
-else
-p2m_64[i] = (uint64_t) - 1;
-break;
-default:
-xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
- "sizeof_pfn is invalid (is %d, can be 4 or 8)",
- dom->arch_hooks->sizeof_pfn);
-return -1;
-}
-return 0;
-}
-
 static int xc_dom_build_module(struct xc_dom_image *dom, unsigned int mod)
 {
 size_t unziplen, modulelen;
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index d2acff1061..f21662c8b9 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -104,6 +104,9 @@ struct

[Xen-devel] [PATCH 2/4] tools/dombuilder: Remove PV-only, mandatory hooks

2019-12-17 Thread Andrew Cooper

Currently, the setup_pgtable() hook is optional, but alloc_pgtable() hook is
not.  Both are specific to x86 PV guests, and stubbed in various ways by the
dombuilders for translated guests (x86 HVM, ARM).

Make alloc_pgtables() optional, and drop all the stubs for translated guest
types.

No change in the constructed guests.

Signed-off-by: Andrew Cooper 
---
CC: Ian Jackson 
CC: Wei Liu 
CC: Stefano Stabellini 
CC: Julien Grall 
CC: Volodymyr Babchuk 
CC: Varad Gautam 
---
 tools/libxc/include/xc_dom.h |  3 ++-
 tools/libxc/xc_dom_arm.c | 21 -
 tools/libxc/xc_dom_boot.c|  6 +++---
 tools/libxc/xc_dom_core.c|  3 ++-
 tools/libxc/xc_dom_x86.c |  7 ---
 5 files changed, 7 insertions(+), 33 deletions(-)

diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 5900bbe8fa..9ff1cb8b07 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -253,8 +253,9 @@ void xc_dom_register_loader(struct xc_dom_loader *loader);
 /* --- arch specific hooks - */
 
 struct xc_dom_arch {
-/* pagetable setup */
 int (*alloc_magic_pages) (struct xc_dom_image * dom);
+
+/* pagetable setup - x86 PV only */
 int (*alloc_pgtables) (struct xc_dom_image * dom);
 int (*alloc_p2m_list) (struct xc_dom_image * dom);
 int (*setup_pgtables) (struct xc_dom_image * dom);
diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c
index 5b9eca6087..7e0fb9169f 100644
--- a/tools/libxc/xc_dom_arm.c
+++ b/tools/libxc/xc_dom_arm.c
@@ -47,23 +47,6 @@ const char *xc_domain_get_native_protocol(xc_interface *xch,
 }
 
 /*  */
-/*
- * arm guests are hybrid and start off with paging disabled, therefore no
- * pagetables and nothing to do here.
- */
-static int alloc_pgtables_arm(struct xc_dom_image *dom)
-{
-DOMPRINTF_CALLED(dom->xch);
-return 0;
-}
-
-static int setup_pgtables_arm(struct xc_dom_image *dom)
-{
-DOMPRINTF_CALLED(dom->xch);
-return 0;
-}
-
-/*  */
 
 static int alloc_magic_pages(struct xc_dom_image *dom)
 {
@@ -539,8 +522,6 @@ static struct xc_dom_arch xc_dom_32 = {
 .page_shift = PAGE_SHIFT_ARM,
 .sizeof_pfn = 8,
 .alloc_magic_pages = alloc_magic_pages,
-.alloc_pgtables = alloc_pgtables_arm,
-.setup_pgtables = setup_pgtables_arm,
 .start_info = start_info_arm,
 .shared_info = shared_info_arm,
 .vcpu = vcpu_arm32,
@@ -555,8 +536,6 @@ static struct xc_dom_arch xc_dom_64 = {
 .page_shift = PAGE_SHIFT_ARM,
 .sizeof_pfn = 8,
 .alloc_magic_pages = alloc_magic_pages,
-.alloc_pgtables = alloc_pgtables_arm,
-.setup_pgtables = setup_pgtables_arm,
 .start_info = start_info_arm,
 .shared_info = shared_info_arm,
 .vcpu = vcpu_arm64,
diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
index 918ee4d045..79dbbf6571 100644
--- a/tools/libxc/xc_dom_boot.c
+++ b/tools/libxc/xc_dom_boot.c
@@ -199,9 +199,9 @@ int xc_dom_boot_image(struct xc_dom_image *dom)
 /* initial mm setup */
 if ( (rc = xc_dom_update_guest_p2m(dom)) != 0 )
 return rc;
-if ( dom->arch_hooks->setup_pgtables )
-if ( (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 )
-return rc;
+if ( dom->arch_hooks->setup_pgtables &&
+ (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 )
+return rc;
 
 /* start info page */
 if ( dom->arch_hooks->start_info )
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index 9bd04cb2d5..fc77804a7e 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -1247,7 +1247,8 @@ int xc_dom_build_image(struct xc_dom_image *dom)
 goto err;
 if ( dom->arch_hooks->alloc_magic_pages(dom) != 0 )
 goto err;
-if ( dom->arch_hooks->alloc_pgtables(dom) != 0 )
+if ( dom->arch_hooks->alloc_pgtables &&
+ dom->arch_hooks->alloc_pgtables(dom) != 0 )
 goto err;
 if ( dom->alloc_bootstack )
 {
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index 1ce3c798ef..d2acff1061 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -1690,12 +1690,6 @@ static int bootlate_pv(struct xc_dom_image *dom)
 return 0;
 }
 
-static int alloc_pgtables_hvm(struct xc_dom_image *dom)
-{
-DOMPRINTF("%s: doing nothing", __func__);
-return 0;
-}
-
 /*
  * The memory layout of the start_info page and the modules, and where the
  * addresses are stored:
@@ -1906,7 +1900,6 @@ static struct xc_dom_arch xc_hvm_32 = {
 .page_shift = PAGE_SHIFT_X86,
 .sizeof_pfn = 4,
 .alloc_magic_pages = alloc_magic_pages_hvm,
-.alloc_pgtables = alloc_pgtables_hvm,
 .vcpu = vcpu_hvm,
 .meminit = meminit_hvm,
 .bootearly = bootearly,
-- 
2.11.0


___
Xen-devel mailing list

[Xen-devel] [PATCH 1/4] tools/dombuilder: xc_dom_x86 cleanup

2019-12-17 Thread Andrew Cooper

The two xc_dom_params structures for PV pagetables are never modified and can
live in .rodata.  Reduce their scope to the alloc_pgtable_*() functions which
construct xc_dom_image_x86 appropriately.

Rename {alloc,setup}_pgtables() to {alloc,setup}_pgtables_pv() to highlight
that they are PV only, and drop some _x86() suffixes from static helpers.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Ian Jackson 
CC: Wei Liu 
CC: Stefano Stabellini 
CC: Julien Grall 
CC: Volodymyr Babchuk 
CC: Varad Gautam 
---
 tools/libxc/xc_dom_x86.c | 60 ++--
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index 9e279d6768..1ce3c798ef 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -103,7 +103,7 @@ struct xc_dom_image_x86 {
 unsigned n_mappings;
 #define MAPPING_MAX 2
 struct xc_dom_x86_mapping maps[MAPPING_MAX];
-struct xc_dom_params *params;
+const struct xc_dom_params *params;
 };
 
 /* get guest IO ABI protocol */
@@ -235,7 +235,7 @@ static int count_pgtables(struct xc_dom_image *dom, 
xen_vaddr_t from,
 return 0;
 }
 
-static int alloc_pgtables(struct xc_dom_image *dom)
+static int alloc_pgtables_pv(struct xc_dom_image *dom)
 {
 int pages, extra_pages;
 xen_vaddr_t try_virt_end;
@@ -268,20 +268,20 @@ static int alloc_pgtables(struct xc_dom_image *dom)
 /*  */
 /* i386 pagetables  */
 
-static struct xc_dom_params x86_32_params = {
-.levels = PGTBL_LEVELS_I386,
-.vaddr_mask = bits_to_mask(VIRT_BITS_I386),
-.lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED,
-.lvl_prot[1] = 
_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER,
-.lvl_prot[2] = _PAGE_PRESENT,
-};
-
 static int alloc_pgtables_x86_32_pae(struct xc_dom_image *dom)
 {
+static const struct xc_dom_params x86_32_params = {
+.levels = PGTBL_LEVELS_I386,
+.vaddr_mask = bits_to_mask(VIRT_BITS_I386),
+.lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED,
+.lvl_prot[1] = 
_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER,
+.lvl_prot[2] = _PAGE_PRESENT,
+};
 struct xc_dom_image_x86 *domx86 = dom->arch_private;
 
 domx86->params = _32_params;
-return alloc_pgtables(dom);
+
+return alloc_pgtables_pv(dom);
 }
 
 #define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86)
@@ -355,7 +355,7 @@ static xen_pfn_t move_l3_below_4G(struct xc_dom_image *dom,
 return l3mfn;
 }
 
-static x86_pgentry_t *get_pg_table_x86(struct xc_dom_image *dom, int m, int l)
+static x86_pgentry_t *get_pg_table(struct xc_dom_image *dom, int m, int l)
 {
 struct xc_dom_image_x86 *domx86 = dom->arch_private;
 struct xc_dom_x86_mapping *map;
@@ -371,8 +371,7 @@ static x86_pgentry_t *get_pg_table_x86(struct xc_dom_image 
*dom, int m, int l)
 return NULL;
 }
 
-static x86_pgentry_t get_pg_prot_x86(struct xc_dom_image *dom, int l,
- xen_pfn_t pfn)
+static x86_pgentry_t get_pg_prot(struct xc_dom_image *dom, int l, xen_pfn_t 
pfn)
 {
 struct xc_dom_image_x86 *domx86 = dom->arch_private;
 struct xc_dom_x86_mapping *map;
@@ -396,7 +395,7 @@ static x86_pgentry_t get_pg_prot_x86(struct xc_dom_image 
*dom, int l,
 return prot;
 }
 
-static int setup_pgtables_x86(struct xc_dom_image *dom)
+static int setup_pgtables_pv(struct xc_dom_image *dom)
 {
 struct xc_dom_image_x86 *domx86 = dom->arch_private;
 struct xc_dom_x86_mapping *map1, *map2;
@@ -413,7 +412,7 @@ static int setup_pgtables_x86(struct xc_dom_image *dom)
 map1 = domx86->maps + m1;
 from = map1->lvls[l].from;
 to = map1->lvls[l].to;
-pg = get_pg_table_x86(dom, m1, l);
+pg = get_pg_table(dom, m1, l);
 if ( !pg )
 return -1;
 for ( m2 = 0; m2 < domx86->n_mappings; m2++ )
@@ -433,7 +432,7 @@ static int setup_pgtables_x86(struct xc_dom_image *dom)
 for ( p = p_s; p <= p_e; p++ )
 {
 pg[p] = pfn_to_paddr(xc_dom_p2m(dom, pfn)) |
-get_pg_prot_x86(dom, l, pfn);
+get_pg_prot(dom, l, pfn);
 pfn++;
 }
 }
@@ -464,32 +463,32 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image 
*dom)
 }
 }
 
-return setup_pgtables_x86(dom);
+return setup_pgtables_pv(dom);
 }
 
 /*  */
 /* x86_64 pagetables*/
 
-static struct xc_dom_params x86_64_params = {
-.levels = PGTBL_LEVELS_X86_64,
-.vaddr_mask = bits_to_mask(VIRT_BITS_X86_64),
-.lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED,
-

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Stefano Stabellini

On Tue, 17 Dec 2019, Roman Shaposhnik wrote:
> On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini
>  wrote:
> >
> > On Tue, 17 Dec 2019, Julien Grall wrote:
> > > Hi,
> > >
> > > On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> > > >  wrote:
> > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> > > > > If I sum all the memory sizes together I get 0x3ddfd000 which is 990M.
> > > > > If so, I wonder how you could boot succesfully with dom0_mem=1024M 
> > > > > even
> > > > > on Xen 4.12... :-?
> > > >
> > > > That is a very interesting observation indeed! I actually don't
> > > > remember where that device tree came from, but I think it was from one
> > > > of the Linaro sites.
> > >
> > > This is mostly likely because of:
> > >
> > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> > > Author: Julien Grall 
> > > Date:   Wed Aug 21 22:42:31 2019 +0100
> > >
> > > xen/arm: domain_build: Don't continue if unable to allocate all dom0 
> > > banks
> > >
> > > Xen will only print a warning if there are memory unallocated when 
> > > using
> > > 1:1 mapping (only used by dom0). This also includes the case where no
> > > memory has been allocated.
> > >
> > > It will bring to all sort of issues that can be hard to diagnostic for
> > > users (the warning can be difficult to spot or disregard).
> > >
> > > If the users request 1GB of memory, then most likely they want the 
> > > exact
> > > amount and not 512MB. So panic if all the memory has not been 
> > > allocated.
> > >
> > > After this change, the behavior is the same as for non-1:1 memory
> > > allocation (used by domU).
> > >
> > > At the same time, reflow the message to have the format on a single
> > > line.
> > >
> > > Signed-off-by: Julien Grall 
> > > Acked-by: Stefano Stabellini 
> >
> > Ah! Roman, could you please post the full boot log of a successful 4.12
> > boot?
> >
> > If it has a "Failed to allocate requested dom0 memory" message, then we
> > know what the issue is.
> 
> Aha! Our messages seems to have crossed ;-) Full log is attached and
> yes -- that's
> the problem indeed.
> 
> So at least that mystery is solved. But I'm still not able to get to a
> full 1G of memory
> even with your update to the device tree file. Any chance you can send me the
> device tree file that works for you?

I didn't try on real hardware, I only tried on QEMU with a similar
configuration. I went back and check the HiKey device tree I used and it
is the same as yours (including the ramoops reserved-memory error).

Apparently there are 1G and 2G variants of the HiKey, obviously both
yours and my device tree are for the 1G variant. I try to dig through
the docs but couldn't find the details of the 2G variant. I cannot find
anywhere the memory range for the top 1G of memory not even on the
LeMaker docs! :-/

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [ovmf test] 144903: regressions - FAIL

2019-12-17 Thread osstest service owner

flight 144903 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144903/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-i386-xsm6 xen-buildfail REGR. vs. 144637
 build-amd64   6 xen-buildfail REGR. vs. 144637
 build-amd64-xsm   6 xen-buildfail REGR. vs. 144637
 build-i3866 xen-buildfail REGR. vs. 144637

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemuu-ovmf-amd64  1 build-check(1) blocked n/a
 build-amd64-libvirt   1 build-check(1)   blocked  n/a
 build-i386-libvirt1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemuu-ovmf-amd64  1 build-check(1)  blocked n/a

version targeted for testing:
 ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
baseline version:
 ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0

Last test of basis   144637  2019-12-09 09:09:49 Z8 days
Failing since144646  2019-12-10 01:39:53 Z7 days   70 attempts
Testing same since   144770  2019-12-12 18:41:26 Z5 days   59 attempts


People who touched revisions under test:
  Antoine Coeur 
  Ard Biesheuvel 
  Bob Feng 
  Jiewen Yao 
  Michael Kubacki 
  Pete Batard 
  Philippe Mathieu-Daude 
  Steven Shi 

jobs:
 build-amd64-xsm  fail
 build-i386-xsm   fail
 build-amd64  fail
 build-i386   fail
 build-amd64-libvirt  blocked 
 build-i386-libvirt   blocked 
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked 
 test-amd64-i386-xl-qemuu-ovmf-amd64  blocked 



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Not pushing.


commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
Author: Pete Batard 
Date:   Tue Dec 10 18:23:04 2019 +

MdePkg/Include: Add DCC and BCM2835 SPCR UART types

As per the Microsoft Debug Port Table 2 (DBG2) documentation, that
can be found online, we are missing 2 serial interface types for
Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi).

These same types are present in DebugPort2Table.h so add them to
SerialPortConsoleRedirectionTable.h too.

Note that we followed the same idiosyncrasies as DebugPort2Table
for naming these new macros.

Signed-off-by: Pete Batard 
Acked-by: Ard Biesheuvel 
Reviewed-by: Liming Gao 

commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e
Author: Ard Biesheuvel 
Date:   Tue Mar 5 14:32:48 2019 +0100

ArmPkg/MmCommunicationDxe: relay architected PI events to MM context

PI defines a few architected events that have significance in the MM
context as well as in the non-secure DXE context. So register notify
handlers for these events, and relay them into the standalone MM world.

Signed-off-by: Ard Biesheuvel 
Reviewed-by: Jiewen Yao 
Reviewed-by: Achin Gupta 

commit d3add11e87dace180387562d6f1951f2bffbd3d9
Author: Michael Kubacki 
Date:   Wed Nov 20 17:31:24 2019 -0800

MdeModulePkg PeiCore: Improve comment semantics

This patch clarifies wording in several PeiCore comments to improve
reading comprehension.

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Jian J Wang 

commit d39d1260c615b716675f67f5c4e1f4f52df01dad
Author: Michael Kubacki 
Date:   Wed Nov 20 17:10:48 2019 -0800

MdeModulePkg PeiCore: Fix typos

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Philippe Mathieu-Daude 
Reviewed-by: Jian J Wang 

commit 97eedf5dfbaffde33210fd88066247cf0b7d3325
Author: Antoine Coeur 
Date:   Wed Dec 4 12:14:53

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Roman Shaposhnik

On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini
 wrote:
>
> On Tue, 17 Dec 2019, Julien Grall wrote:
> > Hi,
> >
> > On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> > >  wrote:
> > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> > > > If I sum all the memory sizes together I get 0x3ddfd000 which is 990M.
> > > > If so, I wonder how you could boot succesfully with dom0_mem=1024M even
> > > > on Xen 4.12... :-?
> > >
> > > That is a very interesting observation indeed! I actually don't
> > > remember where that device tree came from, but I think it was from one
> > > of the Linaro sites.
> >
> > This is mostly likely because of:
> >
> > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> > Author: Julien Grall 
> > Date:   Wed Aug 21 22:42:31 2019 +0100
> >
> > xen/arm: domain_build: Don't continue if unable to allocate all dom0 
> > banks
> >
> > Xen will only print a warning if there are memory unallocated when using
> > 1:1 mapping (only used by dom0). This also includes the case where no
> > memory has been allocated.
> >
> > It will bring to all sort of issues that can be hard to diagnostic for
> > users (the warning can be difficult to spot or disregard).
> >
> > If the users request 1GB of memory, then most likely they want the exact
> > amount and not 512MB. So panic if all the memory has not been allocated.
> >
> > After this change, the behavior is the same as for non-1:1 memory
> > allocation (used by domU).
> >
> > At the same time, reflow the message to have the format on a single
> > line.
> >
> > Signed-off-by: Julien Grall 
> > Acked-by: Stefano Stabellini 
>
> Ah! Roman, could you please post the full boot log of a successful 4.12
> boot?
>
> If it has a "Failed to allocate requested dom0 memory" message, then we
> know what the issue is.

Aha! Our messages seems to have crossed ;-) Full log is attached and
yes -- that's
the problem indeed.

So at least that mystery is solved. But I'm still not able to get to a
full 1G of memory
even with your update to the device tree file. Any chance you can send me the
device tree file that works for you?

Thanks,
Roman.
Using modules provided by bootloader in FDT
Xen 4.12.0 (c/s ) EFI loader
 Xen 4.12.0
(XEN) Xen version 4.12.0 (@) (gcc (Alpine 6.4.0) 6.4.0) debug=n  Fri Jun  7 
17:32:08 UTC 2019
(XEN) Latest ChangeSet:
(XEN) Processor: 410fd033: "ARM Limited", variant: 0x0, part 0xd03, rev 0x3
(XEN) 64-bit Execution:
(XEN)   Processor Features:  
(XEN) Exception Levels: EL3:64+32 EL2:64+32 EL1:64+32 EL0:64+32
(XEN) Extensions: FloatingPoint AdvancedSIMD
(XEN)   Debug Features: 10305106 
(XEN)   Auxiliary Features:  
(XEN)   Memory Model Features: 1122 
(XEN)   ISA Features:  00011120 
(XEN) 32-bit Execution:
(XEN)   Processor Features: 0131:00011011
(XEN) Instruction Sets: AArch32 A32 Thumb Thumb-2 Jazelle
(XEN) Extensions: GenericTimer Security
(XEN)   Debug Features: 03010066
(XEN)   Auxiliary Features: 
(XEN)   Memory Model Features: 10101105 4000 0126 02102211
(XEN)  ISA Features: 02101110 13112111 21232042 01112131 00011142 00011121
(XEN) Generic Timer IRQ: phys=30 hyp=26 virt=27 Freq: 1200 KHz
(XEN) GICv2 initialization:
(XEN) gic_dist_addr=f6801000
(XEN) gic_cpu_addr=f6802000
(XEN) gic_hyp_addr=f6804000
(XEN) gic_vcpu_addr=f6806000
(XEN) gic_maintenance_irq=25
(XEN) GICv2: 160 lines, 8 cpus, secure (IID 0200143b).
(XEN) Using scheduler: SMP Credit Scheduler rev2 (credit2)
(XEN) Initializing Credit2 scheduler
(XEN) Allocated console ring of 16 KiB.
(XEN) Bringing up CPU1
(XEN) Bringing up CPU2
(XEN) Bringing up CPU3
(XEN) Bringing up CPU4
(XEN) Bringing up CPU5
(XEN) Bringing up CPU6
(XEN) Bringing up CPU7
(XEN) Brought up 8 CPUs
(XEN) P2M: 40-bit IPA with 40-bit PA and 8-bit VMID
(XEN) P2M: 3 levels with order-1 root, VTCR 0x80023558
(XEN) I/O virtualisation disabled
(XEN) *** LOADING DOMAIN 0 ***
(XEN) Loading Domd0 kernel from boot module @ 48d38000
(XEN) Loading ramdisk from boot module @ 47aa6000
(XEN) Allocating 1:1 mappings totalling 1024MB for dom0:
(XEN) WARNING: Failed to allocate requested dom0 memory. 624MB unallocated
(XEN) BANK[0] 0x000800-0x001000 (128MB)
(XEN) BANK[1] 0x003600-0x003e00 (128MB)
(XEN) BANK[2] 0x004000-0x004700 (112MB)
(XEN) BANK[3] 0x007b00-0x007c00 (16MB)
(XEN) BANK[4] 0x007e00-0x007f00 (16MB)
(XEN) Grant table range: 0x0047998000-0x00479d8000
(XEN) Allocating PPI 16 for event channel interrupt
(XEN) Loading zImage from 48d38000 to 0808-09233200
(XEN) Loading dom0 initrd from 47aa6000 to

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Roman Shaposhnik

Hi Julien,

On Tue, Dec 17, 2019 at 3:30 AM Julien Grall  wrote:
>
> Hi,
>
> On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> >  wrote:
> >> On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> >> If I sum all the memory sizes together I get 0x3ddfd000 which is 990M.
> >> If so, I wonder how you could boot succesfully with dom0_mem=1024M even
> >> on Xen 4.12... :-?
> >
> > That is a very interesting observation indeed! I actually don't
> > remember where that device tree came from, but I think it was from one
> > of the Linaro sites.
>
> This is mostly likely because of:
>
> commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> Author: Julien Grall 
> Date:   Wed Aug 21 22:42:31 2019 +0100
>
>  xen/arm: domain_build: Don't continue if unable to allocate all
> dom0 banks
>
>  Xen will only print a warning if there are memory unallocated when
> using
>  1:1 mapping (only used by dom0). This also includes the case where no
>  memory has been allocated.
>
>  It will bring to all sort of issues that can be hard to diagnostic for
>  users (the warning can be difficult to spot or disregard).
>
>  If the users request 1GB of memory, then most likely they want the
> exact
>  amount and not 512MB. So panic if all the memory has not been
> allocated.
>
>  After this change, the behavior is the same as for non-1:1 memory
>  allocation (used by domU).
>
>  At the same time, reflow the message to have the format on a single
>  line.
>
>  Signed-off-by: Julien Grall 
>  Acked-by: Stefano Stabellini 

It seems you're absolutely right. Looking at the logs from Xen 4.12 I'm seeing:

(XEN) Allocating 1:1 mappings totalling 1024MB for dom0:
(XEN) WARNING: Failed to allocate requested dom0 memory. 624MB unallocated
(XEN) BANK[0] 0x000800-0x001000 (128MB)
(XEN) BANK[1] 0x003600-0x003e00 (128MB)
(XEN) BANK[2] 0x004000-0x004700 (112MB)
(XEN) BANK[3] 0x007b00-0x007c00 (16MB)
(XEN) BANK[4] 0x007e00-0x007f00 (16MB)
(XEN) Grant table range: 0x0047998000-0x00479d8000
(XEN) Allocating PPI 16 for event channel interrupt

So yes -- it was a warning that now turned an ERROR. So at least that
part is clear now.

What isn't clear still is the interplay between device trees and Xen
memory allocation -- I'll reply to Stefano on that.

Thanks,
Roman.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM

2019-12-17 Thread Stefano Stabellini

On Tue, 17 Dec 2019, Julien Grall wrote:
> Hi,
> 
> On 17/12/2019 04:39, Roman Shaposhnik wrote:
> > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini
> >  wrote:
> > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote:
> > > If I sum all the memory sizes together I get 0x3ddfd000 which is 990M.
> > > If so, I wonder how you could boot succesfully with dom0_mem=1024M even
> > > on Xen 4.12... :-?
> > 
> > That is a very interesting observation indeed! I actually don't
> > remember where that device tree came from, but I think it was from one
> > of the Linaro sites.
> 
> This is mostly likely because of:
> 
> commit 6341a674573f1834f083f0ab0f5b36b075f9e02e
> Author: Julien Grall 
> Date:   Wed Aug 21 22:42:31 2019 +0100
> 
> xen/arm: domain_build: Don't continue if unable to allocate all dom0 banks
> 
> Xen will only print a warning if there are memory unallocated when using
> 1:1 mapping (only used by dom0). This also includes the case where no
> memory has been allocated.
> 
> It will bring to all sort of issues that can be hard to diagnostic for
> users (the warning can be difficult to spot or disregard).
> 
> If the users request 1GB of memory, then most likely they want the exact
> amount and not 512MB. So panic if all the memory has not been allocated.
> 
> After this change, the behavior is the same as for non-1:1 memory
> allocation (used by domU).
> 
> At the same time, reflow the message to have the format on a single
> line.
> 
> Signed-off-by: Julien Grall 
> Acked-by: Stefano Stabellini 

Ah! Roman, could you please post the full boot log of a successful 4.12
boot?

If it has a "Failed to allocate requested dom0 memory" message, then we
know what the issue is.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Xen ARM Dom0less passthrough without IOMMU

2019-12-17 Thread Stefano Stabellini

On Tue, 17 Dec 2019, Andrei Cherechesu wrote:
> > On Mon, 16 Dec 2019, Julien Grall wrote:
> > > On 16/12/2019 23:05, Stefano Stabellini wrote:
> > > > On Mon, 16 Dec 2019, Julien Grall wrote:
> > > > > On 16/12/2019 18:02, Andrei Cherechesu wrote:
> > > > > But even with this patch, RAM in DomU is not direct mapped (i.e Guest
> > > > > Physical
> > > > > Address == Host Physical Address). This means that DMA-capable device
> > > > > would
> > > > > not work properly in DomU.
> > > > >
> > > > > We could theoritically map DomU direct mapped, but this would break 
> > > > > the
> > > > > isolation provided by the hypervisor.
> > > >
> > > > Yes, being able to map the DomU memory 1:1 can be pretty useful for some
> > > > very embedded dom0less configurations, in fact I was surprised that a
> > > > couple of Xilinx users asked me for that recently. Typically, the users
> > > > are aware of the consequences but they still find them better than the
> > > > alternative (i.e. the lack of isolation is bad but is tolerable in their
> > > > configuration.)
> > > This does not make much sense... The whole point of a hypervisor is to 
> > > isolate
> > > guest between each other... So if you are happy with the lack of 
> > > isolation,
> > > then why are you using an hypervisor at the first place?
> >
> > There are a number of reasons, although they are all variation of the
> > same theme. In all these cases the IOMMU cannot be used for one reason
> > or the other (a device is not behind the IOMMU, or due to an errata,
> > etc.)
> >
> > - multiple baremetal apps
> > The user wants to run two or more baremetal (unikernel-like)
> > applications. The user owns both applications and she is not much
> > concerned about isolation (although it is always desirable when
> > possible.)
> >
> > - multiple OSes
> > This is similar to the one before, however, instead of multiple
> > baremetal apps, we are talking about multiple full OSes. For instance,
> > Linux and Android or Linux and VxWorks. Again, they are both maintained
> > by the same user (no multi-tenancy) so isolation is desirable but it is
> > not the top concern.
> >
> > - real-time / no real-time
> > The user wants to run a real-time OS or real-time baremetal app and a
> > non real-time OS. For instance a tiny baremetal app controlling one
> > specific device and Linux. Again, the user is responsible for both
> > systems so isolation is not a concern.
> >
> > In all these cases the users has to run multiple OSes or baremetal apps
> > so she needs a hypervisor. However, it is tolerable that the apps are
> > not actually fully isolated from each others because they are both
> > developed and deployed together by the same "owner".
> >
> 
> Basically, since we do not have an IOMMU, we would be able
> to ensure memory isolation via a NXP IP named xRDC (Extended
> Resource Domain Controller) that our boards have, which supervises
> the access to memory buses.
> 
> But before we get to think about isolation, we need to enable
> basic passthrough functionality (via 1:1 mapping, since no IOMMU).
> 
> Firstly, a good step forward would be to get any non-DMA-capable
> device passed-through and working. 
> I rebased onto upstream/staging branch and applied the hack
> that skips the setting of XEN_DOMCTL_CDF_iommu flag,
> that Julien specified.
> 
> Then I tried to passthrough the eMMC, but I got the following
> error:
> (XEN) DOM1: [0.879151] sdhci-esdhc-imx 4005d000.usdhc: can't request 
> region for resource [mem 0x4005d000-0x4005dfff]
> (XEN) DOM1: [0.891137] sdhci-esdhc-imx 4005d000.usdhc: sdhci_pltfm_init 
> failed -16
> (XEN) DOM1: [0.900249] sdhci-esdhc-imx: probe of 4005d000.usdhc failed 
> with error -16
> 
> Where 0x4005d000 is the physical address of the uSDHC(eMMC) node in the DT.
> It seems that the DomU1 kernel does not have access to that memory zone.

It looks like drivers/mmc/host/sdhci-pltfm.c:sdhci_pltfm_init failed,
but I cannot see a simple reason why it would. As Julien mentioned the
device tree snippet would be useful. Also the domU config and the full
device tree would be useful. i.e. did you add "xen,passthrough;" under
the related uSDHC node on the host device tree?


> I'm trying to passthrough the eMMC in order to mount DomU1's root
> on a SDCard partition, because I couldn't get to DomU1's Linux prompt
> when I tried to boot with a ramdisk module. I always get this error:
> (XEN) DOM1: [1.544199] RAMDISK: Couldn't find valid RAM disk image 
> starting at 0.
> 
> Could this be because the ramdisk is too big? The smallest I've tried with
> Is approximately 60MB in size. What size are the ramdisks that you
> are using in your dom0less booting demos?

I don't think so, I could boot with ramdisk 120MB in size or even
larger. It is probably an address calculation error: it is easy to make
a small mistake in the addresses so that they end up overlapping.
Sometimes it is even U-Boot that causes the overlaps.

I would suggest to use

[Xen-devel] [xen-unstable test] 144887: regressions - FAIL

2019-12-17 Thread osstest service owner

flight 144887 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144887/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-libvirt 19 leak-check/check fail REGR. vs. 144850

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemut-win7-amd64 17 guest-stopfail like 144827
 test-amd64-amd64-xl-rtds 16 guest-localmigrate   fail  like 144850
 test-amd64-amd64-xl-qemuu-win7-amd64 17 guest-stopfail like 144850
 test-armhf-armhf-libvirt 14 saverestore-support-checkfail  like 144850
 test-amd64-i386-xl-qemuu-win7-amd64 17 guest-stop fail like 144850
 test-amd64-i386-xl-qemut-win7-amd64 17 guest-stop fail like 144850
 test-armhf-armhf-libvirt-raw 13 saverestore-support-checkfail  like 144850
 test-amd64-amd64-xl-qemuu-ws16-amd64 17 guest-stopfail like 144850
 test-amd64-amd64-xl-qemut-ws16-amd64 17 guest-stopfail like 144850
 test-amd64-i386-xl-qemuu-ws16-amd64 17 guest-stop fail like 144850
 test-xtf-amd64-amd64-4   72 xtf/test-hvm64-xsa-308   fail   never pass
 test-amd64-i386-xl-pvshim12 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm 13 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 13 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  13 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  13 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check 
fail never pass
 test-arm64-arm64-xl-credit2  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 13 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  14 saverestore-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 17 debian-hvm-install/l1/l2  fail never pass
 test-arm64-arm64-xl-xsm  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  14 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 13 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 14 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-rtds 13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 13 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 14 saverestore-support-checkfail never pass
 test-armhf-armhf-libvirt 13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  14 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  14 saverestore-support-checkfail   never pass
 test-amd64-i386-xl-qemut-ws16-amd64 17 guest-stop  fail never pass

version targeted for testing:
 xen  21cb0bdcf4c12b8edd34f3d086edd76f2e974c32
baseline version:
 xen  c9115affa6f83aebe29ae9cbf503aa163911a5bb

Last test of basis   144850  2019-12-16 01:51:10 Z1 days
Failing since144878  2019-12-16 19:06:11 Z0 days2 attempts
Testing same since   144887  2019-12-17 04:24:45 Z0 days1 attempts

[Xen-devel] [ovmf test] 144900: regressions - FAIL

2019-12-17 Thread osstest service owner

flight 144900 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144900/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-i386-xsm6 xen-buildfail REGR. vs. 144637
 build-amd64   6 xen-buildfail REGR. vs. 144637
 build-amd64-xsm   6 xen-buildfail REGR. vs. 144637
 build-i3866 xen-buildfail REGR. vs. 144637

Tests which did not succeed, but are not blocking:
 build-i386-libvirt1 build-check(1)   blocked  n/a
 build-amd64-libvirt   1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-qemuu-ovmf-amd64  1 build-check(1) blocked n/a
 test-amd64-i386-xl-qemuu-ovmf-amd64  1 build-check(1)  blocked n/a

version targeted for testing:
 ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
baseline version:
 ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0

Last test of basis   144637  2019-12-09 09:09:49 Z8 days
Failing since144646  2019-12-10 01:39:53 Z7 days   69 attempts
Testing same since   144770  2019-12-12 18:41:26 Z4 days   58 attempts


People who touched revisions under test:
  Antoine Coeur 
  Ard Biesheuvel 
  Bob Feng 
  Jiewen Yao 
  Michael Kubacki 
  Pete Batard 
  Philippe Mathieu-Daude 
  Steven Shi 

jobs:
 build-amd64-xsm  fail
 build-i386-xsm   fail
 build-amd64  fail
 build-i386   fail
 build-amd64-libvirt  blocked 
 build-i386-libvirt   blocked 
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked 
 test-amd64-i386-xl-qemuu-ovmf-amd64  blocked 



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Not pushing.


commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
Author: Pete Batard 
Date:   Tue Dec 10 18:23:04 2019 +

MdePkg/Include: Add DCC and BCM2835 SPCR UART types

As per the Microsoft Debug Port Table 2 (DBG2) documentation, that
can be found online, we are missing 2 serial interface types for
Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi).

These same types are present in DebugPort2Table.h so add them to
SerialPortConsoleRedirectionTable.h too.

Note that we followed the same idiosyncrasies as DebugPort2Table
for naming these new macros.

Signed-off-by: Pete Batard 
Acked-by: Ard Biesheuvel 
Reviewed-by: Liming Gao 

commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e
Author: Ard Biesheuvel 
Date:   Tue Mar 5 14:32:48 2019 +0100

ArmPkg/MmCommunicationDxe: relay architected PI events to MM context

PI defines a few architected events that have significance in the MM
context as well as in the non-secure DXE context. So register notify
handlers for these events, and relay them into the standalone MM world.

Signed-off-by: Ard Biesheuvel 
Reviewed-by: Jiewen Yao 
Reviewed-by: Achin Gupta 

commit d3add11e87dace180387562d6f1951f2bffbd3d9
Author: Michael Kubacki 
Date:   Wed Nov 20 17:31:24 2019 -0800

MdeModulePkg PeiCore: Improve comment semantics

This patch clarifies wording in several PeiCore comments to improve
reading comprehension.

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Jian J Wang 

commit d39d1260c615b716675f67f5c4e1f4f52df01dad
Author: Michael Kubacki 
Date:   Wed Nov 20 17:10:48 2019 -0800

MdeModulePkg PeiCore: Fix typos

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Philippe Mathieu-Daude 
Reviewed-by: Jian J Wang 

commit 97eedf5dfbaffde33210fd88066247cf0b7d3325
Author: Antoine Coeur 
Date:   Wed Dec 4 12:14:53

Re: [Xen-devel] [RFC PATCH 0/3] basic KASAN support for Xen PV domains

2019-12-17 Thread Boris Ostrovsky



> On Dec 17, 2019, at 9:08 AM, Sergey Dyasli  wrote:
> 
> This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
> CONFIG_KASAN=y. It has been used internally for some time now with good
> results for finding memory corruption issues in Dom0 kernel.
> 
> Only Outline instrumentation is supported at the moment.
> 
> Patch 1 is of RFC quality
> Patches 2-3 are independent and quite self-contained.


Don’t you need to initialize kasan before, for example, calling 
kasan_alloc_pages() in patch 2?

-boris


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [xen-unstable-smoke test] 144898: tolerable all pass - PUSHED

2019-12-17 Thread osstest service owner

flight 144898 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144898/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  13 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  14 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  13 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  14 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  f50a4f6e244cfc8e773300c03aaf4db391f3028a
baseline version:
 xen  21cb0bdcf4c12b8edd34f3d086edd76f2e974c32

Last test of basis   144877  2019-12-16 19:00:23 Z0 days
Testing same since   144898  2019-12-17 15:00:35 Z0 days1 attempts


People who touched revisions under test:
  Andrew Cooper 
  Wei Liu 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  pass
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  pass
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   21cb0bdcf4..f50a4f6e24  f50a4f6e244cfc8e773300c03aaf4db391f3028a -> smoke

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Xen ARM Dom0less passthrough without IOMMU

2019-12-17 Thread Julien Grall


Hi Andrei,

On 17/12/2019 17:20, Andrei Cherechesu wrote:

On Mon, 16 Dec 2019, Julien Grall wrote:

On 16/12/2019 23:05, Stefano Stabellini wrote:

On Mon, 16 Dec 2019, Julien Grall wrote:

On 16/12/2019 18:02, Andrei Cherechesu wrote:
But even with this patch, RAM in DomU is not direct mapped (i.e Guest
Physical
Address == Host Physical Address). This means that DMA-capable device
would
not work properly in DomU.

We could theoritically map DomU direct mapped, but this would break the
isolation provided by the hypervisor.


Yes, being able to map the DomU memory 1:1 can be pretty useful for some
very embedded dom0less configurations, in fact I was surprised that a
couple of Xilinx users asked me for that recently. Typically, the users
are aware of the consequences but they still find them better than the
alternative (i.e. the lack of isolation is bad but is tolerable in their
configuration.)

This does not make much sense... The whole point of a hypervisor is to isolate
guest between each other... So if you are happy with the lack of isolation,
then why are you using an hypervisor at the first place?


There are a number of reasons, although they are all variation of the
same theme. In all these cases the IOMMU cannot be used for one reason
or the other (a device is not behind the IOMMU, or due to an errata,
etc.)

- multiple baremetal apps
The user wants to run two or more baremetal (unikernel-like)
applications. The user owns both applications and she is not much
concerned about isolation (although it is always desirable when
possible.)

- multiple OSes
This is similar to the one before, however, instead of multiple
baremetal apps, we are talking about multiple full OSes. For instance,
Linux and Android or Linux and VxWorks. Again, they are both maintained
by the same user (no multi-tenancy) so isolation is desirable but it is
not the top concern.

- real-time / no real-time
The user wants to run a real-time OS or real-time baremetal app and a
non real-time OS. For instance a tiny baremetal app controlling one
specific device and Linux. Again, the user is responsible for both
systems so isolation is not a concern.

In all these cases the users has to run multiple OSes or baremetal apps
so she needs a hypervisor. However, it is tolerable that the apps are
not actually fully isolated from each others because they are both
developed and deployed together by the same "owner".



Basically, since we do not have an IOMMU, we would be able
to ensure memory isolation via a NXP IP named xRDC (Extended
Resource Domain Controller) that our boards have, which supervises
the access to memory buses.


Ok, so you have some sort of MPU. I assume this will be between the 
devices and the memory, am I right?


But before we get to think about isolation, we need to enable
basic passthrough functionality (via 1:1 mapping, since no IOMMU).


So you are in better place than what Stefano described. Your use case is 
probably the only place where a 1:1 mapping would be warrant as 
isolation is still provided by the HW.




Firstly, a good step forward would be to get any non-DMA-capable
device passed-through and working.
I rebased onto upstream/staging branch and applied the hack
that skips the setting of XEN_DOMCTL_CDF_iommu flag,
that Julien specified.

Then I tried to passthrough the eMMC, but I got the following
error:
(XEN) DOM1: [0.879151] sdhci-esdhc-imx 4005d000.usdhc: can't request region 
for resource [mem 0x4005d000-0x4005dfff]
(XEN) DOM1: [0.891137] sdhci-esdhc-imx 4005d000.usdhc: sdhci_pltfm_init 
failed -16
(XEN) DOM1: [0.900249] sdhci-esdhc-imx: probe of 4005d000.usdhc failed with 
error -16

Where 0x4005d000 is the physical address of the uSDHC(eMMC) node in the DT.
It seems that the DomU1 kernel does not have access to that memory zone.


Could you paste your partial Device-Tree and domain node?



I'm trying to passthrough the eMMC in order to mount DomU1's root
on a SDCard partition, because I couldn't get to DomU1's Linux prompt
when I tried to boot with a ramdisk module. I always get this error:
(XEN) DOM1: [1.544199] RAMDISK: Couldn't find valid RAM disk image starting 
at 0.


How did you pass the ramdisk to dom1?



Could this be because the ramdisk is too big? The smallest I've tried with
Is approximately 60MB in size. What size are the ramdisks that you
are using in your dom0less booting demos?


How much memory did you give to your guest?

[...]


I'll gladly write the patch if you give me some basic
instructions regarding it, because I'm not that familiar with
all the Xen internal mechanisms, and I wouldn't know where
to look in order to ensure everything is properly done.


I am going to suggest a quick and dirty way but it should get you to the 
point where 1:1 mapping will work in basic use case:


1) Update the guest memory map in xen/include/public/arch-arm.h (see 
GUEST_*) so all the regions don't overlap your RAM. The best way would 
be to re-use the same address for the

Re: [Xen-devel] [PATCH v3 22/22] golang/xenlight: add error return type to Context.Cpupoolinfo

2019-12-17 Thread George Dunlap

On 12/10/19 3:47 PM, Nick Rosbrook wrote:
> From: Nick Rosbrook 
> 
> A previous commit that removed Context.CheckOpen revealed
> an ineffectual assignent to err in Context.Cpupoolinfo, as
> there is no error return type.
> 
> Since it appears that the intent is to return an error here,
> add an error return value to the function signature.
> 
> Signed-off-by: Nick Rosbrook 

Reviewed-by: George Dunlap 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 21/22] golang/xenlight: revise use of Context type

2019-12-17 Thread George Dunlap

On 12/10/19 3:47 PM, Nick Rosbrook wrote:
> From: Nick Rosbrook 
> 
> Remove the exported global context variable, 'Ctx.' Generally, it is
> better to not export global variables for use through a Go package.
> However, there are some exceptions that can be found in the standard
> library.
> 
> Add a NewContext function instead, and remove the Open, IsOpen, and
> CheckOpen functions as a result.
> 
> Also, comment-out an ineffectual assignment to 'err' inside the function
> Context.CpupoolInfo so that compilation does not fail.
> 
> Signed-off-by: Nick Rosbrook 

Reviewed-by: George Dunlap 

With one requested change...

> ---
>  tools/golang/xenlight/xenlight.go | 219 +-
>  1 file changed, 34 insertions(+), 185 deletions(-)
> 
> diff --git a/tools/golang/xenlight/xenlight.go 
> b/tools/golang/xenlight/xenlight.go
> index f32eb11384..1c431fa4e5 100644
> --- a/tools/golang/xenlight/xenlight.go
> +++ b/tools/golang/xenlight/xenlight.go
> @@ -74,6 +74,39 @@ func (e Error) Error() string {
>   return fmt.Sprintf("libxl error: %d", -e)
>  }
>  
> +// Context represents a libxl_ctx.
> +type Context struct {
> + ctx*C.libxl_ctx
> + logger *C.xentoollog_logger_stdiostream
> +}
> +
> +// NewContext returns a new Context.
> +func NewContext() (*Context, error) {
> + var ctx Context
> +
> + ctx.logger = C.xtl_createlogger_stdiostream(C.stderr, C.XTL_ERROR, 0)
> +
> + ret := C.libxl_ctx_alloc(, C.LIBXL_VERSION, 0, 
> (*C.xentoollog_logger)(unsafe.Pointer(ctx.logger)))

This line looks to be 114 characters long, which seems a bit much. :-)
Mind breaking it just before the last argument?

Thanks,
 -George

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock

2019-12-17 Thread SeongJae Park

On Tue, 17 Dec 2019 18:10:19 +0100 "Jürgen Groß"  wrote:

> On 17.12.19 17:24, SeongJae Park wrote:
> > On Tue, 17 Dec 2019 17:13:42 +0100 "Jürgen Groß"  wrote:
> > 
> >> On 17.12.19 17:07, SeongJae Park wrote:
> >>> From: SeongJae Park 
> >>>
> >>> 'reclaim_memory' callback can race with a driver code as this callback
> >>> will be called from any memory pressure detected context.  To deal with
> >>> the case, this commit adds a spinlock in the 'xenbus_device'.  Whenever
> >>> 'reclaim_memory' callback is called, the lock of the device which passed
> >>> to the callback as its argument is locked.  Thus, drivers registering
> >>> their 'reclaim_memory' callback should protect the data that might race
> >>> with the callback with the lock by themselves.
> >>>
> >>> Signed-off-by: SeongJae Park 
> >>> ---
> >>>drivers/xen/xenbus/xenbus_probe.c |  1 +
> >>>drivers/xen/xenbus/xenbus_probe_backend.c | 10 --
> >>>include/xen/xenbus.h  |  2 ++
> >>>3 files changed, 11 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/xen/xenbus/xenbus_probe.c 
> >>> b/drivers/xen/xenbus/xenbus_probe.c
> >>> index 5b471889d723..b86393f172e6 100644
> >>> --- a/drivers/xen/xenbus/xenbus_probe.c
> >>> +++ b/drivers/xen/xenbus/xenbus_probe.c
> >>> @@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
> >>>   goto fail;
> >>>
> >>>   dev_set_name(>dev, "%s", devname);
> >>> + spin_lock_init(>reclaim_lock);
> >>>
> >>>   /* Register with generic device framework. */
> >>>   err = device_register(>dev);
> >>> diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c 
> >>> b/drivers/xen/xenbus/xenbus_probe_backend.c
> >>> index 7e78ebef7c54..516aa64b9967 100644
> >>> --- a/drivers/xen/xenbus/xenbus_probe_backend.c
> >>> +++ b/drivers/xen/xenbus/xenbus_probe_backend.c
> >>> @@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct 
> >>> notifier_block *notifier,
> >>>static int backend_reclaim_memory(struct device *dev, void *data)
> >>>{
> >>>   const struct xenbus_driver *drv;
> >>> + struct xenbus_device *xdev;
> >>> + unsigned long flags;
> >>>
> >>>   if (!dev->driver)
> >>>   return 0;
> >>>   drv = to_xenbus_driver(dev->driver);
> >>> - if (drv && drv->reclaim_memory)
> >>> - drv->reclaim_memory(to_xenbus_device(dev));
> >>> + if (drv && drv->reclaim_memory) {
> >>> + xdev = to_xenbus_device(dev);
> >>> + spin_trylock_irqsave(>reclaim_lock, flags);
> >>
> >> You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine,
> >> too? I can't see a reason why you'd want to disable irqs here.
> > 
> > I needed to diable irq here as this is called from the memory shrinker 
> > context.
> 
> Okay.
> 
> > 
> > Also, used 'trylock' because the 'probe()' and 'remove()' code of the driver
> > might include memory allocation.  And the xen-blkback actually does.  If the
> > allocation shows a memory pressure during the allocation, it will trigger 
> > this
> > shrinker callback again and then deadlock.
> 
> In that case you need to either return when you didn't get the lock or

Yes, it should.  Cannot believe how I posted this code.  Seems I made some
terrible mistake while formatting patches.  Anyway, will return if fail to
acquire the lock, in the next version.


Thanks,
SeongJae Park

> 
> - when obtaining the lock during probe() and remove() set a variable
>containing the current cpu number
> - and reset that to e.g NR_CPUS before releasing the lock again
> - in the shrinker callback do trylock, and if you didn't get the lock
>test whether the cpu-variable above is set to your current cpu and
>continue only if yes; if not, redo the the trylock
> 
> 
> Juergen

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Xen ARM Dom0less passthrough without IOMMU

2019-12-17 Thread Andrei Cherechesu

> On Mon, 16 Dec 2019, Julien Grall wrote:
> > On 16/12/2019 23:05, Stefano Stabellini wrote:
> > > On Mon, 16 Dec 2019, Julien Grall wrote:
> > > > On 16/12/2019 18:02, Andrei Cherechesu wrote:
> > > > But even with this patch, RAM in DomU is not direct mapped (i.e Guest
> > > > Physical
> > > > Address == Host Physical Address). This means that DMA-capable device
> > > > would
> > > > not work properly in DomU.
> > > >
> > > > We could theoritically map DomU direct mapped, but this would break the
> > > > isolation provided by the hypervisor.
> > >
> > > Yes, being able to map the DomU memory 1:1 can be pretty useful for some
> > > very embedded dom0less configurations, in fact I was surprised that a
> > > couple of Xilinx users asked me for that recently. Typically, the users
> > > are aware of the consequences but they still find them better than the
> > > alternative (i.e. the lack of isolation is bad but is tolerable in their
> > > configuration.)
> > This does not make much sense... The whole point of a hypervisor is to 
> > isolate
> > guest between each other... So if you are happy with the lack of isolation,
> > then why are you using an hypervisor at the first place?
>
> There are a number of reasons, although they are all variation of the
> same theme. In all these cases the IOMMU cannot be used for one reason
> or the other (a device is not behind the IOMMU, or due to an errata,
> etc.)
>
> - multiple baremetal apps
> The user wants to run two or more baremetal (unikernel-like)
> applications. The user owns both applications and she is not much
> concerned about isolation (although it is always desirable when
> possible.)
>
> - multiple OSes
> This is similar to the one before, however, instead of multiple
> baremetal apps, we are talking about multiple full OSes. For instance,
> Linux and Android or Linux and VxWorks. Again, they are both maintained
> by the same user (no multi-tenancy) so isolation is desirable but it is
> not the top concern.
>
> - real-time / no real-time
> The user wants to run a real-time OS or real-time baremetal app and a
> non real-time OS. For instance a tiny baremetal app controlling one
> specific device and Linux. Again, the user is responsible for both
> systems so isolation is not a concern.
>
> In all these cases the users has to run multiple OSes or baremetal apps
> so she needs a hypervisor. However, it is tolerable that the apps are
> not actually fully isolated from each others because they are both
> developed and deployed together by the same "owner".
>

Basically, since we do not have an IOMMU, we would be able
to ensure memory isolation via a NXP IP named xRDC (Extended
Resource Domain Controller) that our boards have, which supervises
the access to memory buses.

But before we get to think about isolation, we need to enable
basic passthrough functionality (via 1:1 mapping, since no IOMMU).

Firstly, a good step forward would be to get any non-DMA-capable
device passed-through and working. 
I rebased onto upstream/staging branch and applied the hack
that skips the setting of XEN_DOMCTL_CDF_iommu flag,
that Julien specified.

Then I tried to passthrough the eMMC, but I got the following
error:
(XEN) DOM1: [0.879151] sdhci-esdhc-imx 4005d000.usdhc: can't request region 
for resource [mem 0x4005d000-0x4005dfff]
(XEN) DOM1: [0.891137] sdhci-esdhc-imx 4005d000.usdhc: sdhci_pltfm_init 
failed -16
(XEN) DOM1: [0.900249] sdhci-esdhc-imx: probe of 4005d000.usdhc failed with 
error -16

Where 0x4005d000 is the physical address of the uSDHC(eMMC) node in the DT.
It seems that the DomU1 kernel does not have access to that memory zone.

I'm trying to passthrough the eMMC in order to mount DomU1's root
on a SDCard partition, because I couldn't get to DomU1's Linux prompt
when I tried to boot with a ramdisk module. I always get this error:
(XEN) DOM1: [1.544199] RAMDISK: Couldn't find valid RAM disk image starting 
at 0.

Could this be because the ramdisk is too big? The smallest I've tried with
Is approximately 60MB in size. What size are the ramdisks that you
are using in your dom0less booting demos?

> > >  From an implementation perspective, it should be a matter of calling
> > > allocate_memory_11 instead of allocate_memory from construct_domU. I
> > > wanted to experiment with it myself but I haven't had the time. If
> > > nothing else, it would be useful to have a patch around to do it if
> > > needed.
> > This is not that simple. You at least also need to:
> > - Update the code to generate the DT based on the new 1:1 address
> > - Modify the various emulation in Xen because they rely on Xen guest
> > memory layout
> > - Modify is_domain_direct_mapped() to deal with guest
> >
> > I probably missed other bits. Anyway, this is not something I am willing to
> > accept upstream as this break the core idea of an hypervisor.
>
> If you prefer not to have it upstream, I would be happy to maintain it
> downstream in Xilinx/Xen or another

Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock

2019-12-17 Thread Jürgen Groß


On 17.12.19 17:24, SeongJae Park wrote:

On Tue, 17 Dec 2019 17:13:42 +0100 "Jürgen Groß"  wrote:


On 17.12.19 17:07, SeongJae Park wrote:

From: SeongJae Park 

'reclaim_memory' callback can race with a driver code as this callback
will be called from any memory pressure detected context.  To deal with
the case, this commit adds a spinlock in the 'xenbus_device'.  Whenever
'reclaim_memory' callback is called, the lock of the device which passed
to the callback as its argument is locked.  Thus, drivers registering
their 'reclaim_memory' callback should protect the data that might race
with the callback with the lock by themselves.

Signed-off-by: SeongJae Park 
---
   drivers/xen/xenbus/xenbus_probe.c |  1 +
   drivers/xen/xenbus/xenbus_probe_backend.c | 10 --
   include/xen/xenbus.h  |  2 ++
   3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c 
b/drivers/xen/xenbus/xenbus_probe.c
index 5b471889d723..b86393f172e6 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
goto fail;
   
   	dev_set_name(>dev, "%s", devname);

+   spin_lock_init(>reclaim_lock);
   
   	/* Register with generic device framework. */

err = device_register(>dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c 
b/drivers/xen/xenbus/xenbus_probe_backend.c
index 7e78ebef7c54..516aa64b9967 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct notifier_block 
*notifier,
   static int backend_reclaim_memory(struct device *dev, void *data)
   {
const struct xenbus_driver *drv;
+   struct xenbus_device *xdev;
+   unsigned long flags;
   
   	if (!dev->driver)

return 0;
drv = to_xenbus_driver(dev->driver);
-   if (drv && drv->reclaim_memory)
-   drv->reclaim_memory(to_xenbus_device(dev));
+   if (drv && drv->reclaim_memory) {
+   xdev = to_xenbus_device(dev);
+   spin_trylock_irqsave(>reclaim_lock, flags);


You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine,
too? I can't see a reason why you'd want to disable irqs here.


I needed to diable irq here as this is called from the memory shrinker context.


Okay.



Also, used 'trylock' because the 'probe()' and 'remove()' code of the driver
might include memory allocation.  And the xen-blkback actually does.  If the
allocation shows a memory pressure during the allocation, it will trigger this
shrinker callback again and then deadlock.


In that case you need to either return when you didn't get the lock or

- when obtaining the lock during probe() and remove() set a variable
  containing the current cpu number
- and reset that to e.g NR_CPUS before releasing the lock again
- in the shrinker callback do trylock, and if you didn't get the lock
  test whether the cpu-variable above is set to your current cpu and
  continue only if yes; if not, redo the the trylock


Juergen

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V4 2/4] x86/altp2m: Add hypercall to set a range of sve bits

2019-12-17 Thread Jan Beulich

On 17.12.2019 16:12, Alexandru Stefan ISAILA wrote:
> @@ -4711,6 +4712,20 @@ static int do_altp2m_op(
>  }
>  break;
>  
> +case HVMOP_altp2m_set_suppress_ve_multi:
> +if ( a.u.suppress_ve_multi.pad1 ||
> + a.u.suppress_ve_multi.first_error_code ||
> + a.u.suppress_ve_multi.first_error ||
> + a.u.suppress_ve_multi.first_gfn > 
> a.u.suppress_ve_multi.last_gfn )
> +rc = -EINVAL;

An error having occurred doesn't prevent scheduling of a
continuation. When you come back here, you'll then return
-EINVAL instead of continuing the prior operation.

> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -3064,6 +3064,70 @@ out:
>  return rc;
>  }
>  
> +/*
> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
> + */
> +int p2m_set_suppress_ve_multi(struct domain *d,
> +  struct xen_hvm_altp2m_suppress_ve_multi *sve)
> +{
> +struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
> +struct p2m_domain *ap2m = NULL;
> +struct p2m_domain *p2m = host_p2m;
> +uint64_t start = sve->first_gfn;
> +int rc = 0;
> +uint64_t max_phys_addr = (1UL << d->arch.cpuid->extd.maxphysaddr) - 1;
> +
> +if ( sve->view > 0 )
> +{
> +if ( sve->view >= MAX_ALTP2M ||
> + d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_EPTP)] ==
> + mfn_x(INVALID_MFN) )
> +return -EINVAL;
> +
> +p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view,
> +   MAX_ALTP2M)];
> +}
> +
> +p2m_lock(host_p2m);
> +
> +if ( ap2m )
> +p2m_lock(ap2m);
> +
> +while ( sve->last_gfn >= start && start < max_phys_addr )

Why don't you clip ->last_gfn ahead of the loop, saving one
comparison per iteration?

> +{
> +p2m_access_t a;
> +p2m_type_t t;
> +mfn_t mfn;
> +int err = 0;
> +
> +if ( altp2m_get_effective_entry(p2m, _gfn(start), , , , 
> AP2MGET_query) )
> +a = p2m->default_access;
> +
> +if ( (err = p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, 
> a,
> +   sve->suppress_ve)) && !sve->first_error )
> +{
> +sve->first_error = start; /* Save the gfn of the first error */
> +sve->first_error_code = err; /* Save the first error code */
> +}

What if the first error occurs on GFN 0? I guess you want to check
->first_error_code against zero in the condition.

> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -46,6 +46,16 @@ struct xen_hvm_altp2m_suppress_ve {
>  uint64_t gfn;
>  };
>  
> +struct xen_hvm_altp2m_suppress_ve_multi {
> +uint16_t view;
> +uint8_t suppress_ve; /* Boolean type. */
> +uint8_t pad1;
> +uint32_t first_error_code; /* Must be set to 0 . */

int32_t perhaps, since error codes are negative?

Jan

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V4 1/4] x86/mm: Add array_index_nospec to guest provided index values

2019-12-17 Thread Jan Beulich

On 17.12.2019 16:12, Alexandru Stefan ISAILA wrote:
> --- a/xen/arch/x86/mm/mem_access.c
> +++ b/xen/arch/x86/mm/mem_access.c
> @@ -367,10 +367,11 @@ long p2m_set_mem_access(struct domain *d, gfn_t gfn, 
> uint32_t nr,
>  if ( altp2m_idx )
>  {
>  if ( altp2m_idx >= MAX_ALTP2M ||
> - d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) )
> + d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==

The bounds check is against MAX_ALTP2M. Both MAX_ values look to be
independent, which means bounds check and value passed to the
helper need to match up (not just here).

> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -1353,7 +1353,8 @@ void setup_ept_dump(void)
>  
>  void p2m_init_altp2m_ept(struct domain *d, unsigned int i)
>  {
> -struct p2m_domain *p2m = d->arch.altp2m_p2m[i];
> +struct p2m_domain *p2m =
> +   d->arch.altp2m_p2m[array_index_nospec(i, MAX_ALTP2M)];
>  struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
>  struct ept_data *ept;
>  
> @@ -1366,7 +1367,7 @@ void p2m_init_altp2m_ept(struct domain *d, unsigned int 
> i)
>  p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0;
>  ept = >ept;
>  ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
> -d->arch.altp2m_eptp[i] = ept->eptp;
> +d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp;
>  }
>  
>  unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp)
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -2499,7 +2499,7 @@ static void p2m_reset_altp2m(struct domain *d, unsigned 
> int idx,
>  struct p2m_domain *p2m;
>  
>  ASSERT(idx < MAX_ALTP2M);
> -p2m = d->arch.altp2m_p2m[idx];
> +p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)];
>  
>  p2m_lock(p2m);
>  
> @@ -2540,7 +2540,7 @@ static int p2m_activate_altp2m(struct domain *d, 
> unsigned int idx)
>  
>  ASSERT(idx < MAX_ALTP2M);
>  
> -p2m = d->arch.altp2m_p2m[idx];
> +p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)];

All of the above have a more or less significant disconnect between
the bounds check and the use as array index. I think it would be
quite helpful if these could live close to one another, so one can
(see further up) easily prove that both specified bounds actually
match up.

Jan

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xen/page_alloc: statically allocate bootmem_region_list

2019-12-17 Thread Julien Grall


Hi,

On 17/12/2019 16:37, Jan Beulich wrote:

I'm sorry for the non-threaded reply, but my mail client has
mixed up this mail with another one, so I have nothing to
properly reply to. With one stylistic issue taken care of
(blanks around the binary operator / )
Reviewed-by: Jan Beulich 
The change would be easy enough to do while committing, but
said mailbox issue would either require someone else to
apply the change, or you to send a v2 (which then hopefully
won't end up garbled).


I am in the middle of committing other patches on Arm, so I can commit it.



Iirc this was suggested before, so it would be nice if the
patch could also gain a suitable Suggested-by.


I suggested it on [1] but this was based on a previous discussion about 
an Arm bug (see [2]). So I am not sure who to put in the Suggested-by 
tag here.


I will commit without it.

Cheers,

[1] <3d7f6e45-4c62-b314-7110-2e998bcdd...@arm.com>
[2] <5f71588b-274a-bdb7-d324-5ff9177a0...@arm.com>



Jan



--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [ovmf test] 144895: regressions - FAIL

2019-12-17 Thread osstest service owner

flight 144895 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/144895/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-i386-xsm6 xen-buildfail REGR. vs. 144637
 build-amd64   6 xen-buildfail REGR. vs. 144637
 build-amd64-xsm   6 xen-buildfail REGR. vs. 144637
 build-i3866 xen-buildfail REGR. vs. 144637

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemuu-ovmf-amd64  1 build-check(1) blocked n/a
 build-amd64-libvirt   1 build-check(1)   blocked  n/a
 build-i386-libvirt1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemuu-ovmf-amd64  1 build-check(1)  blocked n/a

version targeted for testing:
 ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
baseline version:
 ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0

Last test of basis   144637  2019-12-09 09:09:49 Z8 days
Failing since144646  2019-12-10 01:39:53 Z7 days   68 attempts
Testing same since   144770  2019-12-12 18:41:26 Z4 days   57 attempts


People who touched revisions under test:
  Antoine Coeur 
  Ard Biesheuvel 
  Bob Feng 
  Jiewen Yao 
  Michael Kubacki 
  Pete Batard 
  Philippe Mathieu-Daude 
  Steven Shi 

jobs:
 build-amd64-xsm  fail
 build-i386-xsm   fail
 build-amd64  fail
 build-i386   fail
 build-amd64-libvirt  blocked 
 build-i386-libvirt   blocked 
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked 
 test-amd64-i386-xl-qemuu-ovmf-amd64  blocked 



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Not pushing.


commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798
Author: Pete Batard 
Date:   Tue Dec 10 18:23:04 2019 +

MdePkg/Include: Add DCC and BCM2835 SPCR UART types

As per the Microsoft Debug Port Table 2 (DBG2) documentation, that
can be found online, we are missing 2 serial interface types for
Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi).

These same types are present in DebugPort2Table.h so add them to
SerialPortConsoleRedirectionTable.h too.

Note that we followed the same idiosyncrasies as DebugPort2Table
for naming these new macros.

Signed-off-by: Pete Batard 
Acked-by: Ard Biesheuvel 
Reviewed-by: Liming Gao 

commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e
Author: Ard Biesheuvel 
Date:   Tue Mar 5 14:32:48 2019 +0100

ArmPkg/MmCommunicationDxe: relay architected PI events to MM context

PI defines a few architected events that have significance in the MM
context as well as in the non-secure DXE context. So register notify
handlers for these events, and relay them into the standalone MM world.

Signed-off-by: Ard Biesheuvel 
Reviewed-by: Jiewen Yao 
Reviewed-by: Achin Gupta 

commit d3add11e87dace180387562d6f1951f2bffbd3d9
Author: Michael Kubacki 
Date:   Wed Nov 20 17:31:24 2019 -0800

MdeModulePkg PeiCore: Improve comment semantics

This patch clarifies wording in several PeiCore comments to improve
reading comprehension.

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Jian J Wang 

commit d39d1260c615b716675f67f5c4e1f4f52df01dad
Author: Michael Kubacki 
Date:   Wed Nov 20 17:10:48 2019 -0800

MdeModulePkg PeiCore: Fix typos

Cc: Dandan Bi 
Cc: Liming Gao 
Cc: Jian J Wang 
Cc: Hao A Wu 
Signed-off-by: Michael Kubacki 
Reviewed-by: Liming Gao 
Reviewed-by: Philippe Mathieu-Daude 
Reviewed-by: Jian J Wang 

commit 97eedf5dfbaffde33210fd88066247cf0b7d3325
Author: Antoine Coeur 
Date:   Wed Dec 4 12:14:53

Re: [Xen-devel] [PATCH 0/6] x86/suspend: State cleanup

2019-12-17 Thread Jan Beulich

On 17.12.2019 17:33, Andrew Cooper wrote:
> On 17/12/2019 16:17, Jan Beulich wrote:
>> On 13.12.2019 20:04, Andrew Cooper wrote:
>>> Andrew Cooper (6):
>>>   x86/suspend: Clarify and improve the behaviour of do_suspend_lowlevel()
>>>   x86/suspend: Don't bother saving %cr3, %ss or flags
>>>   x86/suspend: Don't save unnecessary GPRs
>>>   x86/suspend: Restore cr4 later during resume
>>>   x86/suspend: Expand macros in wakeup_prot.S
>>>   x86/suspend: Drop save_rest_processor_state() completely
>>>
>>>  xen/arch/x86/acpi/suspend.c |  55 ++--
>>>  xen/arch/x86/acpi/wakeup_prot.S | 136 
>>> 
>>>  xen/arch/x86/boot/wakeup.S  |   2 +-
>>>  3 files changed, 46 insertions(+), 147 deletions(-)
>> Based on Roger's review
>> Acked-by: Jan Beulich 
>>
>> One remark on the combination of patches 2 and 5: The loading of
>> the stack related registers would now seem to be a fair candidate
>> for using LSS (generally to be preferred over MOV-to-SS).
> 
> Well... You've just fixed c/s ffa21ea5303 in the emulator, and it
> demonstrates why LSS can't be used.

Hmm, indeed, how did I forget? (It's really very counter-intuitive
for this insn to not be universally usable.)

Jan

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xen/page_alloc: statically allocate bootmem_region_list

2019-12-17 Thread Jan Beulich

I'm sorry for the non-threaded reply, but my mail client has
mixed up this mail with another one, so I have nothing to
properly reply to. With one stylistic issue taken care of
(blanks around the binary operator / )
Reviewed-by: Jan Beulich 
The change would be easy enough to do while committing, but
said mailbox issue would either require someone else to
apply the change, or you to send a v2 (which then hopefully
won't end up garbled).

Iirc this was suggested before, so it would be nice if the
patch could also gain a suitable Suggested-by.

Jan

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 0/6] x86/suspend: State cleanup

2019-12-17 Thread Andrew Cooper

On 17/12/2019 16:17, Jan Beulich wrote:
> On 13.12.2019 20:04, Andrew Cooper wrote:
>> Andrew Cooper (6):
>>   x86/suspend: Clarify and improve the behaviour of do_suspend_lowlevel()
>>   x86/suspend: Don't bother saving %cr3, %ss or flags
>>   x86/suspend: Don't save unnecessary GPRs
>>   x86/suspend: Restore cr4 later during resume
>>   x86/suspend: Expand macros in wakeup_prot.S
>>   x86/suspend: Drop save_rest_processor_state() completely
>>
>>  xen/arch/x86/acpi/suspend.c |  55 ++--
>>  xen/arch/x86/acpi/wakeup_prot.S | 136 
>> 
>>  xen/arch/x86/boot/wakeup.S  |   2 +-
>>  3 files changed, 46 insertions(+), 147 deletions(-)
> Based on Roger's review
> Acked-by: Jan Beulich 
>
> One remark on the combination of patches 2 and 5: The loading of
> the stack related registers would now seem to be a fair candidate
> for using LSS (generally to be preferred over MOV-to-SS).

Well... You've just fixed c/s ffa21ea5303 in the emulator, and it
demonstrates why LSS can't be used.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xen/arm: Basic support for sunxi/sun50i h6 platform.

2019-12-17 Thread Julien Grall


Hi,

On 04/12/2019 09:27, Andre Przywara wrote:

On Tue, 3 Dec 2019 16:52:45 +
Julien Grall  wrote:

Hi,


On 03/12/2019 14:38, Andre Przywara wrote:

On Tue, 3 Dec 2019 11:39:58 +
Julien Grall  wrote:

Hi,
   

(+Andre)

Hi,

@Andre, IIRC you originally added the support for sunxi in Xen. Could
you have a look at this patch?


Looks alright, and indeed the H6 needs it. Even though Allwinner totally 
re-arranged the memory map, they missed the opportunity to put each device at 
least in their own 4K page.

Reviewed-by: Andre Przywara 


Thank you for the review!



If you can wait till this evening, I can even test it.


I can wait until tomorrow before comitting the patch.


I booted Xen 4.12.1 to the Dom0 prompt on the Pine H64, and saw all the serial 
devices in /sys/firmware/devicetree/base/soc. Then I applied the patch, and the 
serial devices were gone. And yes, all the four main serial ports share one 4K 
page on the H6.

Tested-by: Andre Przywara 


Thank you for the testing!

Acked-by: Julien Grall 

I have also committed it.

Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH 1/3] xen/blkback: Squeeze page pools if a memory pressure is detected

2019-12-17 Thread SeongJae Park

From: SeongJae Park 

I though it would be better to review separated patches, but seems it
was my mistake.  As Juergen asked, merged them again and post here.
Also, dropped Roger's reviewed-by.


Thanks,
SeongJae Park


 >8 ---
Subject: [PATCH 1/3] xen/blkback: Squeeze page pools if a memory pressure is
 detected

Each `blkif` has a free pages pool for the grant mapping.  The size of
the pool starts from zero and is increased on demand while processing
the I/O requests.  If current I/O requests handling is finished or 100
milliseconds has passed since last I/O requests handling, it checks and
shrinks the pool to not exceed the size limit, `max_buffer_pages`.

Therefore, host administrators can cause memory pressure in blkback by
attaching a large number of block devices and inducing I/O.  Such
problematic situations can be avoided by limiting the maximum number of
devices that can be attached, but finding the optimal limit is not so
easy.  Improper set of the limit can results in memory pressure or a
resource underutilization.  This commit avoids such problematic
situations by squeezing the pools (returns every free page in the pool
to the system) for a while (users can set this duration via a module
parameter) if memory pressure is detected.

Discussions
===

The `blkback`'s original shrinking mechanism returns only pages in the
pool which are not currently be used by `blkback` to the system.  In
other words, the pages that are not mapped with granted pages.  Because
this commit is changing only the shrink limit but still uses the same
freeing mechanism it does not touch pages which are currently mapping
grants.

Once memory pressure is detected, this commit keeps the squeezing limit
for a user-specified time duration.  The duration should be neither too
long nor too short.  If it is too long, the squeezing incurring overhead
can reduce the I/O performance.  If it is too short, `blkback` will not
free enough pages to reduce the memory pressure.  This commit sets the
value as `10 milliseconds` by default because it is a short time in
terms of I/O while it is a long time in terms of memory operations.
Also, as the original shrinking mechanism works for at least every 100
milliseconds, this could be a somewhat reasonable choice.  I also tested
other durations (refer to the below section for more details) and
confirmed that 10 milliseconds is the one that works best with the test.
That said, the proper duration depends on actual configurations and
workloads.  That's why this commit allows users to set the duration as a
module parameter.

Memory Pressure Test


To show how this commit fixes the memory pressure situation well, I
configured a test environment on a xen-running virtualization system.
On the `blkfront` running guest instances, I attach a large number of
network-backed volume devices and induce I/O to those.  Meanwhile, I
measure the number of pages that swapped in (pswpin) and out (pswpout)
on the `blkback` running guest.  The test ran twice, once for the
`blkback` before this commit and once for that after this commit.  As
shown below, this commit has dramatically reduced the memory pressure:

pswpin  pswpout
before  76,672  185,799
after  2123,325

Optimal Aggressive Shrinking Duration
-

To find a best squeezing duration, I repeated the test with three
different durations (1ms, 10ms, and 100ms).  The results are as below:

durationpswpin  pswpout
1   852 6,424
10  212 3,325
100 203 3,340

As expected, the memory pressure has decreased as the duration is
increased, but the reduction stopped from the `10ms`.  Based on this
results, I chose the default duration as 10ms.

Performance Overhead Test
=

This commit could incur I/O performance degradation under severe memory
pressure because the squeezing will require more page allocations per
I/O.  To show the overhead, I artificially made a worst-case squeezing
situation and measured the I/O performance of a `blkfront` running
guest.

For the artificial squeezing, I set the `blkback.max_buffer_pages` using
the `/sys/module/xen_blkback/parameters/max_buffer_pages` file.  In this
test, I set the value to `1024` and `0`.  The `1024` is the default
value.  Setting the value as `0` is same to a situation doing the
squeezing always (worst-case).

If the underlying block device is slow enough, the squeezing overhead
could be hidden.  For the reason, I use a fast block device, namely the
rbd[1]:

# xl block-attach guest phy:/dev/ram0 xvdb w

For the I/O performance measurement, I run a simple `dd` command 5 times
directly to the device as below and collect the 'MB/s' results.

$ for i in {1..5}; do dd if=/dev/zero of=/dev/xvdb \
 bs=4k count=$((256*512)); sync; done

The results

Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock

2019-12-17 Thread SeongJae Park

On Tue, 17 Dec 2019 17:13:42 +0100 "Jürgen Groß"  wrote:

> On 17.12.19 17:07, SeongJae Park wrote:
> > From: SeongJae Park 
> > 
> > 'reclaim_memory' callback can race with a driver code as this callback
> > will be called from any memory pressure detected context.  To deal with
> > the case, this commit adds a spinlock in the 'xenbus_device'.  Whenever
> > 'reclaim_memory' callback is called, the lock of the device which passed
> > to the callback as its argument is locked.  Thus, drivers registering
> > their 'reclaim_memory' callback should protect the data that might race
> > with the callback with the lock by themselves.
> > 
> > Signed-off-by: SeongJae Park 
> > ---
> >   drivers/xen/xenbus/xenbus_probe.c |  1 +
> >   drivers/xen/xenbus/xenbus_probe_backend.c | 10 --
> >   include/xen/xenbus.h  |  2 ++
> >   3 files changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/xen/xenbus/xenbus_probe.c 
> > b/drivers/xen/xenbus/xenbus_probe.c
> > index 5b471889d723..b86393f172e6 100644
> > --- a/drivers/xen/xenbus/xenbus_probe.c
> > +++ b/drivers/xen/xenbus/xenbus_probe.c
> > @@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
> > goto fail;
> >   
> > dev_set_name(>dev, "%s", devname);
> > +   spin_lock_init(>reclaim_lock);
> >   
> > /* Register with generic device framework. */
> > err = device_register(>dev);
> > diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c 
> > b/drivers/xen/xenbus/xenbus_probe_backend.c
> > index 7e78ebef7c54..516aa64b9967 100644
> > --- a/drivers/xen/xenbus/xenbus_probe_backend.c
> > +++ b/drivers/xen/xenbus/xenbus_probe_backend.c
> > @@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct 
> > notifier_block *notifier,
> >   static int backend_reclaim_memory(struct device *dev, void *data)
> >   {
> > const struct xenbus_driver *drv;
> > +   struct xenbus_device *xdev;
> > +   unsigned long flags;
> >   
> > if (!dev->driver)
> > return 0;
> > drv = to_xenbus_driver(dev->driver);
> > -   if (drv && drv->reclaim_memory)
> > -   drv->reclaim_memory(to_xenbus_device(dev));
> > +   if (drv && drv->reclaim_memory) {
> > +   xdev = to_xenbus_device(dev);
> > +   spin_trylock_irqsave(>reclaim_lock, flags);
> 
> You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine,
> too? I can't see a reason why you'd want to disable irqs here.

I needed to diable irq here as this is called from the memory shrinker context.

Also, used 'trylock' because the 'probe()' and 'remove()' code of the driver
might include memory allocation.  And the xen-blkback actually does.  If the
allocation shows a memory pressure during the allocation, it will trigger this
shrinker callback again and then deadlock.


Thanks,
SeongJae Park

> 
> > +   drv->reclaim_memory(xdev);
> > +   spin_unlock_irqrestore(>reclaim_lock, flags);
> > +   }
> > return 0;
> >   }
> >   
> > diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
> > index c861cfb6f720..d9468313061d 100644
> > --- a/include/xen/xenbus.h
> > +++ b/include/xen/xenbus.h
> > @@ -76,6 +76,8 @@ struct xenbus_device {
> > enum xenbus_state state;
> > struct completion down;
> > struct work_struct work;
> > +   /* 'reclaim_memory' callback is called while this lock is acquired */
> > +   spinlock_t reclaim_lock;
> >   };
> >   
> >   static inline struct xenbus_device *to_xenbus_device(struct device *dev)
> > 
> 
> 
> Juergen
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xen/page_alloc: statically allocate bootmem_region_list

2019-12-17 Thread Julien Grall


Hi Hongyan,

On 17/12/2019 14:33, Hongyan Xia wrote:

The existing code assumes that the first mfn passed to the boot
allocator is mapped, which creates problems when, e.g., we do not have
a direct map, and may create other bootstrapping problems in the
future. Make it static. The size is kept the same as before (1 page).

Signed-off-by: Hongyan Xia 
---
  xen/common/page_alloc.c | 11 +--
  1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 7cb1bd368b..7afb651b79 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -244,9 +244,12 @@ PAGE_LIST_HEAD(page_broken_list);
   */
  mfn_t first_valid_mfn = INVALID_MFN_INITIALIZER;
  
-static struct bootmem_region {

+struct bootmem_region {
  unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */
-} *__initdata bootmem_region_list;
+};
+/* Statically allocate a page for bootmem_region_list. */
+static struct bootmem_region __initdata
+bootmem_region_list[PAGE_SIZE/sizeof(struct bootmem_region)];


NIT: space before and after /.

Other than that:

Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 0/6] x86/suspend: State cleanup

2019-12-17 Thread Jan Beulich

On 13.12.2019 20:04, Andrew Cooper wrote:
> Andrew Cooper (6):
>   x86/suspend: Clarify and improve the behaviour of do_suspend_lowlevel()
>   x86/suspend: Don't bother saving %cr3, %ss or flags
>   x86/suspend: Don't save unnecessary GPRs
>   x86/suspend: Restore cr4 later during resume
>   x86/suspend: Expand macros in wakeup_prot.S
>   x86/suspend: Drop save_rest_processor_state() completely
> 
>  xen/arch/x86/acpi/suspend.c |  55 ++--
>  xen/arch/x86/acpi/wakeup_prot.S | 136 
> 
>  xen/arch/x86/boot/wakeup.S  |   2 +-
>  3 files changed, 46 insertions(+), 147 deletions(-)

Based on Roger's review
Acked-by: Jan Beulich 

One remark on the combination of patches 2 and 5: The loading of
the stack related registers would now seem to be a fair candidate
for using LSS (generally to be preferred over MOV-to-SS).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v10 2/4] xen/blkback: Squeeze page pools if a memory pressure is detected

2019-12-17 Thread SeongJae Park

On Tue, 17 Dec 2019 09:30:32 +0100 SeongJae Park  wrote:

> On Tue, 17 Dec 2019 09:16:47 +0100 "Jürgen Groß"  wrote:
> 
> > On 17.12.19 08:59, SeongJae Park wrote:
> > > On Tue, 17 Dec 2019 07:23:12 +0100 "Jürgen Groß"  wrote:
> > > 
> > >> On 16.12.19 20:48, SeongJae Park wrote:
> > >>> On on, 16 Dec 2019 17:23:44 +0100, Jürgen Groß wrote:
> > >>>
> >  On 16.12.19 17:15, SeongJae Park wrote:
> > > On Mon, 16 Dec 2019 15:37:20 +0100 SeongJae Park  
> > > wrote:
> > >
> > >> On Mon, 16 Dec 2019 13:45:25 +0100 SeongJae Park  
> > >> wrote:
> > >>
> > >>> From: SeongJae Park 
> > >>>
> > > [...]
> > >>> --- a/drivers/block/xen-blkback/xenbus.c
> > >>> +++ b/drivers/block/xen-blkback/xenbus.c
> > >>> @@ -824,6 +824,24 @@ static void frontend_changed(struct 
> > >>> xenbus_device *dev,
> > >>> }
> > >>> 
> > >>> 
> > >>> +/* Once a memory pressure is detected, squeeze free page pools for 
> > >>> a while. */
> > >>> +static unsigned int buffer_squeeze_duration_ms = 10;
> > >>> +module_param_named(buffer_squeeze_duration_ms,
> > >>> +   buffer_squeeze_duration_ms, int, 0644);
> > >>> +MODULE_PARM_DESC(buffer_squeeze_duration_ms,
> > >>> +"Duration in ms to squeeze pages buffer when a memory pressure is 
> > >>> detected");
> > >>> +
> > >>> +/*
> > >>> + * Callback received when the memory pressure is detected.
> > >>> + */
> > >>> +static void reclaim_memory(struct xenbus_device *dev)
> > >>> +{
> > >>> +   struct backend_info *be = dev_get_drvdata(>dev);
> > >>> +
> > >>> +   be->blkif->buffer_squeeze_end = jiffies +
> > >>> +   msecs_to_jiffies(buffer_squeeze_duration_ms);
> > >>
> > >> This callback might race with 'xen_blkbk_probe()'.  The race could 
> > >> result in
> > >> __NULL dereferencing__, as 'xen_blkbk_probe()' sets '->blkif' after 
> > >> it links
> > >> 'be' to the 'dev'.  Please _don't merge_ this patch now!
> > >>
> > >> I will do more test and share results.  Meanwhile, if you have any 
> > >> opinion,
> > >> please let me know.
> > >>>
> > >>> I reduced system memory and attached bunch of devices in short time so 
> > >>> that
> > >>> memory pressure occurs while device attachments are ongoing.  Under this
> > >>> circumstance, I was able to see the race.
> > >>>
> > >
> > > Not only '->blkif', but 'be' itself also coule be a NULL.  As similar
> > > concurrency issues could be in other drivers in their way, I suggest 
> > > to change
> > > the reclaim callback ('->reclaim_memory') to be called for each 
> > > driver instead
> > > of each device.  Then, each driver could be able to deal with its 
> > > concurrency
> > > issues by itself.
> > 
> >  Hmm, I don't like that. This would need to be changed back in case we
> >  add per-guest quota.
> > >>>
> > >>> Extending this callback in that way would be still not too hard.  We 
> > >>> could use
> > >>> the argument to the callback.  I would keep the argument of the 
> > >>> callback to
> > >>> 'struct device *' as is, and will add a comment saying 'NULL' value of 
> > >>> the
> > >>> argument means every devices.  As an example, xenbus would pass 
> > >>> NULL-ending
> > >>> array of the device pointers that need to free its resources.
> > >>>
> > >>> After seeing this race, I am now also thinking it could be better to 
> > >>> delegate
> > >>> detailed control of each device to its driver, as some drivers have some
> > >>> complicated and unique relation with its devices.
> > >>>
> > 
> >  Wouldn't a get_device() before calling the callback and a put_device()
> >  afterwards avoid that problem?
> > >>>
> > >>> I didn't used the reference count manipulation operations because other 
> > >>> similar
> > >>> parts also didn't.  But, if there is no implicit reference count 
> > >>> guarantee, it
> > >>> seems those operations are indeed necessary.
> > >>>
> > >>> That said, as get/put operations only adjust the reference count, those 
> > >>> will
> > >>> not make the callback to wait until the linking of the 'backend' and 
> > >>> 'blkif' to
> > >>> the device (xen_blkbk_probe()) is finished.  Thus, the race could still 
> > >>> happen.
> > >>> Or, am I missing something?
> > >>
> > >> No, I think we need a xenbus lock per device which will need to be
> > >> taken in xen_blkbk_probe(), xenbus_dev_remove() and while calling the
> > >> callback.
> > > 
> > > I also agree that locking should be used at last.  But, as each driver 
> > > manages
> > > its devices and resources in their way, it could have its unique race
> > > conditions.  And, each unique race condition might have its unique 
> > > efficient
> > > way to synchronize it.  Therefore, I think the synchronization should be 
> > > done
> > > by each driver, not by xenbus and thus we should make the callback to be 
> > >

Re: [Xen-devel] [PATCH v11 4/6] xen/blkback: Protect 'reclaim_memory()' with 'reclaim_lock'

2019-12-17 Thread Jürgen Groß


On 17.12.19 17:07, SeongJae Park wrote:

From: SeongJae Park 

The 'reclaim_memory()' callback of blkback could race with
'xen_blkbk_probe()' and 'xen_blkbk_remove()'.  In the case, incompletely
linked 'backend_info' and 'blkif' might be exposed to the callback, thus
result in bad results including NULL dereference.  This commit fixes the
problem by applying the 'reclaim_lock' protection to those.

Note that this commit is separated for review purpose only.  As the
previous commit might result in race condition and might make bisect
confuse, please squash this commit into previous commit if possible.

Signed-off-by: SeongJae Park 


Please merge this patch into patch 2.


Juergen

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock

2019-12-17 Thread Jürgen Groß


On 17.12.19 17:07, SeongJae Park wrote:

From: SeongJae Park 

'reclaim_memory' callback can race with a driver code as this callback
will be called from any memory pressure detected context.  To deal with
the case, this commit adds a spinlock in the 'xenbus_device'.  Whenever
'reclaim_memory' callback is called, the lock of the device which passed
to the callback as its argument is locked.  Thus, drivers registering
their 'reclaim_memory' callback should protect the data that might race
with the callback with the lock by themselves.

Signed-off-by: SeongJae Park 
---
  drivers/xen/xenbus/xenbus_probe.c |  1 +
  drivers/xen/xenbus/xenbus_probe_backend.c | 10 --
  include/xen/xenbus.h  |  2 ++
  3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c 
b/drivers/xen/xenbus/xenbus_probe.c
index 5b471889d723..b86393f172e6 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
goto fail;
  
  	dev_set_name(>dev, "%s", devname);

+   spin_lock_init(>reclaim_lock);
  
  	/* Register with generic device framework. */

err = device_register(>dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c 
b/drivers/xen/xenbus/xenbus_probe_backend.c
index 7e78ebef7c54..516aa64b9967 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct notifier_block 
*notifier,
  static int backend_reclaim_memory(struct device *dev, void *data)
  {
const struct xenbus_driver *drv;
+   struct xenbus_device *xdev;
+   unsigned long flags;
  
  	if (!dev->driver)

return 0;
drv = to_xenbus_driver(dev->driver);
-   if (drv && drv->reclaim_memory)
-   drv->reclaim_memory(to_xenbus_device(dev));
+   if (drv && drv->reclaim_memory) {
+   xdev = to_xenbus_device(dev);
+   spin_trylock_irqsave(>reclaim_lock, flags);


You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine,
too? I can't see a reason why you'd want to disable irqs here.


+   drv->reclaim_memory(xdev);
+   spin_unlock_irqrestore(>reclaim_lock, flags);
+   }
return 0;
  }
  
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h

index c861cfb6f720..d9468313061d 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -76,6 +76,8 @@ struct xenbus_device {
enum xenbus_state state;
struct completion down;
struct work_struct work;
+   /* 'reclaim_memory' callback is called while this lock is acquired */
+   spinlock_t reclaim_lock;
  };
  
  static inline struct xenbus_device *to_xenbus_device(struct device *dev)





Juergen

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v11 6/6] xen/blkback: Consistently insert one empty line between functions

2019-12-17 Thread SeongJae Park

From: SeongJae Park 

The number of empty lines between functions in the xenbus.c is
inconsistent.  This trivial style cleanup commit fixes the file to
consistently place only one empty line.

Acked-by: Roger Pau Monné 
Signed-off-by: SeongJae Park 
---
 drivers/block/xen-blkback/xenbus.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index 20045827a391..453f97dd533d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -432,7 +432,6 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev)
device_remove_file(>dev, _attr_physical_device);
 }
 
-
 static void xen_vbd_free(struct xen_vbd *vbd)
 {
if (vbd->bdev)
@@ -489,6 +488,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, 
blkif_vdev_t handle,
handle, blkif->domid);
return 0;
 }
+
 static int xen_blkbk_remove(struct xenbus_device *dev)
 {
struct backend_info *be = dev_get_drvdata(>dev);
@@ -575,6 +575,7 @@ static void xen_blkbk_discard(struct xenbus_transaction 
xbt, struct backend_info
if (err)
dev_warn(>dev, "writing feature-discard (%d)", err);
 }
+
 int xen_blkbk_barrier(struct xenbus_transaction xbt,
  struct backend_info *be, int state)
 {
@@ -663,7 +664,6 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
return err;
 }
 
-
 /*
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -755,7 +755,6 @@ static void backend_changed(struct xenbus_watch *watch,
}
 }
 
-
 /*
  * Callback received when the frontend's state changes.
  */
@@ -830,7 +829,6 @@ static void frontend_changed(struct xenbus_device *dev,
}
 }
 
-
 /* Once a memory pressure is detected, squeeze free page pools for a while. */
 static unsigned int buffer_squeeze_duration_ms = 10;
 module_param_named(buffer_squeeze_duration_ms,
@@ -855,7 +853,6 @@ static void reclaim_memory(struct xenbus_device *dev)
 
 /* ** Connection ** */
 
-
 /*
  * Write the physical details regarding the block device to the store, and
  * switch to Connected state.
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v11 5/6] xen/blkback: Remove unnecessary static variable name prefixes

2019-12-17 Thread SeongJae Park

From: SeongJae Park 

A few of static variables in blkback have 'xen_blkif_' prefix, though it
is unnecessary for static variables.  This commit removes such prefixes.

Reviewed-by: Roger Pau Monné 
Signed-off-by: SeongJae Park 
---
 drivers/block/xen-blkback/blkback.c | 37 +
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c 
b/drivers/block/xen-blkback/blkback.c
index 79f677aeb5cc..fbd67f8e4e4e 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -62,8 +62,8 @@
  * IO workloads.
  */
 
-static int xen_blkif_max_buffer_pages = 1024;
-module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+static int max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, max_buffer_pages, int, 0644);
 MODULE_PARM_DESC(max_buffer_pages,
 "Maximum number of free pages to keep in each block backend buffer");
 
@@ -78,8 +78,8 @@ MODULE_PARM_DESC(max_buffer_pages,
  * algorithm.
  */
 
-static int xen_blkif_max_pgrants = 1056;
-module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+static int max_pgrants = 1056;
+module_param_named(max_persistent_grants, max_pgrants, int, 0644);
 MODULE_PARM_DESC(max_persistent_grants,
  "Maximum number of grants to map persistently");
 
@@ -88,8 +88,8 @@ MODULE_PARM_DESC(max_persistent_grants,
  * use. The time is in seconds, 0 means indefinitely long.
  */
 
-static unsigned int xen_blkif_pgrant_timeout = 60;
-module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout,
+static unsigned int pgrant_timeout = 60;
+module_param_named(persistent_grant_unused_seconds, pgrant_timeout,
   uint, 0644);
 MODULE_PARM_DESC(persistent_grant_unused_seconds,
 "Time in seconds an unused persistent grant is allowed to "
@@ -137,9 +137,8 @@ module_param(log_stats, int, 0644);
 
 static inline bool persistent_gnt_timeout(struct persistent_gnt 
*persistent_gnt)
 {
-   return xen_blkif_pgrant_timeout &&
-  (jiffies - persistent_gnt->last_used >=
-   HZ * xen_blkif_pgrant_timeout);
+   return pgrant_timeout && (jiffies - persistent_gnt->last_used >=
+   HZ * pgrant_timeout);
 }
 
 static inline int get_free_page(struct xen_blkif_ring *ring, struct page 
**page)
@@ -234,7 +233,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *this;
struct xen_blkif *blkif = ring->blkif;
 
-   if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
+   if (ring->persistent_gnt_c >= max_pgrants) {
if (!blkif->vbd.overflow_max_grants)
blkif->vbd.overflow_max_grants = 1;
return -EBUSY;
@@ -397,14 +396,13 @@ static void purge_persistent_gnt(struct xen_blkif_ring 
*ring)
goto out;
}
 
-   if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
-   (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+   if (ring->persistent_gnt_c < max_pgrants ||
+   (ring->persistent_gnt_c == max_pgrants &&
!ring->blkif->vbd.overflow_max_grants)) {
num_clean = 0;
} else {
-   num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
-   num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants +
-   num_clean;
+   num_clean = (max_pgrants / 100) * LRU_PERCENT_CLEAN;
+   num_clean = ring->persistent_gnt_c - max_pgrants + num_clean;
num_clean = min(ring->persistent_gnt_c, num_clean);
pr_debug("Going to purge at least %u persistent grants\n",
 num_clean);
@@ -599,8 +597,7 @@ static void print_stats(struct xen_blkif_ring *ring)
 current->comm, ring->st_oo_req,
 ring->st_rd_req, ring->st_wr_req,
 ring->st_f_req, ring->st_ds_req,
-ring->persistent_gnt_c,
-xen_blkif_max_pgrants);
+ring->persistent_gnt_c, max_pgrants);
ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
ring->st_rd_req = 0;
ring->st_wr_req = 0;
@@ -660,7 +657,7 @@ int xen_blkif_schedule(void *arg)
if (time_before(jiffies, blkif->buffer_squeeze_end))
shrink_free_pagepool(ring, 0);
else
-   shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
+   shrink_free_pagepool(ring, max_buffer_pages);
 
if (log_stats && time_after(jiffies, ring->st_print))
print_stats(ring);
@@ -887,7 +884,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
continue;
}
if (use_persistent_gnts &&
-   ring->persistent_gnt_c < xen_blkif_max_pgrants) {
+

[Xen-devel] [PATCH v11 4/6] xen/blkback: Protect 'reclaim_memory()' with 'reclaim_lock'

2019-12-17 Thread SeongJae Park

From: SeongJae Park 

The 'reclaim_memory()' callback of blkback could race with
'xen_blkbk_probe()' and 'xen_blkbk_remove()'.  In the case, incompletely
linked 'backend_info' and 'blkif' might be exposed to the callback, thus
result in bad results including NULL dereference.  This commit fixes the
problem by applying the 'reclaim_lock' protection to those.

Note that this commit is separated for review purpose only.  As the
previous commit might result in race condition and might make bisect
confuse, please squash this commit into previous commit if possible.

Signed-off-by: SeongJae Park 

---
 drivers/block/xen-blkback/xenbus.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index 4f6ea4feca79..20045827a391 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -492,6 +492,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, 
blkif_vdev_t handle,
 static int xen_blkbk_remove(struct xenbus_device *dev)
 {
struct backend_info *be = dev_get_drvdata(>dev);
+   unsigned long flags;
 
pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
 
@@ -504,6 +505,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
be->backend_watch.node = NULL;
}
 
+   spin_lock_irqsave(>reclaim_lock, flags);
dev_set_drvdata(>dev, NULL);
 
if (be->blkif) {
@@ -512,6 +514,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
/* Put the reference we set in xen_blkif_alloc(). */
xen_blkif_put(be->blkif);
}
+   spin_unlock_irqrestore(>reclaim_lock, flags);
 
return 0;
 }
@@ -597,6 +600,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
int err;
struct backend_info *be = kzalloc(sizeof(struct backend_info),
  GFP_KERNEL);
+   unsigned long flags;
 
/* match the pr_debug in xen_blkbk_remove */
pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
@@ -607,6 +611,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
return -ENOMEM;
}
be->dev = dev;
+   spin_lock_irqsave(>reclaim_lock, flags);
dev_set_drvdata(>dev, be);
 
be->blkif = xen_blkif_alloc(dev->otherend_id);
@@ -614,8 +619,10 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
err = PTR_ERR(be->blkif);
be->blkif = NULL;
xenbus_dev_fatal(dev, err, "creating block interface");
+   spin_unlock_irqrestore(>reclaim_lock, flags);
goto fail;
}
+   spin_unlock_irqrestore(>reclaim_lock, flags);
 
err = xenbus_printf(XBT_NIL, dev->nodename,
"feature-max-indirect-segments", "%u",
@@ -838,6 +845,10 @@ static void reclaim_memory(struct xenbus_device *dev)
 {
struct backend_info *be = dev_get_drvdata(>dev);
 
+   /* Device is registered but not probed yet */
+   if (!be)
+   return;
+
be->blkif->buffer_squeeze_end = jiffies +
msecs_to_jiffies(buffer_squeeze_duration_ms);
 }
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v11 3/6] xen/blkback: Squeeze page pools if a memory pressure is detected

2019-12-17 Thread SeongJae Park

From: SeongJae Park 

Each `blkif` has a free pages pool for the grant mapping.  The size of
the pool starts from zero and is increased on demand while processing
the I/O requests.  If current I/O requests handling is finished or 100
milliseconds has passed since last I/O requests handling, it checks and
shrinks the pool to not exceed the size limit, `max_buffer_pages`.

Therefore, host administrators can cause memory pressure in blkback by
attaching a large number of block devices and inducing I/O.  Such
problematic situations can be avoided by limiting the maximum number of
devices that can be attached, but finding the optimal limit is not so
easy.  Improper set of the limit can results in memory pressure or a
resource underutilization.  This commit avoids such problematic
situations by squeezing the pools (returns every free page in the pool
to the system) for a while (users can set this duration via a module
parameter) if memory pressure is detected.

Discussions
===

The `blkback`'s original shrinking mechanism returns only pages in the
pool which are not currently be used by `blkback` to the system.  In
other words, the pages that are not mapped with granted pages.  Because
this commit is changing only the shrink limit but still uses the same
freeing mechanism it does not touch pages which are currently mapping
grants.

Once memory pressure is detected, this commit keeps the squeezing limit
for a user-specified time duration.  The duration should be neither too
long nor too short.  If it is too long, the squeezing incurring overhead
can reduce the I/O performance.  If it is too short, `blkback` will not
free enough pages to reduce the memory pressure.  This commit sets the
value as `10 milliseconds` by default because it is a short time in
terms of I/O while it is a long time in terms of memory operations.
Also, as the original shrinking mechanism works for at least every 100
milliseconds, this could be a somewhat reasonable choice.  I also tested
other durations (refer to the below section for more details) and
confirmed that 10 milliseconds is the one that works best with the test.
That said, the proper duration depends on actual configurations and
workloads.  That's why this commit allows users to set the duration as a
module parameter.

Memory Pressure Test


To show how this commit fixes the memory pressure situation well, I
configured a test environment on a xen-running virtualization system.
On the `blkfront` running guest instances, I attach a large number of
network-backed volume devices and induce I/O to those.  Meanwhile, I
measure the number of pages that swapped in (pswpin) and out (pswpout)
on the `blkback` running guest.  The test ran twice, once for the
`blkback` before this commit and once for that after this commit.  As
shown below, this commit has dramatically reduced the memory pressure:

pswpin  pswpout
before  76,672  185,799
after  2123,325

Optimal Aggressive Shrinking Duration
-

To find a best squeezing duration, I repeated the test with three
different durations (1ms, 10ms, and 100ms).  The results are as below:

durationpswpin  pswpout
1   852 6,424
10  212 3,325
100 203 3,340

As expected, the memory pressure has decreased as the duration is
increased, but the reduction stopped from the `10ms`.  Based on this
results, I chose the default duration as 10ms.

Performance Overhead Test
=

This commit could incur I/O performance degradation under severe memory
pressure because the squeezing will require more page allocations per
I/O.  To show the overhead, I artificially made a worst-case squeezing
situation and measured the I/O performance of a `blkfront` running
guest.

For the artificial squeezing, I set the `blkback.max_buffer_pages` using
the `/sys/module/xen_blkback/parameters/max_buffer_pages` file.  In this
test, I set the value to `1024` and `0`.  The `1024` is the default
value.  Setting the value as `0` is same to a situation doing the
squeezing always (worst-case).

If the underlying block device is slow enough, the squeezing overhead
could be hidden.  For the reason, I use a fast block device, namely the
rbd[1]:

# xl block-attach guest phy:/dev/ram0 xvdb w

For the I/O performance measurement, I run a simple `dd` command 5 times
directly to the device as below and collect the 'MB/s' results.

$ for i in {1..5}; do dd if=/dev/zero of=/dev/xvdb \
 bs=4k count=$((256*512)); sync; done

The results are as below.  'max_pgs' represents the value of the
`blkback.max_buffer_pages` parameter.

max_pgs   Min   Max   Median AvgStddev
0 417   423   420419.4  2.5099801
1024  414   425   416417.8  4.4384682
No difference proven at 95.0% confidence

In short, even worst case squeezing

[Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock

2019-12-17 Thread SeongJae Park

From: SeongJae Park 

'reclaim_memory' callback can race with a driver code as this callback
will be called from any memory pressure detected context.  To deal with
the case, this commit adds a spinlock in the 'xenbus_device'.  Whenever
'reclaim_memory' callback is called, the lock of the device which passed
to the callback as its argument is locked.  Thus, drivers registering
their 'reclaim_memory' callback should protect the data that might race
with the callback with the lock by themselves.

Signed-off-by: SeongJae Park 
---
 drivers/xen/xenbus/xenbus_probe.c |  1 +
 drivers/xen/xenbus/xenbus_probe_backend.c | 10 --
 include/xen/xenbus.h  |  2 ++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c 
b/drivers/xen/xenbus/xenbus_probe.c
index 5b471889d723..b86393f172e6 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
goto fail;
 
dev_set_name(>dev, "%s", devname);
+   spin_lock_init(>reclaim_lock);
 
/* Register with generic device framework. */
err = device_register(>dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c 
b/drivers/xen/xenbus/xenbus_probe_backend.c
index 7e78ebef7c54..516aa64b9967 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct notifier_block 
*notifier,
 static int backend_reclaim_memory(struct device *dev, void *data)
 {
const struct xenbus_driver *drv;
+   struct xenbus_device *xdev;
+   unsigned long flags;
 
if (!dev->driver)
return 0;
drv = to_xenbus_driver(dev->driver);
-   if (drv && drv->reclaim_memory)
-   drv->reclaim_memory(to_xenbus_device(dev));
+   if (drv && drv->reclaim_memory) {
+   xdev = to_xenbus_device(dev);
+   spin_trylock_irqsave(>reclaim_lock, flags);
+   drv->reclaim_memory(xdev);
+   spin_unlock_irqrestore(>reclaim_lock, flags);
+   }
return 0;
 }
 
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index c861cfb6f720..d9468313061d 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -76,6 +76,8 @@ struct xenbus_device {
enum xenbus_state state;
struct completion down;
struct work_struct work;
+   /* 'reclaim_memory' callback is called while this lock is acquired */
+   spinlock_t reclaim_lock;
 };
 
 static inline struct xenbus_device *to_xenbus_device(struct device *dev)
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v11 0/6] xenbus/backend: Add a memory pressure handler callback

2019-12-17 Thread SeongJae Park

Granting pages consumes backend system memory.  In systems configured
with insufficient spare memory for those pages, it can cause a memory
pressure situation.  However, finding the optimal amount of the spare
memory is challenging for large systems having dynamic resource
utilization patterns.  Also, such a static configuration might lack
flexibility.

To mitigate such problems, this patchset adds a memory reclaim callback
to 'xenbus_driver' (patch 1) and then introduce a lock for race
condition avoidance (patch 2).  Those two patches could be merged into
one patch if necessary.

The third patch applies the callback mechanism to mitigate the problem
in 'xen-blkback' (patch 3), but it lacks use of the race condition
mitigation.  Following change (patch 4) applies the race protection
mechanism to the blkback.  Patch 3 and patch 4 has seperated for only
review convenience.  Highly recommend to merge those into one patch as
patch 3 applied version might confuse bisecting.

The fifth and sixth patches are trivial cleanups; those fix nits we
found during the development of this patchset.

Note that patch 1, 3, 5, 6 are same with previous version.  I made the
changes in this version to different commits (only second and fourth
patches) to make review more comfortable.  Especially, the third and
fourth patches should be merged into one patch, as the third one alone
might make bisecting confuse.  Tthe next version of this patchset will
also merge those.


Base Version


This patch is based on v5.4.  A complete tree is also available at my
public git repo:
https://github.com/sjp38/linux/tree/patches/blkback/buffer_squeeze/v11


Patch History
-

Changes from v10
(https://lore.kernel.org/xen-devel/20191216124527.30306-1-sjp...@amazon.com/)
 - Fix race condition (reported by SeongJae, suggested by Juergen)

Changes from v9
(https://lore.kernel.org/xen-devel/20191213153546.17425-1-sjp...@amazon.de/)
 - Add 'Reviewed-by' and 'Acked-by' from Roger Pau Monné
 - Update the commit message for overhead test of the 2nd path

Changes from v8
(https://lore.kernel.org/xen-devel/20191213130211.24011-1-sjp...@amazon.de/)
 - Drop 'Reviewed-by: Juergen' from the second patch
   (suggested by Roger Pau Monné)
 - Update contact of the new module param to SeongJae Park
   
   (suggested by Roger Pau Monné)
 - Wordsmith the description of the parameter
   (suggested by Roger Pau Monné)
 - Fix dumb bugs
   (suggested by Roger Pau Monné)
 - Move module param definition to xenbus.c and reduce the number of
   lines for this change
   (suggested by Roger Pau Monné)
 - Add a comment for the new callback, reclaim_memory, as other
   callbacks also have
 - Add another trivial cleanup of xenbus.c file (4th patch)

Changes from v7
(https://lore.kernel.org/xen-devel/20191211181016.14366-1-sjp...@amazon.de/)
 - Update sysfs-driver-xen-blkback for new parameter
   (suggested by Roger Pau Monné)
 - Use per-xen_blkif buffer_squeeze_end instead of global variable
   (suggested by Roger Pau Monné)

Changes from v6
(https://lore.kernel.org/linux-block/20191211042428.5961-1-sjp...@amazon.de/)
 - Remove more unnecessary prefixes (suggested by Roger Pau Monné)
 - Constify a variable (suggested by Roger Pau Monné)
 - Rename 'reclaim' into 'reclaim_memory' (suggested by Roger Pau Monné)
 - More wordsmith of the commit message (suggested by Roger Pau Monné)

Changes from v5
(https://lore.kernel.org/linux-block/20191210080628.5264-1-sjp...@amazon.de/)
 - Wordsmith the commit messages (suggested by Roger Pau Monné)
 - Change the reclaim callback return type (suggested by Roger Pau
   Monné)
 - Change the type of the blkback squeeze duration variable
   (suggested by Roger Pau Monné)
 - Add a patch for removal of unnecessary static variable name prefixes
   (suggested by Roger Pau Monné)
 - Fix checkpatch.pl warnings

Changes from v4
(https://lore.kernel.org/xen-devel/20191209194305.20828-1-sjp...@amazon.com/)
 - Remove domain id parameter from the callback (suggested by Juergen
   Gross)
 - Rename xen-blkback module parameter (suggested by Stefan Nuernburger)

Changes from v3
(https://lore.kernel.org/xen-devel/20191209085839.21215-1-sjp...@amazon.com/)
 - Add general callback in xen_driver and use it (suggested by Juergen
   Gross)

Changes from v2
(https://lore.kernel.org/linux-block/af195033-23d5-38ed-b73b-f6e2e3b34...@amazon.com)
 - Rename the module parameter and variables for brevity
   (aggressive shrinking -> squeezing)

Changes from v1
(https://lore.kernel.org/xen-devel/20191204113419.2298-1-sjp...@amazon.com/)
 - Adjust the description to not use the term, `arbitrarily`
   (suggested by Paul Durrant)
 - Specify time unit of the duration in the parameter description,
   (suggested by Maximilian Heyne)
 - Change default aggressive shrinking duration from 1ms to 10ms
 - Merge two patches into one single patch


SeongJae Park (6):
  xenbus/backend: Add memory pressure handler callback
  xenbus/backend: Protect xenbus callback with

[Xen-devel] [PATCH v11 1/6] xenbus/backend: Add memory pressure handler callback

2019-12-17 Thread SeongJae Park

From: SeongJae Park 

Granting pages consumes backend system memory.  In systems configured
with insufficient spare memory for those pages, it can cause a memory
pressure situation.  However, finding the optimal amount of the spare
memory is challenging for large systems having dynamic resource
utilization patterns.  Also, such a static configuration might lack
flexibility.

To mitigate such problems, this commit adds a memory reclaim callback to
'xenbus_driver'.  If a memory pressure is detected, 'xenbus' requests
every backend driver to volunarily release its memory.

Note that it would be able to improve the callback facility for more
sophisticated handlings of general pressures.  For example, it would be
possible to monitor the memory consumption of each device and issue the
release requests to only devices which causing the pressure.  Also, the
callback could be extended to handle not only memory, but general
resources.  Nevertheless, this version of the implementation defers such
sophisticated goals as a future work.

Reviewed-by: Juergen Gross 
Reviewed-by: Roger Pau Monné 
Signed-off-by: SeongJae Park 
---
 drivers/xen/xenbus/xenbus_probe_backend.c | 32 +++
 include/xen/xenbus.h  |  1 +
 2 files changed, 33 insertions(+)

diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c 
b/drivers/xen/xenbus/xenbus_probe_backend.c
index b0bed4faf44c..7e78ebef7c54 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -248,6 +248,35 @@ static int backend_probe_and_watch(struct notifier_block 
*notifier,
return NOTIFY_DONE;
 }
 
+static int backend_reclaim_memory(struct device *dev, void *data)
+{
+   const struct xenbus_driver *drv;
+
+   if (!dev->driver)
+   return 0;
+   drv = to_xenbus_driver(dev->driver);
+   if (drv && drv->reclaim_memory)
+   drv->reclaim_memory(to_xenbus_device(dev));
+   return 0;
+}
+
+/*
+ * Returns 0 always because we are using shrinker to only detect memory
+ * pressure.
+ */
+static unsigned long backend_shrink_memory_count(struct shrinker *shrinker,
+   struct shrink_control *sc)
+{
+   bus_for_each_dev(_backend.bus, NULL, NULL,
+   backend_reclaim_memory);
+   return 0;
+}
+
+static struct shrinker backend_memory_shrinker = {
+   .count_objects = backend_shrink_memory_count,
+   .seeks = DEFAULT_SEEKS,
+};
+
 static int __init xenbus_probe_backend_init(void)
 {
static struct notifier_block xenstore_notifier = {
@@ -264,6 +293,9 @@ static int __init xenbus_probe_backend_init(void)
 
register_xenstore_notifier(_notifier);
 
+   if (register_shrinker(_memory_shrinker))
+   pr_warn("shrinker registration failed\n");
+
return 0;
 }
 subsys_initcall(xenbus_probe_backend_init);
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 869c816d5f8c..c861cfb6f720 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -104,6 +104,7 @@ struct xenbus_driver {
struct device_driver driver;
int (*read_otherend_details)(struct xenbus_device *dev);
int (*is_ready)(struct xenbus_device *dev);
+   void (*reclaim_memory)(struct xenbus_device *dev);
 };
 
 static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step

2019-12-17 Thread Ковалёв Сергей

Andrew, Tamas thank you very much. I will improve the patch.

December 17, 2019 3:13:42 PM UTC, Andrew Cooper  
пишет:
>On 17/12/2019 15:10, Tamas K Lengyel wrote:
>> On Tue, Dec 17, 2019 at 8:08 AM Tamas K Lengyel 
>wrote:
>>> On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper
> wrote:
 On 17/12/2019 14:40, Sergey Kovalev wrote:
> On break point event eight context switches occures.
>
> With fast single step it is possible to shorten path for two
>context
> switches
> and gain 35% spead-up.
>
> Was tested on Debian branch of Xen 4.12. See at:
>
>https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep
>
> Rebased on master:
> https://github.com/skvl/xen/tree/fast-singlestep
>
> Signed-off-by: Sergey Kovalev 
 35% looks like a good number, but what is "fast single step"?  All
>this
 appears to be is plumbing for to cause an altp2m switch on single
>step.
>>> Yes, a better explanation would be much needed here and I'm not 100%
>>> sure it correctly implements what I think it tries to.
>>>
>>> This is my interpretation of what the idea is: when using DRAKVUF
>(or
>>> another system using altp2m with shadow pages similar to what I
>>> describe in
>https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m),
>>> after a breakpoint is hit the system switches to the default
>>> unrestricted altp2m view with singlestep enabled. When the
>singlestep
>>> traps to Xen another vm_event is sent to the monitor agent, which
>then
>>> normally disables singlestepping and switches the altp2m view back
>to
>>> the restricted view. This patch looks like its short-circuiting that
>>> last part so that it doesn't need to send the vm_event out for the
>>> singlestep event and should switch back to the restricted view in
>Xen
>>> automatically. It's a nice optimization. But what seems to be
>missing
>>> is the altp2m switch itself.
>> Never mind, p2m_altp2m_check does the altp2m switch as well, so this
>> patch implements what I described above. Please update the patch
>> message to be more descriptive (you can copy my description from
>> above).
>
>Also please read CODING_STYLE in the root of the xen repository.  The
>important ones you need to fix are spaces in "if ( ... )" statements,
>and binary operators on the end of the first line rather than the
>beginning of the continuation.
>
>~Andrew

-- 
Простите за краткость, создано в K-9 Mail.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 1/2] Tidy up whitespace and formatting in file to be consistent.

2019-12-17 Thread Wei Liu

On Wed, Dec 18, 2019 at 02:44:51AM +1100, Steven Haigh wrote:
> Ok, if its going to be 4 spaces for each file, I can batch convert & tidy
> stuff up...
> 
> The file I changed had both types, so I went with my own preference :)
> 
> If it might be a better approach, I'll sort out the majority of scripts in
> that directory - and do no function changes and post a series that does
> nothing but cleanup - then do the brctl / ip changes on top of that in a
> different patch.
> 

Like Juergen said on IRC, if you think that patch should be backported
(either by upstream or downstream maintainers), it will make people's
life easier if that goes in first.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH] xsm: hide detailed Xen version from unprivileged guests

2019-12-17 Thread Sergey Dyasli

Hide the following information that can help identify the running Xen
binary version:

XENVER_extraversion
XENVER_compile_info
XENVER_capabilities
XENVER_changeset
XENVER_commandline
XENVER_build_id

Return a more customer friendly empty string instead of ""
which would be shown in tools like dmidecode.

But allow guests to see this information in Debug builds of Xen.

Signed-off-by: Sergey Dyasli 
---
CC: Andrew Cooper 
CC: George Dunlap 
CC: Ian Jackson 
CC: Jan Beulich 
CC: Julien Grall 
CC: Konrad Rzeszutek Wilk 
CC: Stefano Stabellini 
CC: Wei Liu 
CC: Daniel De Graaf 
---
 xen/common/version.c|  2 +-
 xen/include/xsm/dummy.h | 15 ++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/xen/common/version.c b/xen/common/version.c
index 937eb1281c..cc621ab76a 100644
--- a/xen/common/version.c
+++ b/xen/common/version.c
@@ -67,7 +67,7 @@ const char *xen_banner(void)
 
 const char *xen_deny(void)
 {
-return "";
+return "";
 }
 
 static const void *build_id_p __read_mostly;
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index b8e185e6fa..4a1a1bf2bd 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -750,16 +750,21 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG 
uint32_t op)
 case XENVER_get_features:
 /* These sub-ops ignore the permission checks and return data. */
 return 0;
-case XENVER_extraversion:
-case XENVER_compile_info:
-case XENVER_capabilities:
-case XENVER_changeset:
 case XENVER_pagesize:
 case XENVER_guest_handle:
 /* These MUST always be accessible to any guest by default. */
 return xsm_default_action(XSM_HOOK, current->domain, NULL);
+
+case XENVER_extraversion:
+case XENVER_compile_info:
+case XENVER_capabilities:
+case XENVER_changeset:
+case XENVER_commandline:
+case XENVER_build_id:
 default:
-return xsm_default_action(XSM_PRIV, current->domain, NULL);
+/* Hide information from guests only in Release builds. */
+return xsm_default_action(debug_build() ? XSM_HOOK : XSM_PRIV,
+  current->domain, NULL);
 }
 }
 
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 1/2] Tidy up whitespace and formatting in file to be consistent.

2019-12-17 Thread Steven Haigh

Ok, if its going to be 4 spaces for each file, I can batch convert & 
tidy stuff up...


The file I changed had both types, so I went with my own preference :)

If it might be a better approach, I'll sort out the majority of scripts 
in that directory - and do no function changes and post a series that 
does nothing but cleanup - then do the brctl / ip changes on top of 
that in a different patch.


I might as well do them all - and it makes sense to do nothing but 
cleanup, then functional changes based on the cleaned up code.

Steven Haigh

 net...@crc.id.au  https://www.crc.id.au


On Tue, Dec 17, 2019 at 14:13, Wei Liu  wrote:

On Fri, Dec 13, 2019 at 03:08:34PM +1100, Steven Haigh wrote:

 Signed-off-by: Steven Haigh 


Acked-by: Wei Liu 

I will need to add tools/hotplug to the subject line and the following
commit message:

   Use 4 spaces for indentation throughout the file. No functional
   change.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel




___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH net-next 2/3] xen-netback: switch state to InitWait at the end of netback_probe()...

2019-12-17 Thread Wei Liu

On Tue, Dec 17, 2019 at 01:32:17PM +, Paul Durrant wrote:
> ...as the comment above the function states.
> 
> The switch to Initialising at the start of the function is somewhat bogus
> as the toolstack will have set that initial state anyway. To behave
> correctly, a backend should switch to InitWait once it has set up all
> xenstore values that may be required by a initialising frontend. This
> patch calls backend_switch_state() to make the transition at the
> appropriate point.
> 
> NOTE: backend_switch_state() ignores errors from xenbus_switch_state()
>   and so this patch removes an error path from netback_probe(). This
>   means a failure to change state at this stage (in the absence of
>   other failures) will leave the device instantiated. This is highly
>   unlikley to happen as a failure to change state would indicate a
>   failure to write to xenstore, and that will trigger other error
>   paths. Also, a 'stuck' device can still be cleaned up using 'unbind'
>   in any case.
> 
> Signed-off-by: Paul Durrant 

Acked-by: Wei Liu 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH net-next 1/3] xen-netback: move netback_probe() and netback_remove() to the end...

2019-12-17 Thread Wei Liu

On Tue, Dec 17, 2019 at 01:32:16PM +, Paul Durrant wrote:
> ...of xenbus.c
> 
> This is a cosmetic function re-ordering to reduce churn in a subsequent
> patch. Some style fix-up was done to make checkpatch.pl happier.
> 
> No functional change.
> 
> Signed-off-by: Paul Durrant 

Acked-by: Wei Liu 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH net-next 3/3] xen-netback: remove 'hotplug-status' once it has served its purpose

2019-12-17 Thread Wei Liu

On Tue, Dec 17, 2019 at 01:32:18PM +, Paul Durrant wrote:
> Removing the 'hotplug-status' node in netback_remove() is wrong; the script
> may not have completed. Only remove the node once the watch has fired and
> has been unregistered.
> 
> Signed-off-by: Paul Durrant 

Acked-by: Wei Liu 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V4 1/4] x86/mm: Add array_index_nospec to guest provided index values

2019-12-17 Thread Tamas K Lengyel

On Tue, Dec 17, 2019 at 8:12 AM Alexandru Stefan ISAILA
 wrote:
>
> This patch aims to sanitize indexes, potentially guest provided
> values, for altp2m_eptp[] and altp2m_p2m[] arrays.
>
> Signed-off-by: Alexandru Isaila 

LGTM, thanks!

Acked-by: Tamas K Lengyel 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN

2019-12-17 Thread Durrant, Paul

> -Original Message-
> From: Xen-devel  On Behalf Of
> Sergey Dyasli
> Sent: 17 December 2019 14:08
> To: xen-de...@lists.xen.org; kasan-...@googlegroups.com; linux-
> ker...@vger.kernel.org
> Cc: Juergen Gross ; Sergey Dyasli
> ; Stefano Stabellini ;
> George Dunlap ; Ross Lagerwall
> ; Alexander Potapenko ;
> Andrey Ryabinin ; Boris Ostrovsky
> ; Dmitry Vyukov 
> Subject: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across
> page boundary with KASAN
> 
> From: Ross Lagerwall 
> 
> When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
> allocations are aligned to the next power of 2 of the size does not
> hold. Therefore, handle grant copies that cross page boundaries.
> 
> Signed-off-by: Ross Lagerwall 
> Signed-off-by: Sergey Dyasli 

Would have been nice to cc netback maintainers...

> ---
>  drivers/net/xen-netback/common.h  |  2 +-
>  drivers/net/xen-netback/netback.c | 55 ---
>  2 files changed, 45 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-
> netback/common.h
> index 05847eb91a1b..e57684415edd 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
>   struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
>   grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
> 
> - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
> + struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
>   struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
>   struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
>   /* passed to gnttab_[un]map_refs with pages under (un)mapping */
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
> netback/netback.c
> index 0020b2e8c279..1541b6e0cc62 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue
> *queue,
> 
>  struct xenvif_tx_cb {
>   u16 pending_idx;
> + u8 copies;
>  };

I know we're a way off the limit (48 bytes) but I wonder if we ought to have a 
compile time check here that we're not overflowing skb->cb.

> 
>  #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
> @@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue
> *queue,
>  {
>   struct gnttab_map_grant_ref *gop_map = *gopp_map;
>   u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
> + u8 copies = XENVIF_TX_CB(skb)->copies;
>   /* This always points to the shinfo of the skb being checked, which
>* could be either the first or the one on the frag_list
>*/
> @@ -450,23 +452,27 @@ static int xenvif_tx_check_gop(struct xenvif_queue
> *queue,
>   int nr_frags = shinfo->nr_frags;
>   const bool sharedslot = nr_frags &&
>   frag_get_pending_idx(>frags[0]) ==
> pending_idx;
> - int i, err;
> + int i, err = 0;
> 
> - /* Check status of header. */
> - err = (*gopp_copy)->status;
> - if (unlikely(err)) {
> - if (net_ratelimit())
> - netdev_dbg(queue->vif->dev,
> + while (copies) {
> + /* Check status of header. */
> + int newerr = (*gopp_copy)->status;
> + if (unlikely(newerr)) {
> + if (net_ratelimit())
> + netdev_dbg(queue->vif->dev,
>  "Grant copy of header failed! status: %d
> pending_idx: %u ref: %u\n",
>  (*gopp_copy)->status,
>  pending_idx,
>  (*gopp_copy)->source.u.ref);
> - /* The first frag might still have this slot mapped */
> - if (!sharedslot)
> - xenvif_idx_release(queue, pending_idx,
> -XEN_NETIF_RSP_ERROR);
> + /* The first frag might still have this slot mapped */
> + if (!sharedslot && !err)
> + xenvif_idx_release(queue, pending_idx,
> +XEN_NETIF_RSP_ERROR);

Can't this be done after the loop, if there is an accumulated err? I think it 
would make the code slightly neater.

> + err = newerr;
> + }
> + (*gopp_copy)++;
> + copies--;
>   }
> - (*gopp_copy)++;
> 
>  check_frags:
>   for (i = 0; i < nr_frags; i++, gop_map++) {
> @@ -910,6 +916,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue
> *queue,
>   xenvif_tx_err(queue, , extra_count, idx);
>   break;
>   }
> + XENVIF_TX_CB(skb)->copies = 0;
> 
>   skb_shinfo(skb)->nr_frags = ret;
>   if (data_len < txreq.size)
> @@ -933,6 +940,7 @@ static void

[Xen-devel] [PATCH V4 4/4] x86/mm: Make use of the default access param from xc_altp2m_create_view

2019-12-17 Thread Alexandru Stefan ISAILA

At this moment the default_access param from xc_altp2m_create_view is
not used.

This patch assigns default_access to p2m->default_access at the time of
initializing a new altp2m view.

Signed-off-by: Alexandru Isaila 
---
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Wei Liu 
CC: "Roger Pau Monné" 
CC: George Dunlap 
CC: Ian Jackson 
CC: Julien Grall 
CC: Konrad Rzeszutek Wilk 
CC: Stefano Stabellini 
CC: Razvan Cojocaru 
CC: Tamas K Lengyel 
CC: Petre Pircalabu 
CC: George Dunlap 
---
Changes since V3:
- Change type of hvmmem_default_access to xenmem_access_t
- Fix style issues
- Release lock before return.
---
 xen/arch/x86/hvm/hvm.c  |  3 ++-
 xen/arch/x86/mm/mem_access.c|  6 +++---
 xen/arch/x86/mm/p2m.c   | 27 ++-
 xen/include/asm-x86/p2m.h   |  3 ++-
 xen/include/public/hvm/hvm_op.h |  2 --
 xen/include/xen/mem_access.h|  4 
 6 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index a129049d6b..d4b19d2412 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4687,7 +4687,8 @@ static int do_altp2m_op(
 }
 
 case HVMOP_altp2m_create_p2m:
-if ( !(rc = p2m_init_next_altp2m(d, )) )
+if ( !(rc = p2m_init_next_altp2m(d, ,
+ a.u.view.hvmmem_default_access)) )
 rc = __copy_to_guest(arg, , 1) ? -EFAULT : 0;
 break;
 
diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index 70f3528bb1..288c865ffa 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -314,9 +314,9 @@ static int set_mem_access(struct domain *d, struct 
p2m_domain *p2m,
 return rc;
 }
 
-static bool xenmem_access_to_p2m_access(struct p2m_domain *p2m,
-xenmem_access_t xaccess,
-p2m_access_t *paccess)
+bool xenmem_access_to_p2m_access(struct p2m_domain *p2m,
+ xenmem_access_t xaccess,
+ p2m_access_t *paccess)
 {
 static const p2m_access_t memaccess[] = {
 #define ACCESS(ac) [XENMEM_access_##ac] = p2m_access_##ac
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index d381f6877f..d67326f8b7 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -25,6 +25,7 @@
 
 #include  /* copy_from_guest() */
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2533,7 +2534,8 @@ void p2m_flush_altp2m(struct domain *d)
 altp2m_list_unlock(d);
 }
 
-static int p2m_activate_altp2m(struct domain *d, unsigned int idx)
+static int p2m_activate_altp2m(struct domain *d, unsigned int idx,
+   p2m_access_t hvmmem_default_access)
 {
 struct p2m_domain *hostp2m, *p2m;
 int rc;
@@ -2559,7 +2561,7 @@ static int p2m_activate_altp2m(struct domain *d, unsigned 
int idx)
 goto out;
 }
 
-p2m->default_access = hostp2m->default_access;
+p2m->default_access = hvmmem_default_access;
 p2m->domain = hostp2m->domain;
 p2m->global_logdirty = hostp2m->global_logdirty;
 p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
@@ -2576,6 +2578,7 @@ static int p2m_activate_altp2m(struct domain *d, unsigned 
int idx)
 int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx)
 {
 int rc = -EINVAL;
+struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
 
 if ( idx >= MAX_ALTP2M )
 return rc;
@@ -2583,16 +2586,22 @@ int p2m_init_altp2m_by_id(struct domain *d, unsigned 
int idx)
 altp2m_list_lock(d);
 
 if ( d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
-rc = p2m_activate_altp2m(d, idx);
+rc = p2m_activate_altp2m(d, idx, hostp2m->default_access);
 
 altp2m_list_unlock(d);
 return rc;
 }
 
-int p2m_init_next_altp2m(struct domain *d, uint16_t *idx)
+int p2m_init_next_altp2m(struct domain *d, uint16_t *idx,
+ xenmem_access_t hvmmem_default_access)
 {
 int rc = -EINVAL;
 unsigned int i;
+p2m_access_t a;
+struct p2m_domain *p2m;
+
+if ( hvmmem_default_access > XENMEM_access_default )
+return rc;
 
 altp2m_list_lock(d);
 
@@ -2601,7 +2610,15 @@ int p2m_init_next_altp2m(struct domain *d, uint16_t *idx)
 if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
 continue;
 
-rc = p2m_activate_altp2m(d, i);
+p2m = d->arch.altp2m_p2m[i];
+
+if ( !xenmem_access_to_p2m_access(p2m, hvmmem_default_access, ) )
+{
+altp2m_list_unlock(d);
+return -EINVAL;
+}
+
+rc = p2m_activate_altp2m(d, i, a);
 
 if ( !rc )
 *idx = i;
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 94285db1b4..ac2d2787f4 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -884,7 +884,8 @@ bool p2m_altp2m_get_or_propagate(struct p2m_domain *ap2m, 
unsigned long gfn_l,
 int

Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step

2019-12-17 Thread Andrew Cooper

On 17/12/2019 15:10, Tamas K Lengyel wrote:
> On Tue, Dec 17, 2019 at 8:08 AM Tamas K Lengyel  wrote:
>> On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper  
>> wrote:
>>> On 17/12/2019 14:40, Sergey Kovalev wrote:
 On break point event eight context switches occures.

 With fast single step it is possible to shorten path for two context
 switches
 and gain 35% spead-up.

 Was tested on Debian branch of Xen 4.12. See at:
 https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep

 Rebased on master:
 https://github.com/skvl/xen/tree/fast-singlestep

 Signed-off-by: Sergey Kovalev 
>>> 35% looks like a good number, but what is "fast single step"?  All this
>>> appears to be is plumbing for to cause an altp2m switch on single step.
>> Yes, a better explanation would be much needed here and I'm not 100%
>> sure it correctly implements what I think it tries to.
>>
>> This is my interpretation of what the idea is: when using DRAKVUF (or
>> another system using altp2m with shadow pages similar to what I
>> describe in 
>> https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m),
>> after a breakpoint is hit the system switches to the default
>> unrestricted altp2m view with singlestep enabled. When the singlestep
>> traps to Xen another vm_event is sent to the monitor agent, which then
>> normally disables singlestepping and switches the altp2m view back to
>> the restricted view. This patch looks like its short-circuiting that
>> last part so that it doesn't need to send the vm_event out for the
>> singlestep event and should switch back to the restricted view in Xen
>> automatically. It's a nice optimization. But what seems to be missing
>> is the altp2m switch itself.
> Never mind, p2m_altp2m_check does the altp2m switch as well, so this
> patch implements what I described above. Please update the patch
> message to be more descriptive (you can copy my description from
> above).

Also please read CODING_STYLE in the root of the xen repository.  The
important ones you need to fix are spaces in "if ( ... )" statements,
and binary operators on the end of the first line rather than the
beginning of the continuation.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH V4 3/4] x86/mm: Pull out the p2m specifics from p2m_init_altp2m_ept

2019-12-17 Thread Alexandru Stefan ISAILA

Signed-off-by: Alexandru Isaila 
---
CC: Jun Nakajima 
CC: Kevin Tian 
CC: George Dunlap 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Wei Liu 
CC: "Roger Pau Monné" 
---
 xen/arch/x86/mm/p2m-ept.c | 6 --
 xen/arch/x86/mm/p2m.c | 6 ++
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index e088a63f56..362f7079ab 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -1358,13 +1358,7 @@ void p2m_init_altp2m_ept(struct domain *d, unsigned int 
i)
 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
 struct ept_data *ept;
 
-p2m->default_access = hostp2m->default_access;
-p2m->domain = hostp2m->domain;
-
-p2m->global_logdirty = hostp2m->global_logdirty;
 p2m->ept.ad = hostp2m->ept.ad;
-p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
-p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0;
 ept = >ept;
 ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
 d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 253cab3458..d381f6877f 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2559,6 +2559,12 @@ static int p2m_activate_altp2m(struct domain *d, 
unsigned int idx)
 goto out;
 }
 
+p2m->default_access = hostp2m->default_access;
+p2m->domain = hostp2m->domain;
+p2m->global_logdirty = hostp2m->global_logdirty;
+p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
+p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0;
+
 p2m_init_altp2m_ept(d, idx);
 
  out:
-- 
2.17.1

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH V4 2/4] x86/altp2m: Add hypercall to set a range of sve bits

2019-12-17 Thread Alexandru Stefan ISAILA

By default the sve bits are not set.
This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
to set a range of sve bits.
The core function, p2m_set_suppress_ve_multi(), does not brake in case
of a error and it is doing a best effort for setting the bits in the
given range. A check for continuation is made in order to have
preemption on big ranges.
The gfn of the first error is stored in
xen_hvm_altp2m_suppress_ve_multi.first_error and the error code is
stored in xen_hvm_altp2m_suppress_ve_multi.first_error_code.
If no error occurred the values will be 0.

Signed-off-by: Alexandru Isaila 
---
CC: Ian Jackson 
CC: Wei Liu 
CC: Andrew Cooper 
CC: George Dunlap 
CC: Jan Beulich 
CC: Julien Grall 
CC: Konrad Rzeszutek Wilk 
CC: Stefano Stabellini 
CC: "Roger Pau Monné" 
CC: George Dunlap 
CC: Razvan Cojocaru 
CC: Tamas K Lengyel 
CC: Petre Pircalabu 
---
Changes since V3:
- Update commit message
- Check rc and __copy_to_guest() in the same if
- Fix style issue
- Fix comment typo
- Init p2m with host_p2m
- Use array_index_nospec() in altp2m_p2m[] and altp2m_eptp[]
- Drop opaque
- Use pad2 to return first error code
- Update first_gfn
- Stop the range loop at cpuid->extd.maxphysaddr.
---
 tools/libxc/include/xenctrl.h   |  4 +++
 tools/libxc/xc_altp2m.c | 33 +
 xen/arch/x86/hvm/hvm.c  | 15 
 xen/arch/x86/mm/p2m.c   | 64 +
 xen/include/public/hvm/hvm_op.h | 13 +++
 xen/include/xen/mem_access.h|  3 ++
 6 files changed, 132 insertions(+)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index f4431687b3..2ace8ea80e 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1923,6 +1923,10 @@ int xc_altp2m_switch_to_view(xc_interface *handle, 
uint32_t domid,
  uint16_t view_id);
 int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
   uint16_t view_id, xen_pfn_t gfn, bool sve);
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+   uint16_t view_id, xen_pfn_t first_gfn,
+   xen_pfn_t last_gfn, bool sve,
+   xen_pfn_t *error_gfn, uint32_t *error_code);
 int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
   uint16_t view_id, xen_pfn_t gfn, bool *sve);
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
index 09dad0355e..9f7e8315b3 100644
--- a/tools/libxc/xc_altp2m.c
+++ b/tools/libxc/xc_altp2m.c
@@ -234,6 +234,39 @@ int xc_altp2m_set_suppress_ve(xc_interface *handle, 
uint32_t domid,
 return rc;
 }
 
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+   uint16_t view_id, xen_pfn_t first_gfn,
+   xen_pfn_t last_gfn, bool sve,
+   xen_pfn_t *error_gfn, uint32_t *error_code)
+{
+int rc;
+DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
+
+arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));
+if ( arg == NULL )
+return -1;
+
+arg->version = HVMOP_ALTP2M_INTERFACE_VERSION;
+arg->cmd = HVMOP_altp2m_set_suppress_ve_multi;
+arg->domain = domid;
+arg->u.suppress_ve_multi.view = view_id;
+arg->u.suppress_ve_multi.first_gfn = first_gfn;
+arg->u.suppress_ve_multi.last_gfn = last_gfn;
+arg->u.suppress_ve_multi.suppress_ve = sve;
+
+rc = xencall2(handle->xcall, __HYPERVISOR_hvm_op, HVMOP_altp2m,
+  HYPERCALL_BUFFER_AS_ARG(arg));
+
+if ( arg->u.suppress_ve_multi.first_error )
+{
+*error_gfn = arg->u.suppress_ve_multi.first_error;
+*error_code = arg->u.suppress_ve_multi.first_error_code;
+}
+
+xc_hypercall_buffer_free(handle, arg);
+return rc;
+}
+
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
  uint16_t view_id, xen_pfn_t gfn,
  xenmem_access_t access)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 47573f71b8..a129049d6b 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4553,6 +4553,7 @@ static int do_altp2m_op(
 case HVMOP_altp2m_destroy_p2m:
 case HVMOP_altp2m_switch_p2m:
 case HVMOP_altp2m_set_suppress_ve:
+case HVMOP_altp2m_set_suppress_ve_multi:
 case HVMOP_altp2m_get_suppress_ve:
 case HVMOP_altp2m_set_mem_access:
 case HVMOP_altp2m_set_mem_access_multi:
@@ -4711,6 +4712,20 @@ static int do_altp2m_op(
 }
 break;
 
+case HVMOP_altp2m_set_suppress_ve_multi:
+if ( a.u.suppress_ve_multi.pad1 ||
+ a.u.suppress_ve_multi.first_error_code ||
+

[Xen-devel] [PATCH V4 1/4] x86/mm: Add array_index_nospec to guest provided index values

2019-12-17 Thread Alexandru Stefan ISAILA

This patch aims to sanitize indexes, potentially guest provided
values, for altp2m_eptp[] and altp2m_p2m[] arrays.

Signed-off-by: Alexandru Isaila 
---
CC: Razvan Cojocaru 
CC: Tamas K Lengyel 
CC: Petre Pircalabu 
CC: George Dunlap 
CC: Jan Beulich 
CC: Andrew Cooper 
CC: Wei Liu 
CC: "Roger Pau Monné" 
CC: Jun Nakajima 
CC: Kevin Tian 
---
 xen/arch/x86/mm/mem_access.c | 15 +--
 xen/arch/x86/mm/p2m-ept.c|  5 +++--
 xen/arch/x86/mm/p2m.c| 27 +--
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index 320b9fe621..70f3528bb1 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -367,10 +367,11 @@ long p2m_set_mem_access(struct domain *d, gfn_t gfn, 
uint32_t nr,
 if ( altp2m_idx )
 {
 if ( altp2m_idx >= MAX_ALTP2M ||
- d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) )
+ d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
+ mfn_x(INVALID_MFN) )
 return -EINVAL;
 
-ap2m = d->arch.altp2m_p2m[altp2m_idx];
+ap2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, MAX_ALTP2M)];
 }
 #else
 ASSERT(!altp2m_idx);
@@ -426,10 +427,11 @@ long p2m_set_mem_access_multi(struct domain *d,
 if ( altp2m_idx )
 {
 if ( altp2m_idx >= MAX_ALTP2M ||
- d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) )
+ d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
+ mfn_x(INVALID_MFN) )
 return -EINVAL;
 
-ap2m = d->arch.altp2m_p2m[altp2m_idx];
+ap2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, MAX_ALTP2M)];
 }
 #else
 ASSERT(!altp2m_idx);
@@ -492,10 +494,11 @@ int p2m_get_mem_access(struct domain *d, gfn_t gfn, 
xenmem_access_t *access,
 else if ( altp2m_idx ) /* altp2m view 0 is treated as the hostp2m */
 {
 if ( altp2m_idx >= MAX_ALTP2M ||
- d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) )
+ d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
+ mfn_x(INVALID_MFN) )
 return -EINVAL;
 
-p2m = d->arch.altp2m_p2m[altp2m_idx];
+p2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, MAX_ALTP2M)];
 }
 #else
 ASSERT(!altp2m_idx);
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index b5517769c9..e088a63f56 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -1353,7 +1353,8 @@ void setup_ept_dump(void)
 
 void p2m_init_altp2m_ept(struct domain *d, unsigned int i)
 {
-struct p2m_domain *p2m = d->arch.altp2m_p2m[i];
+struct p2m_domain *p2m =
+   d->arch.altp2m_p2m[array_index_nospec(i, MAX_ALTP2M)];
 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
 struct ept_data *ept;
 
@@ -1366,7 +1367,7 @@ void p2m_init_altp2m_ept(struct domain *d, unsigned int i)
 p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0;
 ept = >ept;
 ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
-d->arch.altp2m_eptp[i] = ept->eptp;
+d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp;
 }
 
 unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp)
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index ba126f790a..7e7f4f1a7c 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2499,7 +2499,7 @@ static void p2m_reset_altp2m(struct domain *d, unsigned 
int idx,
 struct p2m_domain *p2m;
 
 ASSERT(idx < MAX_ALTP2M);
-p2m = d->arch.altp2m_p2m[idx];
+p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)];
 
 p2m_lock(p2m);
 
@@ -2540,7 +2540,7 @@ static int p2m_activate_altp2m(struct domain *d, unsigned 
int idx)
 
 ASSERT(idx < MAX_ALTP2M);
 
-p2m = d->arch.altp2m_p2m[idx];
+p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)];
 hostp2m = p2m_get_hostp2m(d);
 
 p2m_lock(p2m);
@@ -2622,9 +2622,10 @@ int p2m_destroy_altp2m_by_id(struct domain *d, unsigned 
int idx)
 rc = -EBUSY;
 altp2m_list_lock(d);
 
-if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
+if ( d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] !=
+ mfn_x(INVALID_MFN) )
 {
-p2m = d->arch.altp2m_p2m[idx];
+p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)];
 
 if ( !_atomic_read(p2m->active_vcpus) )
 {
@@ -2686,11 +2687,13 @@ int p2m_change_altp2m_gfn(struct domain *d, unsigned 
int idx,
 mfn_t mfn;
 int rc = -EINVAL;
 
-if ( idx >= MAX_ALTP2M || d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
+if ( idx >= MAX_ALTP2M ||
+ d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] ==
+ mfn_x(INVALID_MFN) )
 return rc;
 
 hp2m = p2m_get_hostp2m(d);
-ap2m = d->arch.altp2m_p2m[idx];
+ap2m = d->arch.altp2m_p2m[array_index_nospec(idx,

Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step

2019-12-17 Thread Tamas K Lengyel

On Tue, Dec 17, 2019 at 8:08 AM Tamas K Lengyel  wrote:
>
> On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper  
> wrote:
> >
> > On 17/12/2019 14:40, Sergey Kovalev wrote:
> > > On break point event eight context switches occures.
> > >
> > > With fast single step it is possible to shorten path for two context
> > > switches
> > > and gain 35% spead-up.
> > >
> > > Was tested on Debian branch of Xen 4.12. See at:
> > > https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep
> > >
> > > Rebased on master:
> > > https://github.com/skvl/xen/tree/fast-singlestep
> > >
> > > Signed-off-by: Sergey Kovalev 
> >
> > 35% looks like a good number, but what is "fast single step"?  All this
> > appears to be is plumbing for to cause an altp2m switch on single step.
>
> Yes, a better explanation would be much needed here and I'm not 100%
> sure it correctly implements what I think it tries to.
>
> This is my interpretation of what the idea is: when using DRAKVUF (or
> another system using altp2m with shadow pages similar to what I
> describe in 
> https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m),
> after a breakpoint is hit the system switches to the default
> unrestricted altp2m view with singlestep enabled. When the singlestep
> traps to Xen another vm_event is sent to the monitor agent, which then
> normally disables singlestepping and switches the altp2m view back to
> the restricted view. This patch looks like its short-circuiting that
> last part so that it doesn't need to send the vm_event out for the
> singlestep event and should switch back to the restricted view in Xen
> automatically. It's a nice optimization. But what seems to be missing
> is the altp2m switch itself.

Never mind, p2m_altp2m_check does the altp2m switch as well, so this
patch implements what I described above. Please update the patch
message to be more descriptive (you can copy my description from
above).

Thanks!
Tamas

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH 2/6] x86/suspend: Don't bother saving %cr3, %ss or flags

2019-12-17 Thread Roger Pau Monné

On Tue, Dec 17, 2019 at 12:26:24PM +, Andrew Cooper wrote:
> On 17/12/2019 12:18, Roger Pau Monné wrote:
> > On Tue, Dec 17, 2019 at 12:06:01PM +, Andrew Cooper wrote:
> >> On 17/12/2019 11:52, Roger Pau Monné wrote:
> >>> On Fri, Dec 13, 2019 at 07:04:32PM +, Andrew Cooper wrote:
>  The trampoline has already set up the idle pagetables (which are the 
>  correct
>  ones to use), and sanitised the flags state.
> >>> I wonder why do we have wakeup.S and wakeup_prot.S, it would be easier
> >>> to follow if it all was in the same file IMO.
> >> wakeup.S is the 16bit entry point, and lives in the trampoline below 1M.
> >>
> >> wakeup_prot.S is a bit of logic which lives in the main hypervisor.
> >>
> >> The naming could probably do with some improvement, but they can't
> >> feasibly be part of the same file.
> > Hm, I'm not sure I follow. Isn't this trampoline copied by Xen in a
> > suitable position below the 1M boundary, and hence could use symbols
> > in order to figure out which part to copy?
> >
> > Ie: both the low and the high part could live in the same file as long
> > as Xen knows how to differentiate those and which chunk needs
> > positioning below 1M?
> 
> There is one trampoline.S (and trampoline.o) which gathers together
> various files (including wakeup.S) to construct the trampoline.

Oh, I see it's all included to make a single unit, and the symbols
used to mark the start and end of the trampoline chunk are defined
outside of the included file.

> It is not something which can be constructed simply by putting code/data
> in the requisite sections.  There are two main entrypoints, one with a
> 4k alignment requirement, one with 16 byte alignment, and we split the
> trampoline into two parts - one which is BSP-only and is several pages
> in size, and one which is post-boot which is only a single page.

Given the size of s3_resume I would guess there's space in that single
page to fit it, but since it doesn't need to live below the 1M
boundary it could be seen as a waste.

Anyway, leaving it as-is is fine since placing it in wakeup.S would be
a waste of space or require some restructuring of how the trampoline
code is assembled.

Thanks, Roger.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step

2019-12-17 Thread Tamas K Lengyel

On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper  wrote:
>
> On 17/12/2019 14:40, Sergey Kovalev wrote:
> > On break point event eight context switches occures.
> >
> > With fast single step it is possible to shorten path for two context
> > switches
> > and gain 35% spead-up.
> >
> > Was tested on Debian branch of Xen 4.12. See at:
> > https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep
> >
> > Rebased on master:
> > https://github.com/skvl/xen/tree/fast-singlestep
> >
> > Signed-off-by: Sergey Kovalev 
>
> 35% looks like a good number, but what is "fast single step"?  All this
> appears to be is plumbing for to cause an altp2m switch on single step.

Yes, a better explanation would be much needed here and I'm not 100%
sure it correctly implements what I think it tries to.

This is my interpretation of what the idea is: when using DRAKVUF (or
another system using altp2m with shadow pages similar to what I
describe in 
https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m),
after a breakpoint is hit the system switches to the default
unrestricted altp2m view with singlestep enabled. When the singlestep
traps to Xen another vm_event is sent to the monitor agent, which then
normally disables singlestepping and switches the altp2m view back to
the restricted view. This patch looks like its short-circuiting that
last part so that it doesn't need to send the vm_event out for the
singlestep event and should switch back to the restricted view in Xen
automatically. It's a nice optimization. But what seems to be missing
is the altp2m switch itself.

Tamas

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step

2019-12-17 Thread Sergey Kovalev


On 17.12.2019 17:48, Andrew Cooper wrote:

On 17/12/2019 14:40, Sergey Kovalev wrote:

On break point event eight context switches occures.

With fast single step it is possible to shorten path for two context
switches
and gain 35% spead-up.

Was tested on Debian branch of Xen 4.12. See at:
https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep

Rebased on master:
https://github.com/skvl/xen/tree/fast-singlestep

Signed-off-by: Sergey Kovalev 


35% looks like a good number, but what is "fast single step"?  All this
appears to be is plumbing for to cause an altp2m switch on single step.

~Andrew



You are right. I should quoted "fast single step".

Original INT#3 path is like this (in PlangUML):
@startuml
VM->Xen : EXIT_REASON_EXCEPTION_NMI
Xen->LibVMI: request(VM_EVENT_REASON_SOFTWARE_BREAKPOINT)
LibVMI->Xen: response(singlestep | altp2m)
Xen->VM:
VM->Xen: EXIT_REASON_MONITOR_TRAP_FLAG
Xen->LibVMI: request(VM_EVENT_REASON_SINGLESTEP)
LibVMI->Xen: response(altp2m)
Xen->VM:
@enduml

With fast single step it looks like this:
@startuml
VM->Xen : EXIT_REASON_EXCEPTION_NMI
Xen->LibVMI: request(VM_EVENT_REASON_SOFTWARE_BREAKPOINT)
LibVMI->Xen: response(fast singlestep | altp2m)
Xen->VM:
VM->Xen: EXIT_REASON_MONITOR_TRAP_FLAG
Xen->Xen: fast singlestep
Xen->VM:
@enduml

So we just store altp2m index and switch to it on MTF.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 17/22] golang/xenlight: implement array C to Go marshaling

2019-12-17 Thread Nick Rosbrook

On Tue, Dec 17, 2019 at 6:16 AM George Dunlap  wrote:
>
> On 12/10/19 3:47 PM, Nick Rosbrook wrote:
> > From: Nick Rosbrook 
> >
> > Signed-off-by: Nick Rosbrook 
> > ---
> >  tools/golang/xenlight/gengotypes.py  |  39 +++-
> >  tools/golang/xenlight/helpers.gen.go | 300 +++
> >  2 files changed, 338 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/golang/xenlight/gengotypes.py 
> > b/tools/golang/xenlight/gengotypes.py
> > index b68c1aa66b..ee9aaf9eff 100644
> > --- a/tools/golang/xenlight/gengotypes.py
> > +++ b/tools/golang/xenlight/gengotypes.py
> > @@ -252,7 +252,7 @@ def xenlight_golang_define_from_C(ty = None):
> >  for f in ty.fields:
> >  if f.type.typename is not None:
> >  if isinstance(f.type, idl.Array):
> > -# TODO
> > +body += xenlight_golang_array_from_C(f)
> >  continue
> >
> >  body += xenlight_golang_convert_from_C(f)
> > @@ -399,6 +399,43 @@ def xenlight_golang_union_from_C(ty = None, union_name 
> > = '', struct_name = ''):
> >
> >  return (s,extras)
> >
> > +def xenlight_golang_array_from_C(ty = None):
> > +"""
> > +Convert C array to Go slice using the method
> > +described here:
> > +
> > +https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
> > +"""
> > +s = ''
> > +
> > +gotypename = xenlight_golang_fmt_name(ty.type.elem_type.typename)
> > +goname = xenlight_golang_fmt_name(ty.name)
> > +ctypename  = ty.type.elem_type.typename
> > +cname  = ty.name
> > +cslice = 'c{}'.format(goname)
> > +clenvar= ty.type.lenvar.name
> > +golenvar   = xenlight_golang_fmt_name(clenvar,exported=False)
> > +
> > +s += '{} := int(xc.{})\n'.format(golenvar, clenvar)
> > +s += '{} := '.format(cslice)
> > +s 
> > +='(*[1<<28]C.{})(unsafe.Pointer(xc.{}))[:{}:{}]\n'.format(ctypename, cname,
> > +golenvar, 
> > golenvar)
> > +s += 'x.{} = make([]{}, {})\n'.format(goname, gotypename, golenvar)
> > +s += 'for i, v := range {} {{\n'.format(cslice)
> > +
> > +is_enum = isinstance(ty.type.elem_type,idl.Enumeration)
> > +if gotypename in go_builtin_types or is_enum:
> > +s += 'x.{}[i] = {}(v)\n'.format(goname, gotypename)
> > +else:
> > +s += 'var e {}\n'.format(gotypename)
> > +s += 'if err := e.fromC(); err != nil {\n'
> > +s += 'return err }\n'
> > +s += 'x.{}[i] = e\n'.format(goname)
> > +
> > +s += '}\n'
> > +
> > +return s
> > +
> >  def xenlight_golang_fmt_name(name, exported = True):
> >  """
> >  Take a given type name and return an
> > diff --git a/tools/golang/xenlight/helpers.gen.go 
> > b/tools/golang/xenlight/helpers.gen.go
> > index e6eee234c0..2f917cac58 100644
> > --- a/tools/golang/xenlight/helpers.gen.go
> > +++ b/tools/golang/xenlight/helpers.gen.go
> > @@ -263,6 +263,16 @@ func (x *SchedParams) fromC(xc *C.libxl_sched_params) 
> > error {
> >
> >  func (x *VcpuSchedParams) fromC(xc *C.libxl_vcpu_sched_params) error {
> >   x.Sched = Scheduler(xc.sched)
> > + numVcpus := int(xc.num_vcpus)
> > + cVcpus := (*[1 << 
> > 28]C.libxl_sched_params)(unsafe.Pointer(xc.vcpus))[:numVcpus:numVcpus]
> > + x.Vcpus = make([]SchedParams, numVcpus)
> > + for i, v := range cVcpus {
> > + var e SchedParams
> > + if err := e.fromC(); err != nil {
> > + return err
> > + }
> > + x.Vcpus[i] = e
>
> Along the same lines, any reason not to do the following?
>
> if err := x.Vcpus[i].fromC(); err != nil {
> return err
> }

Nope, no problem with that.

Thanks,
-NR

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

1 2 >

1 - 100 of 152 matches

Mail list logo