[Xen-devel] [PATCH 1/9] xen/sched: move schedulers and cpupool coding to dedicated directory
Move sched*c and cpupool.c to a new directory common/sched. Signed-off-by: Juergen Gross --- MAINTAINERS| 8 +-- xen/common/Kconfig | 66 +- xen/common/Makefile| 8 +-- xen/common/sched/Kconfig | 65 + xen/common/sched/Makefile | 7 +++ .../{compat/schedule.c => sched/compat_schedule.c} | 2 +- xen/common/{ => sched}/cpupool.c | 0 xen/common/{ => sched}/sched_arinc653.c| 0 xen/common/{ => sched}/sched_credit.c | 0 xen/common/{ => sched}/sched_credit2.c | 0 xen/common/{ => sched}/sched_null.c| 0 xen/common/{ => sched}/sched_rt.c | 0 xen/common/{ => sched}/schedule.c | 2 +- 13 files changed, 80 insertions(+), 78 deletions(-) create mode 100644 xen/common/sched/Kconfig create mode 100644 xen/common/sched/Makefile rename xen/common/{compat/schedule.c => sched/compat_schedule.c} (97%) rename xen/common/{ => sched}/cpupool.c (100%) rename xen/common/{ => sched}/sched_arinc653.c (100%) rename xen/common/{ => sched}/sched_credit.c (100%) rename xen/common/{ => sched}/sched_credit2.c (100%) rename xen/common/{ => sched}/sched_null.c (100%) rename xen/common/{ => sched}/sched_rt.c (100%) rename xen/common/{ => sched}/schedule.c (99%) diff --git a/MAINTAINERS b/MAINTAINERS index 012c847ebd..37d4da2bc2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -174,7 +174,7 @@ M: Josh Whitehead M: Stewart Hildebrand S: Supported L: DornerWorks Xen-Devel -F: xen/common/sched_arinc653.c +F: xen/common/sched/sched_arinc653.c F: tools/libxc/xc_arinc653.c ARM (W/ VIRTUALISATION EXTENSIONS) ARCHITECTURE @@ -212,7 +212,7 @@ CPU POOLS M: Juergen Gross M: Dario Faggioli S: Supported -F: xen/common/cpupool.c +F: xen/common/sched/cpupool.c DEVICE TREE M: Stefano Stabellini @@ -378,13 +378,13 @@ RTDS SCHEDULER M: Dario Faggioli M: Meng Xu S: Supported -F: xen/common/sched_rt.c +F: xen/common/sched/sched_rt.c SCHEDULING M: George Dunlap M: Dario Faggioli S: Supported -F: xen/common/sched* +F: xen/common/sched/ SEABIOS UPSTREAM M: Wei Liu diff --git a/xen/common/Kconfig b/xen/common/Kconfig index 2f516da101..79465fc1f9 100644 --- a/xen/common/Kconfig +++ b/xen/common/Kconfig @@ -278,71 +278,7 @@ config ARGO If unsure, say N. -menu "Schedulers" - visible if EXPERT = "y" - -config SCHED_CREDIT - bool "Credit scheduler support" - default y - ---help--- - The traditional credit scheduler is a general purpose scheduler. - -config SCHED_CREDIT2 - bool "Credit2 scheduler support" - default y - ---help--- - The credit2 scheduler is a general purpose scheduler that is - optimized for lower latency and higher VM density. - -config SCHED_RTDS - bool "RTDS scheduler support (EXPERIMENTAL)" - default y - ---help--- - The RTDS scheduler is a soft and firm real-time scheduler for - multicore, targeted for embedded, automotive, graphics and gaming - in the cloud, and general low-latency workloads. - -config SCHED_ARINC653 - bool "ARINC653 scheduler support (EXPERIMENTAL)" - default DEBUG - ---help--- - The ARINC653 scheduler is a hard real-time scheduler for single - cores, targeted for avionics, drones, and medical devices. - -config SCHED_NULL - bool "Null scheduler support (EXPERIMENTAL)" - default y - ---help--- - The null scheduler is a static, zero overhead scheduler, - for when there always are less vCPUs than pCPUs, typically - in embedded or HPC scenarios. - -choice - prompt "Default Scheduler?" - default SCHED_CREDIT2_DEFAULT - - config SCHED_CREDIT_DEFAULT - bool "Credit Scheduler" if SCHED_CREDIT - config SCHED_CREDIT2_DEFAULT - bool "Credit2 Scheduler" if SCHED_CREDIT2 - config SCHED_RTDS_DEFAULT - bool "RT Scheduler" if SCHED_RTDS - config SCHED_ARINC653_DEFAULT - bool "ARINC653 Scheduler" if SCHED_ARINC653 - config SCHED_NULL_DEFAULT - bool "Null Scheduler" if SCHED_NULL -endchoice - -config SCHED_DEFAULT - string - default "credit" if SCHED_CREDIT_DEFAULT - default "credit2" if SCHED_CREDIT2_DEFAULT - default "rtds" if SCHED_RTDS_DEFAULT - default "arinc653" if SCHED_ARINC653_DEFAULT - default "null" if SCHED_NULL_DEFAULT - default "credit2" - -endmenu +source "common/sched/Kconfig" config CRYPTO bool diff --git a/xen/common/Makefile b/xen/common/Makefile index 62b34e69e9..2abb8250b0 100644 --- a/xen/common/Makefile +++
[Xen-devel] [PATCH 6/9] xen/sched: replace null scheduler percpu-variable with pdata hook
Instead of having an own percpu-variable for private data per cpu the generic scheduler interface for that purpose should be used. Signed-off-by: Juergen Gross --- xen/common/sched/sched_null.c | 89 +-- 1 file changed, 60 insertions(+), 29 deletions(-) diff --git a/xen/common/sched/sched_null.c b/xen/common/sched/sched_null.c index 5a23a7e7dc..11aab25743 100644 --- a/xen/common/sched/sched_null.c +++ b/xen/common/sched/sched_null.c @@ -89,7 +89,6 @@ struct null_private { struct null_pcpu { struct sched_unit *unit; }; -DEFINE_PER_CPU(struct null_pcpu, npc); /* * Schedule unit @@ -159,32 +158,48 @@ static void null_deinit(struct scheduler *ops) ops->sched_data = NULL; } -static void init_pdata(struct null_private *prv, unsigned int cpu) +static void init_pdata(struct null_private *prv, struct null_pcpu *npc, + unsigned int cpu) { /* Mark the pCPU as free, and with no unit assigned */ cpumask_set_cpu(cpu, >cpus_free); -per_cpu(npc, cpu).unit = NULL; +npc->unit = NULL; } static void null_init_pdata(const struct scheduler *ops, void *pdata, int cpu) { struct null_private *prv = null_priv(ops); -/* alloc_pdata is not implemented, so we want this to be NULL. */ -ASSERT(!pdata); +ASSERT(pdata); -init_pdata(prv, cpu); +init_pdata(prv, pdata, cpu); } static void null_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) { struct null_private *prv = null_priv(ops); +struct null_pcpu *npc = pcpu; -/* alloc_pdata not implemented, so this must have stayed NULL */ -ASSERT(!pcpu); +ASSERT(npc); cpumask_clear_cpu(cpu, >cpus_free); -per_cpu(npc, cpu).unit = NULL; +npc->unit = NULL; +} + +static void *null_alloc_pdata(const struct scheduler *ops, int cpu) +{ +struct null_pcpu *npc; + +npc = xzalloc(struct null_pcpu); +if ( npc == NULL ) +return ERR_PTR(-ENOMEM); + +return npc; +} + +static void null_free_pdata(const struct scheduler *ops, void *pcpu, int cpu) +{ +xfree(pcpu); } static void *null_alloc_udata(const struct scheduler *ops, @@ -268,6 +283,7 @@ pick_res(struct null_private *prv, const struct sched_unit *unit) unsigned int bs; unsigned int cpu = sched_unit_master(unit), new_cpu; cpumask_t *cpus = cpupool_domain_master_cpumask(unit->domain); +struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; ASSERT(spin_is_locked(get_sched_res(cpu)->schedule_lock)); @@ -286,8 +302,7 @@ pick_res(struct null_private *prv, const struct sched_unit *unit) * don't, so we get to keep in the scratch cpumask what we have just * put in it.) */ -if ( likely((per_cpu(npc, cpu).unit == NULL || - per_cpu(npc, cpu).unit == unit) +if ( likely((npc->unit == NULL || npc->unit == unit) && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) ) { new_cpu = cpu; @@ -336,9 +351,11 @@ pick_res(struct null_private *prv, const struct sched_unit *unit) static void unit_assign(struct null_private *prv, struct sched_unit *unit, unsigned int cpu) { +struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; + ASSERT(is_unit_online(unit)); -per_cpu(npc, cpu).unit = unit; +npc->unit = unit; sched_set_res(unit, get_sched_res(cpu)); cpumask_clear_cpu(cpu, >cpus_free); @@ -363,12 +380,13 @@ static bool unit_deassign(struct null_private *prv, struct sched_unit *unit) unsigned int bs; unsigned int cpu = sched_unit_master(unit); struct null_unit *wvc; +struct null_pcpu *npc = get_sched_res(cpu)->sched_priv; ASSERT(list_empty(_unit(unit)->waitq_elem)); -ASSERT(per_cpu(npc, cpu).unit == unit); +ASSERT(npc->unit == unit); ASSERT(!cpumask_test_cpu(cpu, >cpus_free)); -per_cpu(npc, cpu).unit = NULL; +npc->unit = NULL; cpumask_set_cpu(cpu, >cpus_free); dprintk(XENLOG_G_INFO, "%d <-- NULL (%pdv%d)\n", cpu, unit->domain, @@ -436,7 +454,7 @@ static spinlock_t *null_switch_sched(struct scheduler *new_ops, */ ASSERT(!local_irq_is_enabled()); -init_pdata(prv, cpu); +init_pdata(prv, pdata, cpu); return >_lock; } @@ -446,6 +464,7 @@ static void null_unit_insert(const struct scheduler *ops, { struct null_private *prv = null_priv(ops); struct null_unit *nvc = null_unit(unit); +struct null_pcpu *npc; unsigned int cpu; spinlock_t *lock; @@ -462,6 +481,7 @@ static void null_unit_insert(const struct scheduler *ops, retry: sched_set_res(unit, pick_res(prv, unit)); cpu = sched_unit_master(unit); +npc = get_sched_res(cpu)->sched_priv; spin_unlock(lock); @@ -471,7 +491,7 @@ static void null_unit_insert(const struct scheduler *ops, cpupool_domain_master_cpumask(unit->domain)); /* If the pCPU is free, we assign unit
[Xen-devel] [PATCH 4/9] xen/sched: remove special cases for free cpus in schedulers
With the idle scheduler now taking care of all cpus not in any cpupool the special cases in the other schedulers for no cpupool associated can be removed. Signed-off-by: Juergen Gross --- xen/common/sched/sched_credit.c | 7 ++- xen/common/sched/sched_credit2.c | 30 -- 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/xen/common/sched/sched_credit.c b/xen/common/sched/sched_credit.c index a098ca0f3a..8b1de9b033 100644 --- a/xen/common/sched/sched_credit.c +++ b/xen/common/sched/sched_credit.c @@ -1690,11 +1690,8 @@ csched_load_balance(struct csched_private *prv, int cpu, BUG_ON(get_sched_res(cpu) != snext->unit->res); -/* - * If this CPU is going offline, or is not (yet) part of any cpupool - * (as it happens, e.g., during cpu bringup), we shouldn't steal work. - */ -if ( unlikely(!cpumask_test_cpu(cpu, online) || c == NULL) ) +/* If this CPU is going offline, we shouldn't steal work. */ +if ( unlikely(!cpumask_test_cpu(cpu, online)) ) goto out; if ( snext->pri == CSCHED_PRI_IDLE ) diff --git a/xen/common/sched/sched_credit2.c b/xen/common/sched/sched_credit2.c index 5bfe1441a2..f9e521a3a8 100644 --- a/xen/common/sched/sched_credit2.c +++ b/xen/common/sched/sched_credit2.c @@ -2744,40 +2744,10 @@ static void csched2_unit_migrate( const struct scheduler *ops, struct sched_unit *unit, unsigned int new_cpu) { -struct domain *d = unit->domain; struct csched2_unit * const svc = csched2_unit(unit); struct csched2_runqueue_data *trqd; s_time_t now = NOW(); -/* - * Being passed a target pCPU which is outside of our cpupool is only - * valid if we are shutting down (or doing ACPI suspend), and we are - * moving everyone to BSP, no matter whether or not BSP is inside our - * cpupool. - * - * And since there indeed is the chance that it is not part of it, all - * we must do is remove _and_ unassign the unit from any runqueue, as - * well as updating v->processor with the target, so that the suspend - * process can continue. - * - * It will then be during resume that a new, meaningful, value for - * v->processor will be chosen, and during actual domain unpause that - * the unit will be assigned to and added to the proper runqueue. - */ -if ( unlikely(!cpumask_test_cpu(new_cpu, cpupool_domain_master_cpumask(d))) ) -{ -ASSERT(system_state == SYS_STATE_suspend); -if ( unit_on_runq(svc) ) -{ -runq_remove(svc); -update_load(ops, svc->rqd, NULL, -1, now); -} -_runq_deassign(svc); -sched_set_res(unit, get_sched_res(new_cpu)); -return; -} - -/* If here, new_cpu must be a valid Credit2 pCPU, and in our affinity. */ ASSERT(cpumask_test_cpu(new_cpu, _priv(ops)->initialized)); ASSERT(cpumask_test_cpu(new_cpu, unit->cpu_hard_affinity)); -- 2.16.4 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 9/9] xen/sched: add const qualifier where appropriate
Make use of the const qualifier more often in scheduling code. Signed-off-by: Juergen Gross --- xen/common/sched/cpupool.c| 2 +- xen/common/sched/sched_arinc653.c | 4 +-- xen/common/sched/sched_credit.c | 44 + xen/common/sched/sched_credit2.c | 52 --- xen/common/sched/sched_null.c | 17 +++-- xen/common/sched/sched_rt.c | 32 xen/common/sched/schedule.c | 25 ++- xen/include/xen/sched.h | 9 --- 8 files changed, 96 insertions(+), 89 deletions(-) diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c index 14212bb4ae..a6c04c46cb 100644 --- a/xen/common/sched/cpupool.c +++ b/xen/common/sched/cpupool.c @@ -882,7 +882,7 @@ int cpupool_get_id(const struct domain *d) return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE; } -cpumask_t *cpupool_valid_cpus(struct cpupool *pool) +const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool) { return pool->cpu_valid; } diff --git a/xen/common/sched/sched_arinc653.c b/xen/common/sched/sched_arinc653.c index dc45378952..0de4ba6b2c 100644 --- a/xen/common/sched/sched_arinc653.c +++ b/xen/common/sched/sched_arinc653.c @@ -608,7 +608,7 @@ static struct sched_resource * a653sched_pick_resource(const struct scheduler *ops, const struct sched_unit *unit) { -cpumask_t *online; +const cpumask_t *online; unsigned int cpu; /* @@ -639,7 +639,7 @@ a653_switch_sched(struct scheduler *new_ops, unsigned int cpu, void *pdata, void *vdata) { struct sched_resource *sr = get_sched_res(cpu); -arinc653_unit_t *svc = vdata; +const arinc653_unit_t *svc = vdata; ASSERT(!pdata && svc && is_idle_unit(svc->unit)); diff --git a/xen/common/sched/sched_credit.c b/xen/common/sched/sched_credit.c index 05930261d9..f2fc1cca5a 100644 --- a/xen/common/sched/sched_credit.c +++ b/xen/common/sched/sched_credit.c @@ -233,7 +233,7 @@ static void csched_tick(void *_cpu); static void csched_acct(void *dummy); static inline int -__unit_on_runq(struct csched_unit *svc) +__unit_on_runq(const struct csched_unit *svc) { return !list_empty(>runq_elem); } @@ -349,11 +349,11 @@ boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle); DEFINE_PER_CPU(unsigned int, last_tickle_cpu); -static inline void __runq_tickle(struct csched_unit *new) +static inline void __runq_tickle(const struct csched_unit *new) { unsigned int cpu = sched_unit_master(new->unit); -struct sched_resource *sr = get_sched_res(cpu); -struct sched_unit *unit = new->unit; +const struct sched_resource *sr = get_sched_res(cpu); +const struct sched_unit *unit = new->unit; struct csched_unit * const cur = CSCHED_UNIT(curr_on_cpu(cpu)); struct csched_private *prv = CSCHED_PRIV(sr->scheduler); cpumask_t mask, idle_mask, *online; @@ -509,7 +509,7 @@ static inline void __runq_tickle(struct csched_unit *new) static void csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu) { -struct csched_private *prv = CSCHED_PRIV(ops); +const struct csched_private *prv = CSCHED_PRIV(ops); /* * pcpu either points to a valid struct csched_pcpu, or is NULL, if we're @@ -652,7 +652,7 @@ csched_switch_sched(struct scheduler *new_ops, unsigned int cpu, #ifndef NDEBUG static inline void -__csched_unit_check(struct sched_unit *unit) +__csched_unit_check(const struct sched_unit *unit) { struct csched_unit * const svc = CSCHED_UNIT(unit); struct csched_dom * const sdom = svc->sdom; @@ -700,8 +700,8 @@ __csched_vcpu_is_cache_hot(const struct csched_private *prv, static inline int __csched_unit_is_migrateable(const struct csched_private *prv, - struct sched_unit *unit, - int dest_cpu, cpumask_t *mask) + const struct sched_unit *unit, + int dest_cpu, const cpumask_t *mask) { const struct csched_unit *svc = CSCHED_UNIT(unit); /* @@ -725,7 +725,7 @@ _csched_cpu_pick(const struct scheduler *ops, const struct sched_unit *unit, /* We must always use cpu's scratch space */ cpumask_t *cpus = cpumask_scratch_cpu(cpu); cpumask_t idlers; -cpumask_t *online = cpupool_domain_master_cpumask(unit->domain); +const cpumask_t *online = cpupool_domain_master_cpumask(unit->domain); struct csched_pcpu *spc = NULL; int balance_step; @@ -932,7 +932,7 @@ csched_unit_acct(struct csched_private *prv, unsigned int cpu) { struct sched_unit *currunit = current->sched_unit; struct csched_unit * const svc = CSCHED_UNIT(currunit); -struct sched_resource *sr = get_sched_res(cpu); +const struct sched_resource *sr = get_sched_res(cpu); const struct scheduler *ops = sr->scheduler; ASSERT( sched_unit_master(currunit) == cpu ); @@
[Xen-devel] [PATCH 0/9] xen: scheduler cleanups
Move all scheduler related hypervisor code to xen/common/sched/ and do a lot of cleanups. Juergen Gross (9): xen/sched: move schedulers and cpupool coding to dedicated directory xen/sched: make sched-if.h really scheduler private xen/sched: cleanup sched.h xen/sched: remove special cases for free cpus in schedulers xen/sched: use scratch cpumask instead of allocating it on the stack xen/sched: replace null scheduler percpu-variable with pdata hook xen/sched: switch scheduling to bool where appropriate xen/sched: eliminate sched_tick_suspend() and sched_tick_resume() xen/sched: add const qualifier where appropriate MAINTAINERS| 8 +- xen/arch/arm/domain.c | 6 +- xen/arch/x86/acpi/cpu_idle.c | 15 +- xen/arch/x86/cpu/mwait-idle.c | 8 +- xen/arch/x86/dom0_build.c | 5 +- xen/common/Kconfig | 66 +- xen/common/Makefile| 8 +- xen/common/domain.c| 70 -- xen/common/domctl.c| 135 +-- xen/common/rcupdate.c | 7 +- xen/common/sched/Kconfig | 65 ++ xen/common/sched/Makefile | 7 + .../{compat/schedule.c => sched/compat_schedule.c} | 2 +- xen/common/{ => sched}/cpupool.c | 23 +- xen/{include/xen => common/sched}/sched-if.h | 18 +- xen/common/{ => sched}/sched_arinc653.c| 15 +- xen/common/{ => sched}/sched_credit.c | 65 +++--- xen/common/{ => sched}/sched_credit2.c | 85 +++ xen/common/{ => sched}/sched_null.c| 105 ++--- xen/common/{ => sched}/sched_rt.c | 105 + xen/common/{ => sched}/schedule.c | 246 ++--- xen/include/xen/domain.h | 3 + xen/include/xen/rcupdate.h | 3 - xen/include/xen/sched.h| 39 ++-- 24 files changed, 566 insertions(+), 543 deletions(-) create mode 100644 xen/common/sched/Kconfig create mode 100644 xen/common/sched/Makefile rename xen/common/{compat/schedule.c => sched/compat_schedule.c} (97%) rename xen/common/{ => sched}/cpupool.c (97%) rename xen/{include/xen => common/sched}/sched-if.h (96%) rename xen/common/{ => sched}/sched_arinc653.c (99%) rename xen/common/{ => sched}/sched_credit.c (97%) rename xen/common/{ => sched}/sched_credit2.c (98%) rename xen/common/{ => sched}/sched_null.c (92%) rename xen/common/{ => sched}/sched_rt.c (94%) rename xen/common/{ => sched}/schedule.c (92%) -- 2.16.4 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 8/9] xen/sched: eliminate sched_tick_suspend() and sched_tick_resume()
sched_tick_suspend() and sched_tick_resume() only call rcu related functions, so eliminate them and do the rcu_idle_timer*() calling in rcu_idle_[enter|exit](). Signed-off-by: Juergen Gross --- xen/arch/arm/domain.c | 6 +++--- xen/arch/x86/acpi/cpu_idle.c | 15 --- xen/arch/x86/cpu/mwait-idle.c | 8 xen/common/rcupdate.c | 7 +-- xen/common/sched/schedule.c | 12 xen/include/xen/rcupdate.h| 3 --- xen/include/xen/sched.h | 2 -- 7 files changed, 20 insertions(+), 33 deletions(-) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index c0a13aa0ab..aa3df3b3ba 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -46,8 +46,8 @@ static void do_idle(void) { unsigned int cpu = smp_processor_id(); -sched_tick_suspend(); -/* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ +rcu_idle_enter(cpu); +/* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */ process_pending_softirqs(); local_irq_disable(); @@ -58,7 +58,7 @@ static void do_idle(void) } local_irq_enable(); -sched_tick_resume(); +rcu_idle_exit(cpu); } void idle_loop(void) diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c index 5edd1844f4..2676f0d7da 100644 --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -599,7 +599,8 @@ void update_idle_stats(struct acpi_processor_power *power, static void acpi_processor_idle(void) { -struct acpi_processor_power *power = processor_powers[smp_processor_id()]; +unsigned int cpu = smp_processor_id(); +struct acpi_processor_power *power = processor_powers[cpu]; struct acpi_processor_cx *cx = NULL; int next_state; uint64_t t1, t2 = 0; @@ -648,8 +649,8 @@ static void acpi_processor_idle(void) cpufreq_dbs_timer_suspend(); -sched_tick_suspend(); -/* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ +rcu_idle_enter(cpu); +/* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */ process_pending_softirqs(); /* @@ -658,10 +659,10 @@ static void acpi_processor_idle(void) */ local_irq_disable(); -if ( !cpu_is_haltable(smp_processor_id()) ) +if ( !cpu_is_haltable(cpu) ) { local_irq_enable(); -sched_tick_resume(); +rcu_idle_exit(cpu); cpufreq_dbs_timer_resume(); return; } @@ -786,7 +787,7 @@ static void acpi_processor_idle(void) /* Now in C0 */ power->last_state = >states[0]; local_irq_enable(); -sched_tick_resume(); +rcu_idle_exit(cpu); cpufreq_dbs_timer_resume(); return; } @@ -794,7 +795,7 @@ static void acpi_processor_idle(void) /* Now in C0 */ power->last_state = >states[0]; -sched_tick_resume(); +rcu_idle_exit(cpu); cpufreq_dbs_timer_resume(); if ( cpuidle_current_governor->reflect ) diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c index 52413e6da1..f49b04c45b 100644 --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -755,8 +755,8 @@ static void mwait_idle(void) cpufreq_dbs_timer_suspend(); - sched_tick_suspend(); - /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ + rcu_idle_enter(cpu); + /* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */ process_pending_softirqs(); /* Interrupts must be disabled for C2 and higher transitions. */ @@ -764,7 +764,7 @@ static void mwait_idle(void) if (!cpu_is_haltable(cpu)) { local_irq_enable(); - sched_tick_resume(); + rcu_idle_exit(cpu); cpufreq_dbs_timer_resume(); return; } @@ -806,7 +806,7 @@ static void mwait_idle(void) if (!(lapic_timer_reliable_states & (1 << cstate))) lapic_timer_on(); - sched_tick_resume(); + rcu_idle_exit(cpu); cpufreq_dbs_timer_resume(); if ( cpuidle_current_governor->reflect ) diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c index a56103c6f7..cb712c8690 100644 --- a/xen/common/rcupdate.c +++ b/xen/common/rcupdate.c @@ -459,7 +459,7 @@ int rcu_needs_cpu(int cpu) * periodically poke rcu_pedning(), so that it will invoke the callback * not too late after the end of the grace period. */ -void rcu_idle_timer_start() +static void rcu_idle_timer_start(void) { struct rcu_data *rdp = _cpu(rcu_data); @@ -475,7 +475,7 @@ void rcu_idle_timer_start() rdp->idle_timer_active = true; } -void rcu_idle_timer_stop() +static void rcu_idle_timer_stop(void) { struct rcu_data *rdp = _cpu(rcu_data); @@ -633,10 +633,13 @@ void rcu_idle_enter(unsigned int cpu) * Se the comment before cpumask_andnot() in rcu_start_batch(). */ smp_mb(); + +rcu_idle_timer_start(); } void
[Xen-devel] [PATCH 7/9] xen/sched: switch scheduling to bool where appropriate
Scheduling code has several places using int or bool_t instead of bool. Switch those. Signed-off-by: Juergen Gross --- xen/common/sched/cpupool.c| 10 +- xen/common/sched/sched-if.h | 2 +- xen/common/sched/sched_arinc653.c | 8 xen/common/sched/sched_credit.c | 12 ++-- xen/common/sched/sched_rt.c | 14 +++--- xen/common/sched/schedule.c | 14 +++--- xen/include/xen/sched.h | 6 +++--- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c index d5b64d0a6a..14212bb4ae 100644 --- a/xen/common/sched/cpupool.c +++ b/xen/common/sched/cpupool.c @@ -154,7 +154,7 @@ static struct cpupool *alloc_cpupool_struct(void) * the searched id is returned * returns NULL if not found. */ -static struct cpupool *__cpupool_find_by_id(int id, int exact) +static struct cpupool *__cpupool_find_by_id(int id, bool exact) { struct cpupool **q; @@ -169,10 +169,10 @@ static struct cpupool *__cpupool_find_by_id(int id, int exact) static struct cpupool *cpupool_find_by_id(int poolid) { -return __cpupool_find_by_id(poolid, 1); +return __cpupool_find_by_id(poolid, true); } -static struct cpupool *__cpupool_get_by_id(int poolid, int exact) +static struct cpupool *__cpupool_get_by_id(int poolid, bool exact) { struct cpupool *c; spin_lock(_lock); @@ -185,12 +185,12 @@ static struct cpupool *__cpupool_get_by_id(int poolid, int exact) struct cpupool *cpupool_get_by_id(int poolid) { -return __cpupool_get_by_id(poolid, 1); +return __cpupool_get_by_id(poolid, true); } static struct cpupool *cpupool_get_next_by_id(int poolid) { -return __cpupool_get_by_id(poolid, 0); +return __cpupool_get_by_id(poolid, false); } void cpupool_put(struct cpupool *pool) diff --git a/xen/common/sched/sched-if.h b/xen/common/sched/sched-if.h index edce354dc7..9d0db75cbb 100644 --- a/xen/common/sched/sched-if.h +++ b/xen/common/sched/sched-if.h @@ -589,7 +589,7 @@ unsigned int cpupool_get_granularity(const struct cpupool *c); * * The hard affinity is not a subset of soft affinity * * There is an overlap between the soft and hard affinity masks */ -static inline int has_soft_affinity(const struct sched_unit *unit) +static inline bool has_soft_affinity(const struct sched_unit *unit) { return unit->soft_aff_effective && !cpumask_subset(cpupool_domain_master_cpumask(unit->domain), diff --git a/xen/common/sched/sched_arinc653.c b/xen/common/sched/sched_arinc653.c index fe15754900..dc45378952 100644 --- a/xen/common/sched/sched_arinc653.c +++ b/xen/common/sched/sched_arinc653.c @@ -75,7 +75,7 @@ typedef struct arinc653_unit_s * arinc653_unit_t pointer. */ struct sched_unit * unit; /* awake holds whether the UNIT has been woken with vcpu_wake() */ -bool_t awake; +boolawake; /* list holds the linked list information for the list this UNIT * is stored in */ struct list_headlist; @@ -427,7 +427,7 @@ a653sched_alloc_udata(const struct scheduler *ops, struct sched_unit *unit, * will mark the UNIT awake. */ svc->unit = unit; -svc->awake = 0; +svc->awake = false; if ( !is_idle_unit(unit) ) list_add(>list, _PRIV(ops)->unit_list); update_schedule_units(ops); @@ -473,7 +473,7 @@ static void a653sched_unit_sleep(const struct scheduler *ops, struct sched_unit *unit) { if ( AUNIT(unit) != NULL ) -AUNIT(unit)->awake = 0; +AUNIT(unit)->awake = false; /* * If the UNIT being put to sleep is the same one that is currently @@ -493,7 +493,7 @@ static void a653sched_unit_wake(const struct scheduler *ops, struct sched_unit *unit) { if ( AUNIT(unit) != NULL ) -AUNIT(unit)->awake = 1; +AUNIT(unit)->awake = true; cpu_raise_softirq(sched_unit_master(unit), SCHEDULE_SOFTIRQ); } diff --git a/xen/common/sched/sched_credit.c b/xen/common/sched/sched_credit.c index 8b1de9b033..05930261d9 100644 --- a/xen/common/sched/sched_credit.c +++ b/xen/common/sched/sched_credit.c @@ -245,7 +245,7 @@ __runq_elem(struct list_head *elem) } /* Is the first element of cpu's runq (if any) cpu's idle unit? */ -static inline bool_t is_runq_idle(unsigned int cpu) +static inline bool is_runq_idle(unsigned int cpu) { /* * We're peeking at cpu's runq, we must hold the proper lock. @@ -344,7 +344,7 @@ static void burn_credits(struct csched_unit *svc, s_time_t now) svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC; } -static bool_t __read_mostly opt_tickle_one_idle = 1; +static bool __read_mostly opt_tickle_one_idle = true; boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle); DEFINE_PER_CPU(unsigned int, last_tickle_cpu); @@ -719,7 +719,7 @@ __csched_unit_is_migrateable(const struct csched_private *prv, static int _csched_cpu_pick(const struct
[Xen-devel] [PATCH 5/9] xen/sched: use scratch cpumask instead of allocating it on the stack
In sched_rt there are three instances of cpumasks allocated on the stack. Replace them by using cpumask_scratch. Signed-off-by: Juergen Gross --- xen/common/sched/sched_rt.c | 56 ++--- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/xen/common/sched/sched_rt.c b/xen/common/sched/sched_rt.c index 379b56bc2a..264a753116 100644 --- a/xen/common/sched/sched_rt.c +++ b/xen/common/sched/sched_rt.c @@ -637,23 +637,38 @@ replq_reinsert(const struct scheduler *ops, struct rt_unit *svc) * and available resources */ static struct sched_resource * -rt_res_pick(const struct scheduler *ops, const struct sched_unit *unit) +rt_res_pick_locked(const struct sched_unit *unit, unsigned int locked_cpu) { -cpumask_t cpus; +cpumask_t *cpus = cpumask_scratch_cpu(locked_cpu); cpumask_t *online; int cpu; online = cpupool_domain_master_cpumask(unit->domain); -cpumask_and(, online, unit->cpu_hard_affinity); +cpumask_and(cpus, online, unit->cpu_hard_affinity); -cpu = cpumask_test_cpu(sched_unit_master(unit), ) +cpu = cpumask_test_cpu(sched_unit_master(unit), cpus) ? sched_unit_master(unit) -: cpumask_cycle(sched_unit_master(unit), ); -ASSERT( !cpumask_empty() && cpumask_test_cpu(cpu, ) ); +: cpumask_cycle(sched_unit_master(unit), cpus); +ASSERT( !cpumask_empty(cpus) && cpumask_test_cpu(cpu, cpus) ); return get_sched_res(cpu); } +/* + * Pick a valid resource for the unit vc + * Valid resource of an unit is intesection of unit's affinity + * and available resources + */ +static struct sched_resource * +rt_res_pick(const struct scheduler *ops, const struct sched_unit *unit) +{ +struct sched_resource *res; + +res = rt_res_pick_locked(unit, unit->res->master_cpu); + +return res; +} + /* * Init/Free related code */ @@ -886,11 +901,14 @@ rt_unit_insert(const struct scheduler *ops, struct sched_unit *unit) struct rt_unit *svc = rt_unit(unit); s_time_t now; spinlock_t *lock; +unsigned int cpu = smp_processor_id(); BUG_ON( is_idle_unit(unit) ); /* This is safe because unit isn't yet being scheduled */ -sched_set_res(unit, rt_res_pick(ops, unit)); +lock = pcpu_schedule_lock_irq(cpu); +sched_set_res(unit, rt_res_pick_locked(unit, cpu)); +pcpu_schedule_unlock_irq(lock, cpu); lock = unit_schedule_lock_irq(unit); @@ -1003,13 +1021,13 @@ burn_budget(const struct scheduler *ops, struct rt_unit *svc, s_time_t now) * lock is grabbed before calling this function */ static struct rt_unit * -runq_pick(const struct scheduler *ops, const cpumask_t *mask) +runq_pick(const struct scheduler *ops, const cpumask_t *mask, unsigned int cpu) { struct list_head *runq = rt_runq(ops); struct list_head *iter; struct rt_unit *svc = NULL; struct rt_unit *iter_svc = NULL; -cpumask_t cpu_common; +cpumask_t *cpu_common = cpumask_scratch_cpu(cpu); cpumask_t *online; list_for_each ( iter, runq ) @@ -1018,9 +1036,9 @@ runq_pick(const struct scheduler *ops, const cpumask_t *mask) /* mask cpu_hard_affinity & cpupool & mask */ online = cpupool_domain_master_cpumask(iter_svc->unit->domain); -cpumask_and(_common, online, iter_svc->unit->cpu_hard_affinity); -cpumask_and(_common, mask, _common); -if ( cpumask_empty(_common) ) +cpumask_and(cpu_common, online, iter_svc->unit->cpu_hard_affinity); +cpumask_and(cpu_common, mask, cpu_common); +if ( cpumask_empty(cpu_common) ) continue; ASSERT( iter_svc->cur_budget > 0 ); @@ -1092,7 +1110,7 @@ rt_schedule(const struct scheduler *ops, struct sched_unit *currunit, } else { -snext = runq_pick(ops, cpumask_of(sched_cpu)); +snext = runq_pick(ops, cpumask_of(sched_cpu), cur_cpu); if ( snext == NULL ) snext = rt_unit(sched_idle_unit(sched_cpu)); @@ -1186,22 +1204,22 @@ runq_tickle(const struct scheduler *ops, struct rt_unit *new) struct rt_unit *iter_svc; struct sched_unit *iter_unit; int cpu = 0, cpu_to_tickle = 0; -cpumask_t not_tickled; +cpumask_t *not_tickled = cpumask_scratch_cpu(smp_processor_id()); cpumask_t *online; if ( new == NULL || is_idle_unit(new->unit) ) return; online = cpupool_domain_master_cpumask(new->unit->domain); -cpumask_and(_tickled, online, new->unit->cpu_hard_affinity); -cpumask_andnot(_tickled, _tickled, >tickled); +cpumask_and(not_tickled, online, new->unit->cpu_hard_affinity); +cpumask_andnot(not_tickled, not_tickled, >tickled); /* * 1) If there are any idle CPUs, kick one. *For cache benefit,we first search new->cpu. *The same loop also find the one with lowest priority. */ -cpu = cpumask_test_or_cycle(sched_unit_master(new->unit), _tickled); +cpu =
[Xen-devel] [PATCH 3/9] xen/sched: cleanup sched.h
There are some items in include/xen/sched.h which can be moved to sched-if.h as they are scheduler private. Signed-off-by: Juergen Gross --- xen/common/sched/sched-if.h | 13 + xen/common/sched/schedule.c | 2 +- xen/include/xen/sched.h | 17 - 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/xen/common/sched/sched-if.h b/xen/common/sched/sched-if.h index a702fd23b1..edce354dc7 100644 --- a/xen/common/sched/sched-if.h +++ b/xen/common/sched/sched-if.h @@ -533,6 +533,7 @@ static inline void sched_unit_unpause(const struct sched_unit *unit) struct cpupool { int cpupool_id; +#define CPUPOOLID_NONE-1 unsigned int n_dom; cpumask_var_tcpu_valid; /* all cpus assigned to pool */ cpumask_var_tres_valid; /* all scheduling resources of pool */ @@ -618,5 +619,17 @@ affinity_balance_cpumask(const struct sched_unit *unit, int step, void sched_rm_cpu(unsigned int cpu); const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu); +void schedule_dump(struct cpupool *c); +struct scheduler *scheduler_get_default(void); +struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr); +void scheduler_free(struct scheduler *sched); +int cpu_disable_scheduler(unsigned int cpu); +int schedule_cpu_add(unsigned int cpu, struct cpupool *c); +int schedule_cpu_rm(unsigned int cpu); +int sched_move_domain(struct domain *d, struct cpupool *c); +struct cpupool *cpupool_get_by_id(int poolid); +void cpupool_put(struct cpupool *pool); +int cpupool_add_domain(struct domain *d, int poolid); +void cpupool_rm_domain(struct domain *d); #endif /* __XEN_SCHED_IF_H__ */ diff --git a/xen/common/sched/schedule.c b/xen/common/sched/schedule.c index c751faa741..db8ce146ca 100644 --- a/xen/common/sched/schedule.c +++ b/xen/common/sched/schedule.c @@ -1346,7 +1346,7 @@ int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity) return vcpu_set_affinity(v, affinity, v->sched_unit->cpu_hard_affinity); } -int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity) +static int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity) { return vcpu_set_affinity(v, affinity, v->sched_unit->cpu_soft_affinity); } diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 2507a833c2..55335d6ab3 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -685,7 +685,6 @@ int sched_init_vcpu(struct vcpu *v); void sched_destroy_vcpu(struct vcpu *v); int sched_init_domain(struct domain *d, int poolid); void sched_destroy_domain(struct domain *d); -int sched_move_domain(struct domain *d, struct cpupool *c); long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *); long sched_adjust_global(struct xen_sysctl_scheduler_op *); int sched_id(void); @@ -918,19 +917,10 @@ static inline bool sched_has_urgent_vcpu(void) return atomic_read(_cpu(sched_urgent_count)); } -struct scheduler; - -struct scheduler *scheduler_get_default(void); -struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr); -void scheduler_free(struct scheduler *sched); -int schedule_cpu_add(unsigned int cpu, struct cpupool *c); -int schedule_cpu_rm(unsigned int cpu); void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value); -int cpu_disable_scheduler(unsigned int cpu); void sched_setup_dom0_vcpus(struct domain *d); int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason); int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity); -int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity); void restore_vcpu_affinity(struct domain *d); int vcpu_affinity_domctl(struct domain *d, uint32_t cmd, struct xen_domctl_vcpuaffinity *vcpuaff); @@ -1051,17 +1041,10 @@ extern enum cpufreq_controller { FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen } cpufreq_controller; -#define CPUPOOLID_NONE-1 - -struct cpupool *cpupool_get_by_id(int poolid); -void cpupool_put(struct cpupool *pool); -int cpupool_add_domain(struct domain *d, int poolid); -void cpupool_rm_domain(struct domain *d); int cpupool_move_domain(struct domain *d, struct cpupool *c); int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op); int cpupool_get_id(const struct domain *d); cpumask_t *cpupool_valid_cpus(struct cpupool *pool); -void schedule_dump(struct cpupool *c); extern void dump_runq(unsigned char key); void arch_do_physinfo(struct xen_sysctl_physinfo *pi); -- 2.16.4 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 2/9] xen/sched: make sched-if.h really scheduler private
include/xen/sched-if.h should be private to scheduler code, so move it to common/sched/sched-if.h and move the remaining use cases to cpupool.c and schedule.c. Signed-off-by: Juergen Gross --- xen/arch/x86/dom0_build.c| 5 +- xen/common/domain.c | 70 -- xen/common/domctl.c | 135 +-- xen/common/sched/cpupool.c | 13 +- xen/{include/xen => common/sched}/sched-if.h | 3 - xen/common/sched/sched_arinc653.c| 3 +- xen/common/sched/sched_credit.c | 2 +- xen/common/sched/sched_credit2.c | 3 +- xen/common/sched/sched_null.c| 3 +- xen/common/sched/sched_rt.c | 3 +- xen/common/sched/schedule.c | 191 ++- xen/include/xen/domain.h | 3 + xen/include/xen/sched.h | 7 + 13 files changed, 228 insertions(+), 213 deletions(-) rename xen/{include/xen => common/sched}/sched-if.h (99%) diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c index 28b964e018..56c2dee0fc 100644 --- a/xen/arch/x86/dom0_build.c +++ b/xen/arch/x86/dom0_build.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -227,9 +226,9 @@ unsigned int __init dom0_max_vcpus(void) dom0_nodes = node_online_map; for_each_node_mask ( node, dom0_nodes ) cpumask_or(_cpus, _cpus, _to_cpumask(node)); -cpumask_and(_cpus, _cpus, cpupool0->cpu_valid); +cpumask_and(_cpus, _cpus, cpupool_valid_cpus(cpupool0)); if ( cpumask_empty(_cpus) ) -cpumask_copy(_cpus, cpupool0->cpu_valid); +cpumask_copy(_cpus, cpupool_valid_cpus(cpupool0)); max_vcpus = cpumask_weight(_cpus); if ( opt_dom0_max_vcpus_min > max_vcpus ) diff --git a/xen/common/domain.c b/xen/common/domain.c index 66c7fc..f4f0a66262 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -565,75 +564,6 @@ void __init setup_system_domains(void) #endif } -void domain_update_node_affinity(struct domain *d) -{ -cpumask_var_t dom_cpumask, dom_cpumask_soft; -cpumask_t *dom_affinity; -const cpumask_t *online; -struct sched_unit *unit; -unsigned int cpu; - -/* Do we have vcpus already? If not, no need to update node-affinity. */ -if ( !d->vcpu || !d->vcpu[0] ) -return; - -if ( !zalloc_cpumask_var(_cpumask) ) -return; -if ( !zalloc_cpumask_var(_cpumask_soft) ) -{ -free_cpumask_var(dom_cpumask); -return; -} - -online = cpupool_domain_master_cpumask(d); - -spin_lock(>node_affinity_lock); - -/* - * If d->auto_node_affinity is true, let's compute the domain's - * node-affinity and update d->node_affinity accordingly. if false, - * just leave d->auto_node_affinity alone. - */ -if ( d->auto_node_affinity ) -{ -/* - * We want the narrowest possible set of pcpus (to get the narowest - * possible set of nodes). What we need is the cpumask of where the - * domain can run (the union of the hard affinity of all its vcpus), - * and the full mask of where it would prefer to run (the union of - * the soft affinity of all its various vcpus). Let's build them. - */ -for_each_sched_unit ( d, unit ) -{ -cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity); -cpumask_or(dom_cpumask_soft, dom_cpumask_soft, - unit->cpu_soft_affinity); -} -/* Filter out non-online cpus */ -cpumask_and(dom_cpumask, dom_cpumask, online); -ASSERT(!cpumask_empty(dom_cpumask)); -/* And compute the intersection between hard, online and soft */ -cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask); - -/* - * If not empty, the intersection of hard, soft and online is the - * narrowest set we want. If empty, we fall back to hard - */ -dom_affinity = cpumask_empty(dom_cpumask_soft) ? - dom_cpumask : dom_cpumask_soft; - -nodes_clear(d->node_affinity); -for_each_cpu ( cpu, dom_affinity ) -node_set(cpu_to_node(cpu), d->node_affinity); -} - -spin_unlock(>node_affinity_lock); - -free_cpumask_var(dom_cpumask_soft); -free_cpumask_var(dom_cpumask); -} - - int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity) { /* Being disjoint with the system is just wrong. */ diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 03d0226039..3407db44fd 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -65,9 +64,9 @@ static int bitmap_to_xenctl_bitmap(struct
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
On Tue, Dec 17, 2019 at 6:56 PM Roman Shaposhnik wrote: > > On Tue, Dec 17, 2019 at 5:51 PM Stefano Stabellini > wrote: > > > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > > > On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini > > > wrote: > > > > > > > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > > > > > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini > > > > > wrote: > > > > > > > > > > > > On Tue, 17 Dec 2019, Julien Grall wrote: > > > > > > > Hi, > > > > > > > > > > > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > > > > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > > > > > > > wrote: > > > > > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > > > > > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which > > > > > > > > > is 990M. > > > > > > > > > If so, I wonder how you could boot succesfully with > > > > > > > > > dom0_mem=1024M even > > > > > > > > > on Xen 4.12... :-? > > > > > > > > > > > > > > > > That is a very interesting observation indeed! I actually don't > > > > > > > > remember where that device tree came from, but I think it was > > > > > > > > from one > > > > > > > > of the Linaro sites. > > > > > > > > > > > > > > This is mostly likely because of: > > > > > > > > > > > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > > > > > > > Author: Julien Grall > > > > > > > Date: Wed Aug 21 22:42:31 2019 +0100 > > > > > > > > > > > > > > xen/arm: domain_build: Don't continue if unable to allocate > > > > > > > all dom0 banks > > > > > > > > > > > > > > Xen will only print a warning if there are memory unallocated > > > > > > > when using > > > > > > > 1:1 mapping (only used by dom0). This also includes the case > > > > > > > where no > > > > > > > memory has been allocated. > > > > > > > > > > > > > > It will bring to all sort of issues that can be hard to > > > > > > > diagnostic for > > > > > > > users (the warning can be difficult to spot or disregard). > > > > > > > > > > > > > > If the users request 1GB of memory, then most likely they > > > > > > > want the exact > > > > > > > amount and not 512MB. So panic if all the memory has not been > > > > > > > allocated. > > > > > > > > > > > > > > After this change, the behavior is the same as for non-1:1 > > > > > > > memory > > > > > > > allocation (used by domU). > > > > > > > > > > > > > > At the same time, reflow the message to have the format on a > > > > > > > single > > > > > > > line. > > > > > > > > > > > > > > Signed-off-by: Julien Grall > > > > > > > Acked-by: Stefano Stabellini > > > > > > > > > > > > Ah! Roman, could you please post the full boot log of a successful > > > > > > 4.12 > > > > > > boot? > > > > > > > > > > > > If it has a "Failed to allocate requested dom0 memory" message, > > > > > > then we > > > > > > know what the issue is. > > > > > > > > > > Aha! Our messages seems to have crossed ;-) Full log is attached and > > > > > yes -- that's > > > > > the problem indeed. > > > > > > > > > > So at least that mystery is solved. But I'm still not able to get to a > > > > > full 1G of memory > > > > > even with your update to the device tree file. Any chance you can > > > > > send me the > > > > > device tree file that works for you? > > > > > > > > I didn't try on real hardware, I only tried on QEMU with a similar > > > > configuration. I went back and check the HiKey device tree I used and it > > > > is the same as yours (including the ramoops reserved-memory error). > > > > > > > > Apparently there are 1G and 2G variants of the HiKey, obviously both > > > > yours and my device tree are for the 1G variant. I try to dig through > > > > the docs but couldn't find the details of the 2G variant. I cannot find > > > > anywhere the memory range for the top 1G of memory not even on the > > > > LeMaker docs! :-/ > > > > > > Yup. That's exactly the issue on my end as well - can't seem to find an > > > authoritative source for that devicetree. > > > > > > I did find this, though: > > > https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/ > > > which looks like it has the latest (at least file timestamp-wise) > > > devicetree. > > > > > > If you look at the memory and reserved memory nodes there, they > > > are actually much simpler than what we've got: > > > > > > memory { > > > device_type = "memory"; > > > reg = <0x0 0x0 0x0 0x4000>; > > > }; > > > > Which is still 1G, but it is surprisingly simpler. > > > > > > > reserved-memory { > > > #address-cells = <0x2>; > > > #size-cells = <0x2>; > > > ranges; > > > > > > mcu-buf@05e0 { > > > no-map; > > > reg = <0x0 0x5e0 0x0 0x10 0x0 > > > 0x740f000 0x0 0x1000>; > > > }; > > > > > > mbox-buf@06dff000 { > > >
Re: [Xen-devel] [PATCH net-next 3/3] xen-netback: remove 'hotplug-status' once it has served its purpose
From: Paul Durrant Date: Tue, 17 Dec 2019 13:32:18 + > Removing the 'hotplug-status' node in netback_remove() is wrong; the script > may not have completed. Only remove the node once the watch has fired and > has been unregistered. > > Signed-off-by: Paul Durrant Applied. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH net-next 2/3] xen-netback: switch state to InitWait at the end of netback_probe()...
From: Paul Durrant Date: Tue, 17 Dec 2019 13:32:17 + > ...as the comment above the function states. > > The switch to Initialising at the start of the function is somewhat bogus > as the toolstack will have set that initial state anyway. To behave > correctly, a backend should switch to InitWait once it has set up all > xenstore values that may be required by a initialising frontend. This > patch calls backend_switch_state() to make the transition at the > appropriate point. > > NOTE: backend_switch_state() ignores errors from xenbus_switch_state() > and so this patch removes an error path from netback_probe(). This > means a failure to change state at this stage (in the absence of > other failures) will leave the device instantiated. This is highly > unlikley to happen as a failure to change state would indicate a > failure to write to xenstore, and that will trigger other error > paths. Also, a 'stuck' device can still be cleaned up using 'unbind' > in any case. > > Signed-off-by: Paul Durrant Applied. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH net-next 1/3] xen-netback: move netback_probe() and netback_remove() to the end...
From: Paul Durrant Date: Tue, 17 Dec 2019 13:32:16 + > ...of xenbus.c > > This is a cosmetic function re-ordering to reduce churn in a subsequent > patch. Some style fix-up was done to make checkpatch.pl happier. > > No functional change. > > Signed-off-by: Paul Durrant Applied. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [xen-unstable test] 144905: tolerable FAIL - PUSHED
flight 144905 xen-unstable real [real] http://logs.test-lab.xenproject.org/osstest/logs/144905/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-xtf-amd64-amd64-1 72 xtf/test-hvm64-xsa-308 fail blocked in 144850 test-xtf-amd64-amd64-3 72 xtf/test-hvm64-xsa-308 fail blocked in 144850 test-amd64-amd64-xl-rtds 18 guest-localmigrate/x10 fail like 144813 test-amd64-amd64-xl-qemut-win7-amd64 17 guest-stopfail like 144827 test-amd64-amd64-xl-qemuu-win7-amd64 17 guest-stopfail like 144850 test-armhf-armhf-libvirt 14 saverestore-support-checkfail like 144850 test-amd64-i386-xl-qemuu-win7-amd64 17 guest-stop fail like 144850 test-amd64-i386-xl-qemut-win7-amd64 17 guest-stop fail like 144850 test-armhf-armhf-libvirt-raw 13 saverestore-support-checkfail like 144850 test-amd64-amd64-xl-qemuu-ws16-amd64 17 guest-stopfail like 144850 test-amd64-amd64-xl-qemut-ws16-amd64 17 guest-stopfail like 144850 test-amd64-i386-xl-qemuu-ws16-amd64 17 guest-stop fail like 144850 test-amd64-i386-xl-pvshim12 guest-start fail never pass test-amd64-i386-libvirt 13 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 13 migrate-support-checkfail never pass test-amd64-amd64-libvirt 13 migrate-support-checkfail never pass test-amd64-i386-libvirt-xsm 13 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check fail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check fail never pass test-arm64-arm64-xl-credit2 13 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 13 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 13 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 14 saverestore-support-checkfail never pass test-arm64-arm64-xl 13 migrate-support-checkfail never pass test-arm64-arm64-xl 14 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 14 saverestore-support-checkfail never pass test-amd64-amd64-qemuu-nested-amd 17 debian-hvm-install/l1/l2 fail never pass test-armhf-armhf-xl-arndale 13 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 14 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 12 migrate-support-checkfail never pass test-armhf-armhf-xl 13 migrate-support-checkfail never pass test-armhf-armhf-xl 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 13 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 13 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-cubietruck 13 migrate-support-checkfail never pass test-armhf-armhf-xl-cubietruck 14 saverestore-support-checkfail never pass test-armhf-armhf-libvirt 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-seattle 13 migrate-support-checkfail never pass test-arm64-arm64-xl-seattle 14 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-raw 12 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 12 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 13 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 14 saverestore-support-checkfail never pass test-amd64-i386-xl-qemut-ws16-amd64 17 guest-stop fail never pass version targeted for testing: xen f50a4f6e244cfc8e773300c03aaf4db391f3028a baseline version: xen c9115affa6f83aebe29ae9cbf503aa163911a5bb Last test of basis 144850 2019-12-16 01:51:10 Z2 days Failing since144878 2019-12-16 19:06:11 Z1 days3 attempts Testing same since 144905 2019-12-17 18:36:21 Z0 days1 attempts People who touched
[Xen-devel] [ovmf test] 144918: all pass - PUSHED
flight 144918 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/144918/ Perfect :-) All tests in this flight passed as required version targeted for testing: ovmf 69ebe8280672589d8f5826f74c0fa92c103c8042 baseline version: ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0 Last test of basis 144637 2019-12-09 09:09:49 Z8 days Failing since144646 2019-12-10 01:39:53 Z8 days 73 attempts Testing same since 144918 2019-12-18 02:39:46 Z0 days1 attempts People who touched revisions under test: Antoine Coeur Ard Biesheuvel Bob Feng Fan, Zhiju Jiewen Yao Michael Kubacki Pete Batard Philippe Mathieu-Daude Steven Shi Zhiju.Fan jobs: build-amd64-xsm pass build-i386-xsm pass build-amd64 pass build-i386 pass build-amd64-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 pass test-amd64-i386-xl-qemuu-ovmf-amd64 pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/osstest/ovmf.git 804666c86e..69ebe82806 69ebe8280672589d8f5826f74c0fa92c103c8042 -> xen-tested-master ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [XEN PATCH v2] x86/vm_event: add short-circuit for breakpoints (aka, "fast single step")
When using DRAKVUF (or another system using altp2m with shadow pages similar to what is described in https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m), after a breakpoint is hit the system switches to the default unrestricted altp2m view with singlestep enabled. When the singlestep traps to Xen another vm_event is sent to the monitor agent, which then normally disables singlestepping and switches the altp2m view back to the restricted view. This patch short-circuiting that last part so that it doesn't need to send the vm_event out for the singlestep event and should switch back to the restricted view in Xen automatically. This optimization gains about 35% speed-up. Was tested on Debian branch of Xen 4.12. See at: https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep Rebased on master: https://github.com/skvl/xen/tree/fast-singlestep Signed-off-by: Sergey Kovalev --- xen/arch/x86/hvm/hvm.c | 12 xen/arch/x86/hvm/monitor.c | 9 + xen/arch/x86/vm_event.c| 8 ++-- xen/include/asm-x86/hvm/hvm.h | 1 + xen/include/asm-x86/hvm/vcpu.h | 4 xen/include/public/vm_event.h | 10 ++ 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 47573f71b8..4999569503 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -5126,6 +5126,18 @@ void hvm_toggle_singlestep(struct vcpu *v) v->arch.hvm.single_step = !v->arch.hvm.single_step; } +void hvm_fast_singlestep(struct vcpu *v, uint16_t p2midx) +{ +ASSERT(atomic_read(>pause_count)); + +if ( !hvm_is_singlestep_supported() ) +return; + +v->arch.hvm.single_step = true; +v->arch.hvm.fast_single_step.enabled = true; +v->arch.hvm.fast_single_step.p2midx = p2midx; +} + /* * Segment caches in VMCB/VMCS are inconsistent about which bits are checked, * important, and preserved across vmentry/exit. Cook the values to make them diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c index 1f23fe25e8..85996a3edd 100644 --- a/xen/arch/x86/hvm/monitor.c +++ b/xen/arch/x86/hvm/monitor.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,14 @@ int hvm_monitor_debug(unsigned long rip, enum hvm_monitor_debug_type type, case HVM_MONITOR_SINGLESTEP_BREAKPOINT: if ( !ad->monitor.singlestep_enabled ) return 0; +if ( curr->arch.hvm.fast_single_step.enabled ) +{ +p2m_altp2m_check(curr, curr->arch.hvm.fast_single_step.p2midx); +curr->arch.hvm.single_step = false; +curr->arch.hvm.fast_single_step.enabled = false; +curr->arch.hvm.fast_single_step.p2midx = 0; +return 0; +} req.reason = VM_EVENT_REASON_SINGLESTEP; req.u.singlestep.gfn = gfn_of_rip(rip); sync = true; diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c index 52c2a71fa0..3788d103f9 100644 --- a/xen/arch/x86/vm_event.c +++ b/xen/arch/x86/vm_event.c @@ -61,7 +61,8 @@ void vm_event_cleanup_domain(struct domain *d) void vm_event_toggle_singlestep(struct domain *d, struct vcpu *v, vm_event_response_t *rsp) { -if ( !(rsp->flags & VM_EVENT_FLAG_TOGGLE_SINGLESTEP) ) +if ( !(rsp->flags & VM_EVENT_FLAG_TOGGLE_SINGLESTEP || + rsp->flags & VM_EVENT_FLAG_FAST_SINGLESTEP) ) return; if ( !is_hvm_domain(d) ) @@ -69,7 +70,10 @@ void vm_event_toggle_singlestep(struct domain *d, struct vcpu *v, ASSERT(atomic_read(>vm_event_pause_count)); -hvm_toggle_singlestep(v); +if ( rsp->flags & VM_EVENT_FLAG_TOGGLE_SINGLESTEP ) +hvm_toggle_singlestep(v); +else +hvm_fast_singlestep(v, rsp->u.fast_singlestep.p2midx); } void vm_event_register_write_resume(struct vcpu *v, vm_event_response_t *rsp) diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 1d7b66f927..09793c12e9 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -323,6 +323,7 @@ int hvm_debug_op(struct vcpu *v, int32_t op); /* Caller should pause vcpu before calling this function */ void hvm_toggle_singlestep(struct vcpu *v); +void hvm_fast_singlestep(struct vcpu *v, uint16_t p2midx); int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, struct npfec npfec); diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 38f5c2bb9b..8b8494 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -172,6 +172,10 @@ struct hvm_vcpu { boolflag_dr_dirty; booldebug_state_latch; boolsingle_step; +struct { +bool enabled; +uint16_t p2midx; +} fast_single_step; struct hvm_vcpu_asid n1asid; diff --git a/xen/include/public/vm_event.h
Re: [Xen-devel] [RFC 6/6] arm64: hyperv: Enable vDSO
On Tue, Dec 17, 2019 at 03:10:16PM +0100, Vitaly Kuznetsov wrote: > Boqun Feng writes: > > > Similar to x86, add a new vclock_mode VCLOCK_HVCLOCK, and reuse the > > hv_read_tsc_page() for userspace to read tsc page clocksource. > > > > Signed-off-by: Boqun Feng (Microsoft) > > --- > > arch/arm64/include/asm/clocksource.h | 3 ++- > > arch/arm64/include/asm/mshyperv.h | 2 +- > > arch/arm64/include/asm/vdso/gettimeofday.h | 19 +++ > > 3 files changed, 22 insertions(+), 2 deletions(-) > > > > diff --git a/arch/arm64/include/asm/clocksource.h > > b/arch/arm64/include/asm/clocksource.h > > index fbe80057468c..c6acd45fe748 100644 > > --- a/arch/arm64/include/asm/clocksource.h > > +++ b/arch/arm64/include/asm/clocksource.h > > @@ -4,7 +4,8 @@ > > > > #define VCLOCK_NONE0 /* No vDSO clock available. > > */ > > #define VCLOCK_CNTVCT 1 /* vDSO should use cntvcnt > > */ > > -#define VCLOCK_MAX 1 > > +#define VCLOCK_HVCLOCK 2 /* vDSO should use vread_hvclock() > > */ > > +#define VCLOCK_MAX 2 > > > > struct arch_clocksource_data { > > int vclock_mode; > > diff --git a/arch/arm64/include/asm/mshyperv.h > > b/arch/arm64/include/asm/mshyperv.h > > index 0afb00e3501d..7c85dd816dca 100644 > > --- a/arch/arm64/include/asm/mshyperv.h > > +++ b/arch/arm64/include/asm/mshyperv.h > > @@ -90,7 +90,7 @@ extern void hv_get_vpreg_128(u32 reg, struct > > hv_get_vp_register_output *result); > > #define hv_set_reference_tsc(val) \ > > hv_set_vpreg(HV_REGISTER_REFERENCE_TSC, val) > > #define hv_set_clocksource_vdso(val) \ > > - ((val).archdata.vclock_mode = VCLOCK_NONE) > > + ((val).archdata.vclock_mode = VCLOCK_HVCLOCK) > > > > #if IS_ENABLED(CONFIG_HYPERV) > > #define hv_enable_stimer0_percpu_irq(irq) enable_percpu_irq(irq, 0) > > diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h > > b/arch/arm64/include/asm/vdso/gettimeofday.h > > index e6e3fe0488c7..7e689b903f4d 100644 > > --- a/arch/arm64/include/asm/vdso/gettimeofday.h > > +++ b/arch/arm64/include/asm/vdso/gettimeofday.h > > @@ -67,6 +67,20 @@ int clock_getres_fallback(clockid_t _clkid, struct > > __kernel_timespec *_ts) > > return ret; > > } > > > > +#ifdef CONFIG_HYPERV_TIMER > > +/* This will override the default hv_get_raw_timer() */ > > +#define hv_get_raw_timer() __arch_counter_get_cntvct() > > +#include > > + > > +extern struct ms_hyperv_tsc_page > > +_hvclock_page __attribute__((visibility("hidden"))); > > + > > +static u64 vread_hvclock(void) > > +{ > > + return hv_read_tsc_page(&_hvclock_page); > > +} > > +#endif > > The function is almost the same on x86 (&_hvclock_page -> > _page), would it maybe make sense to move this to arch neutral > clocksource/hyperv_timer.h? > I'm not sure whether the underscore matters in the vDSO data symbol, so I follow the architectural name convention. If the leading underscore doesn't have special purpose I'm happy to move this to arch neutral header file. > > + > > static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) > > { > > u64 res; > > @@ -78,6 +92,11 @@ static __always_inline u64 __arch_get_hw_counter(s32 > > clock_mode) > > if (clock_mode == VCLOCK_NONE) > > return __VDSO_USE_SYSCALL; > > > > +#ifdef CONFIG_HYPERV_TIMER > > + if (likely(clock_mode == VCLOCK_HVCLOCK)) > > + return vread_hvclock(); > > I'm not sure likely() is justified here: it'll make ALL builds which > enable CONFIG_HYPERV_TIMER (e.g. distro kernels) to prefer > VCLOCK_HVCLOCK, even if the kernel is not running on Hyper-V. > Make sense. Thanks for pointing this out! I will change it in the next version. Regards, Boqun > > +#endif > > + > > /* > > * This isb() is required to prevent that the counter value > > * is speculated. > > -- > Vitaly > ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] clock source in PV Linux
On Wed, Dec 11, 2019 at 12:41 AM Jan Beulich wrote: > > On 11.12.2019 09:16, Jürgen Groß wrote: > > On 11.12.19 08:28, Jan Beulich wrote: > >> Jürgen, Boris, > >> > >> I've noticed > >> > >> <6>clocksource: Switched to clocksource tsc > >> > >> as the final clocksource related boot message in a PV Dom0's > >> log with 5.4.2. Is it intentional that it's not the "xen" one > >> that gets used by default? > > > > I think this is fine. I just tested it and I'm seeing the same in dom0, > > while in a PV domU "xen" is used per default. > > > > In dom0 "tsc" should be okay in case it is stable. Or are you expecting > > problems with that setting? > > Well, first of all I found this surprising. Whether there are problems to > be expected largely depends on the reliability of the "stable" detection > in PV Dom0. Related question: does this mean that tsc is now default for PVH as well? The reason I'm asking is because I'm still a bit worried about the clock drift with tsc. Thanks, Roman. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
On Tue, Dec 17, 2019 at 5:51 PM Stefano Stabellini wrote: > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > > On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini > > wrote: > > > > > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > > > > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini > > > > wrote: > > > > > > > > > > On Tue, 17 Dec 2019, Julien Grall wrote: > > > > > > Hi, > > > > > > > > > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > > > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > > > > > > wrote: > > > > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > > > > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which > > > > > > > > is 990M. > > > > > > > > If so, I wonder how you could boot succesfully with > > > > > > > > dom0_mem=1024M even > > > > > > > > on Xen 4.12... :-? > > > > > > > > > > > > > > That is a very interesting observation indeed! I actually don't > > > > > > > remember where that device tree came from, but I think it was > > > > > > > from one > > > > > > > of the Linaro sites. > > > > > > > > > > > > This is mostly likely because of: > > > > > > > > > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > > > > > > Author: Julien Grall > > > > > > Date: Wed Aug 21 22:42:31 2019 +0100 > > > > > > > > > > > > xen/arm: domain_build: Don't continue if unable to allocate all > > > > > > dom0 banks > > > > > > > > > > > > Xen will only print a warning if there are memory unallocated > > > > > > when using > > > > > > 1:1 mapping (only used by dom0). This also includes the case > > > > > > where no > > > > > > memory has been allocated. > > > > > > > > > > > > It will bring to all sort of issues that can be hard to > > > > > > diagnostic for > > > > > > users (the warning can be difficult to spot or disregard). > > > > > > > > > > > > If the users request 1GB of memory, then most likely they want > > > > > > the exact > > > > > > amount and not 512MB. So panic if all the memory has not been > > > > > > allocated. > > > > > > > > > > > > After this change, the behavior is the same as for non-1:1 > > > > > > memory > > > > > > allocation (used by domU). > > > > > > > > > > > > At the same time, reflow the message to have the format on a > > > > > > single > > > > > > line. > > > > > > > > > > > > Signed-off-by: Julien Grall > > > > > > Acked-by: Stefano Stabellini > > > > > > > > > > Ah! Roman, could you please post the full boot log of a successful > > > > > 4.12 > > > > > boot? > > > > > > > > > > If it has a "Failed to allocate requested dom0 memory" message, then > > > > > we > > > > > know what the issue is. > > > > > > > > Aha! Our messages seems to have crossed ;-) Full log is attached and > > > > yes -- that's > > > > the problem indeed. > > > > > > > > So at least that mystery is solved. But I'm still not able to get to a > > > > full 1G of memory > > > > even with your update to the device tree file. Any chance you can send > > > > me the > > > > device tree file that works for you? > > > > > > I didn't try on real hardware, I only tried on QEMU with a similar > > > configuration. I went back and check the HiKey device tree I used and it > > > is the same as yours (including the ramoops reserved-memory error). > > > > > > Apparently there are 1G and 2G variants of the HiKey, obviously both > > > yours and my device tree are for the 1G variant. I try to dig through > > > the docs but couldn't find the details of the 2G variant. I cannot find > > > anywhere the memory range for the top 1G of memory not even on the > > > LeMaker docs! :-/ > > > > Yup. That's exactly the issue on my end as well - can't seem to find an > > authoritative source for that devicetree. > > > > I did find this, though: > > https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/ > > which looks like it has the latest (at least file timestamp-wise) > > devicetree. > > > > If you look at the memory and reserved memory nodes there, they > > are actually much simpler than what we've got: > > > > memory { > > device_type = "memory"; > > reg = <0x0 0x0 0x0 0x4000>; > > }; > > Which is still 1G, but it is surprisingly simpler. > > > > reserved-memory { > > #address-cells = <0x2>; > > #size-cells = <0x2>; > > ranges; > > > > mcu-buf@05e0 { > > no-map; > > reg = <0x0 0x5e0 0x0 0x10 0x0 > > 0x740f000 0x0 0x1000>; > > }; > > > > mbox-buf@06dff000 { > > no-map; > > reg = <0x0 0x6dff000 0x0 0x1000>; > > }; > > }; > > > > So -- just on a whim -- I changed it to: > > reg = <0x0 0x0 0x0 0x8000>; > > I would have tried that too :-) > > > > Interestingly enough, Xen booted, and complained about
[Xen-devel] [ovmf test] 144914: regressions - FAIL
flight 144914 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/144914/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-i386-xsm6 xen-buildfail REGR. vs. 144637 build-amd64 6 xen-buildfail REGR. vs. 144637 build-amd64-xsm 6 xen-buildfail REGR. vs. 144637 build-i3866 xen-buildfail REGR. vs. 144637 Tests which did not succeed, but are not blocking: build-i386-libvirt1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a version targeted for testing: ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 baseline version: ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0 Last test of basis 144637 2019-12-09 09:09:49 Z8 days Failing since144646 2019-12-10 01:39:53 Z8 days 72 attempts Testing same since 144770 2019-12-12 18:41:26 Z5 days 61 attempts People who touched revisions under test: Antoine Coeur Ard Biesheuvel Bob Feng Jiewen Yao Michael Kubacki Pete Batard Philippe Mathieu-Daude Steven Shi jobs: build-amd64-xsm fail build-i386-xsm fail build-amd64 fail build-i386 fail build-amd64-libvirt blocked build-i386-libvirt blocked build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked test-amd64-i386-xl-qemuu-ovmf-amd64 blocked sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Not pushing. commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 Author: Pete Batard Date: Tue Dec 10 18:23:04 2019 + MdePkg/Include: Add DCC and BCM2835 SPCR UART types As per the Microsoft Debug Port Table 2 (DBG2) documentation, that can be found online, we are missing 2 serial interface types for Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi). These same types are present in DebugPort2Table.h so add them to SerialPortConsoleRedirectionTable.h too. Note that we followed the same idiosyncrasies as DebugPort2Table for naming these new macros. Signed-off-by: Pete Batard Acked-by: Ard Biesheuvel Reviewed-by: Liming Gao commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e Author: Ard Biesheuvel Date: Tue Mar 5 14:32:48 2019 +0100 ArmPkg/MmCommunicationDxe: relay architected PI events to MM context PI defines a few architected events that have significance in the MM context as well as in the non-secure DXE context. So register notify handlers for these events, and relay them into the standalone MM world. Signed-off-by: Ard Biesheuvel Reviewed-by: Jiewen Yao Reviewed-by: Achin Gupta commit d3add11e87dace180387562d6f1951f2bffbd3d9 Author: Michael Kubacki Date: Wed Nov 20 17:31:24 2019 -0800 MdeModulePkg PeiCore: Improve comment semantics This patch clarifies wording in several PeiCore comments to improve reading comprehension. Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Jian J Wang commit d39d1260c615b716675f67f5c4e1f4f52df01dad Author: Michael Kubacki Date: Wed Nov 20 17:10:48 2019 -0800 MdeModulePkg PeiCore: Fix typos Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Philippe Mathieu-Daude Reviewed-by: Jian J Wang commit 97eedf5dfbaffde33210fd88066247cf0b7d3325 Author: Antoine Coeur Date: Wed Dec 4 12:14:53
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini > wrote: > > > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > > > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini > > > wrote: > > > > > > > > On Tue, 17 Dec 2019, Julien Grall wrote: > > > > > Hi, > > > > > > > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > > > > > wrote: > > > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > > > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which is > > > > > > > 990M. > > > > > > > If so, I wonder how you could boot succesfully with > > > > > > > dom0_mem=1024M even > > > > > > > on Xen 4.12... :-? > > > > > > > > > > > > That is a very interesting observation indeed! I actually don't > > > > > > remember where that device tree came from, but I think it was from > > > > > > one > > > > > > of the Linaro sites. > > > > > > > > > > This is mostly likely because of: > > > > > > > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > > > > > Author: Julien Grall > > > > > Date: Wed Aug 21 22:42:31 2019 +0100 > > > > > > > > > > xen/arm: domain_build: Don't continue if unable to allocate all > > > > > dom0 banks > > > > > > > > > > Xen will only print a warning if there are memory unallocated > > > > > when using > > > > > 1:1 mapping (only used by dom0). This also includes the case > > > > > where no > > > > > memory has been allocated. > > > > > > > > > > It will bring to all sort of issues that can be hard to > > > > > diagnostic for > > > > > users (the warning can be difficult to spot or disregard). > > > > > > > > > > If the users request 1GB of memory, then most likely they want > > > > > the exact > > > > > amount and not 512MB. So panic if all the memory has not been > > > > > allocated. > > > > > > > > > > After this change, the behavior is the same as for non-1:1 memory > > > > > allocation (used by domU). > > > > > > > > > > At the same time, reflow the message to have the format on a > > > > > single > > > > > line. > > > > > > > > > > Signed-off-by: Julien Grall > > > > > Acked-by: Stefano Stabellini > > > > > > > > Ah! Roman, could you please post the full boot log of a successful 4.12 > > > > boot? > > > > > > > > If it has a "Failed to allocate requested dom0 memory" message, then we > > > > know what the issue is. > > > > > > Aha! Our messages seems to have crossed ;-) Full log is attached and > > > yes -- that's > > > the problem indeed. > > > > > > So at least that mystery is solved. But I'm still not able to get to a > > > full 1G of memory > > > even with your update to the device tree file. Any chance you can send me > > > the > > > device tree file that works for you? > > > > I didn't try on real hardware, I only tried on QEMU with a similar > > configuration. I went back and check the HiKey device tree I used and it > > is the same as yours (including the ramoops reserved-memory error). > > > > Apparently there are 1G and 2G variants of the HiKey, obviously both > > yours and my device tree are for the 1G variant. I try to dig through > > the docs but couldn't find the details of the 2G variant. I cannot find > > anywhere the memory range for the top 1G of memory not even on the > > LeMaker docs! :-/ > > Yup. That's exactly the issue on my end as well - can't seem to find an > authoritative source for that devicetree. > > I did find this, though: > https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/ > which looks like it has the latest (at least file timestamp-wise) devicetree. > > If you look at the memory and reserved memory nodes there, they > are actually much simpler than what we've got: > > memory { > device_type = "memory"; > reg = <0x0 0x0 0x0 0x4000>; > }; Which is still 1G, but it is surprisingly simpler. > reserved-memory { > #address-cells = <0x2>; > #size-cells = <0x2>; > ranges; > > mcu-buf@05e0 { > no-map; > reg = <0x0 0x5e0 0x0 0x10 0x0 > 0x740f000 0x0 0x1000>; > }; > > mbox-buf@06dff000 { > no-map; > reg = <0x0 0x6dff000 0x0 0x1000>; > }; > }; > > So -- just on a whim -- I changed it to: > reg = <0x0 0x0 0x0 0x8000>; I would have tried that too :-) > Interestingly enough, Xen booted, and complained about only 192MB > unallocated this time. > So, I dropped the size of Dom0 to 640M and I got it boot and here's > what I'm seeing as > an output of xl info: >total_memory : 1120 >free_memory: 390 > It still nowhere close to 2G. > > Then I booted the Linux kernel without Xen and it correctly identified > all 2G worth
[Xen-devel] [PATCH v2 2/4] x86/microcode: avoid unnecessary xmalloc/memcpy of ucode data
When using `ucode=scan` and if a matching module is found, the microcode payload is maintained in an xmalloc()'d region. This is unnecessary since the bootmap would just do. Remove the xmalloc and xfree on the microcode module scan path. This commit also does away with the restriction on the microcode module size limit. The concern that a large microcode module would consume too much memory preventing guests launch is misplaced since this is all the init path. While having such safeguards is valuable, this should apply across the board for all early/late microcode loading. Having it just on the `scan` path is confusing. Looking forward, we are a bit closer (i.e., one xmalloc down) to pulling the early microcode loading of the BSP a bit earlier in the early boot process. This commit is the low hanging fruit. There is still a sizable amount of work to get there as there are still a handful of xmalloc in microcode_{amd,intel}.c. First, there are xmallocs on the path of finding a matching microcode update. Similar to the commit at hand, searching through the microcode blob can be done on the already present buffer with no need to xmalloc any further. Even better, do the filtering in microcode.c before requesting the microcode update on all CPUs. The latter requires careful restructuring and exposing the arch-specific logic for iterating over patches and declaring a match. Second, there are xmallocs for the microcode cache. Here, we would need to ensure that the cache corresponding to the BSP gets xmalloc()'d and populated after the fact. Signed-off-by: Eslam Elnikety --- xen/arch/x86/microcode.c | 32 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c index 8b4d87782c..c878fc71ff 100644 --- a/xen/arch/x86/microcode.c +++ b/xen/arch/x86/microcode.c @@ -138,11 +138,6 @@ static int __init parse_ucode_param(const char *s) } custom_param("ucode", parse_ucode_param); -/* - * 8MB ought to be enough. - */ -#define MAX_EARLY_CPIO_MICROCODE (8 << 20) - void __init microcode_scan_module( unsigned long *module_map, const multiboot_info_t *mbi) @@ -187,31 +182,12 @@ void __init microcode_scan_module( cd = find_cpio_data(p, _blob_start, _blob_size, /* ignore */); if ( cd.data ) { -/* - * This is an arbitrary check - it would be sad if the blob - * consumed most of the memory and did not allow guests - * to launch. - */ -if ( cd.size > MAX_EARLY_CPIO_MICROCODE ) -{ -printk("Multiboot %d microcode payload too big! (%ld, we can do %d)\n", - i, cd.size, MAX_EARLY_CPIO_MICROCODE); -goto err; -} -ucode_blob.size = cd.size; -ucode_blob.data = xmalloc_bytes(cd.size); -if ( !ucode_blob.data ) -cd.data = NULL; -else -memcpy(ucode_blob.data, cd.data, cd.size); +ucode_blob.size = cd.size; +ucode_blob.data = cd.data; +break; } bootstrap_map(NULL); -if ( cd.data ) -break; } -return; -err: -bootstrap_map(NULL); } void __init microcode_grab_module( unsigned long *module_map, @@ -725,7 +701,7 @@ static int __init microcode_init(void) */ if ( ucode_blob.size ) { -xfree(ucode_blob.data); +bootstrap_map(NULL); ucode_blob.size = 0; ucode_blob.data = NULL; } -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v2 4/4] x86/microcode: Support builtin CPU microcode
Xen relies on boot modules to perform early microcode updates. This commit adds another mode, namely "builtin" via the BUILTIN_UCODE config parameter. If set, the Xen image itself will contain the microcode updates. Upon boot, Xen inspects its image for microcode blobs and performs the update. A Xen image with builtin microcode will, by default, attempt the microcode update. Disabling the builtin microcode update can be done via the Xen command line parameter 'ucode=no-builtin'. Moreover, the microcode provided via other options (such as 'ucode=|scan' or 'ucode=' config when booting via EFI) takes precedence over the builtin one. Signed-off-by: Eslam Elnikety --- Changes in v2: - Allow for ucode=|scan,{no-}builtin and detail the model. Reflect those changes onto microcode.c and docs/misc/xen-command-line.pandoc - Add documentation to the existing docs/admin-guide/microcode-loading.rst - Build on Patches 1--3 to avoid xmalloc/memcpy for the builtin microcode - Work configuration in order to specify the individual microcode blobs to use for the builtin microcode, and rework the microcode/Makefile accordingly --- docs/admin-guide/microcode-loading.rst | 31 +++ docs/misc/xen-command-line.pandoc | 10 - xen/arch/x86/Kconfig | 30 +++ xen/arch/x86/Makefile | 1 + xen/arch/x86/microcode.c | 52 ++ xen/arch/x86/microcode/Makefile| 46 +++ xen/arch/x86/xen.lds.S | 12 ++ 7 files changed, 180 insertions(+), 2 deletions(-) create mode 100644 xen/arch/x86/microcode/Makefile diff --git a/docs/admin-guide/microcode-loading.rst b/docs/admin-guide/microcode-loading.rst index e83cadd2c2..989e8d446b 100644 --- a/docs/admin-guide/microcode-loading.rst +++ b/docs/admin-guide/microcode-loading.rst @@ -104,6 +104,37 @@ The ``ucode=scan`` command line option will cause Xen to search through all modules to find any CPIO archives, and search the archive for the applicable file. Xen will stop searching at the first match. +Loading microcode built within the Xen image + + +Xen can bundle microcode updates within its image. This support is conditional +on the build configuration BUILTIN_UCODE being enabled. Builtin microcode is +useful to ensure that, by default, a minimum microcode patch level will be +applied to the underlying CPU. + +To use microcode updates available on the build system as builtin, +use BUILTIN_UCODE_DIR to refer to the directory containing the firmware updates +and specify the individual microcode patches via either BUILTIN_UCODE_AMD or +BUILTIN_UCODE_INTEL for AMD microcode or INTEL microcode, respectively. For +instance, the configuration below is suitable for a build system which has a +``/lib/firmware/`` directory which, in turn, includes the individual microcode +patches ``amd-ucode/microcode_amd_fam15h.bin``, ``intel-ucode/06-3a-09``, and +``intel-ucode/06-2f-02``. + + CONFIG_BUILTIN_UCODE=y + CONFIG_BUILTIN_UCODE_DIR="/lib/firmware/" + CONFIG_BUILTIN_UCODE_AMD="amd-ucode/microcode_amd_fam15h.bin" + CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/06-3a-09 intel-ucode/06-2f-02" + +Alternatively, CONFIG_BUILTIN_UCODE_{AMD,INTEL} can directly point to the +concatenation of the individual microcode blobs. For instance, assuming that +``amd-ucode/AuthenticAMD.bin`` and ``intel-ucode/GenuineIntel.bin`` hold +multiple microcode updates for AMD and INTEL, respectively, you may use the +configuration below. + + CONFIG_BUILTIN_UCODE_AMD="amd-ucode/AuthenticAMD.bin" + CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/GenuineIntel.bin" + Run time microcode loading -- diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc index 40faf3bc3a..9cfc2df05a 100644 --- a/docs/misc/xen-command-line.pandoc +++ b/docs/misc/xen-command-line.pandoc @@ -2126,10 +2126,10 @@ logic applies: active by default. ### ucode (x86) -> `= List of [ | scan=, nmi= ]` +> `= List of [ | scan=, builtin=, nmi= ]` Applicability: x86 -Default: `nmi` +Default: `nmi` if BUILTIN_UCODE is not enabled, `builtin,nmi` otherwise Controls for CPU microcode loading. For early loading, this parameter can specify how and where to find the microcode update blob. For late loading, @@ -2150,6 +2150,12 @@ microcode in the cpio name space must be: - on Intel: kernel/x86/microcode/GenuineIntel.bin - on AMD : kernel/x86/microcode/AuthenticAMD.bin +'builtin' instructs the hypervisor to use the builtin microcode update. This +option is available only if option BUILTIN_UCODE is enabled at build. The +default value is `true`. If a microcode is provided via other options (such +as 'integer', 'scan', or `ucode=` config when booting via EFI), +the provided microcode takes precedence over the builtin one. + 'nmi' determines late loading is performed in NMI handler or just in
[Xen-devel] [PATCH v2 1/4] x86/microcode: Improve documentation and parsing for ucode=
Decouple the microcode referencing mechanism when using GRUB to that when using EFI. This allows us to avoid the "unspecified effect" of using ` | scan` along xen.efi. With that, Xen can explicitly ignore those named options when using EFI. As an added benefit, we get a straightfoward parsing of the ucode parameter. While at it, simplify the logic in microcode_grab_module(). Update the command line documentation for consistency. Also, drop the leading comment for parse_ucode_param. (No practical use for it given this commit). Signed-off-by: Eslam Elnikety --- docs/misc/xen-command-line.pandoc | 18 --- xen/arch/x86/microcode.c | 51 ++- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc index 7a1be84ca9..40faf3bc3a 100644 --- a/docs/misc/xen-command-line.pandoc +++ b/docs/misc/xen-command-line.pandoc @@ -2128,7 +2128,13 @@ logic applies: ### ucode (x86) > `= List of [ | scan=, nmi= ]` -Specify how and where to find CPU microcode update blob. +Applicability: x86 +Default: `nmi` + +Controls for CPU microcode loading. For early loading, this parameter can +specify how and where to find the microcode update blob. For late loading, +this parameter specifies if the update happens within a NMI handler or in +a stop_machine context. 'integer' specifies the CPU microcode update blob module index. When positive, this specifies the n-th module (in the GrUB entry, zero based) to be used @@ -2136,10 +2142,7 @@ for updating CPU micrcode. When negative, counting starts at the end of the modules in the GrUB entry (so with the blob commonly being last, one could specify `ucode=-1`). Note that the value of zero is not valid here (entry zero, i.e. the first module, is always the Dom0 kernel -image). Note further that use of this option has an unspecified effect -when used with xen.efi (there the concept of modules doesn't exist, and -the blob gets specified via the `ucode=` config file/section -entry; see [EFI configuration file description](efi.html)). +image). 'scan' instructs the hypervisor to scan the multiboot images for an cpio image that contains microcode. Depending on the platform the blob with the @@ -2151,6 +2154,11 @@ microcode in the cpio name space must be: stop_machine context. In NMI handler, even NMIs are blocked, which is considered safer. The default value is `true`. +Note: When booting via EFI, both options 'integer' and 'scan' are ignored. +Here, the concept of modules does not exist. The microcode update blob for +early loading gets specified via the `ucode=` config file/section +entry; see [EFI configuration file description](efi.html)). + ### unrestricted_guest (Intel) > `= ` diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c index 6ced293d88..8b4d87782c 100644 --- a/xen/arch/x86/microcode.c +++ b/xen/arch/x86/microcode.c @@ -60,7 +60,7 @@ static module_t __initdata ucode_mod; static signed int __initdata ucode_mod_idx; -static bool_t __initdata ucode_mod_forced; +static signed int __initdata ucode_mod_efi_idx; static unsigned int nr_cores; /* @@ -105,16 +105,10 @@ static struct microcode_patch *microcode_cache; void __init microcode_set_module(unsigned int idx) { -ucode_mod_idx = idx; -ucode_mod_forced = 1; +ucode_mod_efi_idx = idx; } -/* - * The format is '[|scan=, nmi=]'. Both options are - * optional. If the EFI has forced which of the multiboot payloads is to be - * used, only nmi= is parsed. - */ -static int __init parse_ucode(const char *s) +static int __init parse_ucode_param(const char *s) { const char *ss; int val, rc = 0; @@ -126,18 +120,15 @@ static int __init parse_ucode(const char *s) if ( (val = parse_boolean("nmi", s, ss)) >= 0 ) ucode_in_nmi = val; -else if ( !ucode_mod_forced ) /* Not forced by EFI */ +else if ( (val = parse_boolean("scan", s, ss)) >= 0 ) +ucode_scan = val; +else { -if ( (val = parse_boolean("scan", s, ss)) >= 0 ) -ucode_scan = val; -else -{ -const char *q; - -ucode_mod_idx = simple_strtol(s, , 0); -if ( q != ss ) -rc = -EINVAL; -} +const char *q; + +ucode_mod_idx = simple_strtol(s, , 0); +if ( q != ss ) +rc = -EINVAL; } s = ss + 1; @@ -145,7 +136,7 @@ static int __init parse_ucode(const char *s) return rc; } -custom_param("ucode", parse_ucode); +custom_param("ucode", parse_ucode_param); /* * 8MB ought to be enough. @@ -228,14 +219,18 @@ void __init microcode_grab_module( { module_t *mod = (module_t *)__va(mbi->mods_addr); -if ( ucode_mod_idx < 0 ) +if ( ucode_mod_efi_idx ) /* Microcode specified by EFI */ +{ +ucode_mod =
[Xen-devel] [PATCH v2 0/4] x86/microcode: Support builtin CPU microcode
The main goal of this patch series is to add support for builtin microcode. Towards that end, the series starts with a few improvements for the documentation and parsing of the ucode= Xen command line parameter that controls early loading of microcode (Patches 1--3), and follows with the main builtin suppot (Patch 4). Changes in v2: - An earlier version of Patch 4 was submitted in isolation. Refer to the patch itself for details regarding the relevant changes. - Patches 1--3 are additions. Eslam Elnikety (4): x86/microcode: Improve documentation and parsing for ucode= x86/microcode: avoid unnecessary xmalloc/memcpy of ucode data x86/microcode: use const qualifier for microcode buffer x86/microcode: Support builtin CPU microcode docs/admin-guide/microcode-loading.rst | 31 ++ docs/misc/xen-command-line.pandoc | 26 +++-- xen/arch/x86/Kconfig | 30 ++ xen/arch/x86/Makefile | 1 + xen/arch/x86/microcode.c | 139 ++--- xen/arch/x86/microcode/Makefile| 46 xen/arch/x86/xen.lds.S | 12 +++ 7 files changed, 221 insertions(+), 64 deletions(-) create mode 100644 xen/arch/x86/microcode/Makefile -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v2 3/4] x86/microcode: use const qualifier for microcode buffer
The buffer holding the microcode bits should be marked as const. Signed-off-by: Eslam Elnikety --- xen/arch/x86/microcode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c index c878fc71ff..4616fa9d2e 100644 --- a/xen/arch/x86/microcode.c +++ b/xen/arch/x86/microcode.c @@ -86,7 +86,7 @@ static enum { * memory. */ struct ucode_mod_blob { -void *data; +const void *data; size_t size; }; @@ -744,7 +744,7 @@ int microcode_update_one(bool start_update) int __init early_microcode_update_cpu(void) { int rc = 0; -void *data = NULL; +const void *data = NULL; size_t len; struct microcode_patch *patch; -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH] [tools/hotplug] Use ip on systems where brctl is not available
Newer distros like CentOS 8 do not have brctl available. As such, we can't use it to configure networking anymore. This patch will fall back to 'ip' or 'bridge' commands if brctl is not available in the working PATH. This would be a likely backport candidate to any version expected to be built on CentOS 8 etc. --- tools/hotplug/Linux/colo-proxy-setup | 30 +-- tools/hotplug/Linux/vif-bridge| 16 tools/hotplug/Linux/vif2 | 12 +++-- tools/hotplug/Linux/xen-network-common.sh | 16 +--- 4 files changed, 55 insertions(+), 19 deletions(-) diff --git a/tools/hotplug/Linux/colo-proxy-setup b/tools/hotplug/Linux/colo-proxy-setup index 94e2034452..d709146c47 100755 --- a/tools/hotplug/Linux/colo-proxy-setup +++ b/tools/hotplug/Linux/colo-proxy-setup @@ -76,10 +76,17 @@ function teardown_primary() function setup_secondary() { -do_without_error brctl delif $bridge $vifname -do_without_error brctl addbr $forwardbr -do_without_error brctl addif $forwardbr $vifname -do_without_error brctl addif $forwardbr $forwarddev +if which brctl >&/dev/null; then +do_without_error brctl delif $bridge $vifname +do_without_error brctl addbr $forwardbr +do_without_error brctl addif $forwardbr $vifname +do_without_error brctl addif $forwardbr $forwarddev +else +do_without_error ip link set $vifname nomaster +do_without_error ip link add name $forwardbr type bridge +do_without_error ip link set $vifname master $forwardbr +do_without_error ip link set $forwarddev master $forwardbr +fi do_without_error ip link set dev $forwardbr up do_without_error modprobe xt_SECCOLO @@ -91,10 +98,17 @@ function setup_secondary() function teardown_secondary() { -do_without_error brctl delif $forwardbr $forwarddev -do_without_error brctl delif $forwardbr $vifname -do_without_error brctl delbr $forwardbr -do_without_error brctl addif $bridge $vifname +if which brctl >&/dev/null; then +do_without_error brctl delif $forwardbr $forwarddev +do_without_error brctl delif $forwardbr $vifname +do_without_error brctl delbr $forwardbr +do_without_error brctl addif $bridge $vifname +else +do_without_error ip link set $forwarddev nomaster +do_without_error ip link set $vifname nomaster +do_without_error ip link delete $forwardbr type bridge +do_without_error ip link set $vifname master $bridge +fi do_without_error iptables -t mangle -D PREROUTING -m physdev --physdev-in \ $vifname -j SECCOLO --index $index diff --git a/tools/hotplug/Linux/vif-bridge b/tools/hotplug/Linux/vif-bridge index 6956dea66a..e722090ca8 100644 --- a/tools/hotplug/Linux/vif-bridge +++ b/tools/hotplug/Linux/vif-bridge @@ -31,10 +31,12 @@ dir=$(dirname "$0") bridge=${bridge:-} bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge") -if [ -z "$bridge" ] -then - bridge=$(brctl show | awk 'NR==2{print$1}') - +if [ -z "$bridge" ]; then +if which brctl >&/dev/null; then +bridge=$(brctl show | awk 'NR==2{print$1}') +else +bridge=$(bridge link | cut -d" " -f7) +fi if [ -z "$bridge" ] then fatal "Could not find bridge, and none was specified" @@ -82,7 +84,11 @@ case "$command" in ;; offline) -do_without_error brctl delif "$bridge" "$dev" +if which brctl >&/dev/null; then +do_without_error brctl delif "$bridge" "$dev" +else +do_without_error ip link set "$dev" nomaster +fi do_without_error ifconfig "$dev" down ;; diff --git a/tools/hotplug/Linux/vif2 b/tools/hotplug/Linux/vif2 index 2c155be68c..5bd555c6f0 100644 --- a/tools/hotplug/Linux/vif2 +++ b/tools/hotplug/Linux/vif2 @@ -7,13 +7,21 @@ dir=$(dirname "$0") bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge") if [ -z "$bridge" ] then -nr_bridges=$(($(brctl show | cut -f 1 | grep -v "^$" | wc -l) - 1)) +if which brctl >&/dev/null; then +nr_bridges=$(($(brctl show | cut -f 1 | grep -v "^$" | wc -l) - 1)) +else +nr_bridges=$(bridge link | wc -l) +fi if [ "$nr_bridges" != 1 ] then fatal "no bridge specified, and don't know which one to use ($nr_bridges found)" fi -bridge=$(brctl show | cut -d " +if which brctl >&/dev/null; then +bridge=$(brctl show | cut -d " " -f 2 | cut -f 1) +else +bridge=$(bridge link | cut -d" " -f6) +fi fi command="$1" diff --git a/tools/hotplug/Linux/xen-network-common.sh b/tools/hotplug/Linux/xen-network-common.sh index 92ffa603f7..8dd3a62068 100644 --- a/tools/hotplug/Linux/xen-network-common.sh +++ b/tools/hotplug/Linux/xen-network-common.sh @@ -111,9 +111,13 @@ create_bridge () { # Don't create the bridge if it already exists. if [ ! -e
[Xen-devel] [xen-unstable-smoke test] 144912: tolerable all pass - PUSHED
flight 144912 xen-unstable-smoke real [real] http://logs.test-lab.xenproject.org/osstest/logs/144912/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-libvirt 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 14 saverestore-support-checkfail never pass test-armhf-armhf-xl 13 migrate-support-checkfail never pass test-armhf-armhf-xl 14 saverestore-support-checkfail never pass version targeted for testing: xen 704fa1532801bc02c4500462f0b913b3c137db4d baseline version: xen c61c1b49430527ee16fbf5b55aca195c325b1a23 Last test of basis 144906 2019-12-17 19:00:23 Z0 days Testing same since 144912 2019-12-17 22:02:21 Z0 days1 attempts People who touched revisions under test: Andrew Cooper jobs: build-arm64-xsm pass build-amd64 pass build-armhf pass build-amd64-libvirt pass test-armhf-armhf-xl pass test-arm64-arm64-xl-xsm pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-amd64-libvirt pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/xen.git c61c1b4943..704fa15328 704fa1532801bc02c4500462f0b913b3c137db4d -> smoke ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH] x86/microcode: Support builtin CPU microcode
On 13.12.19 14:40, Andrew Cooper wrote: On 09/12/2019 21:49, Eslam Elnikety wrote: + +extern const char __builtin_intel_ucode_start[], __builtin_intel_ucode_end[]; +extern const char __builtin_amd_ucode_start[], __builtin_amd_ucode_end[]; +#endif + /* By default, ucode loading is done in NMI handler */ static bool ucode_in_nmi = true; @@ -110,9 +118,9 @@ void __init microcode_set_module(unsigned int idx) } /* - * The format is '[|scan=, nmi=]'. Both options are - * optional. If the EFI has forced which of the multiboot payloads is to be - * used, only nmi= is parsed. + * The format is '[|scan=|builtin=, nmi=]'. All + * options are optional. If the EFI has forced which of the multiboot payloads + * is to be used, only nmi= is parsed. */ Please delete this, or I'll do a prereq patch to fix it and the command line docs. (Both are in a poor state.) Unless you are planning that along your on-going docs/hypervisor-guide/microcode-loading.rst effort, I can pick up this clean-up/prereq patch myself. What do you have in mind? (Or point me to a good example and I will figure things out). c/s 3c5552954, 53a84f672, 633a40947 or 3136dee9c are good examples. ucode= is definitely more complicated to explain because of its implicit EFI behaviour. Currently massaging a patch to that effect. + else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + ucode_blob.size = (size_t)(__builtin_intel_ucode_end + - __builtin_intel_ucode_start); + else + return; + + if ( !ucode_blob.size ) + { + printk("No builtin ucode! 'ucode=builtin' is nullified.\n"); + return; + } + else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE ) + { + printk("Builtin microcode payload too big! (%ld, we can do %d)\n", + ucode_blob.size, MAX_EARLY_CPIO_MICROCODE); + ucode_blob.size = 0; + return; + } + + ucode_blob.data = xmalloc_bytes(ucode_blob.size); + if ( !ucode_blob.data ) + return; Any chance we can reuse the "fits" logic to avoid holding every inapplicable blob in memory as well? I think this would be a welcomed change. It seems to me that we have two ways to go about it. 1) We factor the code in the intel-/amd-specific cpu_request_microcode to extract logic for finding a match into its own new function, expose that through microcode_ops, and finally do xalloc only for the matching microcode when early loading is scan or builtin. 2) Cannot we just do away completely with xalloc? I see that each individual microcode update gets allocated anyway in microcode_intel.c/get_next_ucode_from_buffer() and in microcode_amd.c/cpu_request_microcode(). Unless I am missing something, the xmalloc_bytes for ucode_blob.data is redundant. Thoughts? I'm certain the code is more complicated than it needs to be. Cleanup/simplification would be very welcome. And if you're up for that, there is a related area which would be a great improvement. At the moment, BSP microcode loading is very late because it depends on this xmalloc() to begin with. However, no memory allocation is needed to load microcode from a multiboot module or from the initrd, or from this future builtin location - all loading can be done from a directmap/bootmap pointer if needs be. This would allow moving the BSP microcode to much earlier on boot, probably somewhere between console setup and E820 handling. One way or another, the microcode cache which persists past boot has to be xmalloc()'d, because we will free the module/initrd/builtin. It would however be more friendly to AP's to only give them the single correct piece of ucode, rather than everything to scan through. (These behaviours and expectations are going to be a chunk of my intended second microcode.rst doc, including a "be aware that machines exist which do $X" section to cover some of the weirder corner cases we have encountered.) Avoiding the xmalloc/memcpy on the scan for microcode is one of the patches that I will share shortly. In particular, the ucode_blob.data would directly point to the buffer matching the canonical name within the cpio name space. We are still a bit away from pushing the BSP microcode update earlier though. We will need to surgically remove all the unnecessary xmalloc/memcpy from within microcode_{amd,intel}.c. Also, as you hinted, the challenging bit is the per-cpu microcode cache. + +builtin_ucode.o: Makefile $(amd-blobs) $(intel-blobs) + # Create AMD microcode blob if there are AMD updates on the build system + if [ ! -z "$(amd-blobs)" ]; then \ + cat $(amd-blobs) > $@.bin ; \ + $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 --rename-section .data=.builtin_amd_ucode,alloc,load,readonly,data,contents $@.bin $@.amd; \ + rm -f $@.bin; \ + fi + # Create INTEL microcode blob if there are INTEL updates on the build system + if [ ! -z "$(intel-blobs)" ]; then \ + cat
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
On Tue, Dec 17, 2019 at 11:26 AM Stefano Stabellini wrote: > > On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini > > wrote: > > > > > > On Tue, 17 Dec 2019, Julien Grall wrote: > > > > Hi, > > > > > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > > > > wrote: > > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which is > > > > > > 990M. > > > > > > If so, I wonder how you could boot succesfully with dom0_mem=1024M > > > > > > even > > > > > > on Xen 4.12... :-? > > > > > > > > > > That is a very interesting observation indeed! I actually don't > > > > > remember where that device tree came from, but I think it was from one > > > > > of the Linaro sites. > > > > > > > > This is mostly likely because of: > > > > > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > > > > Author: Julien Grall > > > > Date: Wed Aug 21 22:42:31 2019 +0100 > > > > > > > > xen/arm: domain_build: Don't continue if unable to allocate all > > > > dom0 banks > > > > > > > > Xen will only print a warning if there are memory unallocated when > > > > using > > > > 1:1 mapping (only used by dom0). This also includes the case where > > > > no > > > > memory has been allocated. > > > > > > > > It will bring to all sort of issues that can be hard to diagnostic > > > > for > > > > users (the warning can be difficult to spot or disregard). > > > > > > > > If the users request 1GB of memory, then most likely they want the > > > > exact > > > > amount and not 512MB. So panic if all the memory has not been > > > > allocated. > > > > > > > > After this change, the behavior is the same as for non-1:1 memory > > > > allocation (used by domU). > > > > > > > > At the same time, reflow the message to have the format on a single > > > > line. > > > > > > > > Signed-off-by: Julien Grall > > > > Acked-by: Stefano Stabellini > > > > > > Ah! Roman, could you please post the full boot log of a successful 4.12 > > > boot? > > > > > > If it has a "Failed to allocate requested dom0 memory" message, then we > > > know what the issue is. > > > > Aha! Our messages seems to have crossed ;-) Full log is attached and > > yes -- that's > > the problem indeed. > > > > So at least that mystery is solved. But I'm still not able to get to a > > full 1G of memory > > even with your update to the device tree file. Any chance you can send me > > the > > device tree file that works for you? > > I didn't try on real hardware, I only tried on QEMU with a similar > configuration. I went back and check the HiKey device tree I used and it > is the same as yours (including the ramoops reserved-memory error). > > Apparently there are 1G and 2G variants of the HiKey, obviously both > yours and my device tree are for the 1G variant. I try to dig through > the docs but couldn't find the details of the 2G variant. I cannot find > anywhere the memory range for the top 1G of memory not even on the > LeMaker docs! :-/ Yup. That's exactly the issue on my end as well - can't seem to find an authoritative source for that devicetree. I did find this, though: https://releases.linaro.org/96boards/hikey/linaro/debian/15.11/ which looks like it has the latest (at least file timestamp-wise) devicetree. If you look at the memory and reserved memory nodes there, they are actually much simpler than what we've got: memory { device_type = "memory"; reg = <0x0 0x0 0x0 0x4000>; }; reserved-memory { #address-cells = <0x2>; #size-cells = <0x2>; ranges; mcu-buf@05e0 { no-map; reg = <0x0 0x5e0 0x0 0x10 0x0 0x740f000 0x0 0x1000>; }; mbox-buf@06dff000 { no-map; reg = <0x0 0x6dff000 0x0 0x1000>; }; }; So -- just on a whim -- I changed it to: reg = <0x0 0x0 0x0 0x8000>; Interestingly enough, Xen booted, and complained about only 192MB unallocated this time. So, I dropped the size of Dom0 to 640M and I got it boot and here's what I'm seeing as an output of xl info: total_memory : 1120 free_memory: 390 It still nowhere close to 2G. Then I booted the Linux kernel without Xen and it correctly identified all 2G worth of RAM, and in fact, when I converted /sys/firmware/devicetree/base back into dts, here's what I've got: memory { device_type = "memory"; reg = <0x0 0x0 0x0 0x5e0 0x0 0x5f0 0x0 0x1000 0x0 0x5f02000 0x0 0xefd000 0x0 0x6e0 0x0 0x60f000 0x0 0x741 0x0 0x1aaf 0x0 0x21f0 0x0 0x10 0x0 0x2200 0x0 0x1c00>; };
Re: [Xen-devel] [PATCH] x86/microcode: Support builtin CPU microcode
On 17/12/2019 22:41, Eslam Elnikety wrote: > On 13.12.19 14:57, Andrew Cooper wrote: >> On 12/12/2019 22:13, Eslam Elnikety wrote: > Second, there is often need to couple a Xen build with a minimum > microcode patch level. Having the microcode built within the Xen > image > itself is a streamlined, natural way of achieving that. Okay, I can accept this as a reason, to some degree at least. Yet as said elsewhere, I don't think you want then to override a possible "external" ucode module with the builtin blobs. Instead the newest of everything that's available should then be loaded. >>> >>> Extending Xen to work around tools shortcomings is absolutely not what >>> I have in mind. I should have started with the second reason. Read >>> this as: Xen relies on a minimum microcode feature set, and it makes >>> sense to couple both in one binary. This coupling just happens to >>> provide an added benefit in the face of tools shortcoming. >> >> Do we have anything which strictly relies on a minimum version? > > I had in mind microcode speculation mitigation features when reasoning > with the minimum patch level argument. Considering how well the first round of speculative microcode went, mandating it would have been a rather bad thing... But yes - as a usecase of "I wish to bundle the minimum microcode I'd like to work with", this seems entirely reasonable. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH] x86/microcode: Support builtin CPU microcode
On 13.12.19 14:57, Andrew Cooper wrote: On 12/12/2019 22:13, Eslam Elnikety wrote: Second, there is often need to couple a Xen build with a minimum microcode patch level. Having the microcode built within the Xen image itself is a streamlined, natural way of achieving that. Okay, I can accept this as a reason, to some degree at least. Yet as said elsewhere, I don't think you want then to override a possible "external" ucode module with the builtin blobs. Instead the newest of everything that's available should then be loaded. Extending Xen to work around tools shortcomings is absolutely not what I have in mind. I should have started with the second reason. Read this as: Xen relies on a minimum microcode feature set, and it makes sense to couple both in one binary. This coupling just happens to provide an added benefit in the face of tools shortcoming. Do we have anything which strictly relies on a minimum version? I had in mind microcode speculation mitigation features when reasoning with the minimum patch level argument. -- Eslam ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [ovmf test] 144910: regressions - FAIL
flight 144910 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/144910/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-i386-xsm6 xen-buildfail REGR. vs. 144637 build-amd64 6 xen-buildfail REGR. vs. 144637 build-amd64-xsm 6 xen-buildfail REGR. vs. 144637 build-i3866 xen-buildfail REGR. vs. 144637 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-i386-libvirt1 build-check(1) blocked n/a version targeted for testing: ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 baseline version: ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0 Last test of basis 144637 2019-12-09 09:09:49 Z8 days Failing since144646 2019-12-10 01:39:53 Z7 days 71 attempts Testing same since 144770 2019-12-12 18:41:26 Z5 days 60 attempts People who touched revisions under test: Antoine Coeur Ard Biesheuvel Bob Feng Jiewen Yao Michael Kubacki Pete Batard Philippe Mathieu-Daude Steven Shi jobs: build-amd64-xsm fail build-i386-xsm fail build-amd64 fail build-i386 fail build-amd64-libvirt blocked build-i386-libvirt blocked build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked test-amd64-i386-xl-qemuu-ovmf-amd64 blocked sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Not pushing. commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 Author: Pete Batard Date: Tue Dec 10 18:23:04 2019 + MdePkg/Include: Add DCC and BCM2835 SPCR UART types As per the Microsoft Debug Port Table 2 (DBG2) documentation, that can be found online, we are missing 2 serial interface types for Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi). These same types are present in DebugPort2Table.h so add them to SerialPortConsoleRedirectionTable.h too. Note that we followed the same idiosyncrasies as DebugPort2Table for naming these new macros. Signed-off-by: Pete Batard Acked-by: Ard Biesheuvel Reviewed-by: Liming Gao commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e Author: Ard Biesheuvel Date: Tue Mar 5 14:32:48 2019 +0100 ArmPkg/MmCommunicationDxe: relay architected PI events to MM context PI defines a few architected events that have significance in the MM context as well as in the non-secure DXE context. So register notify handlers for these events, and relay them into the standalone MM world. Signed-off-by: Ard Biesheuvel Reviewed-by: Jiewen Yao Reviewed-by: Achin Gupta commit d3add11e87dace180387562d6f1951f2bffbd3d9 Author: Michael Kubacki Date: Wed Nov 20 17:31:24 2019 -0800 MdeModulePkg PeiCore: Improve comment semantics This patch clarifies wording in several PeiCore comments to improve reading comprehension. Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Jian J Wang commit d39d1260c615b716675f67f5c4e1f4f52df01dad Author: Michael Kubacki Date: Wed Nov 20 17:10:48 2019 -0800 MdeModulePkg PeiCore: Fix typos Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Philippe Mathieu-Daude Reviewed-by: Jian J Wang commit 97eedf5dfbaffde33210fd88066247cf0b7d3325 Author: Antoine Coeur Date: Wed Dec 4 12:14:53
[Xen-devel] [xen-unstable-smoke test] 144906: tolerable all pass - PUSHED
flight 144906 xen-unstable-smoke real [real] http://logs.test-lab.xenproject.org/osstest/logs/144906/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-libvirt 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 14 saverestore-support-checkfail never pass test-armhf-armhf-xl 13 migrate-support-checkfail never pass test-armhf-armhf-xl 14 saverestore-support-checkfail never pass version targeted for testing: xen c61c1b49430527ee16fbf5b55aca195c325b1a23 baseline version: xen f50a4f6e244cfc8e773300c03aaf4db391f3028a Last test of basis 144898 2019-12-17 15:00:35 Z0 days Testing same since 144906 2019-12-17 19:00:23 Z0 days1 attempts People who touched revisions under test: Andre Przywara Hongyan Xia Julien Grall Yangtao Li jobs: build-arm64-xsm pass build-amd64 pass build-armhf pass build-amd64-libvirt pass test-armhf-armhf-xl pass test-arm64-arm64-xl-xsm pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-amd64-libvirt pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/xen.git f50a4f6e24..c61c1b4943 c61c1b49430527ee16fbf5b55aca195c325b1a23 -> smoke ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v2] xen/grant-table: remove multiple BUG_ON on gnttab_interface
gnttab_request_version() always sets the gnttab_interface variable and the assertions to check for empty gnttab_interface is unnecessary. The patch eliminates multiple such assertions. Signed-off-by: Aditya Pakki --- v1: Eliminate more BUG_ON calls, as suggested by Juergen Gross. --- drivers/xen/grant-table.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 49b381e104ef..7b36b51cdb9f 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -664,7 +664,6 @@ static int grow_gnttab_list(unsigned int more_frames) unsigned int nr_glist_frames, new_nr_glist_frames; unsigned int grefs_per_frame; - BUG_ON(gnttab_interface == NULL); grefs_per_frame = gnttab_interface->grefs_per_grant_frame; new_nr_grant_frames = nr_grant_frames + more_frames; @@ -1160,7 +1159,6 @@ EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync); static unsigned int nr_status_frames(unsigned int nr_grant_frames) { - BUG_ON(gnttab_interface == NULL); return gnttab_frames(nr_grant_frames, SPP); } @@ -1388,7 +1386,6 @@ static int gnttab_expand(unsigned int req_entries) int rc; unsigned int cur, extra; - BUG_ON(gnttab_interface == NULL); cur = nr_grant_frames; extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) / gnttab_interface->grefs_per_grant_frame); @@ -1423,7 +1420,6 @@ int gnttab_init(void) /* Determine the maximum number of frames required for the * grant reference free list on the current hypervisor. */ - BUG_ON(gnttab_interface == NULL); max_nr_glist_frames = (max_nr_grant_frames * gnttab_interface->grefs_per_grant_frame / RPP); -- 2.20.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 6/6] x86/suspend: Drop save_rest_processor_state() completely
On 17/12/2019 12:38, Roger Pau Monné wrote: >> @@ -46,25 +14,9 @@ void restore_rest_processor_state(void) >> /* Restore full CR4 (inc MCE) now that the IDT is in place. */ >> write_cr4(mmu_cr4_features); >> >> -/* Recover syscall MSRs */ >> -wrmsrl(MSR_LSTAR, saved_lstar); >> -wrmsrl(MSR_CSTAR, saved_cstar); >> -wrmsrl(MSR_STAR, XEN_MSR_STAR); >> -wrmsrl(MSR_SYSCALL_MASK, XEN_SYSCALL_MASK); >> - >> -wrfsbase(saved_fs_base); >> -wrgsbase(saved_gs_base); >> -wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); >> - >> -if ( cpu_has_sep ) >> -{ >> -/* Recover sysenter MSRs */ >> -wrmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp); >> -wrmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip); >> -wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0); >> -} >> +percpu_traps_init(); >> >> -if ( cpu_has_xsave && !set_xcr0(saved_xcr0) ) >> +if ( cpu_has_xsave && !set_xcr0(get_xcr0()) ) >> BUG(); >> >> wrmsrl(MSR_IA32_CR_PAT, XEN_MSR_PAT); > Given what this functions does after this change, would it be feasible > to place such calls directly in enter_state? > > AFAICT there's already some restoring done there anyway. Hmm - we already appear to double up CR4/EFER restoration, so there is clearly more cleanup to do. I'll see if I can make restore_rest_processor_state() disappear completely. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [qemu-mainline test] 144891: regressions - FAIL
flight 144891 qemu-mainline real [real] http://logs.test-lab.xenproject.org/osstest/logs/144891/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: test-amd64-i386-freebsd10-i386 14 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-freebsd10-amd64 14 guest-saverestore fail REGR. vs. 144861 test-amd64-amd64-xl-qemuu-debianhvm-amd64 13 guest-saverestore fail REGR. vs. 144861 test-amd64-amd64-xl-qemuu-win7-amd64 13 guest-saverestore fail REGR. vs. 144861 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 guest-saverestore fail REGR. vs. 144861 test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 13 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-xl-qemuu-debianhvm-amd64-shadow 13 guest-saverestore fail REGR. vs. 144861 test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 13 guest-saverestore fail REGR. vs. 144861 test-amd64-amd64-xl-qemuu-ovmf-amd64 13 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-xl-qemuu-debianhvm-amd64 13 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-xl-qemuu-ovmf-amd64 13 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-xl-qemuu-debianhvm-i386-xsm 13 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-xl-qemuu-win7-amd64 13 guest-saverestore fail REGR. vs. 144861 test-amd64-amd64-xl-qemuu-ws16-amd64 13 guest-saverestore fail REGR. vs. 144861 test-amd64-i386-xl-qemuu-ws16-amd64 13 guest-saverestore fail REGR. vs. 144861 Regressions which are regarded as allowable (not blocking): test-amd64-amd64-xl-rtds 16 guest-localmigrate fail REGR. vs. 144861 test-armhf-armhf-xl-rtds16 guest-start/debian.repeat fail REGR. vs. 144861 Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt 14 saverestore-support-checkfail like 144861 test-armhf-armhf-libvirt-raw 13 saverestore-support-checkfail like 144861 test-amd64-amd64-libvirt 13 migrate-support-checkfail never pass test-amd64-i386-libvirt 13 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-xl-seattle 13 migrate-support-checkfail never pass test-arm64-arm64-xl-seattle 14 saverestore-support-checkfail never pass test-amd64-i386-libvirt-xsm 13 migrate-support-checkfail never pass test-amd64-i386-xl-pvshim12 guest-start fail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check fail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check fail never pass test-arm64-arm64-xl 13 migrate-support-checkfail never pass test-arm64-arm64-xl 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 13 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 13 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 13 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 14 saverestore-support-checkfail never pass test-amd64-amd64-qemuu-nested-amd 17 debian-hvm-install/l1/l2 fail never pass test-arm64-arm64-libvirt-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 14 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 12 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 13 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 13 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-cubietruck 13 migrate-support-checkfail never pass test-armhf-armhf-xl-cubietruck 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 14 saverestore-support-checkfail never pass test-armhf-armhf-libvirt 13 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 13 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 14 saverestore-support-checkfail never pass test-armhf-armhf-xl 13 migrate-support-checkfail never pass
[Xen-devel] [PATCH 4/4] tools/dombuilder: Don't allocate dom->p2m_host[] for translated domains
xc_dom_p2m() and dom->p2m_host[] implement a linear transform for translated domains, but waste a substantial chunk of RAM doing so. ARM literally never reads dom->p2m_host[] (because of the xc_dom_translated() short circuit in xc_dom_p2m()). Drop it all. x86 HVM does use dom->p2m_host[] for xc_domain_populate_physmap_exact() calls when populating 4k pages. Reuse the same tactic from 2M/1G ranges and use an on-stack array instead. Drop the memory allocation. x86 PV guests do use dom->p2m_host[] as a non-identity transform. Rename the field to pv_p2m to make it clear it is PV-only. No change in the constructed guests. Reported-by: Varad Gautam Reported-by: Julien Grall Signed-off-by: Andrew Cooper --- CC: Ian Jackson CC: Wei Liu CC: Stefano Stabellini CC: Julien Grall CC: Volodymyr Babchuk CC: Varad Gautam --- stubdom/grub/kexec.c | 28 - tools/libxc/include/xc_dom.h | 19 ++-- tools/libxc/xc_dom_arm.c | 9 -- tools/libxc/xc_dom_x86.c | 72 ++-- 4 files changed, 52 insertions(+), 76 deletions(-) diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c index 10891eabcc..0e68b969a2 100644 --- a/stubdom/grub/kexec.c +++ b/stubdom/grub/kexec.c @@ -87,17 +87,17 @@ static void do_exchange(struct xc_dom_image *dom, xen_pfn_t target_pfn, xen_pfn_ xen_pfn_t target_mfn; for (source_pfn = 0; source_pfn < start_info.nr_pages; source_pfn++) -if (dom->p2m_host[source_pfn] == source_mfn) +if (dom->pv_p2m[source_pfn] == source_mfn) break; ASSERT(source_pfn < start_info.nr_pages); -target_mfn = dom->p2m_host[target_pfn]; +target_mfn = dom->pv_p2m[target_pfn]; /* Put target MFN at source PFN */ -dom->p2m_host[source_pfn] = target_mfn; +dom->pv_p2m[source_pfn] = target_mfn; /* Put source MFN at target PFN */ -dom->p2m_host[target_pfn] = source_mfn; +dom->pv_p2m[target_pfn] = source_mfn; } int kexec_allocate(struct xc_dom_image *dom) @@ -110,7 +110,7 @@ int kexec_allocate(struct xc_dom_image *dom) pages_moved2pfns = realloc(pages_moved2pfns, new_allocated * sizeof(*pages_moved2pfns)); for (i = allocated; i < new_allocated; i++) { /* Exchange old page of PFN i with a newly allocated page. */ -xen_pfn_t old_mfn = dom->p2m_host[i]; +xen_pfn_t old_mfn = dom->pv_p2m[i]; xen_pfn_t new_pfn; xen_pfn_t new_mfn; @@ -122,7 +122,7 @@ int kexec_allocate(struct xc_dom_image *dom) /* * If PFN of newly allocated page (new_pfn) is less then currently * requested PFN (i) then look for relevant PFN/MFN pair. In this -* situation dom->p2m_host[new_pfn] no longer contains proper MFN +* situation dom->pv_p2m[new_pfn] no longer contains proper MFN * because original page with new_pfn was moved earlier * to different location. */ @@ -132,10 +132,10 @@ int kexec_allocate(struct xc_dom_image *dom) pages_moved2pfns[i] = new_pfn; /* Put old page at new PFN */ -dom->p2m_host[new_pfn] = old_mfn; +dom->pv_p2m[new_pfn] = old_mfn; /* Put new page at PFN i */ -dom->p2m_host[i] = new_mfn; +dom->pv_p2m[i] = new_mfn; } allocated = new_allocated; @@ -282,11 +282,11 @@ void kexec(void *kernel, long kernel_size, void *module, long module_size, char dom->p2m_size = dom->total_pages; /* setup initial p2m */ -dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->p2m_size); +dom->pv_p2m = malloc(sizeof(*dom->pv_p2m) * dom->p2m_size); /* Start with our current P2M */ for (i = 0; i < dom->p2m_size; i++) -dom->p2m_host[i] = pfn_to_mfn(i); +dom->pv_p2m[i] = pfn_to_mfn(i); if ( (rc = xc_dom_build_image(dom)) != 0 ) { printk("xc_dom_build_image returned %d\n", rc); @@ -373,7 +373,7 @@ void kexec(void *kernel, long kernel_size, void *module, long module_size, char _boot_oldpdmfn = virt_to_mfn(start_info.pt_base); DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn); DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart); -_boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)]; +_boot_pdmfn = dom->pv_p2m[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)]; DEBUG("boot pd mfn %lx\n", _boot_pdmfn); _boot_stack = _boot_target + PAGE_SIZE; DEBUG("boot stack %lx\n", _boot_stack); @@ -384,13 +384,13 @@ void kexec(void *kernel, long kernel_size, void *module, long module_size, char /* Keep only useful entries */ for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++) -if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) +if (dom->pv_p2m[pfn] != pfn_to_mfn(pfn)) nr_m2p_updates++; m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates); for (i = pfn = 0; pfn < start_info.nr_pages; pfn++) -if
[Xen-devel] [PATCH 0/4] Don't allocate dom->p2m_host[] for translated domains
Vastly drop xl's memory usage for HVM (x86 and ARM) guest construction. See https://lore.kernel.org/xen-devel/1562159202-11316-1-git-send-email-...@amazon.de/T/#u for the origins of this work, but ultimately I think this is a far cleaner solution to the problem. Andrew Cooper (4): tools/dombuilder: xc_dom_x86 cleanup tools/dombuilder: Remove PV-only, mandatory hooks tools/dombuilder: Remove p2m_guest from the common interface tools/dombuilder: Don't allocate dom->p2m_host[] for translated domains stubdom/grub/kexec.c | 36 -- tools/libxc/include/xc_dom.h | 24 +++ tools/libxc/xc_dom_arm.c | 30 tools/libxc/xc_dom_boot.c| 6 +- tools/libxc/xc_dom_core.c| 43 +-- tools/libxc/xc_dom_x86.c | 166 ++- 6 files changed, 114 insertions(+), 191 deletions(-) -- 2.11.0 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 3/4] tools/dombuilder: Remove p2m_guest from the common interface
In-guest p2m's are a concept specific to x86 PV guests. alloc_p2m_list() is the only hook which initialises dom->p2m_guest, making xc_dom_update_guest_p2m() a nop for non-PV guests. Move p2m_guest into xc_dom_image_x86 and adjust alloc_p2m_list() to match. Drop xc_dom_update_guest_p2m() entirely. One caller, move_l3_below_4G(), only uses it to modify a single entry, so rewriting the whole guest p2m is wasteful - opencode the single update instead. The other caller is common code. Instead, move the logic into the setup_pgtables() hooks, which know their own sizeof_pfn and can do away with the switch statement. No change in the constructed guests. Signed-off-by: Andrew Cooper --- CC: Ian Jackson CC: Wei Liu CC: Stefano Stabellini CC: Julien Grall CC: Volodymyr Babchuk CC: Varad Gautam --- stubdom/grub/kexec.c | 8 tools/libxc/include/xc_dom.h | 2 -- tools/libxc/xc_dom_boot.c| 2 -- tools/libxc/xc_dom_core.c| 40 tools/libxc/xc_dom_x86.c | 41 +++-- 5 files changed, 35 insertions(+), 58 deletions(-) diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c index 61ca082d42..10891eabcc 100644 --- a/stubdom/grub/kexec.c +++ b/stubdom/grub/kexec.c @@ -320,14 +320,6 @@ void kexec(void *kernel, long kernel_size, void *module, long module_size, char do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base), virt_to_mfn(&_boot_page)); -/* Make sure the bootstrap page table does not RW-map any of our current - * page table frames */ -if ( (rc = xc_dom_update_guest_p2m(dom))) { -printk("xc_dom_update_guest_p2m returned %d\n", rc); -errnum = ERR_BOOT_FAILURE; -goto out; -} - if ( dom->arch_hooks->setup_pgtables ) if ( (rc = dom->arch_hooks->setup_pgtables(dom))) { printk("setup_pgtables returned %d\n", rc); diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 9ff1cb8b07..b7d0faf7e1 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -133,7 +133,6 @@ struct xc_dom_image { * Note that the input is offset by rambase. */ xen_pfn_t *p2m_host; -void *p2m_guest; /* physical memory * @@ -331,7 +330,6 @@ int xc_dom_devicetree_mem(struct xc_dom_image *dom, const void *mem, int xc_dom_parse_image(struct xc_dom_image *dom); int xc_dom_set_arch_hooks(struct xc_dom_image *dom); int xc_dom_build_image(struct xc_dom_image *dom); -int xc_dom_update_guest_p2m(struct xc_dom_image *dom); int xc_dom_boot_xen_init(struct xc_dom_image *dom, xc_interface *xch, uint32_t domid); diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c index 79dbbf6571..bb599b33ba 100644 --- a/tools/libxc/xc_dom_boot.c +++ b/tools/libxc/xc_dom_boot.c @@ -197,8 +197,6 @@ int xc_dom_boot_image(struct xc_dom_image *dom) return -1; /* initial mm setup */ -if ( (rc = xc_dom_update_guest_p2m(dom)) != 0 ) -return rc; if ( dom->arch_hooks->setup_pgtables && (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 ) return rc; diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c index fc77804a7e..f30c73b5e8 100644 --- a/tools/libxc/xc_dom_core.c +++ b/tools/libxc/xc_dom_core.c @@ -969,46 +969,6 @@ int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb) return 0; } -int xc_dom_update_guest_p2m(struct xc_dom_image *dom) -{ -uint32_t *p2m_32; -uint64_t *p2m_64; -xen_pfn_t i; - -if ( !dom->p2m_guest ) -return 0; - -switch ( dom->arch_hooks->sizeof_pfn ) -{ -case 4: -DOMPRINTF("%s: dst 32bit, pages 0x%" PRIpfn "", - __FUNCTION__, dom->p2m_size); -p2m_32 = dom->p2m_guest; -for ( i = 0; i < dom->p2m_size; i++ ) -if ( dom->p2m_host[i] != INVALID_PFN ) -p2m_32[i] = dom->p2m_host[i]; -else -p2m_32[i] = (uint32_t) - 1; -break; -case 8: -DOMPRINTF("%s: dst 64bit, pages 0x%" PRIpfn "", - __FUNCTION__, dom->p2m_size); -p2m_64 = dom->p2m_guest; -for ( i = 0; i < dom->p2m_size; i++ ) -if ( dom->p2m_host[i] != INVALID_PFN ) -p2m_64[i] = dom->p2m_host[i]; -else -p2m_64[i] = (uint64_t) - 1; -break; -default: -xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, - "sizeof_pfn is invalid (is %d, can be 4 or 8)", - dom->arch_hooks->sizeof_pfn); -return -1; -} -return 0; -} - static int xc_dom_build_module(struct xc_dom_image *dom, unsigned int mod) { size_t unziplen, modulelen; diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index d2acff1061..f21662c8b9 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -104,6 +104,9 @@ struct
[Xen-devel] [PATCH 2/4] tools/dombuilder: Remove PV-only, mandatory hooks
Currently, the setup_pgtable() hook is optional, but alloc_pgtable() hook is not. Both are specific to x86 PV guests, and stubbed in various ways by the dombuilders for translated guests (x86 HVM, ARM). Make alloc_pgtables() optional, and drop all the stubs for translated guest types. No change in the constructed guests. Signed-off-by: Andrew Cooper --- CC: Ian Jackson CC: Wei Liu CC: Stefano Stabellini CC: Julien Grall CC: Volodymyr Babchuk CC: Varad Gautam --- tools/libxc/include/xc_dom.h | 3 ++- tools/libxc/xc_dom_arm.c | 21 - tools/libxc/xc_dom_boot.c| 6 +++--- tools/libxc/xc_dom_core.c| 3 ++- tools/libxc/xc_dom_x86.c | 7 --- 5 files changed, 7 insertions(+), 33 deletions(-) diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 5900bbe8fa..9ff1cb8b07 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -253,8 +253,9 @@ void xc_dom_register_loader(struct xc_dom_loader *loader); /* --- arch specific hooks - */ struct xc_dom_arch { -/* pagetable setup */ int (*alloc_magic_pages) (struct xc_dom_image * dom); + +/* pagetable setup - x86 PV only */ int (*alloc_pgtables) (struct xc_dom_image * dom); int (*alloc_p2m_list) (struct xc_dom_image * dom); int (*setup_pgtables) (struct xc_dom_image * dom); diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c index 5b9eca6087..7e0fb9169f 100644 --- a/tools/libxc/xc_dom_arm.c +++ b/tools/libxc/xc_dom_arm.c @@ -47,23 +47,6 @@ const char *xc_domain_get_native_protocol(xc_interface *xch, } /* */ -/* - * arm guests are hybrid and start off with paging disabled, therefore no - * pagetables and nothing to do here. - */ -static int alloc_pgtables_arm(struct xc_dom_image *dom) -{ -DOMPRINTF_CALLED(dom->xch); -return 0; -} - -static int setup_pgtables_arm(struct xc_dom_image *dom) -{ -DOMPRINTF_CALLED(dom->xch); -return 0; -} - -/* */ static int alloc_magic_pages(struct xc_dom_image *dom) { @@ -539,8 +522,6 @@ static struct xc_dom_arch xc_dom_32 = { .page_shift = PAGE_SHIFT_ARM, .sizeof_pfn = 8, .alloc_magic_pages = alloc_magic_pages, -.alloc_pgtables = alloc_pgtables_arm, -.setup_pgtables = setup_pgtables_arm, .start_info = start_info_arm, .shared_info = shared_info_arm, .vcpu = vcpu_arm32, @@ -555,8 +536,6 @@ static struct xc_dom_arch xc_dom_64 = { .page_shift = PAGE_SHIFT_ARM, .sizeof_pfn = 8, .alloc_magic_pages = alloc_magic_pages, -.alloc_pgtables = alloc_pgtables_arm, -.setup_pgtables = setup_pgtables_arm, .start_info = start_info_arm, .shared_info = shared_info_arm, .vcpu = vcpu_arm64, diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c index 918ee4d045..79dbbf6571 100644 --- a/tools/libxc/xc_dom_boot.c +++ b/tools/libxc/xc_dom_boot.c @@ -199,9 +199,9 @@ int xc_dom_boot_image(struct xc_dom_image *dom) /* initial mm setup */ if ( (rc = xc_dom_update_guest_p2m(dom)) != 0 ) return rc; -if ( dom->arch_hooks->setup_pgtables ) -if ( (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 ) -return rc; +if ( dom->arch_hooks->setup_pgtables && + (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 ) +return rc; /* start info page */ if ( dom->arch_hooks->start_info ) diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c index 9bd04cb2d5..fc77804a7e 100644 --- a/tools/libxc/xc_dom_core.c +++ b/tools/libxc/xc_dom_core.c @@ -1247,7 +1247,8 @@ int xc_dom_build_image(struct xc_dom_image *dom) goto err; if ( dom->arch_hooks->alloc_magic_pages(dom) != 0 ) goto err; -if ( dom->arch_hooks->alloc_pgtables(dom) != 0 ) +if ( dom->arch_hooks->alloc_pgtables && + dom->arch_hooks->alloc_pgtables(dom) != 0 ) goto err; if ( dom->alloc_bootstack ) { diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index 1ce3c798ef..d2acff1061 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -1690,12 +1690,6 @@ static int bootlate_pv(struct xc_dom_image *dom) return 0; } -static int alloc_pgtables_hvm(struct xc_dom_image *dom) -{ -DOMPRINTF("%s: doing nothing", __func__); -return 0; -} - /* * The memory layout of the start_info page and the modules, and where the * addresses are stored: @@ -1906,7 +1900,6 @@ static struct xc_dom_arch xc_hvm_32 = { .page_shift = PAGE_SHIFT_X86, .sizeof_pfn = 4, .alloc_magic_pages = alloc_magic_pages_hvm, -.alloc_pgtables = alloc_pgtables_hvm, .vcpu = vcpu_hvm, .meminit = meminit_hvm, .bootearly = bootearly, -- 2.11.0 ___ Xen-devel mailing list
[Xen-devel] [PATCH 1/4] tools/dombuilder: xc_dom_x86 cleanup
The two xc_dom_params structures for PV pagetables are never modified and can live in .rodata. Reduce their scope to the alloc_pgtable_*() functions which construct xc_dom_image_x86 appropriately. Rename {alloc,setup}_pgtables() to {alloc,setup}_pgtables_pv() to highlight that they are PV only, and drop some _x86() suffixes from static helpers. No functional change. Signed-off-by: Andrew Cooper --- CC: Ian Jackson CC: Wei Liu CC: Stefano Stabellini CC: Julien Grall CC: Volodymyr Babchuk CC: Varad Gautam --- tools/libxc/xc_dom_x86.c | 60 ++-- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index 9e279d6768..1ce3c798ef 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -103,7 +103,7 @@ struct xc_dom_image_x86 { unsigned n_mappings; #define MAPPING_MAX 2 struct xc_dom_x86_mapping maps[MAPPING_MAX]; -struct xc_dom_params *params; +const struct xc_dom_params *params; }; /* get guest IO ABI protocol */ @@ -235,7 +235,7 @@ static int count_pgtables(struct xc_dom_image *dom, xen_vaddr_t from, return 0; } -static int alloc_pgtables(struct xc_dom_image *dom) +static int alloc_pgtables_pv(struct xc_dom_image *dom) { int pages, extra_pages; xen_vaddr_t try_virt_end; @@ -268,20 +268,20 @@ static int alloc_pgtables(struct xc_dom_image *dom) /* */ /* i386 pagetables */ -static struct xc_dom_params x86_32_params = { -.levels = PGTBL_LEVELS_I386, -.vaddr_mask = bits_to_mask(VIRT_BITS_I386), -.lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED, -.lvl_prot[1] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER, -.lvl_prot[2] = _PAGE_PRESENT, -}; - static int alloc_pgtables_x86_32_pae(struct xc_dom_image *dom) { +static const struct xc_dom_params x86_32_params = { +.levels = PGTBL_LEVELS_I386, +.vaddr_mask = bits_to_mask(VIRT_BITS_I386), +.lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED, +.lvl_prot[1] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER, +.lvl_prot[2] = _PAGE_PRESENT, +}; struct xc_dom_image_x86 *domx86 = dom->arch_private; domx86->params = _32_params; -return alloc_pgtables(dom); + +return alloc_pgtables_pv(dom); } #define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86) @@ -355,7 +355,7 @@ static xen_pfn_t move_l3_below_4G(struct xc_dom_image *dom, return l3mfn; } -static x86_pgentry_t *get_pg_table_x86(struct xc_dom_image *dom, int m, int l) +static x86_pgentry_t *get_pg_table(struct xc_dom_image *dom, int m, int l) { struct xc_dom_image_x86 *domx86 = dom->arch_private; struct xc_dom_x86_mapping *map; @@ -371,8 +371,7 @@ static x86_pgentry_t *get_pg_table_x86(struct xc_dom_image *dom, int m, int l) return NULL; } -static x86_pgentry_t get_pg_prot_x86(struct xc_dom_image *dom, int l, - xen_pfn_t pfn) +static x86_pgentry_t get_pg_prot(struct xc_dom_image *dom, int l, xen_pfn_t pfn) { struct xc_dom_image_x86 *domx86 = dom->arch_private; struct xc_dom_x86_mapping *map; @@ -396,7 +395,7 @@ static x86_pgentry_t get_pg_prot_x86(struct xc_dom_image *dom, int l, return prot; } -static int setup_pgtables_x86(struct xc_dom_image *dom) +static int setup_pgtables_pv(struct xc_dom_image *dom) { struct xc_dom_image_x86 *domx86 = dom->arch_private; struct xc_dom_x86_mapping *map1, *map2; @@ -413,7 +412,7 @@ static int setup_pgtables_x86(struct xc_dom_image *dom) map1 = domx86->maps + m1; from = map1->lvls[l].from; to = map1->lvls[l].to; -pg = get_pg_table_x86(dom, m1, l); +pg = get_pg_table(dom, m1, l); if ( !pg ) return -1; for ( m2 = 0; m2 < domx86->n_mappings; m2++ ) @@ -433,7 +432,7 @@ static int setup_pgtables_x86(struct xc_dom_image *dom) for ( p = p_s; p <= p_e; p++ ) { pg[p] = pfn_to_paddr(xc_dom_p2m(dom, pfn)) | -get_pg_prot_x86(dom, l, pfn); +get_pg_prot(dom, l, pfn); pfn++; } } @@ -464,32 +463,32 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom) } } -return setup_pgtables_x86(dom); +return setup_pgtables_pv(dom); } /* */ /* x86_64 pagetables*/ -static struct xc_dom_params x86_64_params = { -.levels = PGTBL_LEVELS_X86_64, -.vaddr_mask = bits_to_mask(VIRT_BITS_X86_64), -.lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED, -
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
On Tue, 17 Dec 2019, Roman Shaposhnik wrote: > On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini > wrote: > > > > On Tue, 17 Dec 2019, Julien Grall wrote: > > > Hi, > > > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > > > wrote: > > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > > > > > If I sum all the memory sizes together I get 0x3ddfd000 which is 990M. > > > > > If so, I wonder how you could boot succesfully with dom0_mem=1024M > > > > > even > > > > > on Xen 4.12... :-? > > > > > > > > That is a very interesting observation indeed! I actually don't > > > > remember where that device tree came from, but I think it was from one > > > > of the Linaro sites. > > > > > > This is mostly likely because of: > > > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > > > Author: Julien Grall > > > Date: Wed Aug 21 22:42:31 2019 +0100 > > > > > > xen/arm: domain_build: Don't continue if unable to allocate all dom0 > > > banks > > > > > > Xen will only print a warning if there are memory unallocated when > > > using > > > 1:1 mapping (only used by dom0). This also includes the case where no > > > memory has been allocated. > > > > > > It will bring to all sort of issues that can be hard to diagnostic for > > > users (the warning can be difficult to spot or disregard). > > > > > > If the users request 1GB of memory, then most likely they want the > > > exact > > > amount and not 512MB. So panic if all the memory has not been > > > allocated. > > > > > > After this change, the behavior is the same as for non-1:1 memory > > > allocation (used by domU). > > > > > > At the same time, reflow the message to have the format on a single > > > line. > > > > > > Signed-off-by: Julien Grall > > > Acked-by: Stefano Stabellini > > > > Ah! Roman, could you please post the full boot log of a successful 4.12 > > boot? > > > > If it has a "Failed to allocate requested dom0 memory" message, then we > > know what the issue is. > > Aha! Our messages seems to have crossed ;-) Full log is attached and > yes -- that's > the problem indeed. > > So at least that mystery is solved. But I'm still not able to get to a > full 1G of memory > even with your update to the device tree file. Any chance you can send me the > device tree file that works for you? I didn't try on real hardware, I only tried on QEMU with a similar configuration. I went back and check the HiKey device tree I used and it is the same as yours (including the ramoops reserved-memory error). Apparently there are 1G and 2G variants of the HiKey, obviously both yours and my device tree are for the 1G variant. I try to dig through the docs but couldn't find the details of the 2G variant. I cannot find anywhere the memory range for the top 1G of memory not even on the LeMaker docs! :-/ ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [ovmf test] 144903: regressions - FAIL
flight 144903 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/144903/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-i386-xsm6 xen-buildfail REGR. vs. 144637 build-amd64 6 xen-buildfail REGR. vs. 144637 build-amd64-xsm 6 xen-buildfail REGR. vs. 144637 build-i3866 xen-buildfail REGR. vs. 144637 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a build-i386-libvirt1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a version targeted for testing: ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 baseline version: ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0 Last test of basis 144637 2019-12-09 09:09:49 Z8 days Failing since144646 2019-12-10 01:39:53 Z7 days 70 attempts Testing same since 144770 2019-12-12 18:41:26 Z5 days 59 attempts People who touched revisions under test: Antoine Coeur Ard Biesheuvel Bob Feng Jiewen Yao Michael Kubacki Pete Batard Philippe Mathieu-Daude Steven Shi jobs: build-amd64-xsm fail build-i386-xsm fail build-amd64 fail build-i386 fail build-amd64-libvirt blocked build-i386-libvirt blocked build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked test-amd64-i386-xl-qemuu-ovmf-amd64 blocked sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Not pushing. commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 Author: Pete Batard Date: Tue Dec 10 18:23:04 2019 + MdePkg/Include: Add DCC and BCM2835 SPCR UART types As per the Microsoft Debug Port Table 2 (DBG2) documentation, that can be found online, we are missing 2 serial interface types for Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi). These same types are present in DebugPort2Table.h so add them to SerialPortConsoleRedirectionTable.h too. Note that we followed the same idiosyncrasies as DebugPort2Table for naming these new macros. Signed-off-by: Pete Batard Acked-by: Ard Biesheuvel Reviewed-by: Liming Gao commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e Author: Ard Biesheuvel Date: Tue Mar 5 14:32:48 2019 +0100 ArmPkg/MmCommunicationDxe: relay architected PI events to MM context PI defines a few architected events that have significance in the MM context as well as in the non-secure DXE context. So register notify handlers for these events, and relay them into the standalone MM world. Signed-off-by: Ard Biesheuvel Reviewed-by: Jiewen Yao Reviewed-by: Achin Gupta commit d3add11e87dace180387562d6f1951f2bffbd3d9 Author: Michael Kubacki Date: Wed Nov 20 17:31:24 2019 -0800 MdeModulePkg PeiCore: Improve comment semantics This patch clarifies wording in several PeiCore comments to improve reading comprehension. Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Jian J Wang commit d39d1260c615b716675f67f5c4e1f4f52df01dad Author: Michael Kubacki Date: Wed Nov 20 17:10:48 2019 -0800 MdeModulePkg PeiCore: Fix typos Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Philippe Mathieu-Daude Reviewed-by: Jian J Wang commit 97eedf5dfbaffde33210fd88066247cf0b7d3325 Author: Antoine Coeur Date: Wed Dec 4 12:14:53
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
On Tue, Dec 17, 2019 at 10:30 AM Stefano Stabellini wrote: > > On Tue, 17 Dec 2019, Julien Grall wrote: > > Hi, > > > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > > wrote: > > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > > > > If I sum all the memory sizes together I get 0x3ddfd000 which is 990M. > > > > If so, I wonder how you could boot succesfully with dom0_mem=1024M even > > > > on Xen 4.12... :-? > > > > > > That is a very interesting observation indeed! I actually don't > > > remember where that device tree came from, but I think it was from one > > > of the Linaro sites. > > > > This is mostly likely because of: > > > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > > Author: Julien Grall > > Date: Wed Aug 21 22:42:31 2019 +0100 > > > > xen/arm: domain_build: Don't continue if unable to allocate all dom0 > > banks > > > > Xen will only print a warning if there are memory unallocated when using > > 1:1 mapping (only used by dom0). This also includes the case where no > > memory has been allocated. > > > > It will bring to all sort of issues that can be hard to diagnostic for > > users (the warning can be difficult to spot or disregard). > > > > If the users request 1GB of memory, then most likely they want the exact > > amount and not 512MB. So panic if all the memory has not been allocated. > > > > After this change, the behavior is the same as for non-1:1 memory > > allocation (used by domU). > > > > At the same time, reflow the message to have the format on a single > > line. > > > > Signed-off-by: Julien Grall > > Acked-by: Stefano Stabellini > > Ah! Roman, could you please post the full boot log of a successful 4.12 > boot? > > If it has a "Failed to allocate requested dom0 memory" message, then we > know what the issue is. Aha! Our messages seems to have crossed ;-) Full log is attached and yes -- that's the problem indeed. So at least that mystery is solved. But I'm still not able to get to a full 1G of memory even with your update to the device tree file. Any chance you can send me the device tree file that works for you? Thanks, Roman. Using modules provided by bootloader in FDT Xen 4.12.0 (c/s ) EFI loader Xen 4.12.0 (XEN) Xen version 4.12.0 (@) (gcc (Alpine 6.4.0) 6.4.0) debug=n Fri Jun 7 17:32:08 UTC 2019 (XEN) Latest ChangeSet: (XEN) Processor: 410fd033: "ARM Limited", variant: 0x0, part 0xd03, rev 0x3 (XEN) 64-bit Execution: (XEN) Processor Features: (XEN) Exception Levels: EL3:64+32 EL2:64+32 EL1:64+32 EL0:64+32 (XEN) Extensions: FloatingPoint AdvancedSIMD (XEN) Debug Features: 10305106 (XEN) Auxiliary Features: (XEN) Memory Model Features: 1122 (XEN) ISA Features: 00011120 (XEN) 32-bit Execution: (XEN) Processor Features: 0131:00011011 (XEN) Instruction Sets: AArch32 A32 Thumb Thumb-2 Jazelle (XEN) Extensions: GenericTimer Security (XEN) Debug Features: 03010066 (XEN) Auxiliary Features: (XEN) Memory Model Features: 10101105 4000 0126 02102211 (XEN) ISA Features: 02101110 13112111 21232042 01112131 00011142 00011121 (XEN) Generic Timer IRQ: phys=30 hyp=26 virt=27 Freq: 1200 KHz (XEN) GICv2 initialization: (XEN) gic_dist_addr=f6801000 (XEN) gic_cpu_addr=f6802000 (XEN) gic_hyp_addr=f6804000 (XEN) gic_vcpu_addr=f6806000 (XEN) gic_maintenance_irq=25 (XEN) GICv2: 160 lines, 8 cpus, secure (IID 0200143b). (XEN) Using scheduler: SMP Credit Scheduler rev2 (credit2) (XEN) Initializing Credit2 scheduler (XEN) Allocated console ring of 16 KiB. (XEN) Bringing up CPU1 (XEN) Bringing up CPU2 (XEN) Bringing up CPU3 (XEN) Bringing up CPU4 (XEN) Bringing up CPU5 (XEN) Bringing up CPU6 (XEN) Bringing up CPU7 (XEN) Brought up 8 CPUs (XEN) P2M: 40-bit IPA with 40-bit PA and 8-bit VMID (XEN) P2M: 3 levels with order-1 root, VTCR 0x80023558 (XEN) I/O virtualisation disabled (XEN) *** LOADING DOMAIN 0 *** (XEN) Loading Domd0 kernel from boot module @ 48d38000 (XEN) Loading ramdisk from boot module @ 47aa6000 (XEN) Allocating 1:1 mappings totalling 1024MB for dom0: (XEN) WARNING: Failed to allocate requested dom0 memory. 624MB unallocated (XEN) BANK[0] 0x000800-0x001000 (128MB) (XEN) BANK[1] 0x003600-0x003e00 (128MB) (XEN) BANK[2] 0x004000-0x004700 (112MB) (XEN) BANK[3] 0x007b00-0x007c00 (16MB) (XEN) BANK[4] 0x007e00-0x007f00 (16MB) (XEN) Grant table range: 0x0047998000-0x00479d8000 (XEN) Allocating PPI 16 for event channel interrupt (XEN) Loading zImage from 48d38000 to 0808-09233200 (XEN) Loading dom0 initrd from 47aa6000 to
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
Hi Julien, On Tue, Dec 17, 2019 at 3:30 AM Julien Grall wrote: > > Hi, > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > wrote: > >> On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > >> If I sum all the memory sizes together I get 0x3ddfd000 which is 990M. > >> If so, I wonder how you could boot succesfully with dom0_mem=1024M even > >> on Xen 4.12... :-? > > > > That is a very interesting observation indeed! I actually don't > > remember where that device tree came from, but I think it was from one > > of the Linaro sites. > > This is mostly likely because of: > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > Author: Julien Grall > Date: Wed Aug 21 22:42:31 2019 +0100 > > xen/arm: domain_build: Don't continue if unable to allocate all > dom0 banks > > Xen will only print a warning if there are memory unallocated when > using > 1:1 mapping (only used by dom0). This also includes the case where no > memory has been allocated. > > It will bring to all sort of issues that can be hard to diagnostic for > users (the warning can be difficult to spot or disregard). > > If the users request 1GB of memory, then most likely they want the > exact > amount and not 512MB. So panic if all the memory has not been > allocated. > > After this change, the behavior is the same as for non-1:1 memory > allocation (used by domU). > > At the same time, reflow the message to have the format on a single > line. > > Signed-off-by: Julien Grall > Acked-by: Stefano Stabellini It seems you're absolutely right. Looking at the logs from Xen 4.12 I'm seeing: (XEN) Allocating 1:1 mappings totalling 1024MB for dom0: (XEN) WARNING: Failed to allocate requested dom0 memory. 624MB unallocated (XEN) BANK[0] 0x000800-0x001000 (128MB) (XEN) BANK[1] 0x003600-0x003e00 (128MB) (XEN) BANK[2] 0x004000-0x004700 (112MB) (XEN) BANK[3] 0x007b00-0x007c00 (16MB) (XEN) BANK[4] 0x007e00-0x007f00 (16MB) (XEN) Grant table range: 0x0047998000-0x00479d8000 (XEN) Allocating PPI 16 for event channel interrupt So yes -- it was a warning that now turned an ERROR. So at least that part is clear now. What isn't clear still is the interplay between device trees and Xen memory allocation -- I'll reply to Stefano on that. Thanks, Roman. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] REGRESSION: Xen 4.13 RC5 fails to bootstrap Dom0 on ARM
On Tue, 17 Dec 2019, Julien Grall wrote: > Hi, > > On 17/12/2019 04:39, Roman Shaposhnik wrote: > > On Mon, Dec 16, 2019 at 6:55 PM Stefano Stabellini > > wrote: > > > On Mon, 16 Dec 2019, Roman Shaposhnik wrote: > > > If I sum all the memory sizes together I get 0x3ddfd000 which is 990M. > > > If so, I wonder how you could boot succesfully with dom0_mem=1024M even > > > on Xen 4.12... :-? > > > > That is a very interesting observation indeed! I actually don't > > remember where that device tree came from, but I think it was from one > > of the Linaro sites. > > This is mostly likely because of: > > commit 6341a674573f1834f083f0ab0f5b36b075f9e02e > Author: Julien Grall > Date: Wed Aug 21 22:42:31 2019 +0100 > > xen/arm: domain_build: Don't continue if unable to allocate all dom0 banks > > Xen will only print a warning if there are memory unallocated when using > 1:1 mapping (only used by dom0). This also includes the case where no > memory has been allocated. > > It will bring to all sort of issues that can be hard to diagnostic for > users (the warning can be difficult to spot or disregard). > > If the users request 1GB of memory, then most likely they want the exact > amount and not 512MB. So panic if all the memory has not been allocated. > > After this change, the behavior is the same as for non-1:1 memory > allocation (used by domU). > > At the same time, reflow the message to have the format on a single > line. > > Signed-off-by: Julien Grall > Acked-by: Stefano Stabellini Ah! Roman, could you please post the full boot log of a successful 4.12 boot? If it has a "Failed to allocate requested dom0 memory" message, then we know what the issue is. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] Xen ARM Dom0less passthrough without IOMMU
On Tue, 17 Dec 2019, Andrei Cherechesu wrote: > > On Mon, 16 Dec 2019, Julien Grall wrote: > > > On 16/12/2019 23:05, Stefano Stabellini wrote: > > > > On Mon, 16 Dec 2019, Julien Grall wrote: > > > > > On 16/12/2019 18:02, Andrei Cherechesu wrote: > > > > > But even with this patch, RAM in DomU is not direct mapped (i.e Guest > > > > > Physical > > > > > Address == Host Physical Address). This means that DMA-capable device > > > > > would > > > > > not work properly in DomU. > > > > > > > > > > We could theoritically map DomU direct mapped, but this would break > > > > > the > > > > > isolation provided by the hypervisor. > > > > > > > > Yes, being able to map the DomU memory 1:1 can be pretty useful for some > > > > very embedded dom0less configurations, in fact I was surprised that a > > > > couple of Xilinx users asked me for that recently. Typically, the users > > > > are aware of the consequences but they still find them better than the > > > > alternative (i.e. the lack of isolation is bad but is tolerable in their > > > > configuration.) > > > This does not make much sense... The whole point of a hypervisor is to > > > isolate > > > guest between each other... So if you are happy with the lack of > > > isolation, > > > then why are you using an hypervisor at the first place? > > > > There are a number of reasons, although they are all variation of the > > same theme. In all these cases the IOMMU cannot be used for one reason > > or the other (a device is not behind the IOMMU, or due to an errata, > > etc.) > > > > - multiple baremetal apps > > The user wants to run two or more baremetal (unikernel-like) > > applications. The user owns both applications and she is not much > > concerned about isolation (although it is always desirable when > > possible.) > > > > - multiple OSes > > This is similar to the one before, however, instead of multiple > > baremetal apps, we are talking about multiple full OSes. For instance, > > Linux and Android or Linux and VxWorks. Again, they are both maintained > > by the same user (no multi-tenancy) so isolation is desirable but it is > > not the top concern. > > > > - real-time / no real-time > > The user wants to run a real-time OS or real-time baremetal app and a > > non real-time OS. For instance a tiny baremetal app controlling one > > specific device and Linux. Again, the user is responsible for both > > systems so isolation is not a concern. > > > > In all these cases the users has to run multiple OSes or baremetal apps > > so she needs a hypervisor. However, it is tolerable that the apps are > > not actually fully isolated from each others because they are both > > developed and deployed together by the same "owner". > > > > Basically, since we do not have an IOMMU, we would be able > to ensure memory isolation via a NXP IP named xRDC (Extended > Resource Domain Controller) that our boards have, which supervises > the access to memory buses. > > But before we get to think about isolation, we need to enable > basic passthrough functionality (via 1:1 mapping, since no IOMMU). > > Firstly, a good step forward would be to get any non-DMA-capable > device passed-through and working. > I rebased onto upstream/staging branch and applied the hack > that skips the setting of XEN_DOMCTL_CDF_iommu flag, > that Julien specified. > > Then I tried to passthrough the eMMC, but I got the following > error: > (XEN) DOM1: [0.879151] sdhci-esdhc-imx 4005d000.usdhc: can't request > region for resource [mem 0x4005d000-0x4005dfff] > (XEN) DOM1: [0.891137] sdhci-esdhc-imx 4005d000.usdhc: sdhci_pltfm_init > failed -16 > (XEN) DOM1: [0.900249] sdhci-esdhc-imx: probe of 4005d000.usdhc failed > with error -16 > > Where 0x4005d000 is the physical address of the uSDHC(eMMC) node in the DT. > It seems that the DomU1 kernel does not have access to that memory zone. It looks like drivers/mmc/host/sdhci-pltfm.c:sdhci_pltfm_init failed, but I cannot see a simple reason why it would. As Julien mentioned the device tree snippet would be useful. Also the domU config and the full device tree would be useful. i.e. did you add "xen,passthrough;" under the related uSDHC node on the host device tree? > I'm trying to passthrough the eMMC in order to mount DomU1's root > on a SDCard partition, because I couldn't get to DomU1's Linux prompt > when I tried to boot with a ramdisk module. I always get this error: > (XEN) DOM1: [1.544199] RAMDISK: Couldn't find valid RAM disk image > starting at 0. > > Could this be because the ramdisk is too big? The smallest I've tried with > Is approximately 60MB in size. What size are the ramdisks that you > are using in your dom0less booting demos? I don't think so, I could boot with ramdisk 120MB in size or even larger. It is probably an address calculation error: it is easy to make a small mistake in the addresses so that they end up overlapping. Sometimes it is even U-Boot that causes the overlaps. I would suggest to use
[Xen-devel] [xen-unstable test] 144887: regressions - FAIL
flight 144887 xen-unstable real [real] http://logs.test-lab.xenproject.org/osstest/logs/144887/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: test-armhf-armhf-libvirt 19 leak-check/check fail REGR. vs. 144850 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-qemut-win7-amd64 17 guest-stopfail like 144827 test-amd64-amd64-xl-rtds 16 guest-localmigrate fail like 144850 test-amd64-amd64-xl-qemuu-win7-amd64 17 guest-stopfail like 144850 test-armhf-armhf-libvirt 14 saverestore-support-checkfail like 144850 test-amd64-i386-xl-qemuu-win7-amd64 17 guest-stop fail like 144850 test-amd64-i386-xl-qemut-win7-amd64 17 guest-stop fail like 144850 test-armhf-armhf-libvirt-raw 13 saverestore-support-checkfail like 144850 test-amd64-amd64-xl-qemuu-ws16-amd64 17 guest-stopfail like 144850 test-amd64-amd64-xl-qemut-ws16-amd64 17 guest-stopfail like 144850 test-amd64-i386-xl-qemuu-ws16-amd64 17 guest-stop fail like 144850 test-xtf-amd64-amd64-4 72 xtf/test-hvm64-xsa-308 fail never pass test-amd64-i386-xl-pvshim12 guest-start fail never pass test-amd64-amd64-libvirt-xsm 13 migrate-support-checkfail never pass test-amd64-amd64-libvirt 13 migrate-support-checkfail never pass test-amd64-i386-libvirt-xsm 13 migrate-support-checkfail never pass test-amd64-i386-libvirt 13 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check fail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 11 migrate-support-check fail never pass test-arm64-arm64-xl-credit2 13 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 13 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 13 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 14 saverestore-support-checkfail never pass test-arm64-arm64-xl 13 migrate-support-checkfail never pass test-arm64-arm64-xl 14 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 13 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 14 saverestore-support-checkfail never pass test-amd64-amd64-qemuu-nested-amd 17 debian-hvm-install/l1/l2 fail never pass test-arm64-arm64-xl-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 14 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 12 migrate-support-checkfail never pass test-armhf-armhf-xl 13 migrate-support-checkfail never pass test-armhf-armhf-xl 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 13 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 13 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 14 saverestore-support-checkfail never pass test-armhf-armhf-xl-cubietruck 13 migrate-support-checkfail never pass test-armhf-armhf-xl-cubietruck 14 saverestore-support-checkfail never pass test-armhf-armhf-libvirt 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 14 saverestore-support-checkfail never pass test-arm64-arm64-xl-seattle 13 migrate-support-checkfail never pass test-arm64-arm64-xl-seattle 14 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-raw 12 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 12 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 13 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 13 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 14 saverestore-support-checkfail never pass test-amd64-i386-xl-qemut-ws16-amd64 17 guest-stop fail never pass version targeted for testing: xen 21cb0bdcf4c12b8edd34f3d086edd76f2e974c32 baseline version: xen c9115affa6f83aebe29ae9cbf503aa163911a5bb Last test of basis 144850 2019-12-16 01:51:10 Z1 days Failing since144878 2019-12-16 19:06:11 Z0 days2 attempts Testing same since 144887 2019-12-17 04:24:45 Z0 days1 attempts
[Xen-devel] [ovmf test] 144900: regressions - FAIL
flight 144900 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/144900/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-i386-xsm6 xen-buildfail REGR. vs. 144637 build-amd64 6 xen-buildfail REGR. vs. 144637 build-amd64-xsm 6 xen-buildfail REGR. vs. 144637 build-i3866 xen-buildfail REGR. vs. 144637 Tests which did not succeed, but are not blocking: build-i386-libvirt1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a version targeted for testing: ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 baseline version: ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0 Last test of basis 144637 2019-12-09 09:09:49 Z8 days Failing since144646 2019-12-10 01:39:53 Z7 days 69 attempts Testing same since 144770 2019-12-12 18:41:26 Z4 days 58 attempts People who touched revisions under test: Antoine Coeur Ard Biesheuvel Bob Feng Jiewen Yao Michael Kubacki Pete Batard Philippe Mathieu-Daude Steven Shi jobs: build-amd64-xsm fail build-i386-xsm fail build-amd64 fail build-i386 fail build-amd64-libvirt blocked build-i386-libvirt blocked build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked test-amd64-i386-xl-qemuu-ovmf-amd64 blocked sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Not pushing. commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 Author: Pete Batard Date: Tue Dec 10 18:23:04 2019 + MdePkg/Include: Add DCC and BCM2835 SPCR UART types As per the Microsoft Debug Port Table 2 (DBG2) documentation, that can be found online, we are missing 2 serial interface types for Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi). These same types are present in DebugPort2Table.h so add them to SerialPortConsoleRedirectionTable.h too. Note that we followed the same idiosyncrasies as DebugPort2Table for naming these new macros. Signed-off-by: Pete Batard Acked-by: Ard Biesheuvel Reviewed-by: Liming Gao commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e Author: Ard Biesheuvel Date: Tue Mar 5 14:32:48 2019 +0100 ArmPkg/MmCommunicationDxe: relay architected PI events to MM context PI defines a few architected events that have significance in the MM context as well as in the non-secure DXE context. So register notify handlers for these events, and relay them into the standalone MM world. Signed-off-by: Ard Biesheuvel Reviewed-by: Jiewen Yao Reviewed-by: Achin Gupta commit d3add11e87dace180387562d6f1951f2bffbd3d9 Author: Michael Kubacki Date: Wed Nov 20 17:31:24 2019 -0800 MdeModulePkg PeiCore: Improve comment semantics This patch clarifies wording in several PeiCore comments to improve reading comprehension. Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Jian J Wang commit d39d1260c615b716675f67f5c4e1f4f52df01dad Author: Michael Kubacki Date: Wed Nov 20 17:10:48 2019 -0800 MdeModulePkg PeiCore: Fix typos Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Philippe Mathieu-Daude Reviewed-by: Jian J Wang commit 97eedf5dfbaffde33210fd88066247cf0b7d3325 Author: Antoine Coeur Date: Wed Dec 4 12:14:53
Re: [Xen-devel] [RFC PATCH 0/3] basic KASAN support for Xen PV domains
> On Dec 17, 2019, at 9:08 AM, Sergey Dyasli wrote: > > This series allows to boot and run Xen PV kernels (Dom0 and DomU) with > CONFIG_KASAN=y. It has been used internally for some time now with good > results for finding memory corruption issues in Dom0 kernel. > > Only Outline instrumentation is supported at the moment. > > Patch 1 is of RFC quality > Patches 2-3 are independent and quite self-contained. Don’t you need to initialize kasan before, for example, calling kasan_alloc_pages() in patch 2? -boris ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [xen-unstable-smoke test] 144898: tolerable all pass - PUSHED
flight 144898 xen-unstable-smoke real [real] http://logs.test-lab.xenproject.org/osstest/logs/144898/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-libvirt 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 13 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 14 saverestore-support-checkfail never pass test-armhf-armhf-xl 13 migrate-support-checkfail never pass test-armhf-armhf-xl 14 saverestore-support-checkfail never pass version targeted for testing: xen f50a4f6e244cfc8e773300c03aaf4db391f3028a baseline version: xen 21cb0bdcf4c12b8edd34f3d086edd76f2e974c32 Last test of basis 144877 2019-12-16 19:00:23 Z0 days Testing same since 144898 2019-12-17 15:00:35 Z0 days1 attempts People who touched revisions under test: Andrew Cooper Wei Liu jobs: build-arm64-xsm pass build-amd64 pass build-armhf pass build-amd64-libvirt pass test-armhf-armhf-xl pass test-arm64-arm64-xl-xsm pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-amd64-libvirt pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/xen.git 21cb0bdcf4..f50a4f6e24 f50a4f6e244cfc8e773300c03aaf4db391f3028a -> smoke ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] Xen ARM Dom0less passthrough without IOMMU
Hi Andrei, On 17/12/2019 17:20, Andrei Cherechesu wrote: On Mon, 16 Dec 2019, Julien Grall wrote: On 16/12/2019 23:05, Stefano Stabellini wrote: On Mon, 16 Dec 2019, Julien Grall wrote: On 16/12/2019 18:02, Andrei Cherechesu wrote: But even with this patch, RAM in DomU is not direct mapped (i.e Guest Physical Address == Host Physical Address). This means that DMA-capable device would not work properly in DomU. We could theoritically map DomU direct mapped, but this would break the isolation provided by the hypervisor. Yes, being able to map the DomU memory 1:1 can be pretty useful for some very embedded dom0less configurations, in fact I was surprised that a couple of Xilinx users asked me for that recently. Typically, the users are aware of the consequences but they still find them better than the alternative (i.e. the lack of isolation is bad but is tolerable in their configuration.) This does not make much sense... The whole point of a hypervisor is to isolate guest between each other... So if you are happy with the lack of isolation, then why are you using an hypervisor at the first place? There are a number of reasons, although they are all variation of the same theme. In all these cases the IOMMU cannot be used for one reason or the other (a device is not behind the IOMMU, or due to an errata, etc.) - multiple baremetal apps The user wants to run two or more baremetal (unikernel-like) applications. The user owns both applications and she is not much concerned about isolation (although it is always desirable when possible.) - multiple OSes This is similar to the one before, however, instead of multiple baremetal apps, we are talking about multiple full OSes. For instance, Linux and Android or Linux and VxWorks. Again, they are both maintained by the same user (no multi-tenancy) so isolation is desirable but it is not the top concern. - real-time / no real-time The user wants to run a real-time OS or real-time baremetal app and a non real-time OS. For instance a tiny baremetal app controlling one specific device and Linux. Again, the user is responsible for both systems so isolation is not a concern. In all these cases the users has to run multiple OSes or baremetal apps so she needs a hypervisor. However, it is tolerable that the apps are not actually fully isolated from each others because they are both developed and deployed together by the same "owner". Basically, since we do not have an IOMMU, we would be able to ensure memory isolation via a NXP IP named xRDC (Extended Resource Domain Controller) that our boards have, which supervises the access to memory buses. Ok, so you have some sort of MPU. I assume this will be between the devices and the memory, am I right? But before we get to think about isolation, we need to enable basic passthrough functionality (via 1:1 mapping, since no IOMMU). So you are in better place than what Stefano described. Your use case is probably the only place where a 1:1 mapping would be warrant as isolation is still provided by the HW. Firstly, a good step forward would be to get any non-DMA-capable device passed-through and working. I rebased onto upstream/staging branch and applied the hack that skips the setting of XEN_DOMCTL_CDF_iommu flag, that Julien specified. Then I tried to passthrough the eMMC, but I got the following error: (XEN) DOM1: [0.879151] sdhci-esdhc-imx 4005d000.usdhc: can't request region for resource [mem 0x4005d000-0x4005dfff] (XEN) DOM1: [0.891137] sdhci-esdhc-imx 4005d000.usdhc: sdhci_pltfm_init failed -16 (XEN) DOM1: [0.900249] sdhci-esdhc-imx: probe of 4005d000.usdhc failed with error -16 Where 0x4005d000 is the physical address of the uSDHC(eMMC) node in the DT. It seems that the DomU1 kernel does not have access to that memory zone. Could you paste your partial Device-Tree and domain node? I'm trying to passthrough the eMMC in order to mount DomU1's root on a SDCard partition, because I couldn't get to DomU1's Linux prompt when I tried to boot with a ramdisk module. I always get this error: (XEN) DOM1: [1.544199] RAMDISK: Couldn't find valid RAM disk image starting at 0. How did you pass the ramdisk to dom1? Could this be because the ramdisk is too big? The smallest I've tried with Is approximately 60MB in size. What size are the ramdisks that you are using in your dom0less booting demos? How much memory did you give to your guest? [...] I'll gladly write the patch if you give me some basic instructions regarding it, because I'm not that familiar with all the Xen internal mechanisms, and I wouldn't know where to look in order to ensure everything is properly done. I am going to suggest a quick and dirty way but it should get you to the point where 1:1 mapping will work in basic use case: 1) Update the guest memory map in xen/include/public/arch-arm.h (see GUEST_*) so all the regions don't overlap your RAM. The best way would be to re-use the same address for the
Re: [Xen-devel] [PATCH v3 22/22] golang/xenlight: add error return type to Context.Cpupoolinfo
On 12/10/19 3:47 PM, Nick Rosbrook wrote: > From: Nick Rosbrook > > A previous commit that removed Context.CheckOpen revealed > an ineffectual assignent to err in Context.Cpupoolinfo, as > there is no error return type. > > Since it appears that the intent is to return an error here, > add an error return value to the function signature. > > Signed-off-by: Nick Rosbrook Reviewed-by: George Dunlap ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 21/22] golang/xenlight: revise use of Context type
On 12/10/19 3:47 PM, Nick Rosbrook wrote: > From: Nick Rosbrook > > Remove the exported global context variable, 'Ctx.' Generally, it is > better to not export global variables for use through a Go package. > However, there are some exceptions that can be found in the standard > library. > > Add a NewContext function instead, and remove the Open, IsOpen, and > CheckOpen functions as a result. > > Also, comment-out an ineffectual assignment to 'err' inside the function > Context.CpupoolInfo so that compilation does not fail. > > Signed-off-by: Nick Rosbrook Reviewed-by: George Dunlap With one requested change... > --- > tools/golang/xenlight/xenlight.go | 219 +- > 1 file changed, 34 insertions(+), 185 deletions(-) > > diff --git a/tools/golang/xenlight/xenlight.go > b/tools/golang/xenlight/xenlight.go > index f32eb11384..1c431fa4e5 100644 > --- a/tools/golang/xenlight/xenlight.go > +++ b/tools/golang/xenlight/xenlight.go > @@ -74,6 +74,39 @@ func (e Error) Error() string { > return fmt.Sprintf("libxl error: %d", -e) > } > > +// Context represents a libxl_ctx. > +type Context struct { > + ctx*C.libxl_ctx > + logger *C.xentoollog_logger_stdiostream > +} > + > +// NewContext returns a new Context. > +func NewContext() (*Context, error) { > + var ctx Context > + > + ctx.logger = C.xtl_createlogger_stdiostream(C.stderr, C.XTL_ERROR, 0) > + > + ret := C.libxl_ctx_alloc(, C.LIBXL_VERSION, 0, > (*C.xentoollog_logger)(unsafe.Pointer(ctx.logger))) This line looks to be 114 characters long, which seems a bit much. :-) Mind breaking it just before the last argument? Thanks, -George ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock
On Tue, 17 Dec 2019 18:10:19 +0100 "Jürgen Groß" wrote: > On 17.12.19 17:24, SeongJae Park wrote: > > On Tue, 17 Dec 2019 17:13:42 +0100 "Jürgen Groß" wrote: > > > >> On 17.12.19 17:07, SeongJae Park wrote: > >>> From: SeongJae Park > >>> > >>> 'reclaim_memory' callback can race with a driver code as this callback > >>> will be called from any memory pressure detected context. To deal with > >>> the case, this commit adds a spinlock in the 'xenbus_device'. Whenever > >>> 'reclaim_memory' callback is called, the lock of the device which passed > >>> to the callback as its argument is locked. Thus, drivers registering > >>> their 'reclaim_memory' callback should protect the data that might race > >>> with the callback with the lock by themselves. > >>> > >>> Signed-off-by: SeongJae Park > >>> --- > >>>drivers/xen/xenbus/xenbus_probe.c | 1 + > >>>drivers/xen/xenbus/xenbus_probe_backend.c | 10 -- > >>>include/xen/xenbus.h | 2 ++ > >>>3 files changed, 11 insertions(+), 2 deletions(-) > >>> > >>> diff --git a/drivers/xen/xenbus/xenbus_probe.c > >>> b/drivers/xen/xenbus/xenbus_probe.c > >>> index 5b471889d723..b86393f172e6 100644 > >>> --- a/drivers/xen/xenbus/xenbus_probe.c > >>> +++ b/drivers/xen/xenbus/xenbus_probe.c > >>> @@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, > >>> goto fail; > >>> > >>> dev_set_name(>dev, "%s", devname); > >>> + spin_lock_init(>reclaim_lock); > >>> > >>> /* Register with generic device framework. */ > >>> err = device_register(>dev); > >>> diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c > >>> b/drivers/xen/xenbus/xenbus_probe_backend.c > >>> index 7e78ebef7c54..516aa64b9967 100644 > >>> --- a/drivers/xen/xenbus/xenbus_probe_backend.c > >>> +++ b/drivers/xen/xenbus/xenbus_probe_backend.c > >>> @@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct > >>> notifier_block *notifier, > >>>static int backend_reclaim_memory(struct device *dev, void *data) > >>>{ > >>> const struct xenbus_driver *drv; > >>> + struct xenbus_device *xdev; > >>> + unsigned long flags; > >>> > >>> if (!dev->driver) > >>> return 0; > >>> drv = to_xenbus_driver(dev->driver); > >>> - if (drv && drv->reclaim_memory) > >>> - drv->reclaim_memory(to_xenbus_device(dev)); > >>> + if (drv && drv->reclaim_memory) { > >>> + xdev = to_xenbus_device(dev); > >>> + spin_trylock_irqsave(>reclaim_lock, flags); > >> > >> You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine, > >> too? I can't see a reason why you'd want to disable irqs here. > > > > I needed to diable irq here as this is called from the memory shrinker > > context. > > Okay. > > > > > Also, used 'trylock' because the 'probe()' and 'remove()' code of the driver > > might include memory allocation. And the xen-blkback actually does. If the > > allocation shows a memory pressure during the allocation, it will trigger > > this > > shrinker callback again and then deadlock. > > In that case you need to either return when you didn't get the lock or Yes, it should. Cannot believe how I posted this code. Seems I made some terrible mistake while formatting patches. Anyway, will return if fail to acquire the lock, in the next version. Thanks, SeongJae Park > > - when obtaining the lock during probe() and remove() set a variable >containing the current cpu number > - and reset that to e.g NR_CPUS before releasing the lock again > - in the shrinker callback do trylock, and if you didn't get the lock >test whether the cpu-variable above is set to your current cpu and >continue only if yes; if not, redo the the trylock > > > Juergen ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] Xen ARM Dom0less passthrough without IOMMU
> On Mon, 16 Dec 2019, Julien Grall wrote: > > On 16/12/2019 23:05, Stefano Stabellini wrote: > > > On Mon, 16 Dec 2019, Julien Grall wrote: > > > > On 16/12/2019 18:02, Andrei Cherechesu wrote: > > > > But even with this patch, RAM in DomU is not direct mapped (i.e Guest > > > > Physical > > > > Address == Host Physical Address). This means that DMA-capable device > > > > would > > > > not work properly in DomU. > > > > > > > > We could theoritically map DomU direct mapped, but this would break the > > > > isolation provided by the hypervisor. > > > > > > Yes, being able to map the DomU memory 1:1 can be pretty useful for some > > > very embedded dom0less configurations, in fact I was surprised that a > > > couple of Xilinx users asked me for that recently. Typically, the users > > > are aware of the consequences but they still find them better than the > > > alternative (i.e. the lack of isolation is bad but is tolerable in their > > > configuration.) > > This does not make much sense... The whole point of a hypervisor is to > > isolate > > guest between each other... So if you are happy with the lack of isolation, > > then why are you using an hypervisor at the first place? > > There are a number of reasons, although they are all variation of the > same theme. In all these cases the IOMMU cannot be used for one reason > or the other (a device is not behind the IOMMU, or due to an errata, > etc.) > > - multiple baremetal apps > The user wants to run two or more baremetal (unikernel-like) > applications. The user owns both applications and she is not much > concerned about isolation (although it is always desirable when > possible.) > > - multiple OSes > This is similar to the one before, however, instead of multiple > baremetal apps, we are talking about multiple full OSes. For instance, > Linux and Android or Linux and VxWorks. Again, they are both maintained > by the same user (no multi-tenancy) so isolation is desirable but it is > not the top concern. > > - real-time / no real-time > The user wants to run a real-time OS or real-time baremetal app and a > non real-time OS. For instance a tiny baremetal app controlling one > specific device and Linux. Again, the user is responsible for both > systems so isolation is not a concern. > > In all these cases the users has to run multiple OSes or baremetal apps > so she needs a hypervisor. However, it is tolerable that the apps are > not actually fully isolated from each others because they are both > developed and deployed together by the same "owner". > Basically, since we do not have an IOMMU, we would be able to ensure memory isolation via a NXP IP named xRDC (Extended Resource Domain Controller) that our boards have, which supervises the access to memory buses. But before we get to think about isolation, we need to enable basic passthrough functionality (via 1:1 mapping, since no IOMMU). Firstly, a good step forward would be to get any non-DMA-capable device passed-through and working. I rebased onto upstream/staging branch and applied the hack that skips the setting of XEN_DOMCTL_CDF_iommu flag, that Julien specified. Then I tried to passthrough the eMMC, but I got the following error: (XEN) DOM1: [0.879151] sdhci-esdhc-imx 4005d000.usdhc: can't request region for resource [mem 0x4005d000-0x4005dfff] (XEN) DOM1: [0.891137] sdhci-esdhc-imx 4005d000.usdhc: sdhci_pltfm_init failed -16 (XEN) DOM1: [0.900249] sdhci-esdhc-imx: probe of 4005d000.usdhc failed with error -16 Where 0x4005d000 is the physical address of the uSDHC(eMMC) node in the DT. It seems that the DomU1 kernel does not have access to that memory zone. I'm trying to passthrough the eMMC in order to mount DomU1's root on a SDCard partition, because I couldn't get to DomU1's Linux prompt when I tried to boot with a ramdisk module. I always get this error: (XEN) DOM1: [1.544199] RAMDISK: Couldn't find valid RAM disk image starting at 0. Could this be because the ramdisk is too big? The smallest I've tried with Is approximately 60MB in size. What size are the ramdisks that you are using in your dom0less booting demos? > > > From an implementation perspective, it should be a matter of calling > > > allocate_memory_11 instead of allocate_memory from construct_domU. I > > > wanted to experiment with it myself but I haven't had the time. If > > > nothing else, it would be useful to have a patch around to do it if > > > needed. > > This is not that simple. You at least also need to: > > - Update the code to generate the DT based on the new 1:1 address > > - Modify the various emulation in Xen because they rely on Xen guest > > memory layout > > - Modify is_domain_direct_mapped() to deal with guest > > > > I probably missed other bits. Anyway, this is not something I am willing to > > accept upstream as this break the core idea of an hypervisor. > > If you prefer not to have it upstream, I would be happy to maintain it > downstream in Xilinx/Xen or another
Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock
On 17.12.19 17:24, SeongJae Park wrote: On Tue, 17 Dec 2019 17:13:42 +0100 "Jürgen Groß" wrote: On 17.12.19 17:07, SeongJae Park wrote: From: SeongJae Park 'reclaim_memory' callback can race with a driver code as this callback will be called from any memory pressure detected context. To deal with the case, this commit adds a spinlock in the 'xenbus_device'. Whenever 'reclaim_memory' callback is called, the lock of the device which passed to the callback as its argument is locked. Thus, drivers registering their 'reclaim_memory' callback should protect the data that might race with the callback with the lock by themselves. Signed-off-by: SeongJae Park --- drivers/xen/xenbus/xenbus_probe.c | 1 + drivers/xen/xenbus/xenbus_probe_backend.c | 10 -- include/xen/xenbus.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 5b471889d723..b86393f172e6 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, goto fail; dev_set_name(>dev, "%s", devname); + spin_lock_init(>reclaim_lock); /* Register with generic device framework. */ err = device_register(>dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 7e78ebef7c54..516aa64b9967 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct notifier_block *notifier, static int backend_reclaim_memory(struct device *dev, void *data) { const struct xenbus_driver *drv; + struct xenbus_device *xdev; + unsigned long flags; if (!dev->driver) return 0; drv = to_xenbus_driver(dev->driver); - if (drv && drv->reclaim_memory) - drv->reclaim_memory(to_xenbus_device(dev)); + if (drv && drv->reclaim_memory) { + xdev = to_xenbus_device(dev); + spin_trylock_irqsave(>reclaim_lock, flags); You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine, too? I can't see a reason why you'd want to disable irqs here. I needed to diable irq here as this is called from the memory shrinker context. Okay. Also, used 'trylock' because the 'probe()' and 'remove()' code of the driver might include memory allocation. And the xen-blkback actually does. If the allocation shows a memory pressure during the allocation, it will trigger this shrinker callback again and then deadlock. In that case you need to either return when you didn't get the lock or - when obtaining the lock during probe() and remove() set a variable containing the current cpu number - and reset that to e.g NR_CPUS before releasing the lock again - in the shrinker callback do trylock, and if you didn't get the lock test whether the cpu-variable above is set to your current cpu and continue only if yes; if not, redo the the trylock Juergen ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH V4 2/4] x86/altp2m: Add hypercall to set a range of sve bits
On 17.12.2019 16:12, Alexandru Stefan ISAILA wrote: > @@ -4711,6 +4712,20 @@ static int do_altp2m_op( > } > break; > > +case HVMOP_altp2m_set_suppress_ve_multi: > +if ( a.u.suppress_ve_multi.pad1 || > + a.u.suppress_ve_multi.first_error_code || > + a.u.suppress_ve_multi.first_error || > + a.u.suppress_ve_multi.first_gfn > > a.u.suppress_ve_multi.last_gfn ) > +rc = -EINVAL; An error having occurred doesn't prevent scheduling of a continuation. When you come back here, you'll then return -EINVAL instead of continuing the prior operation. > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -3064,6 +3064,70 @@ out: > return rc; > } > > +/* > + * Set/clear the #VE suppress bit for multiple pages. Only available on VMX. > + */ > +int p2m_set_suppress_ve_multi(struct domain *d, > + struct xen_hvm_altp2m_suppress_ve_multi *sve) > +{ > +struct p2m_domain *host_p2m = p2m_get_hostp2m(d); > +struct p2m_domain *ap2m = NULL; > +struct p2m_domain *p2m = host_p2m; > +uint64_t start = sve->first_gfn; > +int rc = 0; > +uint64_t max_phys_addr = (1UL << d->arch.cpuid->extd.maxphysaddr) - 1; > + > +if ( sve->view > 0 ) > +{ > +if ( sve->view >= MAX_ALTP2M || > + d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_EPTP)] == > + mfn_x(INVALID_MFN) ) > +return -EINVAL; > + > +p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view, > + MAX_ALTP2M)]; > +} > + > +p2m_lock(host_p2m); > + > +if ( ap2m ) > +p2m_lock(ap2m); > + > +while ( sve->last_gfn >= start && start < max_phys_addr ) Why don't you clip ->last_gfn ahead of the loop, saving one comparison per iteration? > +{ > +p2m_access_t a; > +p2m_type_t t; > +mfn_t mfn; > +int err = 0; > + > +if ( altp2m_get_effective_entry(p2m, _gfn(start), , , , > AP2MGET_query) ) > +a = p2m->default_access; > + > +if ( (err = p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, > a, > + sve->suppress_ve)) && !sve->first_error ) > +{ > +sve->first_error = start; /* Save the gfn of the first error */ > +sve->first_error_code = err; /* Save the first error code */ > +} What if the first error occurs on GFN 0? I guess you want to check ->first_error_code against zero in the condition. > --- a/xen/include/public/hvm/hvm_op.h > +++ b/xen/include/public/hvm/hvm_op.h > @@ -46,6 +46,16 @@ struct xen_hvm_altp2m_suppress_ve { > uint64_t gfn; > }; > > +struct xen_hvm_altp2m_suppress_ve_multi { > +uint16_t view; > +uint8_t suppress_ve; /* Boolean type. */ > +uint8_t pad1; > +uint32_t first_error_code; /* Must be set to 0 . */ int32_t perhaps, since error codes are negative? Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH V4 1/4] x86/mm: Add array_index_nospec to guest provided index values
On 17.12.2019 16:12, Alexandru Stefan ISAILA wrote: > --- a/xen/arch/x86/mm/mem_access.c > +++ b/xen/arch/x86/mm/mem_access.c > @@ -367,10 +367,11 @@ long p2m_set_mem_access(struct domain *d, gfn_t gfn, > uint32_t nr, > if ( altp2m_idx ) > { > if ( altp2m_idx >= MAX_ALTP2M || > - d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) ) > + d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] == The bounds check is against MAX_ALTP2M. Both MAX_ values look to be independent, which means bounds check and value passed to the helper need to match up (not just here). > --- a/xen/arch/x86/mm/p2m-ept.c > +++ b/xen/arch/x86/mm/p2m-ept.c > @@ -1353,7 +1353,8 @@ void setup_ept_dump(void) > > void p2m_init_altp2m_ept(struct domain *d, unsigned int i) > { > -struct p2m_domain *p2m = d->arch.altp2m_p2m[i]; > +struct p2m_domain *p2m = > + d->arch.altp2m_p2m[array_index_nospec(i, MAX_ALTP2M)]; > struct p2m_domain *hostp2m = p2m_get_hostp2m(d); > struct ept_data *ept; > > @@ -1366,7 +1367,7 @@ void p2m_init_altp2m_ept(struct domain *d, unsigned int > i) > p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0; > ept = >ept; > ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m)); > -d->arch.altp2m_eptp[i] = ept->eptp; > +d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp; > } > > unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp) > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -2499,7 +2499,7 @@ static void p2m_reset_altp2m(struct domain *d, unsigned > int idx, > struct p2m_domain *p2m; > > ASSERT(idx < MAX_ALTP2M); > -p2m = d->arch.altp2m_p2m[idx]; > +p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)]; > > p2m_lock(p2m); > > @@ -2540,7 +2540,7 @@ static int p2m_activate_altp2m(struct domain *d, > unsigned int idx) > > ASSERT(idx < MAX_ALTP2M); > > -p2m = d->arch.altp2m_p2m[idx]; > +p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)]; All of the above have a more or less significant disconnect between the bounds check and the use as array index. I think it would be quite helpful if these could live close to one another, so one can (see further up) easily prove that both specified bounds actually match up. Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH] xen/page_alloc: statically allocate bootmem_region_list
Hi, On 17/12/2019 16:37, Jan Beulich wrote: I'm sorry for the non-threaded reply, but my mail client has mixed up this mail with another one, so I have nothing to properly reply to. With one stylistic issue taken care of (blanks around the binary operator / ) Reviewed-by: Jan Beulich The change would be easy enough to do while committing, but said mailbox issue would either require someone else to apply the change, or you to send a v2 (which then hopefully won't end up garbled). I am in the middle of committing other patches on Arm, so I can commit it. Iirc this was suggested before, so it would be nice if the patch could also gain a suitable Suggested-by. I suggested it on [1] but this was based on a previous discussion about an Arm bug (see [2]). So I am not sure who to put in the Suggested-by tag here. I will commit without it. Cheers, [1] <3d7f6e45-4c62-b314-7110-2e998bcdd...@arm.com> [2] <5f71588b-274a-bdb7-d324-5ff9177a0...@arm.com> Jan -- Julien Grall ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [ovmf test] 144895: regressions - FAIL
flight 144895 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/144895/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-i386-xsm6 xen-buildfail REGR. vs. 144637 build-amd64 6 xen-buildfail REGR. vs. 144637 build-amd64-xsm 6 xen-buildfail REGR. vs. 144637 build-i3866 xen-buildfail REGR. vs. 144637 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a build-i386-libvirt1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a version targeted for testing: ovmf bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 baseline version: ovmf 804666c86e7b6f04fe5c5cfdb13199c19e0e99b0 Last test of basis 144637 2019-12-09 09:09:49 Z8 days Failing since144646 2019-12-10 01:39:53 Z7 days 68 attempts Testing same since 144770 2019-12-12 18:41:26 Z4 days 57 attempts People who touched revisions under test: Antoine Coeur Ard Biesheuvel Bob Feng Jiewen Yao Michael Kubacki Pete Batard Philippe Mathieu-Daude Steven Shi jobs: build-amd64-xsm fail build-i386-xsm fail build-amd64 fail build-i386 fail build-amd64-libvirt blocked build-i386-libvirt blocked build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 blocked test-amd64-i386-xl-qemuu-ovmf-amd64 blocked sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Not pushing. commit bfb141cf19dd6f9b8df8b9d0914a5b3b15e1a798 Author: Pete Batard Date: Tue Dec 10 18:23:04 2019 + MdePkg/Include: Add DCC and BCM2835 SPCR UART types As per the Microsoft Debug Port Table 2 (DBG2) documentation, that can be found online, we are missing 2 serial interface types for Arm DCC and Bcm2835 (the latter being used with the Raspberry Pi). These same types are present in DebugPort2Table.h so add them to SerialPortConsoleRedirectionTable.h too. Note that we followed the same idiosyncrasies as DebugPort2Table for naming these new macros. Signed-off-by: Pete Batard Acked-by: Ard Biesheuvel Reviewed-by: Liming Gao commit 2fe25a74d6fee3c2ac0b930f7f3596cb432e766e Author: Ard Biesheuvel Date: Tue Mar 5 14:32:48 2019 +0100 ArmPkg/MmCommunicationDxe: relay architected PI events to MM context PI defines a few architected events that have significance in the MM context as well as in the non-secure DXE context. So register notify handlers for these events, and relay them into the standalone MM world. Signed-off-by: Ard Biesheuvel Reviewed-by: Jiewen Yao Reviewed-by: Achin Gupta commit d3add11e87dace180387562d6f1951f2bffbd3d9 Author: Michael Kubacki Date: Wed Nov 20 17:31:24 2019 -0800 MdeModulePkg PeiCore: Improve comment semantics This patch clarifies wording in several PeiCore comments to improve reading comprehension. Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Jian J Wang commit d39d1260c615b716675f67f5c4e1f4f52df01dad Author: Michael Kubacki Date: Wed Nov 20 17:10:48 2019 -0800 MdeModulePkg PeiCore: Fix typos Cc: Dandan Bi Cc: Liming Gao Cc: Jian J Wang Cc: Hao A Wu Signed-off-by: Michael Kubacki Reviewed-by: Liming Gao Reviewed-by: Philippe Mathieu-Daude Reviewed-by: Jian J Wang commit 97eedf5dfbaffde33210fd88066247cf0b7d3325 Author: Antoine Coeur Date: Wed Dec 4 12:14:53
Re: [Xen-devel] [PATCH 0/6] x86/suspend: State cleanup
On 17.12.2019 17:33, Andrew Cooper wrote: > On 17/12/2019 16:17, Jan Beulich wrote: >> On 13.12.2019 20:04, Andrew Cooper wrote: >>> Andrew Cooper (6): >>> x86/suspend: Clarify and improve the behaviour of do_suspend_lowlevel() >>> x86/suspend: Don't bother saving %cr3, %ss or flags >>> x86/suspend: Don't save unnecessary GPRs >>> x86/suspend: Restore cr4 later during resume >>> x86/suspend: Expand macros in wakeup_prot.S >>> x86/suspend: Drop save_rest_processor_state() completely >>> >>> xen/arch/x86/acpi/suspend.c | 55 ++-- >>> xen/arch/x86/acpi/wakeup_prot.S | 136 >>> >>> xen/arch/x86/boot/wakeup.S | 2 +- >>> 3 files changed, 46 insertions(+), 147 deletions(-) >> Based on Roger's review >> Acked-by: Jan Beulich >> >> One remark on the combination of patches 2 and 5: The loading of >> the stack related registers would now seem to be a fair candidate >> for using LSS (generally to be preferred over MOV-to-SS). > > Well... You've just fixed c/s ffa21ea5303 in the emulator, and it > demonstrates why LSS can't be used. Hmm, indeed, how did I forget? (It's really very counter-intuitive for this insn to not be universally usable.) Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH] xen/page_alloc: statically allocate bootmem_region_list
I'm sorry for the non-threaded reply, but my mail client has mixed up this mail with another one, so I have nothing to properly reply to. With one stylistic issue taken care of (blanks around the binary operator / ) Reviewed-by: Jan Beulich The change would be easy enough to do while committing, but said mailbox issue would either require someone else to apply the change, or you to send a v2 (which then hopefully won't end up garbled). Iirc this was suggested before, so it would be nice if the patch could also gain a suitable Suggested-by. Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 0/6] x86/suspend: State cleanup
On 17/12/2019 16:17, Jan Beulich wrote: > On 13.12.2019 20:04, Andrew Cooper wrote: >> Andrew Cooper (6): >> x86/suspend: Clarify and improve the behaviour of do_suspend_lowlevel() >> x86/suspend: Don't bother saving %cr3, %ss or flags >> x86/suspend: Don't save unnecessary GPRs >> x86/suspend: Restore cr4 later during resume >> x86/suspend: Expand macros in wakeup_prot.S >> x86/suspend: Drop save_rest_processor_state() completely >> >> xen/arch/x86/acpi/suspend.c | 55 ++-- >> xen/arch/x86/acpi/wakeup_prot.S | 136 >> >> xen/arch/x86/boot/wakeup.S | 2 +- >> 3 files changed, 46 insertions(+), 147 deletions(-) > Based on Roger's review > Acked-by: Jan Beulich > > One remark on the combination of patches 2 and 5: The loading of > the stack related registers would now seem to be a fair candidate > for using LSS (generally to be preferred over MOV-to-SS). Well... You've just fixed c/s ffa21ea5303 in the emulator, and it demonstrates why LSS can't be used. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH] xen/arm: Basic support for sunxi/sun50i h6 platform.
Hi, On 04/12/2019 09:27, Andre Przywara wrote: On Tue, 3 Dec 2019 16:52:45 + Julien Grall wrote: Hi, On 03/12/2019 14:38, Andre Przywara wrote: On Tue, 3 Dec 2019 11:39:58 + Julien Grall wrote: Hi, (+Andre) Hi, @Andre, IIRC you originally added the support for sunxi in Xen. Could you have a look at this patch? Looks alright, and indeed the H6 needs it. Even though Allwinner totally re-arranged the memory map, they missed the opportunity to put each device at least in their own 4K page. Reviewed-by: Andre Przywara Thank you for the review! If you can wait till this evening, I can even test it. I can wait until tomorrow before comitting the patch. I booted Xen 4.12.1 to the Dom0 prompt on the Pine H64, and saw all the serial devices in /sys/firmware/devicetree/base/soc. Then I applied the patch, and the serial devices were gone. And yes, all the four main serial ports share one 4K page on the H6. Tested-by: Andre Przywara Thank you for the testing! Acked-by: Julien Grall I have also committed it. Cheers, -- Julien Grall ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 1/3] xen/blkback: Squeeze page pools if a memory pressure is detected
From: SeongJae Park I though it would be better to review separated patches, but seems it was my mistake. As Juergen asked, merged them again and post here. Also, dropped Roger's reviewed-by. Thanks, SeongJae Park >8 --- Subject: [PATCH 1/3] xen/blkback: Squeeze page pools if a memory pressure is detected Each `blkif` has a free pages pool for the grant mapping. The size of the pool starts from zero and is increased on demand while processing the I/O requests. If current I/O requests handling is finished or 100 milliseconds has passed since last I/O requests handling, it checks and shrinks the pool to not exceed the size limit, `max_buffer_pages`. Therefore, host administrators can cause memory pressure in blkback by attaching a large number of block devices and inducing I/O. Such problematic situations can be avoided by limiting the maximum number of devices that can be attached, but finding the optimal limit is not so easy. Improper set of the limit can results in memory pressure or a resource underutilization. This commit avoids such problematic situations by squeezing the pools (returns every free page in the pool to the system) for a while (users can set this duration via a module parameter) if memory pressure is detected. Discussions === The `blkback`'s original shrinking mechanism returns only pages in the pool which are not currently be used by `blkback` to the system. In other words, the pages that are not mapped with granted pages. Because this commit is changing only the shrink limit but still uses the same freeing mechanism it does not touch pages which are currently mapping grants. Once memory pressure is detected, this commit keeps the squeezing limit for a user-specified time duration. The duration should be neither too long nor too short. If it is too long, the squeezing incurring overhead can reduce the I/O performance. If it is too short, `blkback` will not free enough pages to reduce the memory pressure. This commit sets the value as `10 milliseconds` by default because it is a short time in terms of I/O while it is a long time in terms of memory operations. Also, as the original shrinking mechanism works for at least every 100 milliseconds, this could be a somewhat reasonable choice. I also tested other durations (refer to the below section for more details) and confirmed that 10 milliseconds is the one that works best with the test. That said, the proper duration depends on actual configurations and workloads. That's why this commit allows users to set the duration as a module parameter. Memory Pressure Test To show how this commit fixes the memory pressure situation well, I configured a test environment on a xen-running virtualization system. On the `blkfront` running guest instances, I attach a large number of network-backed volume devices and induce I/O to those. Meanwhile, I measure the number of pages that swapped in (pswpin) and out (pswpout) on the `blkback` running guest. The test ran twice, once for the `blkback` before this commit and once for that after this commit. As shown below, this commit has dramatically reduced the memory pressure: pswpin pswpout before 76,672 185,799 after 2123,325 Optimal Aggressive Shrinking Duration - To find a best squeezing duration, I repeated the test with three different durations (1ms, 10ms, and 100ms). The results are as below: durationpswpin pswpout 1 852 6,424 10 212 3,325 100 203 3,340 As expected, the memory pressure has decreased as the duration is increased, but the reduction stopped from the `10ms`. Based on this results, I chose the default duration as 10ms. Performance Overhead Test = This commit could incur I/O performance degradation under severe memory pressure because the squeezing will require more page allocations per I/O. To show the overhead, I artificially made a worst-case squeezing situation and measured the I/O performance of a `blkfront` running guest. For the artificial squeezing, I set the `blkback.max_buffer_pages` using the `/sys/module/xen_blkback/parameters/max_buffer_pages` file. In this test, I set the value to `1024` and `0`. The `1024` is the default value. Setting the value as `0` is same to a situation doing the squeezing always (worst-case). If the underlying block device is slow enough, the squeezing overhead could be hidden. For the reason, I use a fast block device, namely the rbd[1]: # xl block-attach guest phy:/dev/ram0 xvdb w For the I/O performance measurement, I run a simple `dd` command 5 times directly to the device as below and collect the 'MB/s' results. $ for i in {1..5}; do dd if=/dev/zero of=/dev/xvdb \ bs=4k count=$((256*512)); sync; done The results
Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock
On Tue, 17 Dec 2019 17:13:42 +0100 "Jürgen Groß" wrote: > On 17.12.19 17:07, SeongJae Park wrote: > > From: SeongJae Park > > > > 'reclaim_memory' callback can race with a driver code as this callback > > will be called from any memory pressure detected context. To deal with > > the case, this commit adds a spinlock in the 'xenbus_device'. Whenever > > 'reclaim_memory' callback is called, the lock of the device which passed > > to the callback as its argument is locked. Thus, drivers registering > > their 'reclaim_memory' callback should protect the data that might race > > with the callback with the lock by themselves. > > > > Signed-off-by: SeongJae Park > > --- > > drivers/xen/xenbus/xenbus_probe.c | 1 + > > drivers/xen/xenbus/xenbus_probe_backend.c | 10 -- > > include/xen/xenbus.h | 2 ++ > > 3 files changed, 11 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/xen/xenbus/xenbus_probe.c > > b/drivers/xen/xenbus/xenbus_probe.c > > index 5b471889d723..b86393f172e6 100644 > > --- a/drivers/xen/xenbus/xenbus_probe.c > > +++ b/drivers/xen/xenbus/xenbus_probe.c > > @@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, > > goto fail; > > > > dev_set_name(>dev, "%s", devname); > > + spin_lock_init(>reclaim_lock); > > > > /* Register with generic device framework. */ > > err = device_register(>dev); > > diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c > > b/drivers/xen/xenbus/xenbus_probe_backend.c > > index 7e78ebef7c54..516aa64b9967 100644 > > --- a/drivers/xen/xenbus/xenbus_probe_backend.c > > +++ b/drivers/xen/xenbus/xenbus_probe_backend.c > > @@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct > > notifier_block *notifier, > > static int backend_reclaim_memory(struct device *dev, void *data) > > { > > const struct xenbus_driver *drv; > > + struct xenbus_device *xdev; > > + unsigned long flags; > > > > if (!dev->driver) > > return 0; > > drv = to_xenbus_driver(dev->driver); > > - if (drv && drv->reclaim_memory) > > - drv->reclaim_memory(to_xenbus_device(dev)); > > + if (drv && drv->reclaim_memory) { > > + xdev = to_xenbus_device(dev); > > + spin_trylock_irqsave(>reclaim_lock, flags); > > You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine, > too? I can't see a reason why you'd want to disable irqs here. I needed to diable irq here as this is called from the memory shrinker context. Also, used 'trylock' because the 'probe()' and 'remove()' code of the driver might include memory allocation. And the xen-blkback actually does. If the allocation shows a memory pressure during the allocation, it will trigger this shrinker callback again and then deadlock. Thanks, SeongJae Park > > > + drv->reclaim_memory(xdev); > > + spin_unlock_irqrestore(>reclaim_lock, flags); > > + } > > return 0; > > } > > > > diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h > > index c861cfb6f720..d9468313061d 100644 > > --- a/include/xen/xenbus.h > > +++ b/include/xen/xenbus.h > > @@ -76,6 +76,8 @@ struct xenbus_device { > > enum xenbus_state state; > > struct completion down; > > struct work_struct work; > > + /* 'reclaim_memory' callback is called while this lock is acquired */ > > + spinlock_t reclaim_lock; > > }; > > > > static inline struct xenbus_device *to_xenbus_device(struct device *dev) > > > > > Juergen > ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH] xen/page_alloc: statically allocate bootmem_region_list
Hi Hongyan, On 17/12/2019 14:33, Hongyan Xia wrote: The existing code assumes that the first mfn passed to the boot allocator is mapped, which creates problems when, e.g., we do not have a direct map, and may create other bootstrapping problems in the future. Make it static. The size is kept the same as before (1 page). Signed-off-by: Hongyan Xia --- xen/common/page_alloc.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 7cb1bd368b..7afb651b79 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -244,9 +244,12 @@ PAGE_LIST_HEAD(page_broken_list); */ mfn_t first_valid_mfn = INVALID_MFN_INITIALIZER; -static struct bootmem_region { +struct bootmem_region { unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */ -} *__initdata bootmem_region_list; +}; +/* Statically allocate a page for bootmem_region_list. */ +static struct bootmem_region __initdata +bootmem_region_list[PAGE_SIZE/sizeof(struct bootmem_region)]; NIT: space before and after /. Other than that: Reviewed-by: Julien Grall Cheers, -- Julien Grall ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 0/6] x86/suspend: State cleanup
On 13.12.2019 20:04, Andrew Cooper wrote: > Andrew Cooper (6): > x86/suspend: Clarify and improve the behaviour of do_suspend_lowlevel() > x86/suspend: Don't bother saving %cr3, %ss or flags > x86/suspend: Don't save unnecessary GPRs > x86/suspend: Restore cr4 later during resume > x86/suspend: Expand macros in wakeup_prot.S > x86/suspend: Drop save_rest_processor_state() completely > > xen/arch/x86/acpi/suspend.c | 55 ++-- > xen/arch/x86/acpi/wakeup_prot.S | 136 > > xen/arch/x86/boot/wakeup.S | 2 +- > 3 files changed, 46 insertions(+), 147 deletions(-) Based on Roger's review Acked-by: Jan Beulich One remark on the combination of patches 2 and 5: The loading of the stack related registers would now seem to be a fair candidate for using LSS (generally to be preferred over MOV-to-SS). Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v10 2/4] xen/blkback: Squeeze page pools if a memory pressure is detected
On Tue, 17 Dec 2019 09:30:32 +0100 SeongJae Park wrote: > On Tue, 17 Dec 2019 09:16:47 +0100 "Jürgen Groß" wrote: > > > On 17.12.19 08:59, SeongJae Park wrote: > > > On Tue, 17 Dec 2019 07:23:12 +0100 "Jürgen Groß" wrote: > > > > > >> On 16.12.19 20:48, SeongJae Park wrote: > > >>> On on, 16 Dec 2019 17:23:44 +0100, Jürgen Groß wrote: > > >>> > > On 16.12.19 17:15, SeongJae Park wrote: > > > On Mon, 16 Dec 2019 15:37:20 +0100 SeongJae Park > > > wrote: > > > > > >> On Mon, 16 Dec 2019 13:45:25 +0100 SeongJae Park > > >> wrote: > > >> > > >>> From: SeongJae Park > > >>> > > > [...] > > >>> --- a/drivers/block/xen-blkback/xenbus.c > > >>> +++ b/drivers/block/xen-blkback/xenbus.c > > >>> @@ -824,6 +824,24 @@ static void frontend_changed(struct > > >>> xenbus_device *dev, > > >>> } > > >>> > > >>> > > >>> +/* Once a memory pressure is detected, squeeze free page pools for > > >>> a while. */ > > >>> +static unsigned int buffer_squeeze_duration_ms = 10; > > >>> +module_param_named(buffer_squeeze_duration_ms, > > >>> + buffer_squeeze_duration_ms, int, 0644); > > >>> +MODULE_PARM_DESC(buffer_squeeze_duration_ms, > > >>> +"Duration in ms to squeeze pages buffer when a memory pressure is > > >>> detected"); > > >>> + > > >>> +/* > > >>> + * Callback received when the memory pressure is detected. > > >>> + */ > > >>> +static void reclaim_memory(struct xenbus_device *dev) > > >>> +{ > > >>> + struct backend_info *be = dev_get_drvdata(>dev); > > >>> + > > >>> + be->blkif->buffer_squeeze_end = jiffies + > > >>> + msecs_to_jiffies(buffer_squeeze_duration_ms); > > >> > > >> This callback might race with 'xen_blkbk_probe()'. The race could > > >> result in > > >> __NULL dereferencing__, as 'xen_blkbk_probe()' sets '->blkif' after > > >> it links > > >> 'be' to the 'dev'. Please _don't merge_ this patch now! > > >> > > >> I will do more test and share results. Meanwhile, if you have any > > >> opinion, > > >> please let me know. > > >>> > > >>> I reduced system memory and attached bunch of devices in short time so > > >>> that > > >>> memory pressure occurs while device attachments are ongoing. Under this > > >>> circumstance, I was able to see the race. > > >>> > > > > > > Not only '->blkif', but 'be' itself also coule be a NULL. As similar > > > concurrency issues could be in other drivers in their way, I suggest > > > to change > > > the reclaim callback ('->reclaim_memory') to be called for each > > > driver instead > > > of each device. Then, each driver could be able to deal with its > > > concurrency > > > issues by itself. > > > > Hmm, I don't like that. This would need to be changed back in case we > > add per-guest quota. > > >>> > > >>> Extending this callback in that way would be still not too hard. We > > >>> could use > > >>> the argument to the callback. I would keep the argument of the > > >>> callback to > > >>> 'struct device *' as is, and will add a comment saying 'NULL' value of > > >>> the > > >>> argument means every devices. As an example, xenbus would pass > > >>> NULL-ending > > >>> array of the device pointers that need to free its resources. > > >>> > > >>> After seeing this race, I am now also thinking it could be better to > > >>> delegate > > >>> detailed control of each device to its driver, as some drivers have some > > >>> complicated and unique relation with its devices. > > >>> > > > > Wouldn't a get_device() before calling the callback and a put_device() > > afterwards avoid that problem? > > >>> > > >>> I didn't used the reference count manipulation operations because other > > >>> similar > > >>> parts also didn't. But, if there is no implicit reference count > > >>> guarantee, it > > >>> seems those operations are indeed necessary. > > >>> > > >>> That said, as get/put operations only adjust the reference count, those > > >>> will > > >>> not make the callback to wait until the linking of the 'backend' and > > >>> 'blkif' to > > >>> the device (xen_blkbk_probe()) is finished. Thus, the race could still > > >>> happen. > > >>> Or, am I missing something? > > >> > > >> No, I think we need a xenbus lock per device which will need to be > > >> taken in xen_blkbk_probe(), xenbus_dev_remove() and while calling the > > >> callback. > > > > > > I also agree that locking should be used at last. But, as each driver > > > manages > > > its devices and resources in their way, it could have its unique race > > > conditions. And, each unique race condition might have its unique > > > efficient > > > way to synchronize it. Therefore, I think the synchronization should be > > > done > > > by each driver, not by xenbus and thus we should make the callback to be > > >
Re: [Xen-devel] [PATCH v11 4/6] xen/blkback: Protect 'reclaim_memory()' with 'reclaim_lock'
On 17.12.19 17:07, SeongJae Park wrote: From: SeongJae Park The 'reclaim_memory()' callback of blkback could race with 'xen_blkbk_probe()' and 'xen_blkbk_remove()'. In the case, incompletely linked 'backend_info' and 'blkif' might be exposed to the callback, thus result in bad results including NULL dereference. This commit fixes the problem by applying the 'reclaim_lock' protection to those. Note that this commit is separated for review purpose only. As the previous commit might result in race condition and might make bisect confuse, please squash this commit into previous commit if possible. Signed-off-by: SeongJae Park Please merge this patch into patch 2. Juergen ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock
On 17.12.19 17:07, SeongJae Park wrote: From: SeongJae Park 'reclaim_memory' callback can race with a driver code as this callback will be called from any memory pressure detected context. To deal with the case, this commit adds a spinlock in the 'xenbus_device'. Whenever 'reclaim_memory' callback is called, the lock of the device which passed to the callback as its argument is locked. Thus, drivers registering their 'reclaim_memory' callback should protect the data that might race with the callback with the lock by themselves. Signed-off-by: SeongJae Park --- drivers/xen/xenbus/xenbus_probe.c | 1 + drivers/xen/xenbus/xenbus_probe_backend.c | 10 -- include/xen/xenbus.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 5b471889d723..b86393f172e6 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, goto fail; dev_set_name(>dev, "%s", devname); + spin_lock_init(>reclaim_lock); /* Register with generic device framework. */ err = device_register(>dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 7e78ebef7c54..516aa64b9967 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct notifier_block *notifier, static int backend_reclaim_memory(struct device *dev, void *data) { const struct xenbus_driver *drv; + struct xenbus_device *xdev; + unsigned long flags; if (!dev->driver) return 0; drv = to_xenbus_driver(dev->driver); - if (drv && drv->reclaim_memory) - drv->reclaim_memory(to_xenbus_device(dev)); + if (drv && drv->reclaim_memory) { + xdev = to_xenbus_device(dev); + spin_trylock_irqsave(>reclaim_lock, flags); You need spin_lock_irqsave() here. Or maybe spin_lock() would be fine, too? I can't see a reason why you'd want to disable irqs here. + drv->reclaim_memory(xdev); + spin_unlock_irqrestore(>reclaim_lock, flags); + } return 0; } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index c861cfb6f720..d9468313061d 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -76,6 +76,8 @@ struct xenbus_device { enum xenbus_state state; struct completion down; struct work_struct work; + /* 'reclaim_memory' callback is called while this lock is acquired */ + spinlock_t reclaim_lock; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) Juergen ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v11 6/6] xen/blkback: Consistently insert one empty line between functions
From: SeongJae Park The number of empty lines between functions in the xenbus.c is inconsistent. This trivial style cleanup commit fixes the file to consistently place only one empty line. Acked-by: Roger Pau Monné Signed-off-by: SeongJae Park --- drivers/block/xen-blkback/xenbus.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 20045827a391..453f97dd533d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -432,7 +432,6 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev) device_remove_file(>dev, _attr_physical_device); } - static void xen_vbd_free(struct xen_vbd *vbd) { if (vbd->bdev) @@ -489,6 +488,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, handle, blkif->domid); return 0; } + static int xen_blkbk_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(>dev); @@ -575,6 +575,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info if (err) dev_warn(>dev, "writing feature-discard (%d)", err); } + int xen_blkbk_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state) { @@ -663,7 +664,6 @@ static int xen_blkbk_probe(struct xenbus_device *dev, return err; } - /* * Callback received when the hotplug scripts have placed the physical-device * node. Read it and the mode node, and create a vbd. If the frontend is @@ -755,7 +755,6 @@ static void backend_changed(struct xenbus_watch *watch, } } - /* * Callback received when the frontend's state changes. */ @@ -830,7 +829,6 @@ static void frontend_changed(struct xenbus_device *dev, } } - /* Once a memory pressure is detected, squeeze free page pools for a while. */ static unsigned int buffer_squeeze_duration_ms = 10; module_param_named(buffer_squeeze_duration_ms, @@ -855,7 +853,6 @@ static void reclaim_memory(struct xenbus_device *dev) /* ** Connection ** */ - /* * Write the physical details regarding the block device to the store, and * switch to Connected state. -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v11 5/6] xen/blkback: Remove unnecessary static variable name prefixes
From: SeongJae Park A few of static variables in blkback have 'xen_blkif_' prefix, though it is unnecessary for static variables. This commit removes such prefixes. Reviewed-by: Roger Pau Monné Signed-off-by: SeongJae Park --- drivers/block/xen-blkback/blkback.c | 37 + 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 79f677aeb5cc..fbd67f8e4e4e 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -62,8 +62,8 @@ * IO workloads. */ -static int xen_blkif_max_buffer_pages = 1024; -module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644); +static int max_buffer_pages = 1024; +module_param_named(max_buffer_pages, max_buffer_pages, int, 0644); MODULE_PARM_DESC(max_buffer_pages, "Maximum number of free pages to keep in each block backend buffer"); @@ -78,8 +78,8 @@ MODULE_PARM_DESC(max_buffer_pages, * algorithm. */ -static int xen_blkif_max_pgrants = 1056; -module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); +static int max_pgrants = 1056; +module_param_named(max_persistent_grants, max_pgrants, int, 0644); MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); @@ -88,8 +88,8 @@ MODULE_PARM_DESC(max_persistent_grants, * use. The time is in seconds, 0 means indefinitely long. */ -static unsigned int xen_blkif_pgrant_timeout = 60; -module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout, +static unsigned int pgrant_timeout = 60; +module_param_named(persistent_grant_unused_seconds, pgrant_timeout, uint, 0644); MODULE_PARM_DESC(persistent_grant_unused_seconds, "Time in seconds an unused persistent grant is allowed to " @@ -137,9 +137,8 @@ module_param(log_stats, int, 0644); static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt) { - return xen_blkif_pgrant_timeout && - (jiffies - persistent_gnt->last_used >= - HZ * xen_blkif_pgrant_timeout); + return pgrant_timeout && (jiffies - persistent_gnt->last_used >= + HZ * pgrant_timeout); } static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) @@ -234,7 +233,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, struct persistent_gnt *this; struct xen_blkif *blkif = ring->blkif; - if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) { + if (ring->persistent_gnt_c >= max_pgrants) { if (!blkif->vbd.overflow_max_grants) blkif->vbd.overflow_max_grants = 1; return -EBUSY; @@ -397,14 +396,13 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) goto out; } - if (ring->persistent_gnt_c < xen_blkif_max_pgrants || - (ring->persistent_gnt_c == xen_blkif_max_pgrants && + if (ring->persistent_gnt_c < max_pgrants || + (ring->persistent_gnt_c == max_pgrants && !ring->blkif->vbd.overflow_max_grants)) { num_clean = 0; } else { - num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; - num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + - num_clean; + num_clean = (max_pgrants / 100) * LRU_PERCENT_CLEAN; + num_clean = ring->persistent_gnt_c - max_pgrants + num_clean; num_clean = min(ring->persistent_gnt_c, num_clean); pr_debug("Going to purge at least %u persistent grants\n", num_clean); @@ -599,8 +597,7 @@ static void print_stats(struct xen_blkif_ring *ring) current->comm, ring->st_oo_req, ring->st_rd_req, ring->st_wr_req, ring->st_f_req, ring->st_ds_req, -ring->persistent_gnt_c, -xen_blkif_max_pgrants); +ring->persistent_gnt_c, max_pgrants); ring->st_print = jiffies + msecs_to_jiffies(10 * 1000); ring->st_rd_req = 0; ring->st_wr_req = 0; @@ -660,7 +657,7 @@ int xen_blkif_schedule(void *arg) if (time_before(jiffies, blkif->buffer_squeeze_end)) shrink_free_pagepool(ring, 0); else - shrink_free_pagepool(ring, xen_blkif_max_buffer_pages); + shrink_free_pagepool(ring, max_buffer_pages); if (log_stats && time_after(jiffies, ring->st_print)) print_stats(ring); @@ -887,7 +884,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, continue; } if (use_persistent_gnts && - ring->persistent_gnt_c < xen_blkif_max_pgrants) { +
[Xen-devel] [PATCH v11 4/6] xen/blkback: Protect 'reclaim_memory()' with 'reclaim_lock'
From: SeongJae Park The 'reclaim_memory()' callback of blkback could race with 'xen_blkbk_probe()' and 'xen_blkbk_remove()'. In the case, incompletely linked 'backend_info' and 'blkif' might be exposed to the callback, thus result in bad results including NULL dereference. This commit fixes the problem by applying the 'reclaim_lock' protection to those. Note that this commit is separated for review purpose only. As the previous commit might result in race condition and might make bisect confuse, please squash this commit into previous commit if possible. Signed-off-by: SeongJae Park --- drivers/block/xen-blkback/xenbus.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 4f6ea4feca79..20045827a391 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -492,6 +492,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, static int xen_blkbk_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(>dev); + unsigned long flags; pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id); @@ -504,6 +505,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->backend_watch.node = NULL; } + spin_lock_irqsave(>reclaim_lock, flags); dev_set_drvdata(>dev, NULL); if (be->blkif) { @@ -512,6 +514,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) /* Put the reference we set in xen_blkif_alloc(). */ xen_blkif_put(be->blkif); } + spin_unlock_irqrestore(>reclaim_lock, flags); return 0; } @@ -597,6 +600,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev, int err; struct backend_info *be = kzalloc(sizeof(struct backend_info), GFP_KERNEL); + unsigned long flags; /* match the pr_debug in xen_blkbk_remove */ pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id); @@ -607,6 +611,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev, return -ENOMEM; } be->dev = dev; + spin_lock_irqsave(>reclaim_lock, flags); dev_set_drvdata(>dev, be); be->blkif = xen_blkif_alloc(dev->otherend_id); @@ -614,8 +619,10 @@ static int xen_blkbk_probe(struct xenbus_device *dev, err = PTR_ERR(be->blkif); be->blkif = NULL; xenbus_dev_fatal(dev, err, "creating block interface"); + spin_unlock_irqrestore(>reclaim_lock, flags); goto fail; } + spin_unlock_irqrestore(>reclaim_lock, flags); err = xenbus_printf(XBT_NIL, dev->nodename, "feature-max-indirect-segments", "%u", @@ -838,6 +845,10 @@ static void reclaim_memory(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(>dev); + /* Device is registered but not probed yet */ + if (!be) + return; + be->blkif->buffer_squeeze_end = jiffies + msecs_to_jiffies(buffer_squeeze_duration_ms); } -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v11 3/6] xen/blkback: Squeeze page pools if a memory pressure is detected
From: SeongJae Park Each `blkif` has a free pages pool for the grant mapping. The size of the pool starts from zero and is increased on demand while processing the I/O requests. If current I/O requests handling is finished or 100 milliseconds has passed since last I/O requests handling, it checks and shrinks the pool to not exceed the size limit, `max_buffer_pages`. Therefore, host administrators can cause memory pressure in blkback by attaching a large number of block devices and inducing I/O. Such problematic situations can be avoided by limiting the maximum number of devices that can be attached, but finding the optimal limit is not so easy. Improper set of the limit can results in memory pressure or a resource underutilization. This commit avoids such problematic situations by squeezing the pools (returns every free page in the pool to the system) for a while (users can set this duration via a module parameter) if memory pressure is detected. Discussions === The `blkback`'s original shrinking mechanism returns only pages in the pool which are not currently be used by `blkback` to the system. In other words, the pages that are not mapped with granted pages. Because this commit is changing only the shrink limit but still uses the same freeing mechanism it does not touch pages which are currently mapping grants. Once memory pressure is detected, this commit keeps the squeezing limit for a user-specified time duration. The duration should be neither too long nor too short. If it is too long, the squeezing incurring overhead can reduce the I/O performance. If it is too short, `blkback` will not free enough pages to reduce the memory pressure. This commit sets the value as `10 milliseconds` by default because it is a short time in terms of I/O while it is a long time in terms of memory operations. Also, as the original shrinking mechanism works for at least every 100 milliseconds, this could be a somewhat reasonable choice. I also tested other durations (refer to the below section for more details) and confirmed that 10 milliseconds is the one that works best with the test. That said, the proper duration depends on actual configurations and workloads. That's why this commit allows users to set the duration as a module parameter. Memory Pressure Test To show how this commit fixes the memory pressure situation well, I configured a test environment on a xen-running virtualization system. On the `blkfront` running guest instances, I attach a large number of network-backed volume devices and induce I/O to those. Meanwhile, I measure the number of pages that swapped in (pswpin) and out (pswpout) on the `blkback` running guest. The test ran twice, once for the `blkback` before this commit and once for that after this commit. As shown below, this commit has dramatically reduced the memory pressure: pswpin pswpout before 76,672 185,799 after 2123,325 Optimal Aggressive Shrinking Duration - To find a best squeezing duration, I repeated the test with three different durations (1ms, 10ms, and 100ms). The results are as below: durationpswpin pswpout 1 852 6,424 10 212 3,325 100 203 3,340 As expected, the memory pressure has decreased as the duration is increased, but the reduction stopped from the `10ms`. Based on this results, I chose the default duration as 10ms. Performance Overhead Test = This commit could incur I/O performance degradation under severe memory pressure because the squeezing will require more page allocations per I/O. To show the overhead, I artificially made a worst-case squeezing situation and measured the I/O performance of a `blkfront` running guest. For the artificial squeezing, I set the `blkback.max_buffer_pages` using the `/sys/module/xen_blkback/parameters/max_buffer_pages` file. In this test, I set the value to `1024` and `0`. The `1024` is the default value. Setting the value as `0` is same to a situation doing the squeezing always (worst-case). If the underlying block device is slow enough, the squeezing overhead could be hidden. For the reason, I use a fast block device, namely the rbd[1]: # xl block-attach guest phy:/dev/ram0 xvdb w For the I/O performance measurement, I run a simple `dd` command 5 times directly to the device as below and collect the 'MB/s' results. $ for i in {1..5}; do dd if=/dev/zero of=/dev/xvdb \ bs=4k count=$((256*512)); sync; done The results are as below. 'max_pgs' represents the value of the `blkback.max_buffer_pages` parameter. max_pgs Min Max Median AvgStddev 0 417 423 420419.4 2.5099801 1024 414 425 416417.8 4.4384682 No difference proven at 95.0% confidence In short, even worst case squeezing
[Xen-devel] [PATCH v11 2/6] xenbus/backend: Protect xenbus callback with lock
From: SeongJae Park 'reclaim_memory' callback can race with a driver code as this callback will be called from any memory pressure detected context. To deal with the case, this commit adds a spinlock in the 'xenbus_device'. Whenever 'reclaim_memory' callback is called, the lock of the device which passed to the callback as its argument is locked. Thus, drivers registering their 'reclaim_memory' callback should protect the data that might race with the callback with the lock by themselves. Signed-off-by: SeongJae Park --- drivers/xen/xenbus/xenbus_probe.c | 1 + drivers/xen/xenbus/xenbus_probe_backend.c | 10 -- include/xen/xenbus.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 5b471889d723..b86393f172e6 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -472,6 +472,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, goto fail; dev_set_name(>dev, "%s", devname); + spin_lock_init(>reclaim_lock); /* Register with generic device framework. */ err = device_register(>dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 7e78ebef7c54..516aa64b9967 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -251,12 +251,18 @@ static int backend_probe_and_watch(struct notifier_block *notifier, static int backend_reclaim_memory(struct device *dev, void *data) { const struct xenbus_driver *drv; + struct xenbus_device *xdev; + unsigned long flags; if (!dev->driver) return 0; drv = to_xenbus_driver(dev->driver); - if (drv && drv->reclaim_memory) - drv->reclaim_memory(to_xenbus_device(dev)); + if (drv && drv->reclaim_memory) { + xdev = to_xenbus_device(dev); + spin_trylock_irqsave(>reclaim_lock, flags); + drv->reclaim_memory(xdev); + spin_unlock_irqrestore(>reclaim_lock, flags); + } return 0; } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index c861cfb6f720..d9468313061d 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -76,6 +76,8 @@ struct xenbus_device { enum xenbus_state state; struct completion down; struct work_struct work; + /* 'reclaim_memory' callback is called while this lock is acquired */ + spinlock_t reclaim_lock; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v11 0/6] xenbus/backend: Add a memory pressure handler callback
Granting pages consumes backend system memory. In systems configured with insufficient spare memory for those pages, it can cause a memory pressure situation. However, finding the optimal amount of the spare memory is challenging for large systems having dynamic resource utilization patterns. Also, such a static configuration might lack flexibility. To mitigate such problems, this patchset adds a memory reclaim callback to 'xenbus_driver' (patch 1) and then introduce a lock for race condition avoidance (patch 2). Those two patches could be merged into one patch if necessary. The third patch applies the callback mechanism to mitigate the problem in 'xen-blkback' (patch 3), but it lacks use of the race condition mitigation. Following change (patch 4) applies the race protection mechanism to the blkback. Patch 3 and patch 4 has seperated for only review convenience. Highly recommend to merge those into one patch as patch 3 applied version might confuse bisecting. The fifth and sixth patches are trivial cleanups; those fix nits we found during the development of this patchset. Note that patch 1, 3, 5, 6 are same with previous version. I made the changes in this version to different commits (only second and fourth patches) to make review more comfortable. Especially, the third and fourth patches should be merged into one patch, as the third one alone might make bisecting confuse. Tthe next version of this patchset will also merge those. Base Version This patch is based on v5.4. A complete tree is also available at my public git repo: https://github.com/sjp38/linux/tree/patches/blkback/buffer_squeeze/v11 Patch History - Changes from v10 (https://lore.kernel.org/xen-devel/20191216124527.30306-1-sjp...@amazon.com/) - Fix race condition (reported by SeongJae, suggested by Juergen) Changes from v9 (https://lore.kernel.org/xen-devel/20191213153546.17425-1-sjp...@amazon.de/) - Add 'Reviewed-by' and 'Acked-by' from Roger Pau Monné - Update the commit message for overhead test of the 2nd path Changes from v8 (https://lore.kernel.org/xen-devel/20191213130211.24011-1-sjp...@amazon.de/) - Drop 'Reviewed-by: Juergen' from the second patch (suggested by Roger Pau Monné) - Update contact of the new module param to SeongJae Park (suggested by Roger Pau Monné) - Wordsmith the description of the parameter (suggested by Roger Pau Monné) - Fix dumb bugs (suggested by Roger Pau Monné) - Move module param definition to xenbus.c and reduce the number of lines for this change (suggested by Roger Pau Monné) - Add a comment for the new callback, reclaim_memory, as other callbacks also have - Add another trivial cleanup of xenbus.c file (4th patch) Changes from v7 (https://lore.kernel.org/xen-devel/20191211181016.14366-1-sjp...@amazon.de/) - Update sysfs-driver-xen-blkback for new parameter (suggested by Roger Pau Monné) - Use per-xen_blkif buffer_squeeze_end instead of global variable (suggested by Roger Pau Monné) Changes from v6 (https://lore.kernel.org/linux-block/20191211042428.5961-1-sjp...@amazon.de/) - Remove more unnecessary prefixes (suggested by Roger Pau Monné) - Constify a variable (suggested by Roger Pau Monné) - Rename 'reclaim' into 'reclaim_memory' (suggested by Roger Pau Monné) - More wordsmith of the commit message (suggested by Roger Pau Monné) Changes from v5 (https://lore.kernel.org/linux-block/20191210080628.5264-1-sjp...@amazon.de/) - Wordsmith the commit messages (suggested by Roger Pau Monné) - Change the reclaim callback return type (suggested by Roger Pau Monné) - Change the type of the blkback squeeze duration variable (suggested by Roger Pau Monné) - Add a patch for removal of unnecessary static variable name prefixes (suggested by Roger Pau Monné) - Fix checkpatch.pl warnings Changes from v4 (https://lore.kernel.org/xen-devel/20191209194305.20828-1-sjp...@amazon.com/) - Remove domain id parameter from the callback (suggested by Juergen Gross) - Rename xen-blkback module parameter (suggested by Stefan Nuernburger) Changes from v3 (https://lore.kernel.org/xen-devel/20191209085839.21215-1-sjp...@amazon.com/) - Add general callback in xen_driver and use it (suggested by Juergen Gross) Changes from v2 (https://lore.kernel.org/linux-block/af195033-23d5-38ed-b73b-f6e2e3b34...@amazon.com) - Rename the module parameter and variables for brevity (aggressive shrinking -> squeezing) Changes from v1 (https://lore.kernel.org/xen-devel/20191204113419.2298-1-sjp...@amazon.com/) - Adjust the description to not use the term, `arbitrarily` (suggested by Paul Durrant) - Specify time unit of the duration in the parameter description, (suggested by Maximilian Heyne) - Change default aggressive shrinking duration from 1ms to 10ms - Merge two patches into one single patch SeongJae Park (6): xenbus/backend: Add memory pressure handler callback xenbus/backend: Protect xenbus callback with
[Xen-devel] [PATCH v11 1/6] xenbus/backend: Add memory pressure handler callback
From: SeongJae Park Granting pages consumes backend system memory. In systems configured with insufficient spare memory for those pages, it can cause a memory pressure situation. However, finding the optimal amount of the spare memory is challenging for large systems having dynamic resource utilization patterns. Also, such a static configuration might lack flexibility. To mitigate such problems, this commit adds a memory reclaim callback to 'xenbus_driver'. If a memory pressure is detected, 'xenbus' requests every backend driver to volunarily release its memory. Note that it would be able to improve the callback facility for more sophisticated handlings of general pressures. For example, it would be possible to monitor the memory consumption of each device and issue the release requests to only devices which causing the pressure. Also, the callback could be extended to handle not only memory, but general resources. Nevertheless, this version of the implementation defers such sophisticated goals as a future work. Reviewed-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: SeongJae Park --- drivers/xen/xenbus/xenbus_probe_backend.c | 32 +++ include/xen/xenbus.h | 1 + 2 files changed, 33 insertions(+) diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index b0bed4faf44c..7e78ebef7c54 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -248,6 +248,35 @@ static int backend_probe_and_watch(struct notifier_block *notifier, return NOTIFY_DONE; } +static int backend_reclaim_memory(struct device *dev, void *data) +{ + const struct xenbus_driver *drv; + + if (!dev->driver) + return 0; + drv = to_xenbus_driver(dev->driver); + if (drv && drv->reclaim_memory) + drv->reclaim_memory(to_xenbus_device(dev)); + return 0; +} + +/* + * Returns 0 always because we are using shrinker to only detect memory + * pressure. + */ +static unsigned long backend_shrink_memory_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + bus_for_each_dev(_backend.bus, NULL, NULL, + backend_reclaim_memory); + return 0; +} + +static struct shrinker backend_memory_shrinker = { + .count_objects = backend_shrink_memory_count, + .seeks = DEFAULT_SEEKS, +}; + static int __init xenbus_probe_backend_init(void) { static struct notifier_block xenstore_notifier = { @@ -264,6 +293,9 @@ static int __init xenbus_probe_backend_init(void) register_xenstore_notifier(_notifier); + if (register_shrinker(_memory_shrinker)) + pr_warn("shrinker registration failed\n"); + return 0; } subsys_initcall(xenbus_probe_backend_init); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 869c816d5f8c..c861cfb6f720 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -104,6 +104,7 @@ struct xenbus_driver { struct device_driver driver; int (*read_otherend_details)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev); + void (*reclaim_memory)(struct xenbus_device *dev); }; static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step
Andrew, Tamas thank you very much. I will improve the patch. December 17, 2019 3:13:42 PM UTC, Andrew Cooper пишет: >On 17/12/2019 15:10, Tamas K Lengyel wrote: >> On Tue, Dec 17, 2019 at 8:08 AM Tamas K Lengyel >wrote: >>> On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper > wrote: On 17/12/2019 14:40, Sergey Kovalev wrote: > On break point event eight context switches occures. > > With fast single step it is possible to shorten path for two >context > switches > and gain 35% spead-up. > > Was tested on Debian branch of Xen 4.12. See at: > >https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep > > Rebased on master: > https://github.com/skvl/xen/tree/fast-singlestep > > Signed-off-by: Sergey Kovalev 35% looks like a good number, but what is "fast single step"? All >this appears to be is plumbing for to cause an altp2m switch on single >step. >>> Yes, a better explanation would be much needed here and I'm not 100% >>> sure it correctly implements what I think it tries to. >>> >>> This is my interpretation of what the idea is: when using DRAKVUF >(or >>> another system using altp2m with shadow pages similar to what I >>> describe in >https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m), >>> after a breakpoint is hit the system switches to the default >>> unrestricted altp2m view with singlestep enabled. When the >singlestep >>> traps to Xen another vm_event is sent to the monitor agent, which >then >>> normally disables singlestepping and switches the altp2m view back >to >>> the restricted view. This patch looks like its short-circuiting that >>> last part so that it doesn't need to send the vm_event out for the >>> singlestep event and should switch back to the restricted view in >Xen >>> automatically. It's a nice optimization. But what seems to be >missing >>> is the altp2m switch itself. >> Never mind, p2m_altp2m_check does the altp2m switch as well, so this >> patch implements what I described above. Please update the patch >> message to be more descriptive (you can copy my description from >> above). > >Also please read CODING_STYLE in the root of the xen repository. The >important ones you need to fix are spaces in "if ( ... )" statements, >and binary operators on the end of the first line rather than the >beginning of the continuation. > >~Andrew -- Простите за краткость, создано в K-9 Mail. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 1/2] Tidy up whitespace and formatting in file to be consistent.
On Wed, Dec 18, 2019 at 02:44:51AM +1100, Steven Haigh wrote: > Ok, if its going to be 4 spaces for each file, I can batch convert & tidy > stuff up... > > The file I changed had both types, so I went with my own preference :) > > If it might be a better approach, I'll sort out the majority of scripts in > that directory - and do no function changes and post a series that does > nothing but cleanup - then do the brctl / ip changes on top of that in a > different patch. > Like Juergen said on IRC, if you think that patch should be backported (either by upstream or downstream maintainers), it will make people's life easier if that goes in first. Wei. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH] xsm: hide detailed Xen version from unprivileged guests
Hide the following information that can help identify the running Xen binary version: XENVER_extraversion XENVER_compile_info XENVER_capabilities XENVER_changeset XENVER_commandline XENVER_build_id Return a more customer friendly empty string instead of "" which would be shown in tools like dmidecode. But allow guests to see this information in Debug builds of Xen. Signed-off-by: Sergey Dyasli --- CC: Andrew Cooper CC: George Dunlap CC: Ian Jackson CC: Jan Beulich CC: Julien Grall CC: Konrad Rzeszutek Wilk CC: Stefano Stabellini CC: Wei Liu CC: Daniel De Graaf --- xen/common/version.c| 2 +- xen/include/xsm/dummy.h | 15 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/xen/common/version.c b/xen/common/version.c index 937eb1281c..cc621ab76a 100644 --- a/xen/common/version.c +++ b/xen/common/version.c @@ -67,7 +67,7 @@ const char *xen_banner(void) const char *xen_deny(void) { -return ""; +return ""; } static const void *build_id_p __read_mostly; diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h index b8e185e6fa..4a1a1bf2bd 100644 --- a/xen/include/xsm/dummy.h +++ b/xen/include/xsm/dummy.h @@ -750,16 +750,21 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG uint32_t op) case XENVER_get_features: /* These sub-ops ignore the permission checks and return data. */ return 0; -case XENVER_extraversion: -case XENVER_compile_info: -case XENVER_capabilities: -case XENVER_changeset: case XENVER_pagesize: case XENVER_guest_handle: /* These MUST always be accessible to any guest by default. */ return xsm_default_action(XSM_HOOK, current->domain, NULL); + +case XENVER_extraversion: +case XENVER_compile_info: +case XENVER_capabilities: +case XENVER_changeset: +case XENVER_commandline: +case XENVER_build_id: default: -return xsm_default_action(XSM_PRIV, current->domain, NULL); +/* Hide information from guests only in Release builds. */ +return xsm_default_action(debug_build() ? XSM_HOOK : XSM_PRIV, + current->domain, NULL); } } -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 1/2] Tidy up whitespace and formatting in file to be consistent.
Ok, if its going to be 4 spaces for each file, I can batch convert & tidy stuff up... The file I changed had both types, so I went with my own preference :) If it might be a better approach, I'll sort out the majority of scripts in that directory - and do no function changes and post a series that does nothing but cleanup - then do the brctl / ip changes on top of that in a different patch. I might as well do them all - and it makes sense to do nothing but cleanup, then functional changes based on the cleaned up code. Steven Haigh net...@crc.id.au https://www.crc.id.au On Tue, Dec 17, 2019 at 14:13, Wei Liu wrote: On Fri, Dec 13, 2019 at 03:08:34PM +1100, Steven Haigh wrote: Signed-off-by: Steven Haigh Acked-by: Wei Liu I will need to add tools/hotplug to the subject line and the following commit message: Use 4 spaces for indentation throughout the file. No functional change. Wei. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH net-next 2/3] xen-netback: switch state to InitWait at the end of netback_probe()...
On Tue, Dec 17, 2019 at 01:32:17PM +, Paul Durrant wrote: > ...as the comment above the function states. > > The switch to Initialising at the start of the function is somewhat bogus > as the toolstack will have set that initial state anyway. To behave > correctly, a backend should switch to InitWait once it has set up all > xenstore values that may be required by a initialising frontend. This > patch calls backend_switch_state() to make the transition at the > appropriate point. > > NOTE: backend_switch_state() ignores errors from xenbus_switch_state() > and so this patch removes an error path from netback_probe(). This > means a failure to change state at this stage (in the absence of > other failures) will leave the device instantiated. This is highly > unlikley to happen as a failure to change state would indicate a > failure to write to xenstore, and that will trigger other error > paths. Also, a 'stuck' device can still be cleaned up using 'unbind' > in any case. > > Signed-off-by: Paul Durrant Acked-by: Wei Liu ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH net-next 1/3] xen-netback: move netback_probe() and netback_remove() to the end...
On Tue, Dec 17, 2019 at 01:32:16PM +, Paul Durrant wrote: > ...of xenbus.c > > This is a cosmetic function re-ordering to reduce churn in a subsequent > patch. Some style fix-up was done to make checkpatch.pl happier. > > No functional change. > > Signed-off-by: Paul Durrant Acked-by: Wei Liu ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH net-next 3/3] xen-netback: remove 'hotplug-status' once it has served its purpose
On Tue, Dec 17, 2019 at 01:32:18PM +, Paul Durrant wrote: > Removing the 'hotplug-status' node in netback_remove() is wrong; the script > may not have completed. Only remove the node once the watch has fired and > has been unregistered. > > Signed-off-by: Paul Durrant Acked-by: Wei Liu ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH V4 1/4] x86/mm: Add array_index_nospec to guest provided index values
On Tue, Dec 17, 2019 at 8:12 AM Alexandru Stefan ISAILA wrote: > > This patch aims to sanitize indexes, potentially guest provided > values, for altp2m_eptp[] and altp2m_p2m[] arrays. > > Signed-off-by: Alexandru Isaila LGTM, thanks! Acked-by: Tamas K Lengyel ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN
> -Original Message- > From: Xen-devel On Behalf Of > Sergey Dyasli > Sent: 17 December 2019 14:08 > To: xen-de...@lists.xen.org; kasan-...@googlegroups.com; linux- > ker...@vger.kernel.org > Cc: Juergen Gross ; Sergey Dyasli > ; Stefano Stabellini ; > George Dunlap ; Ross Lagerwall > ; Alexander Potapenko ; > Andrey Ryabinin ; Boris Ostrovsky > ; Dmitry Vyukov > Subject: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across > page boundary with KASAN > > From: Ross Lagerwall > > When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that > allocations are aligned to the next power of 2 of the size does not > hold. Therefore, handle grant copies that cross page boundaries. > > Signed-off-by: Ross Lagerwall > Signed-off-by: Sergey Dyasli Would have been nice to cc netback maintainers... > --- > drivers/net/xen-netback/common.h | 2 +- > drivers/net/xen-netback/netback.c | 55 --- > 2 files changed, 45 insertions(+), 12 deletions(-) > > diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen- > netback/common.h > index 05847eb91a1b..e57684415edd 100644 > --- a/drivers/net/xen-netback/common.h > +++ b/drivers/net/xen-netback/common.h > @@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ > struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; > grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; > > - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; > + struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2]; > struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; > struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; > /* passed to gnttab_[un]map_refs with pages under (un)mapping */ > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen- > netback/netback.c > index 0020b2e8c279..1541b6e0cc62 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue > *queue, > > struct xenvif_tx_cb { > u16 pending_idx; > + u8 copies; > }; I know we're a way off the limit (48 bytes) but I wonder if we ought to have a compile time check here that we're not overflowing skb->cb. > > #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) > @@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue > *queue, > { > struct gnttab_map_grant_ref *gop_map = *gopp_map; > u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; > + u8 copies = XENVIF_TX_CB(skb)->copies; > /* This always points to the shinfo of the skb being checked, which >* could be either the first or the one on the frag_list >*/ > @@ -450,23 +452,27 @@ static int xenvif_tx_check_gop(struct xenvif_queue > *queue, > int nr_frags = shinfo->nr_frags; > const bool sharedslot = nr_frags && > frag_get_pending_idx(>frags[0]) == > pending_idx; > - int i, err; > + int i, err = 0; > > - /* Check status of header. */ > - err = (*gopp_copy)->status; > - if (unlikely(err)) { > - if (net_ratelimit()) > - netdev_dbg(queue->vif->dev, > + while (copies) { > + /* Check status of header. */ > + int newerr = (*gopp_copy)->status; > + if (unlikely(newerr)) { > + if (net_ratelimit()) > + netdev_dbg(queue->vif->dev, > "Grant copy of header failed! status: %d > pending_idx: %u ref: %u\n", > (*gopp_copy)->status, > pending_idx, > (*gopp_copy)->source.u.ref); > - /* The first frag might still have this slot mapped */ > - if (!sharedslot) > - xenvif_idx_release(queue, pending_idx, > -XEN_NETIF_RSP_ERROR); > + /* The first frag might still have this slot mapped */ > + if (!sharedslot && !err) > + xenvif_idx_release(queue, pending_idx, > +XEN_NETIF_RSP_ERROR); Can't this be done after the loop, if there is an accumulated err? I think it would make the code slightly neater. > + err = newerr; > + } > + (*gopp_copy)++; > + copies--; > } > - (*gopp_copy)++; > > check_frags: > for (i = 0; i < nr_frags; i++, gop_map++) { > @@ -910,6 +916,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue > *queue, > xenvif_tx_err(queue, , extra_count, idx); > break; > } > + XENVIF_TX_CB(skb)->copies = 0; > > skb_shinfo(skb)->nr_frags = ret; > if (data_len < txreq.size) > @@ -933,6 +940,7 @@ static void
[Xen-devel] [PATCH V4 4/4] x86/mm: Make use of the default access param from xc_altp2m_create_view
At this moment the default_access param from xc_altp2m_create_view is not used. This patch assigns default_access to p2m->default_access at the time of initializing a new altp2m view. Signed-off-by: Alexandru Isaila --- CC: Jan Beulich CC: Andrew Cooper CC: Wei Liu CC: "Roger Pau Monné" CC: George Dunlap CC: Ian Jackson CC: Julien Grall CC: Konrad Rzeszutek Wilk CC: Stefano Stabellini CC: Razvan Cojocaru CC: Tamas K Lengyel CC: Petre Pircalabu CC: George Dunlap --- Changes since V3: - Change type of hvmmem_default_access to xenmem_access_t - Fix style issues - Release lock before return. --- xen/arch/x86/hvm/hvm.c | 3 ++- xen/arch/x86/mm/mem_access.c| 6 +++--- xen/arch/x86/mm/p2m.c | 27 ++- xen/include/asm-x86/p2m.h | 3 ++- xen/include/public/hvm/hvm_op.h | 2 -- xen/include/xen/mem_access.h| 4 6 files changed, 33 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index a129049d6b..d4b19d2412 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -4687,7 +4687,8 @@ static int do_altp2m_op( } case HVMOP_altp2m_create_p2m: -if ( !(rc = p2m_init_next_altp2m(d, )) ) +if ( !(rc = p2m_init_next_altp2m(d, , + a.u.view.hvmmem_default_access)) ) rc = __copy_to_guest(arg, , 1) ? -EFAULT : 0; break; diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c index 70f3528bb1..288c865ffa 100644 --- a/xen/arch/x86/mm/mem_access.c +++ b/xen/arch/x86/mm/mem_access.c @@ -314,9 +314,9 @@ static int set_mem_access(struct domain *d, struct p2m_domain *p2m, return rc; } -static bool xenmem_access_to_p2m_access(struct p2m_domain *p2m, -xenmem_access_t xaccess, -p2m_access_t *paccess) +bool xenmem_access_to_p2m_access(struct p2m_domain *p2m, + xenmem_access_t xaccess, + p2m_access_t *paccess) { static const p2m_access_t memaccess[] = { #define ACCESS(ac) [XENMEM_access_##ac] = p2m_access_##ac diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index d381f6877f..d67326f8b7 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -25,6 +25,7 @@ #include /* copy_from_guest() */ #include +#include #include #include #include @@ -2533,7 +2534,8 @@ void p2m_flush_altp2m(struct domain *d) altp2m_list_unlock(d); } -static int p2m_activate_altp2m(struct domain *d, unsigned int idx) +static int p2m_activate_altp2m(struct domain *d, unsigned int idx, + p2m_access_t hvmmem_default_access) { struct p2m_domain *hostp2m, *p2m; int rc; @@ -2559,7 +2561,7 @@ static int p2m_activate_altp2m(struct domain *d, unsigned int idx) goto out; } -p2m->default_access = hostp2m->default_access; +p2m->default_access = hvmmem_default_access; p2m->domain = hostp2m->domain; p2m->global_logdirty = hostp2m->global_logdirty; p2m->min_remapped_gfn = gfn_x(INVALID_GFN); @@ -2576,6 +2578,7 @@ static int p2m_activate_altp2m(struct domain *d, unsigned int idx) int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx) { int rc = -EINVAL; +struct p2m_domain *hostp2m = p2m_get_hostp2m(d); if ( idx >= MAX_ALTP2M ) return rc; @@ -2583,16 +2586,22 @@ int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx) altp2m_list_lock(d); if ( d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) ) -rc = p2m_activate_altp2m(d, idx); +rc = p2m_activate_altp2m(d, idx, hostp2m->default_access); altp2m_list_unlock(d); return rc; } -int p2m_init_next_altp2m(struct domain *d, uint16_t *idx) +int p2m_init_next_altp2m(struct domain *d, uint16_t *idx, + xenmem_access_t hvmmem_default_access) { int rc = -EINVAL; unsigned int i; +p2m_access_t a; +struct p2m_domain *p2m; + +if ( hvmmem_default_access > XENMEM_access_default ) +return rc; altp2m_list_lock(d); @@ -2601,7 +2610,15 @@ int p2m_init_next_altp2m(struct domain *d, uint16_t *idx) if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) ) continue; -rc = p2m_activate_altp2m(d, i); +p2m = d->arch.altp2m_p2m[i]; + +if ( !xenmem_access_to_p2m_access(p2m, hvmmem_default_access, ) ) +{ +altp2m_list_unlock(d); +return -EINVAL; +} + +rc = p2m_activate_altp2m(d, i, a); if ( !rc ) *idx = i; diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 94285db1b4..ac2d2787f4 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -884,7 +884,8 @@ bool p2m_altp2m_get_or_propagate(struct p2m_domain *ap2m, unsigned long gfn_l, int
Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step
On 17/12/2019 15:10, Tamas K Lengyel wrote: > On Tue, Dec 17, 2019 at 8:08 AM Tamas K Lengyel wrote: >> On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper >> wrote: >>> On 17/12/2019 14:40, Sergey Kovalev wrote: On break point event eight context switches occures. With fast single step it is possible to shorten path for two context switches and gain 35% spead-up. Was tested on Debian branch of Xen 4.12. See at: https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep Rebased on master: https://github.com/skvl/xen/tree/fast-singlestep Signed-off-by: Sergey Kovalev >>> 35% looks like a good number, but what is "fast single step"? All this >>> appears to be is plumbing for to cause an altp2m switch on single step. >> Yes, a better explanation would be much needed here and I'm not 100% >> sure it correctly implements what I think it tries to. >> >> This is my interpretation of what the idea is: when using DRAKVUF (or >> another system using altp2m with shadow pages similar to what I >> describe in >> https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m), >> after a breakpoint is hit the system switches to the default >> unrestricted altp2m view with singlestep enabled. When the singlestep >> traps to Xen another vm_event is sent to the monitor agent, which then >> normally disables singlestepping and switches the altp2m view back to >> the restricted view. This patch looks like its short-circuiting that >> last part so that it doesn't need to send the vm_event out for the >> singlestep event and should switch back to the restricted view in Xen >> automatically. It's a nice optimization. But what seems to be missing >> is the altp2m switch itself. > Never mind, p2m_altp2m_check does the altp2m switch as well, so this > patch implements what I described above. Please update the patch > message to be more descriptive (you can copy my description from > above). Also please read CODING_STYLE in the root of the xen repository. The important ones you need to fix are spaces in "if ( ... )" statements, and binary operators on the end of the first line rather than the beginning of the continuation. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH V4 3/4] x86/mm: Pull out the p2m specifics from p2m_init_altp2m_ept
Signed-off-by: Alexandru Isaila --- CC: Jun Nakajima CC: Kevin Tian CC: George Dunlap CC: Jan Beulich CC: Andrew Cooper CC: Wei Liu CC: "Roger Pau Monné" --- xen/arch/x86/mm/p2m-ept.c | 6 -- xen/arch/x86/mm/p2m.c | 6 ++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index e088a63f56..362f7079ab 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -1358,13 +1358,7 @@ void p2m_init_altp2m_ept(struct domain *d, unsigned int i) struct p2m_domain *hostp2m = p2m_get_hostp2m(d); struct ept_data *ept; -p2m->default_access = hostp2m->default_access; -p2m->domain = hostp2m->domain; - -p2m->global_logdirty = hostp2m->global_logdirty; p2m->ept.ad = hostp2m->ept.ad; -p2m->min_remapped_gfn = gfn_x(INVALID_GFN); -p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0; ept = >ept; ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m)); d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp; diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 253cab3458..d381f6877f 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -2559,6 +2559,12 @@ static int p2m_activate_altp2m(struct domain *d, unsigned int idx) goto out; } +p2m->default_access = hostp2m->default_access; +p2m->domain = hostp2m->domain; +p2m->global_logdirty = hostp2m->global_logdirty; +p2m->min_remapped_gfn = gfn_x(INVALID_GFN); +p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0; + p2m_init_altp2m_ept(d, idx); out: -- 2.17.1 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH V4 2/4] x86/altp2m: Add hypercall to set a range of sve bits
By default the sve bits are not set. This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(), to set a range of sve bits. The core function, p2m_set_suppress_ve_multi(), does not brake in case of a error and it is doing a best effort for setting the bits in the given range. A check for continuation is made in order to have preemption on big ranges. The gfn of the first error is stored in xen_hvm_altp2m_suppress_ve_multi.first_error and the error code is stored in xen_hvm_altp2m_suppress_ve_multi.first_error_code. If no error occurred the values will be 0. Signed-off-by: Alexandru Isaila --- CC: Ian Jackson CC: Wei Liu CC: Andrew Cooper CC: George Dunlap CC: Jan Beulich CC: Julien Grall CC: Konrad Rzeszutek Wilk CC: Stefano Stabellini CC: "Roger Pau Monné" CC: George Dunlap CC: Razvan Cojocaru CC: Tamas K Lengyel CC: Petre Pircalabu --- Changes since V3: - Update commit message - Check rc and __copy_to_guest() in the same if - Fix style issue - Fix comment typo - Init p2m with host_p2m - Use array_index_nospec() in altp2m_p2m[] and altp2m_eptp[] - Drop opaque - Use pad2 to return first error code - Update first_gfn - Stop the range loop at cpuid->extd.maxphysaddr. --- tools/libxc/include/xenctrl.h | 4 +++ tools/libxc/xc_altp2m.c | 33 + xen/arch/x86/hvm/hvm.c | 15 xen/arch/x86/mm/p2m.c | 64 + xen/include/public/hvm/hvm_op.h | 13 +++ xen/include/xen/mem_access.h| 3 ++ 6 files changed, 132 insertions(+) diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h index f4431687b3..2ace8ea80e 100644 --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -1923,6 +1923,10 @@ int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid, uint16_t view_id); int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid, uint16_t view_id, xen_pfn_t gfn, bool sve); +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid, + uint16_t view_id, xen_pfn_t first_gfn, + xen_pfn_t last_gfn, bool sve, + xen_pfn_t *error_gfn, uint32_t *error_code); int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid, uint16_t view_id, xen_pfn_t gfn, bool *sve); int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid, diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c index 09dad0355e..9f7e8315b3 100644 --- a/tools/libxc/xc_altp2m.c +++ b/tools/libxc/xc_altp2m.c @@ -234,6 +234,39 @@ int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid, return rc; } +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid, + uint16_t view_id, xen_pfn_t first_gfn, + xen_pfn_t last_gfn, bool sve, + xen_pfn_t *error_gfn, uint32_t *error_code) +{ +int rc; +DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg); + +arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg)); +if ( arg == NULL ) +return -1; + +arg->version = HVMOP_ALTP2M_INTERFACE_VERSION; +arg->cmd = HVMOP_altp2m_set_suppress_ve_multi; +arg->domain = domid; +arg->u.suppress_ve_multi.view = view_id; +arg->u.suppress_ve_multi.first_gfn = first_gfn; +arg->u.suppress_ve_multi.last_gfn = last_gfn; +arg->u.suppress_ve_multi.suppress_ve = sve; + +rc = xencall2(handle->xcall, __HYPERVISOR_hvm_op, HVMOP_altp2m, + HYPERCALL_BUFFER_AS_ARG(arg)); + +if ( arg->u.suppress_ve_multi.first_error ) +{ +*error_gfn = arg->u.suppress_ve_multi.first_error; +*error_code = arg->u.suppress_ve_multi.first_error_code; +} + +xc_hypercall_buffer_free(handle, arg); +return rc; +} + int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid, uint16_t view_id, xen_pfn_t gfn, xenmem_access_t access) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 47573f71b8..a129049d6b 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -4553,6 +4553,7 @@ static int do_altp2m_op( case HVMOP_altp2m_destroy_p2m: case HVMOP_altp2m_switch_p2m: case HVMOP_altp2m_set_suppress_ve: +case HVMOP_altp2m_set_suppress_ve_multi: case HVMOP_altp2m_get_suppress_ve: case HVMOP_altp2m_set_mem_access: case HVMOP_altp2m_set_mem_access_multi: @@ -4711,6 +4712,20 @@ static int do_altp2m_op( } break; +case HVMOP_altp2m_set_suppress_ve_multi: +if ( a.u.suppress_ve_multi.pad1 || + a.u.suppress_ve_multi.first_error_code || +
[Xen-devel] [PATCH V4 1/4] x86/mm: Add array_index_nospec to guest provided index values
This patch aims to sanitize indexes, potentially guest provided values, for altp2m_eptp[] and altp2m_p2m[] arrays. Signed-off-by: Alexandru Isaila --- CC: Razvan Cojocaru CC: Tamas K Lengyel CC: Petre Pircalabu CC: George Dunlap CC: Jan Beulich CC: Andrew Cooper CC: Wei Liu CC: "Roger Pau Monné" CC: Jun Nakajima CC: Kevin Tian --- xen/arch/x86/mm/mem_access.c | 15 +-- xen/arch/x86/mm/p2m-ept.c| 5 +++-- xen/arch/x86/mm/p2m.c| 27 +-- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c index 320b9fe621..70f3528bb1 100644 --- a/xen/arch/x86/mm/mem_access.c +++ b/xen/arch/x86/mm/mem_access.c @@ -367,10 +367,11 @@ long p2m_set_mem_access(struct domain *d, gfn_t gfn, uint32_t nr, if ( altp2m_idx ) { if ( altp2m_idx >= MAX_ALTP2M || - d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) ) + d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] == + mfn_x(INVALID_MFN) ) return -EINVAL; -ap2m = d->arch.altp2m_p2m[altp2m_idx]; +ap2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, MAX_ALTP2M)]; } #else ASSERT(!altp2m_idx); @@ -426,10 +427,11 @@ long p2m_set_mem_access_multi(struct domain *d, if ( altp2m_idx ) { if ( altp2m_idx >= MAX_ALTP2M || - d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) ) + d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] == + mfn_x(INVALID_MFN) ) return -EINVAL; -ap2m = d->arch.altp2m_p2m[altp2m_idx]; +ap2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, MAX_ALTP2M)]; } #else ASSERT(!altp2m_idx); @@ -492,10 +494,11 @@ int p2m_get_mem_access(struct domain *d, gfn_t gfn, xenmem_access_t *access, else if ( altp2m_idx ) /* altp2m view 0 is treated as the hostp2m */ { if ( altp2m_idx >= MAX_ALTP2M || - d->arch.altp2m_eptp[altp2m_idx] == mfn_x(INVALID_MFN) ) + d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] == + mfn_x(INVALID_MFN) ) return -EINVAL; -p2m = d->arch.altp2m_p2m[altp2m_idx]; +p2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, MAX_ALTP2M)]; } #else ASSERT(!altp2m_idx); diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index b5517769c9..e088a63f56 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -1353,7 +1353,8 @@ void setup_ept_dump(void) void p2m_init_altp2m_ept(struct domain *d, unsigned int i) { -struct p2m_domain *p2m = d->arch.altp2m_p2m[i]; +struct p2m_domain *p2m = + d->arch.altp2m_p2m[array_index_nospec(i, MAX_ALTP2M)]; struct p2m_domain *hostp2m = p2m_get_hostp2m(d); struct ept_data *ept; @@ -1366,7 +1367,7 @@ void p2m_init_altp2m_ept(struct domain *d, unsigned int i) p2m->max_mapped_pfn = p2m->max_remapped_gfn = 0; ept = >ept; ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m)); -d->arch.altp2m_eptp[i] = ept->eptp; +d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp; } unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp) diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index ba126f790a..7e7f4f1a7c 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -2499,7 +2499,7 @@ static void p2m_reset_altp2m(struct domain *d, unsigned int idx, struct p2m_domain *p2m; ASSERT(idx < MAX_ALTP2M); -p2m = d->arch.altp2m_p2m[idx]; +p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)]; p2m_lock(p2m); @@ -2540,7 +2540,7 @@ static int p2m_activate_altp2m(struct domain *d, unsigned int idx) ASSERT(idx < MAX_ALTP2M); -p2m = d->arch.altp2m_p2m[idx]; +p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)]; hostp2m = p2m_get_hostp2m(d); p2m_lock(p2m); @@ -2622,9 +2622,10 @@ int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx) rc = -EBUSY; altp2m_list_lock(d); -if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) ) +if ( d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] != + mfn_x(INVALID_MFN) ) { -p2m = d->arch.altp2m_p2m[idx]; +p2m = d->arch.altp2m_p2m[array_index_nospec(idx, MAX_ALTP2M)]; if ( !_atomic_read(p2m->active_vcpus) ) { @@ -2686,11 +2687,13 @@ int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx, mfn_t mfn; int rc = -EINVAL; -if ( idx >= MAX_ALTP2M || d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) ) +if ( idx >= MAX_ALTP2M || + d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] == + mfn_x(INVALID_MFN) ) return rc; hp2m = p2m_get_hostp2m(d); -ap2m = d->arch.altp2m_p2m[idx]; +ap2m = d->arch.altp2m_p2m[array_index_nospec(idx,
Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step
On Tue, Dec 17, 2019 at 8:08 AM Tamas K Lengyel wrote: > > On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper > wrote: > > > > On 17/12/2019 14:40, Sergey Kovalev wrote: > > > On break point event eight context switches occures. > > > > > > With fast single step it is possible to shorten path for two context > > > switches > > > and gain 35% spead-up. > > > > > > Was tested on Debian branch of Xen 4.12. See at: > > > https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep > > > > > > Rebased on master: > > > https://github.com/skvl/xen/tree/fast-singlestep > > > > > > Signed-off-by: Sergey Kovalev > > > > 35% looks like a good number, but what is "fast single step"? All this > > appears to be is plumbing for to cause an altp2m switch on single step. > > Yes, a better explanation would be much needed here and I'm not 100% > sure it correctly implements what I think it tries to. > > This is my interpretation of what the idea is: when using DRAKVUF (or > another system using altp2m with shadow pages similar to what I > describe in > https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m), > after a breakpoint is hit the system switches to the default > unrestricted altp2m view with singlestep enabled. When the singlestep > traps to Xen another vm_event is sent to the monitor agent, which then > normally disables singlestepping and switches the altp2m view back to > the restricted view. This patch looks like its short-circuiting that > last part so that it doesn't need to send the vm_event out for the > singlestep event and should switch back to the restricted view in Xen > automatically. It's a nice optimization. But what seems to be missing > is the altp2m switch itself. Never mind, p2m_altp2m_check does the altp2m switch as well, so this patch implements what I described above. Please update the patch message to be more descriptive (you can copy my description from above). Thanks! Tamas ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 2/6] x86/suspend: Don't bother saving %cr3, %ss or flags
On Tue, Dec 17, 2019 at 12:26:24PM +, Andrew Cooper wrote: > On 17/12/2019 12:18, Roger Pau Monné wrote: > > On Tue, Dec 17, 2019 at 12:06:01PM +, Andrew Cooper wrote: > >> On 17/12/2019 11:52, Roger Pau Monné wrote: > >>> On Fri, Dec 13, 2019 at 07:04:32PM +, Andrew Cooper wrote: > The trampoline has already set up the idle pagetables (which are the > correct > ones to use), and sanitised the flags state. > >>> I wonder why do we have wakeup.S and wakeup_prot.S, it would be easier > >>> to follow if it all was in the same file IMO. > >> wakeup.S is the 16bit entry point, and lives in the trampoline below 1M. > >> > >> wakeup_prot.S is a bit of logic which lives in the main hypervisor. > >> > >> The naming could probably do with some improvement, but they can't > >> feasibly be part of the same file. > > Hm, I'm not sure I follow. Isn't this trampoline copied by Xen in a > > suitable position below the 1M boundary, and hence could use symbols > > in order to figure out which part to copy? > > > > Ie: both the low and the high part could live in the same file as long > > as Xen knows how to differentiate those and which chunk needs > > positioning below 1M? > > There is one trampoline.S (and trampoline.o) which gathers together > various files (including wakeup.S) to construct the trampoline. Oh, I see it's all included to make a single unit, and the symbols used to mark the start and end of the trampoline chunk are defined outside of the included file. > It is not something which can be constructed simply by putting code/data > in the requisite sections. There are two main entrypoints, one with a > 4k alignment requirement, one with 16 byte alignment, and we split the > trampoline into two parts - one which is BSP-only and is several pages > in size, and one which is post-boot which is only a single page. Given the size of s3_resume I would guess there's space in that single page to fit it, but since it doesn't need to live below the 1M boundary it could be seen as a waste. Anyway, leaving it as-is is fine since placing it in wakeup.S would be a waste of space or require some restructuring of how the trampoline code is assembled. Thanks, Roger. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step
On Tue, Dec 17, 2019 at 7:48 AM Andrew Cooper wrote: > > On 17/12/2019 14:40, Sergey Kovalev wrote: > > On break point event eight context switches occures. > > > > With fast single step it is possible to shorten path for two context > > switches > > and gain 35% spead-up. > > > > Was tested on Debian branch of Xen 4.12. See at: > > https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep > > > > Rebased on master: > > https://github.com/skvl/xen/tree/fast-singlestep > > > > Signed-off-by: Sergey Kovalev > > 35% looks like a good number, but what is "fast single step"? All this > appears to be is plumbing for to cause an altp2m switch on single step. Yes, a better explanation would be much needed here and I'm not 100% sure it correctly implements what I think it tries to. This is my interpretation of what the idea is: when using DRAKVUF (or another system using altp2m with shadow pages similar to what I describe in https://xenproject.org/2016/04/13/stealthy-monitoring-with-xen-altp2m), after a breakpoint is hit the system switches to the default unrestricted altp2m view with singlestep enabled. When the singlestep traps to Xen another vm_event is sent to the monitor agent, which then normally disables singlestepping and switches the altp2m view back to the restricted view. This patch looks like its short-circuiting that last part so that it doesn't need to send the vm_event out for the singlestep event and should switch back to the restricted view in Xen automatically. It's a nice optimization. But what seems to be missing is the altp2m switch itself. Tamas ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [XEN PATCH v1 1/1] x86/vm_event: add fast single step
On 17.12.2019 17:48, Andrew Cooper wrote: On 17/12/2019 14:40, Sergey Kovalev wrote: On break point event eight context switches occures. With fast single step it is possible to shorten path for two context switches and gain 35% spead-up. Was tested on Debian branch of Xen 4.12. See at: https://github.com/skvl/xen/tree/debian/knorrie/4.12/fast-singlestep Rebased on master: https://github.com/skvl/xen/tree/fast-singlestep Signed-off-by: Sergey Kovalev 35% looks like a good number, but what is "fast single step"? All this appears to be is plumbing for to cause an altp2m switch on single step. ~Andrew You are right. I should quoted "fast single step". Original INT#3 path is like this (in PlangUML): @startuml VM->Xen : EXIT_REASON_EXCEPTION_NMI Xen->LibVMI: request(VM_EVENT_REASON_SOFTWARE_BREAKPOINT) LibVMI->Xen: response(singlestep | altp2m) Xen->VM: VM->Xen: EXIT_REASON_MONITOR_TRAP_FLAG Xen->LibVMI: request(VM_EVENT_REASON_SINGLESTEP) LibVMI->Xen: response(altp2m) Xen->VM: @enduml With fast single step it looks like this: @startuml VM->Xen : EXIT_REASON_EXCEPTION_NMI Xen->LibVMI: request(VM_EVENT_REASON_SOFTWARE_BREAKPOINT) LibVMI->Xen: response(fast singlestep | altp2m) Xen->VM: VM->Xen: EXIT_REASON_MONITOR_TRAP_FLAG Xen->Xen: fast singlestep Xen->VM: @enduml So we just store altp2m index and switch to it on MTF. ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 17/22] golang/xenlight: implement array C to Go marshaling
On Tue, Dec 17, 2019 at 6:16 AM George Dunlap wrote: > > On 12/10/19 3:47 PM, Nick Rosbrook wrote: > > From: Nick Rosbrook > > > > Signed-off-by: Nick Rosbrook > > --- > > tools/golang/xenlight/gengotypes.py | 39 +++- > > tools/golang/xenlight/helpers.gen.go | 300 +++ > > 2 files changed, 338 insertions(+), 1 deletion(-) > > > > diff --git a/tools/golang/xenlight/gengotypes.py > > b/tools/golang/xenlight/gengotypes.py > > index b68c1aa66b..ee9aaf9eff 100644 > > --- a/tools/golang/xenlight/gengotypes.py > > +++ b/tools/golang/xenlight/gengotypes.py > > @@ -252,7 +252,7 @@ def xenlight_golang_define_from_C(ty = None): > > for f in ty.fields: > > if f.type.typename is not None: > > if isinstance(f.type, idl.Array): > > -# TODO > > +body += xenlight_golang_array_from_C(f) > > continue > > > > body += xenlight_golang_convert_from_C(f) > > @@ -399,6 +399,43 @@ def xenlight_golang_union_from_C(ty = None, union_name > > = '', struct_name = ''): > > > > return (s,extras) > > > > +def xenlight_golang_array_from_C(ty = None): > > +""" > > +Convert C array to Go slice using the method > > +described here: > > + > > +https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices > > +""" > > +s = '' > > + > > +gotypename = xenlight_golang_fmt_name(ty.type.elem_type.typename) > > +goname = xenlight_golang_fmt_name(ty.name) > > +ctypename = ty.type.elem_type.typename > > +cname = ty.name > > +cslice = 'c{}'.format(goname) > > +clenvar= ty.type.lenvar.name > > +golenvar = xenlight_golang_fmt_name(clenvar,exported=False) > > + > > +s += '{} := int(xc.{})\n'.format(golenvar, clenvar) > > +s += '{} := '.format(cslice) > > +s > > +='(*[1<<28]C.{})(unsafe.Pointer(xc.{}))[:{}:{}]\n'.format(ctypename, cname, > > +golenvar, > > golenvar) > > +s += 'x.{} = make([]{}, {})\n'.format(goname, gotypename, golenvar) > > +s += 'for i, v := range {} {{\n'.format(cslice) > > + > > +is_enum = isinstance(ty.type.elem_type,idl.Enumeration) > > +if gotypename in go_builtin_types or is_enum: > > +s += 'x.{}[i] = {}(v)\n'.format(goname, gotypename) > > +else: > > +s += 'var e {}\n'.format(gotypename) > > +s += 'if err := e.fromC(); err != nil {\n' > > +s += 'return err }\n' > > +s += 'x.{}[i] = e\n'.format(goname) > > + > > +s += '}\n' > > + > > +return s > > + > > def xenlight_golang_fmt_name(name, exported = True): > > """ > > Take a given type name and return an > > diff --git a/tools/golang/xenlight/helpers.gen.go > > b/tools/golang/xenlight/helpers.gen.go > > index e6eee234c0..2f917cac58 100644 > > --- a/tools/golang/xenlight/helpers.gen.go > > +++ b/tools/golang/xenlight/helpers.gen.go > > @@ -263,6 +263,16 @@ func (x *SchedParams) fromC(xc *C.libxl_sched_params) > > error { > > > > func (x *VcpuSchedParams) fromC(xc *C.libxl_vcpu_sched_params) error { > > x.Sched = Scheduler(xc.sched) > > + numVcpus := int(xc.num_vcpus) > > + cVcpus := (*[1 << > > 28]C.libxl_sched_params)(unsafe.Pointer(xc.vcpus))[:numVcpus:numVcpus] > > + x.Vcpus = make([]SchedParams, numVcpus) > > + for i, v := range cVcpus { > > + var e SchedParams > > + if err := e.fromC(); err != nil { > > + return err > > + } > > + x.Vcpus[i] = e > > Along the same lines, any reason not to do the following? > > if err := x.Vcpus[i].fromC(); err != nil { > return err > } Nope, no problem with that. Thanks, -NR ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel