On 08.05.19 18:40, Julien Grall wrote:
This patch is quite hard to read because you are reworking the code and at the
same time implementing the new VCPUOP. How about moving the rework in a
separate patch? The implementation can then be fold in the previous patch as
suggested by George.
OK.
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 6dc633e..8e24e63 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -275,32 +275,55 @@ static void ctxt_switch_to(struct vcpu *n)
}
/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+void update_runstate_area(struct vcpu *v)
Why do you export update_runstate_area? The function does not seem to be called
outside.
Ouch, this left from one of the previous versions.
{
- void __user *guest_handle = NULL;
+ if ( !guest_handle_is_null(runstate_guest(v)) )
+ {
+ void __user *guest_handle = NULL;
+ if ( VM_ASSIST(v->domain, runstate_update_flag) )
+ {
+ guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+ guest_handle--;
+ v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+ __raw_copy_to_guest(guest_handle,
+ (void *)(&v->runstate.state_entry_time + 1) -
1,
+ 1);
+ smp_wmb();
+ }
- if ( guest_handle_is_null(runstate_guest(v)) )
- return;
+ __copy_to_guest(runstate_guest(v), &v->runstate, 1);
- if ( VM_ASSIST(v->domain, runstate_update_flag) )
- {
- guest_handle = &v->runstate_guest.p->state_entry_time + 1;
- guest_handle--;
- v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
- __raw_copy_to_guest(guest_handle,
- (void *)(&v->runstate.state_entry_time + 1) - 1,
1);
- smp_wmb();
+ if ( guest_handle )
+ {
+ v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+ smp_wmb();
+ __raw_copy_to_guest(guest_handle,
+ (void *)(&v->runstate.state_entry_time + 1) -
1,
+ 1);
+ }
}
- __copy_to_guest(runstate_guest(v), &v->runstate, 1);
-
- if ( guest_handle )
+ spin_lock(&v->mapped_runstate_lock);
+ if ( v->mapped_runstate )
The code looks a bit odd to me, you seem to allow a guest to provide 2 runstate
areas: one using guest virtual address the other using guest physical address.
It would be best if we prevent a guest to mix match them.
Firstly I turned to implementing in that way, but the locking and decissions
code become really ugly and complex while trying to cover 'guest's misbehavior'
scenarios.
IOW, if the guest provide a physical address first, then *all* the call should
be physical address. Alternatively this could be a per vCPU decision.
I guess we should agree what to implement first.
{
- v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
- smp_wmb();
- __raw_copy_to_guest(guest_handle,
- (void *)(&v->runstate.state_entry_time + 1) - 1,
1);
+ if ( VM_ASSIST(v->domain, runstate_update_flag) )
+ {
+ v->mapped_runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+ smp_wmb();
+ v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+ }
+
+ memcpy(v->mapped_runstate, &v->runstate, sizeof(v->runstate));
+
+ if ( VM_ASSIST(v->domain, runstate_update_flag) )
+ {
+ v->mapped_runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+ smp_wmb();
+ v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+ }
}
+ spin_unlock(&v->mapped_runstate_lock);
+
NIT: The newline is not necessary here.
OK.
}
static void schedule_tail(struct vcpu *prev)
@@ -998,6 +1021,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid,
XEN_GUEST_HANDLE_PARAM(void) a
{
case VCPUOP_register_vcpu_info:
case VCPUOP_register_runstate_memory_area:
+ case VCPUOP_register_runstate_phys_memory_area:
return do_vcpu_op(cmd, vcpuid, arg);
default:
return -EINVAL;
[...]
diff --git a/xen/common/domain.c b/xen/common/domain.c
index ae22049..6df76c6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -149,6 +149,7 @@ struct vcpu *vcpu_create(
v->dirty_cpu = VCPU_CPU_CLEAN;
spin_lock_init(&v->virq_lock);
+ spin_lock_init(&v->mapped_runstate_lock);
tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
@@ -699,6 +700,69 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct
domain **d)
return 0;
}
+static void _unmap_runstate_area(struct vcpu *v)
A better name would be unamep_runstate_area_locked() so you avoid the reserved
name and make clear of the use.
OK.
+{
+ mfn_t mfn;
+
+ if ( !v->mapped_runstate )
+ return;
+
+ mfn = _mfn(virt_to_mfn(runstate_guest(v).p));
As pointed out by Jan in the previous version:
The pointer is the result of __map_domain_page_global(). So I don't think you
don't think you can legitimately use virt_to_mfn() on it, at
least not on x86; domain_page_map_to_mfn() is what you
want to use here.
Yep.
+
+ unmap_domain_page_global((void *)
+ ((unsigned long)v->mapped_runstate &
+ PAGE_MASK));
+
+ v->mapped_runstate = NULL;
+ put_page_and_type(mfn_to_page(mfn));
+}
We seem to have this pattern in a few places now (see unmap_guest_page). It
would be good to introduce helpers that can be used everywhere (probably lifted
from common/event_fifo.c.
I'll check.
+
+static int map_runstate_area(struct vcpu *v,
+ struct vcpu_register_runstate_memory_area *area)
+{
+ unsigned long offset = area->addr.p & ~PAGE_MASK;
+ gfn_t gfn = gaddr_to_gfn(area->addr.p);
+ struct domain *d = v->domain;
+ void *mapping;
+ struct page_info *page;
+ size_t size = sizeof (struct vcpu_runstate_info );
space is not necessary before ).
But is the variable really necessary?
Well, I think it could be dropped.
+
+ if ( offset > (PAGE_SIZE - size) )
+ return -EINVAL;
+
+ page = get_page_from_gfn(d, gfn_x(gfn), NULL, P2M_ALLOC);
+ if ( !page )
+ return -EINVAL;
+
+ if ( !get_page_type(page, PGT_writable_page) )
+ {
+ put_page(page);
+ return -EINVAL;
+ }
+
+ mapping = __map_domain_page_global(page);
+
+ if ( mapping == NULL )
+ {
+ put_page_and_type(page);
+ return -ENOMEM;
+ }
+
+ spin_lock(&v->mapped_runstate_lock);
+ _unmap_runstate_area(v);
+ v->mapped_runstate = mapping + offset;
+ spin_unlock(&v->mapped_runstate_lock);
+
+ return 0;
+}
+
+static void unmap_runstate_area(struct vcpu *v)
+{
+ spin_lock(&v->mapped_runstate_lock);
+ _unmap_runstate_area(v);
+ spin_unlock(&v->mapped_runstate_lock);
+}
+
int domain_kill(struct domain *d)
{
int rc = 0;
@@ -737,7 +801,11 @@ int domain_kill(struct domain *d)
if ( cpupool_move_domain(d, cpupool0) )
return -ERESTART;
for_each_vcpu ( d, v )
+ {
+ set_xen_guest_handle(runstate_guest(v), NULL);
+ unmap_runstate_area(v);
unmap_vcpu_info(v);
+ }
d->is_dying = DOMDYING_dead;
/* Mem event cleanup has to go here because the rings
* have to be put before we call put_domain. */
@@ -1192,6 +1260,7 @@ int domain_soft_reset(struct domain *d)
for_each_vcpu ( d, v )
{
set_xen_guest_handle(runstate_guest(v), NULL);
+ unmap_runstate_area(v);
unmap_vcpu_info(v);
}
@@ -1536,8 +1605,17 @@ long do_vcpu_op(int cmd, unsigned int vcpuid,
XEN_GUEST_HANDLE_PARAM(void) arg)
}
case VCPUOP_register_runstate_phys_memory_area:
- rc = -EOPNOTSUPP;
+ {
+ struct vcpu_register_runstate_memory_area area;
+
+ rc = -EFAULT;
+ if ( copy_from_guest(&area, arg, 1) )
+ break;
+
+ rc = map_runstate_area(v, &area);
+
break;
+ }
#ifdef VCPU_TRAP_NMI
case VCPUOP_send_nmi:
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 312fec8..3fb6ea2 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -217,6 +217,8 @@ void vcpu_show_execution_state(struct vcpu *);
void vcpu_show_registers(const struct vcpu *);
void vcpu_switch_to_aarch64_mode(struct vcpu *);
+void update_runstate_area(struct vcpu *);
+
/*
* Due to the restriction of GICv3, the number of vCPUs in AFF0 is
* limited to 16, thus only the first 4 bits of AFF0 are legal. We will
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index d1bfc82..ecddcfe 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -118,4 +118,6 @@ struct vnuma_info {
void vnuma_destroy(struct vnuma_info *vnuma);
+struct vcpu_register_runstate_memory_area;
+
#endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 748bb0f..2afe31c 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,15 +163,23 @@ struct vcpu
void *sched_priv; /* scheduler-specific data */
struct vcpu_runstate_info runstate;
+
+ spinlock_t mapped_runstate_lock;
+
#ifndef CONFIG_COMPAT
# define runstate_guest(v) ((v)->runstate_guest)
XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+ vcpu_runstate_info_t *mapped_runstate;
#else
# define runstate_guest(v) ((v)->runstate_guest.native)
union {
XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
} runstate_guest; /* guest address */
+ union {
+ vcpu_runstate_info_t* native;
+ vcpu_runstate_info_compat_t* compat;
+ } mapped_runstate; /* guest address */
> The combination of mapped_runstate and runstate_guest is a bit confusing. I
think you want to rework the interface to show that only one is possible at the
time and make clear which one is used by who. Maybe:
As I said before, IMO coupling those interfaces makes the code complicated and
ugly.
union
{
/* Legacy interface to be used when the guest provides a virtual address */
union {
XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
...
} virt;
/* Interface used when the guest provides a physical address */
union {
} phys;
} runstate_guest.>
runstate_guest_type /* could be a bool or enum */
Jan what do you think?
Cheers,
--
Sincerely,
Andrii Anisov.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel