Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure
On Thu, Apr 09, 2009 at 12:30:57PM -0400, Gregory Haskins wrote: +static unsigned long +task_memctx_copy_to(struct vbus_memctx *ctx, void *dst, const void *src, + unsigned long n) +{ + struct task_memctx *tm = to_task_memctx(ctx); + struct task_struct *p = tm-task; + + while (n) { + unsigned long offset = ((unsigned long)dst)%PAGE_SIZE; + unsigned long len = PAGE_SIZE - offset; + int ret; + struct page *pg; + void *maddr; + + if (len n) + len = n; + + down_read(p-mm-mmap_sem); + ret = get_user_pages(p, p-mm, + (unsigned long)dst, 1, 1, 0, pg, NULL); + + if (ret != 1) { + up_read(p-mm-mmap_sem); + break; + } + + maddr = kmap_atomic(pg, KM_USER0); + memcpy(maddr + offset, src, len); + kunmap_atomic(maddr, KM_USER0); + set_page_dirty_lock(pg); + put_page(pg); + up_read(p-mm-mmap_sem); + + src += len; + dst += len; + n -= len; + } + + return n; +} BTW, why did you decide to use get_user_pages? Would switch_mm + copy_to_user work as well avoiding page walk if all pages are present? Also - if we just had vmexit because a process executed io (or hypercall), can't we just do copy_to_user there? Avi, I think at some point you said that we can? -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure
Michael S. Tsirkin wrote: On Thu, Apr 09, 2009 at 12:30:57PM -0400, Gregory Haskins wrote: +static unsigned long +task_memctx_copy_to(struct vbus_memctx *ctx, void *dst, const void *src, +unsigned long n) +{ +struct task_memctx *tm = to_task_memctx(ctx); +struct task_struct *p = tm-task; + +while (n) { +unsigned long offset = ((unsigned long)dst)%PAGE_SIZE; +unsigned long len = PAGE_SIZE - offset; +int ret; +struct page *pg; +void *maddr; + +if (len n) +len = n; + +down_read(p-mm-mmap_sem); +ret = get_user_pages(p, p-mm, + (unsigned long)dst, 1, 1, 0, pg, NULL); + +if (ret != 1) { +up_read(p-mm-mmap_sem); +break; +} + +maddr = kmap_atomic(pg, KM_USER0); +memcpy(maddr + offset, src, len); +kunmap_atomic(maddr, KM_USER0); +set_page_dirty_lock(pg); +put_page(pg); +up_read(p-mm-mmap_sem); + +src += len; +dst += len; +n -= len; +} + +return n; +} BTW, why did you decide to use get_user_pages? Would switch_mm + copy_to_user work as well avoiding page walk if all pages are present? Well, basic c_t_u() won't work because its likely not current if you are updating the ring from some other task, but I think you have already figured that out based on the switch_mm suggestion. The simple truth is I was not familiar with switch_mm at the time I wrote this (nor am I now). If this is a superior method that allows you to acquire c_t_u(some_other_ctx) like behavior, I see no problem in changing. I will look into this, and thanks for the suggestion! Also - if we just had vmexit because a process executed io (or hypercall), can't we just do copy_to_user there? Avi, I think at some point you said that we can? Right, and yes that will work I believe. We could always do a if (p == current) check to test for this. To date, I don't typically do anything mem-ops related directly in vcpu context so this wasn't an issue...but that doesn't mean someone wont try in the future. Therefore, I agree we should strive to optimize it if we can. Thanks Michael, -Greg signature.asc Description: OpenPGP digital signature
Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure
Michael S. Tsirkin wrote: Also - if we just had vmexit because a process executed io (or hypercall), can't we just do copy_to_user there? Avi, I think at some point you said that we can? You can do copy_to_user() whereever it is legal in Linux. Almost all of kvm runs in process context, preemptible, and with interrupts enabled. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure
Gregory Haskins wrote: BTW, why did you decide to use get_user_pages? Would switch_mm + copy_to_user work as well avoiding page walk if all pages are present? Well, basic c_t_u() won't work because its likely not current if you are updating the ring from some other task, but I think you have already figured that out based on the switch_mm suggestion. The simple truth is I was not familiar with switch_mm at the time I wrote this (nor am I now). If this is a superior method that allows you to acquire c_t_u(some_other_ctx) like behavior, I see no problem in changing. I will look into this, and thanks for the suggestion! copy_to_user() is significantly faster than get_user_pages() + kmap() + memcmp() (or their variants). -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure
Avi Kivity wrote: Gregory Haskins wrote: BTW, why did you decide to use get_user_pages? Would switch_mm + copy_to_user work as well avoiding page walk if all pages are present? Well, basic c_t_u() won't work because its likely not current if you are updating the ring from some other task, but I think you have already figured that out based on the switch_mm suggestion. The simple truth is I was not familiar with switch_mm at the time I wrote this (nor am I now). If this is a superior method that allows you to acquire c_t_u(some_other_ctx) like behavior, I see no problem in changing. I will look into this, and thanks for the suggestion! copy_to_user() is significantly faster than get_user_pages() + kmap() + memcmp() (or their variants). Oh, I don't doubt that (in fact, I was pretty sure that was the case based on some of the optimizations I could see in studying the c_t_u() path). I just didn't realize there were other ways to do it if its a non current task. ;) I guess the enigma for me right now is what cost does switch_mm have? (Thats not a slam against the suggested approach...I really do not know and am curious). As an aside, note that we seem to be reviewing v2, where v3 is really the last set I pushed. I think this patch is more or less the same across both iterations, but FYI that I would recommend looking at v3 instead. -Greg signature.asc Description: OpenPGP digital signature
Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure
Gregory Haskins wrote: Oh, I don't doubt that (in fact, I was pretty sure that was the case based on some of the optimizations I could see in studying the c_t_u() path). I just didn't realize there were other ways to do it if its a non current task. ;) I guess the enigma for me right now is what cost does switch_mm have? (Thats not a slam against the suggested approach...I really do not know and am curious). switch_mm() is probably very cheap (reloads cr3), but it does dirty the current cpu's tlb. When the kernel needs to flush a process' tlb, it will have to IPI that cpu in addition to all others. This takes place, for example, after munmap() or after a page is swapped out (though significant batching is done there). It's still plenty cheaper in my estimation. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure
We expect to have various types of connection-clients (e.g. userspace, kvm, etc), each of which is likely to have common access patterns and marshalling duties. Therefore we create a client API to simplify client development by helping with mundane tasks such as handle-2-pointer translation, etc. Special thanks to Pat Mullaney for suggesting the optimization to pass a cookie object down during DEVICESHM operations to save lookup overhead on the event channel. Signed-off-by: Gregory Haskins ghask...@novell.com --- include/linux/vbus_client.h | 115 + kernel/vbus/Makefile|2 kernel/vbus/client.c| 543 +++ 3 files changed, 659 insertions(+), 1 deletions(-) create mode 100644 include/linux/vbus_client.h create mode 100644 kernel/vbus/client.c diff --git a/include/linux/vbus_client.h b/include/linux/vbus_client.h new file mode 100644 index 000..62dab78 --- /dev/null +++ b/include/linux/vbus_client.h @@ -0,0 +1,115 @@ +/* + * Copyright 2009 Novell. All Rights Reserved. + * + * Virtual-Bus - Client interface + * + * We expect to have various types of connection-clients (e.g. userspace, + * kvm, etc). Each client will be connecting from some environment outside + * of the kernel, and therefore will not have direct access to the API as + * presented in ./linux/vbus.h. There will undoubtedly be some parameter + * marshalling that must occur, as well as common patterns for the handling + * of those marshalled parameters (e.g. translating a handle into a pointer, + * etc). + * + * Therefore this client API is provided to simplify the development + * of any clients. Of course, a client is free to bypass this API entirely + * and communicate with the direct VBUS API if desired. + * + * Author: + * Gregory Haskins ghask...@novell.com + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_VBUS_CLIENT_H +#define _LINUX_VBUS_CLIENT_H + +#include linux/types.h +#include linux/compiler.h + +struct vbus_deviceopen { + __u32 devid; + __u32 version; /* device ABI version */ + __u64 handle; /* return value for devh */ +}; + +struct vbus_devicecall { + __u64 devh; /* device-handle (returned from DEVICEOPEN */ + __u32 func; + __u32 len; + __u32 flags; + __u64 datap; +}; + +struct vbus_deviceshm { + __u64 devh; /* device-handle (returned from DEVICEOPEN */ + __u32 id; + __u32 len; + __u32 flags; + struct { + __u32 offset; + __u32 prio; + __u64 cookie; /* token to pass back when signaling client */ + } signal; + __u64 datap; + __u64 handle; /* return value for signaling from client to kernel */ +}; + +#ifdef __KERNEL__ + +#include linux/ioq.h +#include linux/module.h +#include asm/atomic.h + +struct vbus_client; + +struct vbus_client_ops { + int (*deviceopen)(struct vbus_client *client, struct vbus_memctx *ctx, + __u32 devid, __u32 version, __u64 *devh); + int (*deviceclose)(struct vbus_client *client, __u64 devh); + int (*devicecall)(struct vbus_client *client, + __u64 devh, __u32 func, + void *data, __u32 len, __u32 flags); + int (*deviceshm)(struct vbus_client *client, +__u64 devh, __u32 id, +struct vbus_shm *shm, struct shm_signal *signal, +__u32 flags, __u64 *handle); + int (*shmsignal)(struct vbus_client *client, __u64 handle); + void (*release)(struct vbus_client *client); +}; + +struct vbus_client { + atomic_t refs; + struct vbus_client_ops *ops; +}; + +static inline void vbus_client_get(struct vbus_client *client) +{ + atomic_inc(client-refs); +} + +static inline void vbus_client_put(struct vbus_client *client) +{ + if (atomic_dec_and_test(client-refs)) + client-ops-release(client); +} + +struct vbus_client *vbus_client_attach(struct vbus *bus); + +extern struct vbus_memctx *current_memctx; +struct vbus_memctx *task_memctx_alloc(struct task_struct *task); + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_VBUS_CLIENT_H */ diff --git a/kernel/vbus/Makefile b/kernel/vbus/Makefile index 367f65b..4d440e5 100644 --- a/kernel/vbus/Makefile +++ b/kernel/vbus/Makefile @@