Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure

2009-06-04 Thread Michael S. Tsirkin
On Thu, Apr 09, 2009 at 12:30:57PM -0400, Gregory Haskins wrote:
 +static unsigned long
 +task_memctx_copy_to(struct vbus_memctx *ctx, void *dst, const void *src,
 + unsigned long n)
 +{
 + struct task_memctx *tm = to_task_memctx(ctx);
 + struct task_struct *p = tm-task;
 +
 + while (n) {
 + unsigned long offset = ((unsigned long)dst)%PAGE_SIZE;
 + unsigned long len = PAGE_SIZE - offset;
 + int ret;
 + struct page *pg;
 + void *maddr;
 +
 + if (len  n)
 + len = n;
 +
 + down_read(p-mm-mmap_sem);
 + ret = get_user_pages(p, p-mm,
 +  (unsigned long)dst, 1, 1, 0, pg, NULL);
 +
 + if (ret != 1) {
 + up_read(p-mm-mmap_sem);
 + break;
 + }
 +
 + maddr = kmap_atomic(pg, KM_USER0);
 + memcpy(maddr + offset, src, len);
 + kunmap_atomic(maddr, KM_USER0);
 + set_page_dirty_lock(pg);
 + put_page(pg);
 + up_read(p-mm-mmap_sem);
 +
 + src += len;
 + dst += len;
 + n -= len;
 + }
 +
 + return n;
 +}

BTW, why did you decide to use get_user_pages?
Would switch_mm + copy_to_user work as well
avoiding page walk if all pages are present?

Also - if we just had vmexit because a process executed
io (or hypercall), can't we just do copy_to_user there?
Avi, I think at some point you said that we can?


-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure

2009-06-04 Thread Gregory Haskins
Michael S. Tsirkin wrote:
 On Thu, Apr 09, 2009 at 12:30:57PM -0400, Gregory Haskins wrote:
   
 +static unsigned long
 +task_memctx_copy_to(struct vbus_memctx *ctx, void *dst, const void *src,
 +unsigned long n)
 +{
 +struct task_memctx *tm = to_task_memctx(ctx);
 +struct task_struct *p = tm-task;
 +
 +while (n) {
 +unsigned long offset = ((unsigned long)dst)%PAGE_SIZE;
 +unsigned long len = PAGE_SIZE - offset;
 +int ret;
 +struct page *pg;
 +void *maddr;
 +
 +if (len  n)
 +len = n;
 +
 +down_read(p-mm-mmap_sem);
 +ret = get_user_pages(p, p-mm,
 + (unsigned long)dst, 1, 1, 0, pg, NULL);
 +
 +if (ret != 1) {
 +up_read(p-mm-mmap_sem);
 +break;
 +}
 +
 +maddr = kmap_atomic(pg, KM_USER0);
 +memcpy(maddr + offset, src, len);
 +kunmap_atomic(maddr, KM_USER0);
 +set_page_dirty_lock(pg);
 +put_page(pg);
 +up_read(p-mm-mmap_sem);
 +
 +src += len;
 +dst += len;
 +n -= len;
 +}
 +
 +return n;
 +}
 

 BTW, why did you decide to use get_user_pages?
 Would switch_mm + copy_to_user work as well
 avoiding page walk if all pages are present?
   

Well, basic c_t_u() won't work because its likely not current if you
are updating the ring from some other task, but I think you have already
figured that out based on the switch_mm suggestion.  The simple truth is
I was not familiar with switch_mm at the time I wrote this (nor am I
now).  If this is a superior method that allows you to acquire
c_t_u(some_other_ctx) like behavior, I see no problem in changing.  I
will look into this, and thanks for the suggestion!

 Also - if we just had vmexit because a process executed
 io (or hypercall), can't we just do copy_to_user there?
 Avi, I think at some point you said that we can?
   

Right, and yes that will work I believe.  We could always do a if (p ==
current) check to test for this.  To date, I don't typically do
anything mem-ops related directly in vcpu context so this wasn't an
issue...but that doesn't mean someone wont try in the future.  
Therefore, I agree we should strive to optimize it if we can.

   

Thanks Michael,
-Greg



signature.asc
Description: OpenPGP digital signature


Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure

2009-06-04 Thread Avi Kivity

Michael S. Tsirkin wrote:

Also - if we just had vmexit because a process executed
io (or hypercall), can't we just do copy_to_user there?
Avi, I think at some point you said that we can?
  


You can do copy_to_user() whereever it is legal in Linux.  Almost all of 
kvm runs in process context, preemptible, and with interrupts enabled.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure

2009-06-04 Thread Avi Kivity

Gregory Haskins wrote:

   


BTW, why did you decide to use get_user_pages?
Would switch_mm + copy_to_user work as well
avoiding page walk if all pages are present?
  



Well, basic c_t_u() won't work because its likely not current if you
are updating the ring from some other task, but I think you have already
figured that out based on the switch_mm suggestion.  The simple truth is
I was not familiar with switch_mm at the time I wrote this (nor am I
now).  If this is a superior method that allows you to acquire
c_t_u(some_other_ctx) like behavior, I see no problem in changing.  I
will look into this, and thanks for the suggestion!
  


copy_to_user() is significantly faster than get_user_pages() + kmap() + 
memcmp() (or their variants).


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure

2009-06-04 Thread Gregory Haskins
Avi Kivity wrote:
 Gregory Haskins wrote:

   
 BTW, why did you decide to use get_user_pages?
 Would switch_mm + copy_to_user work as well
 avoiding page walk if all pages are present?
   

 Well, basic c_t_u() won't work because its likely not current if you
 are updating the ring from some other task, but I think you have already
 figured that out based on the switch_mm suggestion.  The simple truth is
 I was not familiar with switch_mm at the time I wrote this (nor am I
 now).  If this is a superior method that allows you to acquire
 c_t_u(some_other_ctx) like behavior, I see no problem in changing.  I
 will look into this, and thanks for the suggestion!
   

 copy_to_user() is significantly faster than get_user_pages() + kmap()
 + memcmp() (or their variants).


Oh, I don't doubt that (in fact, I was pretty sure that was the case
based on some of the optimizations I could see in studying the c_t_u()
path).  I just didn't realize there were other ways to do it if its a
non current task. ;)

I guess the enigma for me right now is what cost does switch_mm have? 
(Thats not a slam against the suggested approach...I really do not know
and am curious).

As an aside, note that we seem to be reviewing v2, where v3 is really
the last set I pushed.  I think this patch is more or less the same
across both iterations, but FYI that I would recommend looking at v3
instead.

-Greg



signature.asc
Description: OpenPGP digital signature


Re: [RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure

2009-06-04 Thread Avi Kivity

Gregory Haskins wrote:

Oh, I don't doubt that (in fact, I was pretty sure that was the case
based on some of the optimizations I could see in studying the c_t_u()
path).  I just didn't realize there were other ways to do it if its a
non current task. ;)

I guess the enigma for me right now is what cost does switch_mm have? 
(Thats not a slam against the suggested approach...I really do not know

and am curious).
  


switch_mm() is probably very cheap (reloads cr3), but it does dirty the 
current cpu's tlb.  When the kernel needs to flush a process' tlb, it 
will have to IPI that cpu in addition to all others.  This takes place, 
for example, after munmap() or after a page is swapped out (though 
significant batching is done there).


It's still plenty cheaper in my estimation.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v2 03/19] vbus: add connection-client helper infrastructure

2009-04-09 Thread Gregory Haskins
We expect to have various types of connection-clients (e.g. userspace,
kvm, etc), each of which is likely to have common access patterns and
marshalling duties.  Therefore we create a client API to simplify
client development by helping with mundane tasks such as handle-2-pointer
translation, etc.

Special thanks to Pat Mullaney for suggesting the optimization to pass
a cookie object down during DEVICESHM operations to save lookup overhead
on the event channel.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 include/linux/vbus_client.h |  115 +
 kernel/vbus/Makefile|2 
 kernel/vbus/client.c|  543 +++
 3 files changed, 659 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/vbus_client.h
 create mode 100644 kernel/vbus/client.c

diff --git a/include/linux/vbus_client.h b/include/linux/vbus_client.h
new file mode 100644
index 000..62dab78
--- /dev/null
+++ b/include/linux/vbus_client.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2009 Novell.  All Rights Reserved.
+ *
+ * Virtual-Bus - Client interface
+ *
+ * We expect to have various types of connection-clients (e.g. userspace,
+ * kvm, etc).  Each client will be connecting from some environment outside
+ * of the kernel, and therefore will not have direct access to the API as
+ * presented in ./linux/vbus.h.  There will undoubtedly be some parameter
+ * marshalling that must occur, as well as common patterns for the handling
+ * of those marshalled parameters (e.g. translating a handle into a pointer,
+ * etc).
+ *
+ * Therefore this client API is provided to simplify the development
+ * of any clients.  Of course, a client is free to bypass this API entirely
+ * and communicate with the direct VBUS API if desired.
+ *
+ * Author:
+ *  Gregory Haskins ghask...@novell.com
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LINUX_VBUS_CLIENT_H
+#define _LINUX_VBUS_CLIENT_H
+
+#include linux/types.h
+#include linux/compiler.h
+
+struct vbus_deviceopen {
+   __u32 devid;
+   __u32 version; /* device ABI version */
+   __u64 handle; /* return value for devh */
+};
+
+struct vbus_devicecall {
+   __u64 devh;   /* device-handle (returned from DEVICEOPEN */
+   __u32 func;
+   __u32 len;
+   __u32 flags;
+   __u64 datap;
+};
+
+struct vbus_deviceshm {
+   __u64 devh;   /* device-handle (returned from DEVICEOPEN */
+   __u32 id;
+   __u32 len;
+   __u32 flags;
+   struct {
+   __u32 offset;
+   __u32 prio;
+   __u64 cookie; /* token to pass back when signaling client */
+   } signal;
+   __u64 datap;
+   __u64 handle; /* return value for signaling from client to kernel */
+};
+
+#ifdef __KERNEL__
+
+#include linux/ioq.h
+#include linux/module.h
+#include asm/atomic.h
+
+struct vbus_client;
+
+struct vbus_client_ops {
+   int (*deviceopen)(struct vbus_client *client,  struct vbus_memctx *ctx,
+ __u32 devid, __u32 version, __u64 *devh);
+   int (*deviceclose)(struct vbus_client *client, __u64 devh);
+   int (*devicecall)(struct vbus_client *client,
+ __u64 devh, __u32 func,
+ void *data, __u32 len, __u32 flags);
+   int (*deviceshm)(struct vbus_client *client,
+__u64 devh, __u32 id,
+struct vbus_shm *shm, struct shm_signal *signal,
+__u32 flags, __u64 *handle);
+   int (*shmsignal)(struct vbus_client *client, __u64 handle);
+   void (*release)(struct vbus_client *client);
+};
+
+struct vbus_client {
+   atomic_t refs;
+   struct vbus_client_ops *ops;
+};
+
+static inline void vbus_client_get(struct vbus_client *client)
+{
+   atomic_inc(client-refs);
+}
+
+static inline void vbus_client_put(struct vbus_client *client)
+{
+   if (atomic_dec_and_test(client-refs))
+   client-ops-release(client);
+}
+
+struct vbus_client *vbus_client_attach(struct vbus *bus);
+
+extern struct vbus_memctx *current_memctx;
+struct vbus_memctx *task_memctx_alloc(struct task_struct *task);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_VBUS_CLIENT_H */
diff --git a/kernel/vbus/Makefile b/kernel/vbus/Makefile
index 367f65b..4d440e5 100644
--- a/kernel/vbus/Makefile
+++ b/kernel/vbus/Makefile
@@