Re: [PATCH 4/4] virtio_blk: use disk_name_format() to support mass of disks naming

2012-04-09 Thread Michael S. Tsirkin
On Mon, Apr 09, 2012 at 11:47:51AM +0800, Ren Mingxin wrote:
  On 04/04/2012 04:01 PM, Michael S. Tsirkin wrote:
 On Mon, Apr 02, 2012 at 12:00:45PM -0700, Tejun Heo wrote:
 On Mon, Apr 02, 2012 at 11:56:18AM -0700, James Bottomley wrote:
 So if we're agreed no other devices going forwards should ever use this
 interface, is there any point unifying the interface?  No matter how
 many caveats you hedge it round with, putting the API in a central place
 will be a bit like a honey trap for careless bears.   It might be safer
 just to leave it buried in the three current drivers.
 Yeah, that was my hope but I think it would be easier to enforce to
 have a common function which is clearly marked legacy so that new
 driver writers can go look for the naming code in the existing ones,
 find out they're all using the same function which is marked legacy
 and explains what to do for newer drivers.
 I think I'm not the only one to be confused about the
 preferred direction here.
 James, do you agree to the approach above?
 
 It would be nice to fix virtio block for 3.4, so
 how about this:
 - I'll just apply the original bugfix patch for 3.4 -
it only affects virtio
 
 Sorry, about only affects virtio, I'm not very clear here:
 1) Just duplicate the disk name format function in virtio_blk
 like the original patch: https://lkml.org/lkml/2012/3/28/45
 2) Move the disk name format function into block core like
 this patch series but only affects virtio(not affect mtip32xx).
 Do you mean the 2) one or something else?

I mean 1) - I'll apply the original patch.

 - Ren will repost the refactoring patch on top, and we can
keep up the discussion
 
 Ren if you agree, can you make this a two patch series please?
 
 
 Sure.
 
 -- 
 Thanks,
 Ren
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 4/4] virtio_blk: use disk_name_format() to support mass of disks naming

2012-04-09 Thread Michael S. Tsirkin
On Mon, Apr 09, 2012 at 11:47:51AM +0800, Ren Mingxin wrote:
  On 04/04/2012 04:01 PM, Michael S. Tsirkin wrote:
 On Mon, Apr 02, 2012 at 12:00:45PM -0700, Tejun Heo wrote:
 On Mon, Apr 02, 2012 at 11:56:18AM -0700, James Bottomley wrote:
 So if we're agreed no other devices going forwards should ever use this
 interface, is there any point unifying the interface?  No matter how
 many caveats you hedge it round with, putting the API in a central place
 will be a bit like a honey trap for careless bears.   It might be safer
 just to leave it buried in the three current drivers.
 Yeah, that was my hope but I think it would be easier to enforce to
 have a common function which is clearly marked legacy so that new
 driver writers can go look for the naming code in the existing ones,
 find out they're all using the same function which is marked legacy
 and explains what to do for newer drivers.
 I think I'm not the only one to be confused about the
 preferred direction here.
 James, do you agree to the approach above?
 
 It would be nice to fix virtio block for 3.4, so
 how about this:
 - I'll just apply the original bugfix patch for 3.4 -
it only affects virtio
 
 Sorry, about only affects virtio, I'm not very clear here:
 1) Just duplicate the disk name format function in virtio_blk
 like the original patch: https://lkml.org/lkml/2012/3/28/45

So I'd like to apply this, and we can discuss the
deduplication for 3.5. Please post a version of this that
1. isn't line-wrapped and doesn't have damaged whitespace
   so I can run git am on it
2. lists the # of duspported disks correctly as 26^3+(26^2+26)
   in the description

Thanks!

 2) Move the disk name format function into block core like
 this patch series but only affects virtio(not affect mtip32xx).
 Do you mean the 2) one or something else?
 
 - Ren will repost the refactoring patch on top, and we can
keep up the discussion
 
 Ren if you agree, can you make this a two patch series please?
 
 
 Sure.
 
 -- 
 Thanks,
 Ren
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] skbuff: struct ubuf_info callback type safety

2012-04-09 Thread Michael S. Tsirkin
The skb struct ubuf_info callback gets passed struct ubuf_info
itself, not the arg value as the field name and the function signature
seem to imply. Rename the arg field to ctx to match usage,
add documentation and change the callback argument type
to make usage clear and to have compiler check correctness.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 drivers/vhost/net.c|2 +-
 drivers/vhost/vhost.c  |5 ++---
 drivers/vhost/vhost.h  |2 +-
 include/linux/skbuff.h |7 ---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f0da2c3..1f21d2a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -238,7 +238,7 @@ static void handle_tx(struct vhost_net *net)
 
vq-heads[vq-upend_idx].len = len;
ubuf-callback = vhost_zerocopy_callback;
-   ubuf-arg = vq-ubufs;
+   ubuf-ctx = vq-ubufs;
ubuf-desc = vq-upend_idx;
msg.msg_control = ubuf;
msg.msg_controllen = sizeof(ubuf);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 947f00d..51e4c1e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1598,10 +1598,9 @@ void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref 
*ubufs)
kfree(ubufs);
 }
 
-void vhost_zerocopy_callback(void *arg)
+void vhost_zerocopy_callback(struct ubuf_info *ubuf)
 {
-   struct ubuf_info *ubuf = arg;
-   struct vhost_ubuf_ref *ubufs = ubuf-arg;
+   struct vhost_ubuf_ref *ubufs = ubuf-ctx;
struct vhost_virtqueue *vq = ubufs-vq;
 
/* set len = 1 to mark this desc buffers done DMA */
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 8dcf4cc..8de1fd5 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -188,7 +188,7 @@ bool vhost_enable_notify(struct vhost_dev *, struct 
vhost_virtqueue *);
 
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len);
-void vhost_zerocopy_callback(void *arg);
+void vhost_zerocopy_callback(struct ubuf_info *);
 int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq);
 
 #define vq_err(vq, fmt, ...) do {  \
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3337027..4c3f138 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -238,11 +238,12 @@ enum {
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
  * lower device, the skb last reference should be 0 when calling this.
- * The desc is used to track userspace buffer index.
+ * The ctx field is used to track device context.
+ * The desc field is used to track userspace buffer index.
  */
 struct ubuf_info {
-   void (*callback)(void *);
-   void *arg;
+   void (*callback)(struct ubuf_info *);
+   void *ctx;
unsigned long desc;
 };
 
-- 
1.7.9.111.gf3fb0
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 4/4] virtio_blk: use disk_name_format() to support mass of disks naming

2012-04-09 Thread Ren Mingxin

 On 04/04/2012 04:01 PM, Michael S. Tsirkin wrote:

On Mon, Apr 02, 2012 at 12:00:45PM -0700, Tejun Heo wrote:

On Mon, Apr 02, 2012 at 11:56:18AM -0700, James Bottomley wrote:

So if we're agreed no other devices going forwards should ever use this
interface, is there any point unifying the interface?  No matter how
many caveats you hedge it round with, putting the API in a central place
will be a bit like a honey trap for careless bears.   It might be safer
just to leave it buried in the three current drivers.

Yeah, that was my hope but I think it would be easier to enforce to
have a common function which is clearly marked legacy so that new
driver writers can go look for the naming code in the existing ones,
find out they're all using the same function which is marked legacy
and explains what to do for newer drivers.

I think I'm not the only one to be confused about the
preferred direction here.
James, do you agree to the approach above?

It would be nice to fix virtio block for 3.4, so
how about this:
- I'll just apply the original bugfix patch for 3.4 -
   it only affects virtio


Sorry, about only affects virtio, I'm not very clear here:
1) Just duplicate the disk name format function in virtio_blk
like the original patch: https://lkml.org/lkml/2012/3/28/45
2) Move the disk name format function into block core like
this patch series but only affects virtio(not affect mtip32xx).
Do you mean the 2) one or something else?


- Ren will repost the refactoring patch on top, and we can
   keep up the discussion

Ren if you agree, can you make this a two patch series please?



Sure.

--
Thanks,
Ren

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 0/2] adding tracepoints to vhost

2012-04-09 Thread Jason Wang
To help in vhost analyzing, the following series adding basic tracepoints to
vhost. Operations of both virtqueues and vhost works were traced in current
implementation, net code were untouched. A top-like satistics displaying script
were introduced to help the troubleshooting.

TODO:
- net specific tracepoints?

---

Jason Wang (2):
  vhost: basic tracepoints
  tools: virtio: add a top-like utility for displaying vhost satistics


 drivers/vhost/trace.h   |  153 
 drivers/vhost/vhost.c   |   17 ++
 tools/virtio/vhost_stat |  360 +++
 3 files changed, 528 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vhost/trace.h
 create mode 100755 tools/virtio/vhost_stat

-- 
Jason Wang
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/2] vhost: basic tracepoints

2012-04-09 Thread Jason Wang
To help for the performance optimizations and debugging, this patch tracepoints
for vhost. Pay attention that the tracepoints are only for vhost, net code are
not touched.

Two kinds of activities were traced: virtio and vhost work.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/vhost/trace.h |  153 +
 drivers/vhost/vhost.c |   17 +
 2 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vhost/trace.h

diff --git a/drivers/vhost/trace.h b/drivers/vhost/trace.h
new file mode 100644
index 000..0423899
--- /dev/null
+++ b/drivers/vhost/trace.h
@@ -0,0 +1,153 @@
+#if !defined(_TRACE_VHOST_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VHOST_H
+
+#include linux/tracepoint.h
+#include vhost.h
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vhost
+
+/*
+ * Tracepoint for updating used flag.
+ */
+TRACE_EVENT(vhost_virtio_update_used_flags,
+   TP_PROTO(struct vhost_virtqueue *vq),
+   TP_ARGS(vq),
+
+   TP_STRUCT__entry(
+   __field(struct vhost_virtqueue *, vq)
+   __field(u16, used_flags)
+   ),
+
+   TP_fast_assign(
+   __entry-vq = vq;
+   __entry-used_flags = vq-used_flags;
+   ),
+
+   TP_printk(vhost update used flag %x to vq %p notify %s,
+   __entry-used_flags, __entry-vq,
+   (__entry-used_flags  VRING_USED_F_NO_NOTIFY) ?
+   disabled : enabled)
+);
+
+/*
+ * Tracepoint for updating avail event.
+ */
+TRACE_EVENT(vhost_virtio_update_avail_event,
+   TP_PROTO(struct vhost_virtqueue *vq),
+   TP_ARGS(vq),
+
+   TP_STRUCT__entry(
+   __field(struct vhost_virtqueue *, vq)
+   __field(u16, avail_idx)
+   ),
+
+   TP_fast_assign(
+   __entry-vq = vq;
+   __entry-avail_idx = vq-avail_idx;
+   ),
+
+   TP_printk(vhost update avail idx %u(%u) for vq %p,
+ __entry-avail_idx, __entry-avail_idx %
+ __entry-vq-num, __entry-vq)
+);
+
+/*
+ * Tracepoint for processing descriptor.
+ */
+TRACE_EVENT(vhost_virtio_get_vq_desc,
+   TP_PROTO(struct vhost_virtqueue *vq, unsigned int index,
+unsigned out, unsigned int in),
+   TP_ARGS(vq, index, out, in),
+
+   TP_STRUCT__entry(
+   __field(struct vhost_virtqueue *, vq)
+   __field(unsigned int, head)
+   __field(unsigned int, out)
+   __field(unsigned int, in)
+   ),
+
+   TP_fast_assign(
+   __entry-vq = vq;
+   __entry-head = index;
+   __entry-out = out;
+   __entry-in = in;
+   ),
+
+   TP_printk(vhost get vq %p head %u out %u in %u,
+ __entry-vq, __entry-head, __entry-out, __entry-in)
+
+);
+
+/*
+ * Tracepoint for signal guest.
+ */
+TRACE_EVENT(vhost_virtio_signal,
+   TP_PROTO(struct vhost_virtqueue *vq),
+   TP_ARGS(vq),
+
+   TP_STRUCT__entry(
+   __field(struct vhost_virtqueue *, vq)
+   ),
+
+   TP_fast_assign(
+   __entry-vq = vq;
+   ),
+
+   TP_printk(vhost signal vq %p, __entry-vq)
+);
+
+DECLARE_EVENT_CLASS(vhost_work_template,
+   TP_PROTO(struct vhost_dev *dev, struct vhost_work *work),
+   TP_ARGS(dev, work),
+
+   TP_STRUCT__entry(
+   __field(struct vhost_dev *, dev)
+   __field(struct vhost_work *, work)
+   __field(void *, function)
+   ),
+
+   TP_fast_assign(
+   __entry-dev = dev;
+   __entry-work = work;
+   __entry-function = work-fn;
+   ),
+
+   TP_printk(%pf for work %p dev %p,
+   __entry-function, __entry-work, __entry-dev)
+);
+
+DEFINE_EVENT(vhost_work_template, vhost_work_queue_wakeup,
+   TP_PROTO(struct vhost_dev *dev, struct vhost_work *work),
+   TP_ARGS(dev, work));
+
+DEFINE_EVENT(vhost_work_template, vhost_work_queue_coalesce,
+   TP_PROTO(struct vhost_dev *dev, struct vhost_work *work),
+   TP_ARGS(dev, work));
+
+DEFINE_EVENT(vhost_work_template, vhost_poll_start,
+   TP_PROTO(struct vhost_dev *dev, struct vhost_work *work),
+   TP_ARGS(dev, work));
+
+DEFINE_EVENT(vhost_work_template, vhost_poll_stop,
+   TP_PROTO(struct vhost_dev *dev, struct vhost_work *work),
+   TP_ARGS(dev, work));
+
+DEFINE_EVENT(vhost_work_template, vhost_work_execute_start,
+   TP_PROTO(struct vhost_dev *dev, struct vhost_work *work),
+   TP_ARGS(dev, work));
+
+DEFINE_EVENT(vhost_work_template, vhost_work_execute_end,
+   TP_PROTO(struct vhost_dev *dev, struct vhost_work *work),
+   TP_ARGS(dev, work));
+
+#endif /* _TRACE_VHOST_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/vhost
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include trace/define_trace.h
+
diff --git 

[PATCH 2/2] tools: virtio: add a top-like utility for displaying vhost satistics

2012-04-09 Thread Jason Wang
This patch adds simple python to display vhost satistics of vhost, the codes
were based on kvm_stat script from qemu. As work function has been recored,
filters could be used to distinguish which kinds of work are being executed or
queued:

vhost statistics

 vhost_virtio_get_vq_desc   1460997  219682
 vhost_work_execute_start101248   12842
 vhost_work_execute_end  101247   12842
 vhost_work_queue_wakeup 101263   12841
 vhost_virtio_signal  684528659
 vhost_work_queue_wakeup(rx_net)  517976584
 vhost_work_execute_start(rx_net) 517956584
 vhost_work_queue_coalesce357376571
 vhost_work_queue_coalesce(rx_net)357096566
 vhost_virtio_update_avail_event  495126271
 vhost_work_execute_start(tx_kick)494296254
 vhost_work_queue_wakeup(tx_kick) 494426252
 vhost_work_queue_coalesce(tx_kick)  28   5
 vhost_work_execute_start(rx_kick)   22   3
 vhost_work_queue_wakeup(rx_kick)22   3
 vhost_poll_start 4   0

Signed-off-by: Jason Wang jasow...@redhat.com
---
 tools/virtio/vhost_stat |  360 +++
 1 files changed, 360 insertions(+), 0 deletions(-)
 create mode 100755 tools/virtio/vhost_stat

diff --git a/tools/virtio/vhost_stat b/tools/virtio/vhost_stat
new file mode 100755
index 000..b730f3b
--- /dev/null
+++ b/tools/virtio/vhost_stat
@@ -0,0 +1,360 @@
+#!/usr/bin/python
+#
+# top-like utility for displaying vhost statistics
+#
+# Copyright 2012 Red Hat, Inc.
+#
+# Modified from kvm_stat from qemu
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+import curses
+import sys, os, time, optparse
+
+work_types = {
+handle_rx_kick : rx_kick,
+handle_tx_kick : tx_kick,
+handle_rx_net : rx_net,
+handle_tx_net : tx_net,
+vhost_attach_cgroups_work: cg_attach
+}
+
+addr = {}
+
+kallsyms = file(/proc/kallsyms).readlines()
+for kallsym in kallsyms:
+entry = kallsym.split()
+if entry[2] in work_types.keys():
+addr[0x%s % entry[0]] = work_types[entry[2]]
+
+filters = {
+'vhost_work_queue_wakeup': ('function', addr),
+'vhost_work_queue_coalesce' : ('function', addr),
+'vhost_work_execute_start' : ('function', addr),
+'vhost_poll_start' : ('function', addr),
+'vhost_poll_stop' : ('function', addr),
+}
+
+def invert(d):
+return dict((x[1], x[0]) for x in d.iteritems())
+
+for f in filters:
+filters[f] = (filters[f][0], invert(filters[f][1]))
+
+import ctypes, struct, array
+
+libc = ctypes.CDLL('libc.so.6')
+syscall = libc.syscall
+class perf_event_attr(ctypes.Structure):
+_fields_ = [('type', ctypes.c_uint32),
+('size', ctypes.c_uint32),
+('config', ctypes.c_uint64),
+('sample_freq', ctypes.c_uint64),
+('sample_type', ctypes.c_uint64),
+('read_format', ctypes.c_uint64),
+('flags', ctypes.c_uint64),
+('wakeup_events', ctypes.c_uint32),
+('bp_type', ctypes.c_uint32),
+('bp_addr', ctypes.c_uint64),
+('bp_len', ctypes.c_uint64),
+]
+def _perf_event_open(attr, pid, cpu, group_fd, flags):
+return syscall(298, ctypes.pointer(attr), ctypes.c_int(pid),
+   ctypes.c_int(cpu), ctypes.c_int(group_fd),
+   ctypes.c_long(flags))
+
+PERF_TYPE_HARDWARE  = 0
+PERF_TYPE_SOFTWARE  = 1
+PERF_TYPE_TRACEPOINT= 2
+PERF_TYPE_HW_CACHE  = 3
+PERF_TYPE_RAW   = 4
+PERF_TYPE_BREAKPOINT= 5
+
+PERF_SAMPLE_IP  = 1  0
+PERF_SAMPLE_TID = 1  1
+PERF_SAMPLE_TIME= 1  2
+PERF_SAMPLE_ADDR= 1  3
+PERF_SAMPLE_READ= 1  4
+PERF_SAMPLE_CALLCHAIN   = 1  5
+PERF_SAMPLE_ID  = 1  6
+PERF_SAMPLE_CPU = 1  7
+PERF_SAMPLE_PERIOD  = 1  8
+PERF_SAMPLE_STREAM_ID   = 1  9
+PERF_SAMPLE_RAW = 1  10
+
+PERF_FORMAT_TOTAL_TIME_ENABLED  = 1  0
+PERF_FORMAT_TOTAL_TIME_RUNNING  = 1  1
+PERF_FORMAT_ID  = 1  2
+PERF_FORMAT_GROUP   = 1  3
+
+import re
+
+sys_tracing = '/sys/kernel/debug/tracing'
+
+class Group(object):
+def __init__(self, cpu):
+self.events = []
+self.group_leader = None
+self.cpu = cpu
+def add_event(self, name, event_set, tracepoint, filter = None):
+self.events.append(Event(group = self,
+ name = name, event_set = event_set,
+ tracepoint = tracepoint, filter = filter))
+if len(self.events) == 1:
+