Self-follow-up: The ceph version is 0.80.11 in the cluster I'm working. So quite old.
Adding:
admin socket = /var/run/ceph/$cluster-$type.$id.$pid.$cctid.asok
log file = /var/log/ceph/
to /etc/ceph.conf, and then in my case tweaking apparmor (for test
disabling it):
service apparmor teardown
service apparmor stop
Then stopping a qemu VM:
virsh stop $instance
Then restarting libvirt-bin:
service libvirt-bin restart
Then starting the VM again:
virsh start $instance
Allowed me to get at the the perf dump data, which seems to contain
basically what I need for the moment:
{ "librbd--compute/a43efe1b-461a-4b54-923e-09c2e95da1ba_disk": { "rd": 0,
"rd_bytes": 0,
"rd_latency": { "avgcount": 0,
"sum": 0.000000000},
"wr": 0,
"wr_bytes": 0,
"wr_latency": { "avgcount": 0,
"sum": 0.000000000},
"discard": 0,
"discard_bytes": 0,
"discard_latency": { "avgcount": 0,
"sum": 0.000000000},
"flush": 9,
"aio_rd": 4596,
"aio_rd_bytes": 88915968,
"aio_rd_latency": { "avgcount": 4596,
"sum": 7.335787000},
"aio_wr": 114,
"aio_wr_bytes": 1438720,
"aio_wr_latency": { "avgcount": 114,
"sum": 0.011218000},
"aio_discard": 0,
"aio_discard_bytes": 0,
"aio_discard_latency": { "avgcount": 0,
"sum": 0.000000000},
"aio_flush": 0,
"aio_flush_latency": { "avgcount": 0,
"sum": 0.000000000},
"snap_create": 0,
"snap_remove": 0,
"snap_rollback": 0,
"notify": 0,
"resize": 0},
"objectcacher-librbd--compute/a43efe1b-461a-4b54-923e-09c2e95da1ba_disk": {
"cache_ops_hit": 114,
"cache_ops_miss": 4458,
"cache_bytes_hit": 24985600,
"cache_bytes_miss": 88279552,
"data_read": 88764416,
"data_written": 1438720,
"data_flushed": 1438720,
"data_overwritten_while_flushing": 0,
"write_ops_blocked": 0,
"write_bytes_blocked": 0,
"write_time_blocked": 0.000000000},
"objecter": { "op_active": 0,
"op_laggy": 0,
"op_send": 4553,
"op_send_bytes": 0,
"op_resend": 0,
"op_ack": 4552,
"op_commit": 89,
"op": 4553,
"op_r": 4464,
"op_w": 88,
"op_rmw": 1,
"op_pg": 0,
"osdop_stat": 2,
"osdop_create": 0,
"osdop_read": 4458,
"osdop_write": 88,
"osdop_writefull": 0,
"osdop_append": 0,
"osdop_zero": 0,
"osdop_truncate": 0,
"osdop_delete": 0,
"osdop_mapext": 0,
"osdop_sparse_read": 0,
"osdop_clonerange": 0,
"osdop_getxattr": 0,
"osdop_setxattr": 0,
"osdop_cmpxattr": 0,
"osdop_rmxattr": 0,
"osdop_resetxattrs": 0,
"osdop_tmap_up": 0,
"osdop_tmap_put": 0,
"osdop_tmap_get": 0,
"osdop_call": 9,
"osdop_watch": 1,
"osdop_notify": 0,
"osdop_src_cmpxattr": 0,
"osdop_pgls": 0,
"osdop_pgls_filter": 0,
"osdop_other": 88,
"linger_active": 1,
"linger_send": 1,
"linger_resend": 0,
"poolop_active": 0,
"poolop_send": 0,
"poolop_resend": 0,
"poolstat_active": 0,
"poolstat_send": 0,
"poolstat_resend": 0,
"statfs_active": 0,
"statfs_send": 0,
"statfs_resend": 0,
"command_active": 0,
"command_send": 0,
"command_resend": 0,
"map_epoch": 0,
"map_full": 0,
"map_inc": 0,
"osd_sessions": 7140,
"osd_session_open": 119,
"osd_session_close": 0,
"osd_laggy": 1},
"throttle-msgr_dispatch_throttler-radosclient": { "val": 0,
"max": 104857600,
"get": 4643,
"get_sum": 89851514,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 4643,
"put_sum": 89851514,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-objecter_bytes": { "val": 0,
"max": 104857600,
"get": 4553,
"get_sum": 89718272,
"get_or_fail_fail": 0,
"get_or_fail_success": 4553,
"take": 0,
"take_sum": 0,
"put": 4546,
"put_sum": 89718272,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-objecter_ops": { "val": 0,
"max": 1024,
"get": 4553,
"get_sum": 4553,
"get_or_fail_fail": 0,
"get_or_fail_success": 4553,
"take": 0,
"take_sum": 0,
"put": 4553,
"put_sum": 4553,
"wait": { "avgcount": 0,
"sum": 0.000000000}}}
Am I missing something here?
One thing I need to figure out is how to fix apparmor to allow this
in enforcing mode.
Best,
Martin
On Thu, Mar 15, 2018 at 10:53:51AM +0100, Martin Millnert wrote:
> Dear fellow cephalopods,
>
> does anyone have any pointers on how to instrument librbd as-driven-by
> qemu IO performance from a hypervisor?
>
> Are there less intrusive ways than perf or equivalent? Can librbd be
> told to dump statistics somewhere (per volume) - clientside?
>
> This would come in real handy whilst debugging potential performance
> issues troubling me.
>
> Ideally I'd like to get per-volume metrics out that I can submit to
> InfluxDB for presentation in Graphana. But I'll take anything.
>
> Best,
> Martin
signature.asc
Description: PGP signature
_______________________________________________ ceph-users mailing list [email protected] http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
