Hi there, maybe you could be so kind and help me with following issue:
We running Ceph FS but there's repeatedly a problem with the MDS.Sometimes following error occurs: "mds0: Client 701782 failing to respond to capability release" Listing the session informations shows that the "num_caps" on that Client is much more than on the other Clients. ( see also -> attachement )
The problem is that the load on one of the server is increasing to really high value ( 80 to 100 ) independent of client which is complaining.
I guess my problem is also that I dont really understand the meaning of those "capabilties".
Following facts (let me know if you need more):
* CEPH-FS-Client, MDS, MON, OSD all on same server
* Kernel-Client (Kernel: 3.14.16-031416-generic)
* MDS config
o only raised "mds cache size = 5000000" (because before there
was error "failing to respond to cache pressure")
Best regards
Mathias
##### CEPH FS ERROR
09:33:30 PROD root@ceph01:~# ceph -s
cluster xxxxxxxxxxxxxxxxxxxxxxx
health HEALTH_WARN
mds0: Client 701782 failing to respond to capability release
monmap e1: 3 mons at
{ceph01=xx.xx.xx.114:6789/0,ceph02=xx.xx.xx.115:6789/0,ceph03=xx.xx.xx.116:6789/0}
election epoch 106, quorum 0,1,2 ceph01,ceph02,ceph03
mdsmap e260: 1/1/1 up {0=ceph01=up:active}, 2 up:standby
.....
-> Load raises immedtiatly
09:33:32 PROD root@ceph01:~# ceph daemon mds.ceph01 session ls
[
{
"id": 701782,
"num_leases": 16,
"num_caps": 221397,
"state": "open",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.701782 xx.xx.xx.114:0\/1344307356",
"client_metadata": {}
},
{
"id": 692103,
"num_leases": 1,
"num_caps": 50115,
"state": "open",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.692103 xx.xx.xx.117:0\/3600471798",
"client_metadata": {}
},
{
"id": 691995,
"num_leases": 2,
"num_caps": 53227,
"state": "open",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.691995 xx.xx.xx.115:0\/1220606159",
"client_metadata": {}
},
{
"id": 692058,
"num_leases": 8,
"num_caps": 49722,
"state": "open",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.692058 xx.xx.xx.116:0\/4048537076",
"client_metadata": {}
}
]
09:38:18 PROD root@ceph01:~# ceph daemon mds.ceph01 perf dump
{
"mds": {
"request": 1387754,
"reply": 1387696,
"reply_latency": {
"avgcount": 1387696,
"sum": 6439.991891758
},
"forward": 0,
"dir_fetch": 57946,
"dir_commit": 35053,
"dir_split": 0,
"inode_max": 5000000,
"inodes": 1116643,
"inodes_top": 837156,
"inodes_bottom": 279487,
"inodes_pin_tail": 0,
"inodes_pinned": 292936,
"inodes_expired": 0,
"inodes_with_caps": 269668,
"caps": 374718,
"subtrees": 2,
"traverse": 2591500,
"traverse_hit": 2492810,
"traverse_forward": 0,
"traverse_discover": 0,
"traverse_dir_fetch": 19330,
"traverse_remote_ino": 0,
"traverse_lock": 2350,
"load_cent": 138774897,
"q": 0,
"exported": 0,
"exported_inodes": 0,
"imported": 0,
"imported_inodes": 0
},
"mds_cache": {
"num_strays": 56,
"num_strays_purging": 0,
"num_strays_delayed": 0,
"strays_created": 2835,
"strays_purged": 2802,
"num_recovering_processing": 0,
"num_recovering_enqueued": 0,
"num_recovering_prioritized": 0,
"recovery_started": 0,
"recovery_completed": 0
},
"mds_log": {
"evadd": 376174,
"evex": 377829,
"evtrm": 377829,
"ev": 13815,
"evexg": 0,
"evexd": 1024,
"segadd": 738,
"segex": 738,
"segtrm": 738,
"seg": 31,
"segexg": 0,
"segexd": 1,
"expos": 6882857746,
"wrpos": 6991387600,
"rdpos": 4859818564,
"jlat": 0
},
"mds_mem": {
"ino": 1112733,
"ino+": 1115537,
"ino-": 2804,
"dir": 66813,
"dir+": 67017,
"dir-": 204,
"dn": 1116643,
"dn+": 1121224,
"dn-": 4581,
"cap": 374718,
"cap+": 1005845,
"cap-": 631127,
"rss": 6992420,
"heap": 49060,
"malloc": 18446744073708021059,
"buf": 0
},
"mds_server": {
"handle_client_request": 1387754,
"handle_slave_request": 0,
"handle_client_session": 80950,
"dispatch_client_request": 2526245,
"dispatch_server_request": 0
},
"objecter": {
"op_active": 0,
"op_laggy": 0,
"op_send": 567467,
"op_send_bytes": 0,
"op_resend": 0,
"op_ack": 283387,
"op_commit": 284080,
"op": 567467,
"op_r": 283387,
"op_w": 284080,
"op_rmw": 0,
"op_pg": 0,
"osdop_stat": 24703,
"osdop_create": 40923,
"osdop_read": 24,
"osdop_write": 186035,
"osdop_writefull": 17341,
"osdop_append": 0,
"osdop_zero": 1,
"osdop_truncate": 0,
"osdop_delete": 4721,
"osdop_mapext": 0,
"osdop_sparse_read": 0,
"osdop_clonerange": 0,
"osdop_getxattr": 283361,
"osdop_setxattr": 40923,
"osdop_cmpxattr": 0,
"osdop_rmxattr": 0,
"osdop_resetxattrs": 0,
"osdop_tmap_up": 0,
"osdop_tmap_put": 0,
"osdop_tmap_get": 0,
"osdop_call": 0,
"osdop_watch": 0,
"osdop_notify": 0,
"osdop_src_cmpxattr": 0,
"osdop_pgls": 0,
"osdop_pgls_filter": 0,
"osdop_other": 221016,
"linger_active": 0,
"linger_send": 0,
"linger_resend": 0,
"linger_ping": 0,
"poolop_active": 0,
"poolop_send": 0,
"poolop_resend": 0,
"poolstat_active": 0,
"poolstat_send": 0,
"poolstat_resend": 0,
"statfs_active": 0,
"statfs_send": 0,
"statfs_resend": 0,
"command_active": 0,
"command_send": 0,
"command_resend": 0,
"map_epoch": 1025,
"map_full": 0,
"map_inc": 2,
"osd_sessions": 210,
"osd_session_open": 3589,
"osd_session_close": 3569,
"osd_laggy": 0
},
"throttle-msgr_dispatch_throttler-mds": {
"val": 0,
"max": 104857600,
"get": 6198403,
"get_sum": 1661221790,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 6198403,
"put_sum": 1661221790,
"wait": {
"avgcount": 0,
"sum": 0.000000000
}
},
"throttle-objecter_bytes": {
"val": 0,
"max": 104857600,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 567467,
"take_sum": 2263639983,
"put": 562730,
"put_sum": 2263639983,
"wait": {
"avgcount": 0,
"sum": 0.000000000
}
},
"throttle-objecter_ops": {
"val": 0,
"max": 1024,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 567467,
"take_sum": 567467,
"put": 567467,
"put_sum": 567467,
"wait": {
"avgcount": 0,
"sum": 0.000000000
}
}
}
smime.p7s
Description: S/MIME Cryptographic Signature
_______________________________________________ ceph-users mailing list [email protected] http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
