Hi,

I'd like to use rsyslog as central log collection solution.
Manwhile I've compiled and installed as rsyslog 5.6.4 with a stock
configuration as drop in replacement to local ksyslogd
But I see a high quick cpu usage bursts by rsyslog every few seconds
while it barely does anything.
Most of the cpu time is spent in user mode.
This happends on ec2 large instance with Centos-5.5.

Below are the configs and sample strace/lsof of the rsyslogd process.

Any idea why this happens?

[root@www1a ~]# cat /etc/rsyslog.conf | grep -v -e '^$' -e '^#'
$ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat
$ModLoad imklog
$ModLoad imuxsock
*.info;mail.none;authpriv.none;cron.none                /var/log/messages
authpriv.*                                              /var/log/secure
mail.*                                                  -/var/log/maillog
cron.*                                                  /var/log/cron
*.emerg                                                 *
uucp,news.crit                                          /var/log/spooler
local7.*                                                /var/log/boot.log
[root@www1a ~]# 
[root@www1a ~]# lsof -p $(pgrep rsyslogd)
COMMAND    PID USER   FD   TYPE             DEVICE     SIZE       NODE NAME
rsyslogd 29994 root  cwd    DIR                8,1     4096          2 /
rsyslogd 29994 root  rtd    DIR                8,1     4096          2 /
rsyslogd 29994 root  txt    REG                8,1  1961412    2378118
/usr/local/rsyslog/5.6.4/sbin/rsyslogd
rsyslogd 29994 root  mem    REG                8,1   137256    1720349
/lib64/ld-2.5.so
rsyslogd 29994 root  mem    REG                8,1    83344    1543492
/usr/lib64/libz.so.1.2.3
rsyslogd 29994 root  mem    REG                8,1   142696    1720364
/lib64/libpthread-2.5.so
rsyslogd 29994 root  mem    REG                8,1    20424    1720336
/lib64/libdl-2.5.so
rsyslogd 29994 root  mem    REG                8,1    50288    1720370
/lib64/librt-2.5.so
rsyslogd 29994 root  mem    REG                8,1  1712536    1720326
/lib64/libc-2.5.so
rsyslogd 29994 root  mem    REG                8,1    85956    2378093
/usr/local/rsyslog/5.6.4/lib/rsyslog/lmnet.so
rsyslogd 29994 root  mem    REG                8,1    90745    2378128
/usr/local/rsyslog/5.6.4/lib/rsyslog/imklog.so
rsyslogd 29994 root  mem    REG                8,1    53471    2378110
/usr/local/rsyslog/5.6.4/lib/rsyslog/imuxsock.so
rsyslogd 29994 root    0r   REG                0,3        0 4026531849
/proc/kmsg
rsyslogd 29994 root    1w   REG                8,1     5019    1048938
/var/log/messages
rsyslogd 29994 root    2w   REG                8,1 18336275    1048939
/var/log/secure
rsyslogd 29994 root    3u  unix 0xffff8800241a2a80            80601917
/dev/log
rsyslogd 29994 root    4w   REG                8,1   214769    1048943
/var/log/cron
[root@www1a ~]#
[root@www1a ~]# strace -c -r -T -q -f -p $(pgrep rsyslogd)
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
   nan    0.000000           0         4           write
   nan    0.000000           0         4           select
   nan    0.000000           0         4           recvfrom
   nan    0.000000           0         4           gettimeofday
   nan    0.000000           0        20         4 futex
------ ----------- ----------- --------- --------- ----------------
100.00    0.000000                    36         4 total
[root@www1a ~]#                               
[root@www1a ~]# strace -v -r -T -q -f -p $(pgrep rsyslogd)
[pid 29995]      0.000000 futex(0x6a5d5c, FUTEX_WAIT, 1167, NULL
<unfinished ...>
[pid 29994]      0.005107 select(1, NULL, NULL, NULL, {85286, 180000}
<unfinished ...>
[pid 29997]      0.000049 read(0,  <unfinished ...>
[pid 29998]      0.000021 select(4, [3], NULL, NULL, NULL) = 1 (in [3])
<4.329724>
[pid 29998]      4.329868 recvfrom(3, "<86>Mar 24 14:01:07 sshd[4487]:
"..., 2048, 0, NULL, NULL) = 92 <0.000015>
[pid 29998]      0.000097 gettimeofday({1300975267, 753128}, NULL) = 0
<0.000010>
[pid 29998]      0.000079 futex(0x6a5d5c, FUTEX_WAKE_OP, 1, 1, 0x6a5d58,
{FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1 <0.000014>
[pid 29998]      0.000052 select(4, [3], NULL, NULL, NULL <unfinished ...>
[pid 29995]      0.000021 <... futex resumed> ) = 0 <4.335124>
[pid 29995]      0.000077 futex(0x6a1b30, FUTEX_WAKE, 1) = 0 <0.000057>
[pid 29995]      0.000203 write(2, "Mar 24 14:01:07 www1a
sshd[4487]"..., 95 <unfinished ...>
[pid 29998]      0.024438 <... select resumed> ) = 1 (in [3]) <0.024706>
[pid 29995]      0.000024 <... write resumed> ) = 95 <0.024442>
[pid 29998]      0.000247 recvfrom(3, "<86>Mar 24 14:01:07 sshd[4487]:
"..., 2048, 0, NULL, NULL) = 95 <0.000014>
[pid 29998]      0.000066 gettimeofday({1300975267, 778334}, NULL) = 0
<0.000010>
[pid 29998]      0.000078 select(4, [3], NULL, NULL, NULL <unfinished ...>
[pid 29995]      0.005155 write(2, "Mar 24 14:01:07 www1a
sshd[4487]"..., 98) = 98 <0.000345>
[pid 29995]      0.000489 futex(0x6a5d5c, FUTEX_WAIT, 1169, NULL
<unfinished ...>
[pid 29998]      0.107333 <... select resumed> ) = 1 (in [3]) <0.112941>
[pid 29998]      0.000053 recvfrom(3, "<86>Mar 24 14:01:07 sshd[4487]:
"..., 2048, 0, NULL, NULL) = 84 <0.000016>
[pid 29998]      0.000068 gettimeofday({1300975267, 891512}, NULL) = 0
<0.000010>
[pid 29998]      0.000079 futex(0x6a5d5c, FUTEX_WAKE_OP, 1, 1, 0x6a5d58,
{FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1 <0.000014>
[pid 29998]      0.000050 select(4, [3], NULL, NULL, NULL <unfinished ...>
[pid 29995]      0.001586 <... futex resumed> ) = 0 <0.109150>
[pid 29995]      0.000071 futex(0x6a1b30, FUTEX_WAKE, 1) = 0 <0.000046>
[pid 29995]      0.000169 write(2, "Mar 24 14:01:07 www1a
sshd[4487]"..., 87) = 87 <0.000065>
[pid 29995]      0.000194 futex(0x6a5d5c, FUTEX_WAIT, 1171, NULL
<unfinished ...>
[pid 29998]      1.420908 <... select resumed> ) = 1 (in [3]) <1.422893>
[pid 29998]      0.000049 recvfrom(3, "<86>Mar 24 14:01:09 sshd[4517]:
"..., 2048, 0, NULL, NULL) = 64 <0.000014>
[pid 29998]      0.000503 gettimeofday({1300975269, 315123}, NULL) = 0
<0.000042>
[pid 29998]      0.000121 futex(0x6a5d5c, FUTEX_WAKE_OP, 1, 1, 0x6a5d58,
{FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1} <unfinished ...>
[pid 29995]      0.000490 <... futex resumed> ) = 0 <1.422056>
[pid 29998]      0.000010 <... futex resumed> ) = 1 <0.000487>
[pid 29995]      0.000122 futex(0x6a1b30, FUTEX_WAKE, 1 <unfinished ...>
[pid 29998]      0.000019 select(4, [3], NULL, NULL, NULL <unfinished ...>
[pid 29995]      0.000067 <... futex resumed> ) = 0 <0.000072>
[pid 29995]      0.000127 write(2, "Mar 24 14:01:09 www1a
sshd[4517]"..., 67) = 67 <0.000105>
[pid 29995]      0.001215 futex(0x6a5d5c, FUTEX_WAIT, 1173, NULL
<unfinished ...>
[pid 29998]      4.916991 <... select resumed> ) = 1 (in [3]) <4.918369>
[pid 29998]      0.000050 recvfrom(3, "<86>Mar 24 14:01:14 sshd[4518]:
"..., 2048, 0, NULL, NULL) = 92 <0.000013>
[pid 29998]      0.000060 gettimeofday({1300975274, 234392}, NULL) = 0
<0.000009>
[pid 29998]      0.000079 futex(0x6a5d5c, FUTEX_WAKE_OP, 1, 1, 0x6a5d58,
{FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1 <0.000014>
[pid 29998]      0.000050 select(4, [3], NULL, NULL, NULL <unfinished ...>
[pid 29995]      0.002861 <... futex resumed> ) = 0 <4.920060>
[pid 29995]      0.000177 futex(0x6a1b30, FUTEX_WAKE, 1) = 0 <0.000059>
[pid 29995]      0.000218 write(2, "Mar 24 14:01:14 www1a
sshd[4518]"..., 95) = 95 <0.000076>
[pid 29995]      0.000378 futex(0x6a5d5c, FUTEX_WAIT, 1175, NULL
<unfinished ...>
[pid 29998]      0.067782 <... select resumed> ) = 1 (in [3]) <0.071383>
[pid 29998]      0.000054 recvfrom(3, "<86>Mar 24 14:01:14 sshd[4518]:
"..., 2048, 0, NULL, NULL) = 95 <0.000014>
[pid 29998]      0.000069 gettimeofday({1300975274, 306060}, NULL) = 0
<0.000011>
[pid 29998]      0.000080 futex(0x6a5d5c, FUTEX_WAKE_OP, 1, 1, 0x6a5d58,
{FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1 <0.000014>
[pid 29998]      0.000050 select(4, [3], NULL, NULL, NULL <unfinished ...>
[pid 29995]      0.001890 <... futex resumed> ) = 0 <0.069908>
[pid 29995]      0.000070 futex(0x6a1b30, FUTEX_WAKE, 1) = 0 <0.000044>
[pid 29995]      0.000170 write(2, "Mar 24 14:01:14 www1a
sshd[4518]"..., 98) = 98 <0.000069>
[pid 29995]      0.000285 futex(0x6a5d5c, FUTEX_WAIT, 1177, NULL
<unfinished ...>
[pid 29998]      0.098664 <... select resumed> ) = 1 (in [3]) <0.101048>
[pid 29998]      0.001766 recvfrom(3, "<86>Mar 24 14:01:14 sshd[4518]:
"..., 2048, 0, NULL, NULL) = 84 <0.000047>
[pid 29998]      0.000125 gettimeofday({1300975274, 409161}, NULL) = 0
<0.000039>
[pid 29998]      0.000119 futex(0x6a5d5c, FUTEX_WAKE_OP, 1, 1, 0x6a5d58,
{FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1} <unfinished ...>
[pid 29995]      0.000062 <... futex resumed> ) = 0 <0.100721>
[pid 29998]      0.000009 <... futex resumed> ) = 1 <0.000058>
[pid 29995]      0.000071 futex(0x6a1b30, FUTEX_WAKE, 1 <unfinished ...>
[pid 29998]      0.000019 select(4, [3], NULL, NULL, NULL <unfinished ...>
[pid 29995]      0.000065 <... futex resumed> ) = 0 <0.000070>
[pid 29995]      0.000118 write(2, "Mar 24 14:01:14 www1a
sshd[4518]"..., 87) = 87 <0.000069>
[pid 29995]      0.000200 futex(0x6a5d5c, FUTEX_WAIT, 1179, NULL
<unfinished ...>
[root@www1a ~]#     


PS. One difference I noticed between stock rsyslog and syslog-ng configs
is that rsyslog holds just one fd open to /dev/log
and select's it, while syslog-ng holds about 15 different fd's to
/dev/log and epoll's them.
Any particular reason for such a drastic difference?

Thanks
Alex
                             
_______________________________________________
rsyslog mailing list
http://lists.adiscon.net/mailman/listinfo/rsyslog
http://www.rsyslog.com

Reply via email to