Hi all,

I am trying to debug a very strange bug where the apache child takes some
time to compute the (html) output, but eventually writes it to the fd.
However, looking at the http stream with wireshark shows that the
client/server connection never sees that output, it continues to send
keepalive msgs back and forth.

Apache parent/child communication seems to be broken... but not entirely:
the child poll()s the fd and gets a new request to process from the parent.

Any hints to narrow this down? Is there a timeout I should be looking for?
Known parent/child communication issues?

(mod_php is also involved, but I think this strace mostly absolves it from
guilt)

chdir("/")                              = 0
pwrite(17, "SESSION|O:8:\"stdClass\":9:{s:7:\"f"..., 83538, 0) = 83538
close(17)                               = 0
write(14, "\1\0\0\0\1", 5)              = 5
shutdown(14, 2 /* send and receive */)  = 0
close(14)                               = 0
close(18)                               = 0
futex(0x7f3e4273f088, FUTEX_WAIT, 2, NULL) = 0
futex(0x7f3e4273f088, FUTEX_WAKE, 1)    = 0
futex(0x7f3e4273f088, FUTEX_WAIT, 2, NULL) = 0
futex(0x7f3e4273f088, FUTEX_WAKE, 1)    = 0
futex(0x7f3e4273f088, FUTEX_WAIT, 2, NULL) = 0
futex(0x7f3e4273f088, FUTEX_WAIT, 2, NULL) = 0
futex(0x7f3e4273f088, FUTEX_WAKE, 1)    = 0
umask(022)                              = 0
setitimer(ITIMER_PROF, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0
setitimer(ITIMER_PROF, {it_interval={0, 0}, it_value={300, 0}}, NULL) = 0
brk(0x7f3e7036c000)                     = 0x7f3e7036c000
brk(0x7f3e6f9ec000)                     = 0x7f3e6f9ec000
setitimer(ITIMER_PROF, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0

# here is the writev() to the fd to the parent - >20K which is about what
we expect
# this never appears in the network
writev(12, [{"HTTP/1.1 200 OK\r\nDate: Thu, 17 O"..., 551},
{"\37\213\10\0\0\0\0\0\0\3", 10},
{"\355}\353v\3346\362\347\347\370)\220\316f,\317\364E\27\313\311\337\2664+\313v\3429\226\255\261"...,
8096},
{"\365\f[[!\3123\333Z}\220e\330\5\367k6\263=\201h\324\357cW\307\7\324\367\206
\201"..., 5976}, {"\213\217\210y`\222\2\0", 8}], 5) = 14641
gettimeofday({1382045277, 805849}, NULL) = 0
gettimeofday({1382045277, 806019}, NULL) = 0
gettimeofday({1382045277, 806134}, NULL) = 0
write(11, "174.61.34.0 - - [17/Oct/2013:16:"..., 274) = 274
gettimeofday({1382045277, 806555}, NULL) = 0
times({tms_utime=25626, tms_stime=5554, tms_cutime=0, tms_cstime=0}) =
1363113295
gettimeofday({1382045277, 807014}, NULL) = 0
poll([{fd=12, events=POLLIN}], 1, 1000) = 0 (Timeout)
gettimeofday({1382045278, 809332}, NULL) = 0
gettimeofday({1382045278, 809465}, NULL) = 0
shutdown(12, 1 /* send */)              = 0

# done - and now polling for the next req
poll([{fd=12, events=POLLIN}], 1, 2000) = 0 (Timeout)
close(12)                               = 0
read(7, 0x7fffc30697df, 1)              = -1 EAGAIN (Resource temporarily
unavailable)
gettimeofday({1382045280, 822223}, NULL) = 0
semop(9207812, {{0, -1, SEM_UNDO}}, 1)  = 0
epoll_wait(9, {{EPOLLIN, {u32=1752040504, u64=139906016740408}}}, 2, 10000)
= 1
accept(4, {sa_family=AF_INET6, sin6_port=htons(55341), inet_pton(AF_INET6,
"::ffff:24.190.105.161", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0},
[28]) = 12
fcntl(12, F_GETFD)                      = 0
fcntl(12, F_SETFD, FD_CLOEXEC)          = 0
semop(9207812, {{0, 1, SEM_UNDO}}, 1)   = 0
gettimeofday({1382045287, 803986}, NULL) = 0
getsockname(12, {sa_family=AF_INET6, sin6_port=htons(80),
inet_pton(AF_INET6, "::ffff:204.13.103.195", &sin6_addr), sin6_flowinfo=0,
sin6_scope_id=0}, [28]) = 0
fcntl(12, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(12, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
gettimeofday({1382045287, 804351}, NULL) = 0
gettimeofday({1382045287, 804470}, NULL) = 0
gettimeofday({1382045287, 804538}, NULL) = 0
read(12, "GET /theme/javascript.php/tri/13"..., 8000) = 443

Naturally, this repros only in a busy production environment.

thanks,



m
-- 
 [email protected]
 -  ask interesting questions
 - don't get distracted with shiny stuff  - working code first
 ~ http://docs.moodle.org/en/User:Martin_Langhoff

Reply via email to