Hi Eric, Thank you for following up on this despite your having more important matters on your shoulders :\
Le jeu. 17 févr. 2022 21h02 +0000, Eric Wong a écrit : > > Looks like it is waiting on sysread() in > > https://public-inbox.org/public-inbox.git/tree/t/lei-sigpipe.t#n35 > > OK, that makes more sense... My tracing with print statements shows that sysread is passed and that it is $tp->join which hangs: [nixbld@localhost:/build]$ cat stderr.log > lei-sigpipe: begin > lei-sigpipe: start_script: done > lei-sigpipe: close w: done > lei-sigpipe: sysread > lei-sigpipe: sysread: done > lei-sigpipe: close r: done [nixbld@localhost:/build]$ cat start_script.log > start_script: begin > start_script: fork > start_script: pid!=0 > start_script: pid==0 > start_script: pid!=0: tail > start_script: pid==0: if sub > start_script: pid==0: exec lei ps auxwwf | grep nixbld > nixbld1 491489 0.1 0.0 5312 4036 ? Ss 02:12 0:01 \_ > bash -e /nix/store/qf3mzpvsmkrw963xchbivcci06078n13-builder.sh > nixbld1 494307 0.0 0.1 16672 12616 ? S 02:12 0:00 > \_ perl -MExtUtils::Command::MM -MTest::Harness -e undef > *Test::Harness::Switches; test_harness(0, 'blib/lib', 'blib/arch') > t/lei-sigpipe.t > nixbld1 494311 0.2 0.5 49320 40068 ? S 02:12 0:02 | > \_ t/lei-sigpipe.t > nixbld1 494445 0.0 0.0 0 0 ? Z 02:12 0:00 | > \_ [/nix/store/176g] <defunct> > nixbld1 494447 0.0 0.1 14388 10176 ? S 02:12 0:00 | > \_ /nix/store/6wcdxh3h5jp9qgrccq3n71wnalca5mf5-perl-5.34.0/bin/perl -w > -I/nix/store/6wcdxh3h5jp9qgrccq3n71wnalca5mf5-perl-5.34.0/lib/perl5/site_perl > -I/nix/store/jibsc747pfzmyxp5lsqgzyvrs2lnkws6-perl5.34.0-Mail-IMAPClient-3.42/lib/perl5/site_perl > -I/nix/st blib/script/lei q -q -t z:1.. > nixbld1 494427 0.1 0.5 50664 41892 ? S 02:12 0:00 > \_ lei-daemon > /build/pi-lei-sigpipe-2750-0bv8/lei-daemon/xdg_run/lei/5.seq.sock > nixbld1 494433 0.0 0.4 50260 39580 ? S 02:12 0:00 > \_ lei/store /build/pi-lei-sigpipe-2750-0bv8/lei-daemon/.local/share The defunct process with truncated path "/nix/store/176g" is points to /nix/store/176gh50y24c0lx2bnnmsvf9wazb73php-coreutils-9.0 It appears only when I export TAIL=${coreutils}/bin/tail which does not workaround the hang. This deadlock appears only when the test is run by nix, it doesn't when I run it manually from a terminal by entering the sandbox: [nixbld@localhost:/]$ (cd /build/public-inbox-1.7.0; export PERL_INLINE_DIRECTORY=$PWD/inline-c; rm -rf $PERL_INLINE_DIRECTORY; mkdir $PERL_INLINE_DIRECTORY; prove -bvw t/lei-sigpipe.t ) > t/lei-sigpipe.t .. > ok 1 - lei import $TMPDIR/big.eml > ok 2 - read one byte > ok 3 - signaled > ok 4 - got SIGPIPE > ok 5 - read one byte > ok 6 - signaled -f mboxcl2 > ok 7 - got SIGPIPE -f mboxcl2 > ok 8 - read one byte > ok 9 - signaled -f text > ok 10 - got SIGPIPE -f text > ok 11 - lei daemon-pid (daemon-pid after t/lei-sigpipe.t:58) > ok 12 - daemon running after t/lei-sigpipe.t:58 > ok 13 - lei daemon-kill (daemon-kill after t/lei-sigpipe.t:58) > ok 14 - t/lei-sigpipe.t:58 daemon stopped > ok 15 - t/lei-sigpipe.t:58 daemon XDG_RUNTIME_DIR/lei/errors.log empty > 1..15 > ok > All tests successful. > Files=1, Tests=15, 8 wallclock secs ( 0.06 usr 0.08 sys + 3.55 cusr 2.81 > csys = 6.50 CPU) > Result: PASS
>From d919b3441bdf61e8dfae0c08005b8a77662ddb91 Mon Sep 17 00:00:00 2001 From: Julien Moutinho <[email protected]> Date: Sun, 20 Feb 2022 01:33:04 +0100 Subject: [PATCH] trace t/lei-sigpipe.t --- lib/PublicInbox/TestCommon.pm | 10 ++++++++++ t/lei-sigpipe.t | 22 +++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index ca732811..d8416f90 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -439,6 +439,9 @@ sub start_script { my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 2; my $sub = $run_mode == 0 ? undef : key2sub($key); my $tail; + open my $errfh, '+>>', "/build/start_script.log" or xbail $!; + $errfh->autoflush(1); + print $errfh "\nstart_script: begin\n"; if ($tail_cmd) { my @paths; for (@argv) { @@ -459,8 +462,10 @@ sub start_script { } $tail = tail_f(@paths); } + print $errfh "\nstart_script: fork\n"; my $pid = fork // die "fork: $!\n"; if ($pid == 0) { + print $errfh "\nstart_script: pid==0\n"; eval { PublicInbox::DS->Reset }; # pretend to be systemd (cf. sd_listen_fds(3)) # 3 == SD_LISTEN_FDS_START @@ -484,18 +489,23 @@ sub start_script { } if ($opt->{-C}) { chdir($opt->{-C}) or die "chdir: $!" } $0 = join(' ', @$cmd); + print $errfh "\nstart_script: pid==0: if sub\n"; if ($sub) { + print $errfh "\nstart_script: pid==0: sub\n"; eval { PublicInbox::DS->Reset }; _run_sub($sub, $key, \@argv); POSIX::_exit($? >> 8); } else { + print $errfh "\nstart_script: pid==0: exec $key\n"; exec(key2script($key), @argv); die "FAIL: ",join(' ', $key, @argv), ": $!\n"; } } + print $errfh "\nstart_script: pid!=0\n"; require PublicInbox::AutoReap; my $td = PublicInbox::AutoReap->new($pid); $td->{-extra} = $tail; + print $errfh "\nstart_script: pid!=0: tail\n"; $td; } diff --git a/t/lei-sigpipe.t b/t/lei-sigpipe.t index 6b2772a6..e5e701dd 100644 --- a/t/lei-sigpipe.t +++ b/t/lei-sigpipe.t @@ -6,7 +6,7 @@ use v5.10.1; use PublicInbox::TestCommon; use POSIX qw(WTERMSIG WIFSIGNALED SIGPIPE); test_lei(sub { - my $f = "$ENV{HOME}/big.eml"; + my $f = "/build/big.eml"; my $imported; for my $out ([], [qw(-f mboxcl2)], [qw(-f text)]) { pipe(my ($r, $w)) or BAIL_OUT $!; @@ -27,25 +27,33 @@ EOM } lei_ok(qw(import), $f) if $imported++ == 0; - open my $errfh, '+>>', "$ENV{HOME}/stderr.log" or xbail $!; + open my $errfh, '+>>', "/build/stderr.log" or xbail $!; + $errfh->autoflush(1); + print $errfh "\nlei-sigpipe: begin\n"; my $opt = { run_mode => 0, 2 => $errfh, 1 => $w }; my $cmd = [qw(lei q -q -t), @$out, 'z:1..']; my $tp = start_script($cmd, undef, $opt); + print $errfh "\nlei-sigpipe: start_script: done\n"; close $w; + print $errfh "\nlei-sigpipe: close w: done\n"; vec(my $rvec = '', fileno($r), 1) = 1; - if (!select($rvec, undef, undef, 30)) { + if (!select($rvec, undef, undef, 3)) { seek($errfh, 0, 0) or xbail $!; my $s = do { local $/; <$errfh> }; - xbail "lei q had no output after 30s, stderr=$s"; + xbail "lei q had no output after 3s, stderr=$s"; } + print $errfh "\nlei-sigpipe: sysread\n"; is(sysread($r, my $buf, 1), 1, 'read one byte'); + print $errfh "\nlei-sigpipe: sysread: done\n"; close $r; # trigger SIGPIPE + print $errfh "\nlei-sigpipe: close r: done\n"; $tp->join; + print $errfh "\nlei-sigpipe: join: done\n"; ok(WIFSIGNALED($?), "signaled @$out"); is(WTERMSIG($?), SIGPIPE, "got SIGPIPE @$out"); - seek($errfh, 0, 0) or xbail $!; - my $s = do { local $/; <$errfh> }; - is($s, '', "quiet after sigpipe @$out"); + #seek($errfh, 0, 0) or xbail $!; + #my $s = do { local $/; <$errfh> }; + #is($s, '', "quiet after sigpipe @$out"); } }); -- 2.34.1
