Jakub Kicinski wrote:
> ksft runner sends 2 SIGTERMs in a row if a test runs out of time.
> Handle this in a similar way we handle SIGINT - cleanup and stop
> running further tests.
>
> Because we get 2 signals we need a bit of logic to ignore
> the subsequent one, they come immediately one after the other
> (due to commit 9616cb34b08e ("kselftest/runner.sh: Propagate SIGTERM
> to runner child")).
>
> This change makes sure we run cleanup (scheduled defer()s)
> and also print a stack trace on SIGTERM, which doesn't happen
> by default. Tests occasionally hang in NIPA and it's impossible
> to tell what they are waiting from or doing.
>
> Signed-off-by: Jakub Kicinski <[email protected]>
> ---
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> ---
> tools/testing/selftests/net/lib/py/ksft.py | 27 +++++++++++++++++++++-
> 1 file changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/net/lib/py/ksft.py
> b/tools/testing/selftests/net/lib/py/ksft.py
> index 3cfad0fd4570..73710634d457 100644
> --- a/tools/testing/selftests/net/lib/py/ksft.py
> +++ b/tools/testing/selftests/net/lib/py/ksft.py
> @@ -3,6 +3,7 @@
> import builtins
> import functools
> import inspect
> +import signal
> import sys
> import time
> import traceback
> @@ -26,6 +27,10 @@ KSFT_DISRUPTIVE = True
> pass
>
>
> +class KsftTerminate(KeyboardInterrupt):
> + pass
> +
> +
> def ksft_pr(*objs, **kwargs):
> print("#", *objs, **kwargs)
>
> @@ -193,6 +198,19 @@ KSFT_DISRUPTIVE = True
> return env
>
>
> +term_cnt = 0
> +
A bit ugly to initialize this here. Also, it already is initialized
below.
> +def _ksft_intr(signum, frame):
> + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
> + # if we don't ignore the second one it will stop us from handling cleanup
> + global term_cnt
> + term_cnt += 1
> + if term_cnt == 1:
> + raise KsftTerminate()
> + else:
> + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
> +
> +
> def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
> cases = cases or []
>
> @@ -205,6 +223,10 @@ KSFT_DISRUPTIVE = True
> cases.append(value)
> break
>
> + global term_cnt
> + term_cnt = 0
> + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
> +
> totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
>
> print("TAP version 13")
> @@ -229,11 +251,12 @@ KSFT_DISRUPTIVE = True
> cnt_key = 'xfail'
> except BaseException as e:
> stop |= isinstance(e, KeyboardInterrupt)
> + stop |= isinstance(e, KsftTerminate)
> tb = traceback.format_exc()
> for line in tb.strip().split('\n'):
> ksft_pr("Exception|", line)
> if stop:
> - ksft_pr("Stopping tests due to KeyboardInterrupt.")
> + ksft_pr(f"Stopping tests due to {type(e).__name__}.")
> KSFT_RESULT = False
> cnt_key = 'fail'
>
> @@ -248,6 +271,8 @@ KSFT_DISRUPTIVE = True
> if stop:
> break
>
> + signal.signal(signal.SIGTERM, prev_sigterm)
> +
Why is prev_sigterm saved and reassigned as handler here?
> print(
> f"# Totals: pass:{totals['pass']} fail:{totals['fail']}
> xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
> )
> --
> 2.49.0
>