The branch, master has been updated via 85478847a1f WHATSNEW: Add CTDB changes for 4.12 via aa2977e1519 ctdb-mutex: Change default re-check time for fcntl helper to 5s via 14b1dffc27d ctdb-tests: Add some tests to check recovery from recovery lock issues via 64501f51931 ctdb-tests: Put recovery lock for local daemons into a subdirectory via 93fc31858f9 ctdb-tests: Add local_daemons.sh option for recovery lock recheck interval from 13658324a3a CVE-2019-19344 kcc dns scavenging: Fix use after free in dns_tombstone_records_zone
https://git.samba.org/?p=samba.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 85478847a1f6bf8027a1a91df23ae746042620db Author: Martin Schwenke <mar...@meltin.net> Date: Fri Dec 20 18:16:13 2019 +1100 WHATSNEW: Add CTDB changes for 4.12 Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> Autobuild-User(master): Amitay Isaacs <ami...@samba.org> Autobuild-Date(master): Tue Jan 21 13:05:00 UTC 2020 on sn-devel-184 commit aa2977e1519b76b2c70871032bbc5ab85f8a0c45 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jan 10 14:25:39 2020 +1100 ctdb-mutex: Change default re-check time for fcntl helper to 5s Testing against a commonly used cluster filesystem has shown no performance impact, as expected. Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> commit 14b1dffc27def76f1c69ff820f4e03dc50ddf4b6 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jan 10 15:45:48 2020 +1100 ctdb-tests: Add some tests to check recovery from recovery lock issues Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> commit 64501f519319f83fb6281da50c76275782ee1f6c Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jan 10 14:04:14 2020 +1100 ctdb-tests: Put recovery lock for local daemons into a subdirectory This makes it more like the way it works with a cluster filesystem. It also allows the subdirectory to be manipulated in tests. Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> commit 93fc31858f91c1b4080a223fed905eaac66a90d2 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jan 17 15:30:01 2020 +1100 ctdb-tests: Add local_daemons.sh option for recovery lock recheck interval Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> ----------------------------------------------------------------------- Summary of changes: WHATSNEW.txt | 11 +++ ctdb/server/ctdb_mutex_fcntl_helper.c | 2 +- .../simple/cluster.015.reclock_remove_lock.sh | 90 ++++++++++++++++++++++ .../simple/cluster.016.reclock_move_lock_dir.sh | 74 ++++++++++++++++++ ctdb/tests/local_daemons.sh | 17 +++- 5 files changed, 190 insertions(+), 4 deletions(-) create mode 100755 ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh create mode 100755 ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh Changeset truncated at 500 lines: diff --git a/WHATSNEW.txt b/WHATSNEW.txt index 0faf69e030f..18c787d3cba 100644 --- a/WHATSNEW.txt +++ b/WHATSNEW.txt @@ -129,6 +129,17 @@ Heimdal-DC: removal of weak-crypto. Following removal of DES encryption types from Samba, the embedded Heimdal build has been updated to not compile weak crypto code (HEIM_WEAK_CRYPTO). +CTDB changes +------------ + +* The ctdb_mutex_fcntl_helper periodically re-checks the lock file + + The re-check period is specified using a 2nd argument to this + helper. The default re-check period is 5s. + + If the file no longer exists or the inode number changes then the + helper exits. This triggers an election. + smb.conf changes ================ diff --git a/ctdb/server/ctdb_mutex_fcntl_helper.c b/ctdb/server/ctdb_mutex_fcntl_helper.c index 1448a9062a0..51c46ce733f 100644 --- a/ctdb/server/ctdb_mutex_fcntl_helper.c +++ b/ctdb/server/ctdb_mutex_fcntl_helper.c @@ -398,7 +398,7 @@ int main(int argc, char *argv[]) file = argv[1]; - recheck_time = 60; + recheck_time = 5; if (argc == 3) { recheck_time = smb_strtoul(argv[2], NULL, diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh new file mode 100755 index 00000000000..d74bcf819b4 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# Verify that the cluster recovers if the recovery lock is removed. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -r 5 + +generation_has_changed () +{ + local node="$1" + local generation_init="$2" + + # Leak this so it can be printed by test + generation_new="" + + ctdb_onnode "$node" status + # shellcheck disable=SC2154 + # $outfile set by ctdb_onnode() above + generation_new=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") + + [ "$generation_new" != "$generation_init" ] +} + +select_test_node + +echo "Get recovery lock setting" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +ctdb_onnode "$test_node" getreclock +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +reclock_setting="$out" + +if [ -z "$reclock_setting" ] ; then + ctdb_test_skip "Recovery lock is not set" +fi + +t="${reclock_setting% 5}" +reclock="${t##* }" + +if [ ! -f "$reclock" ] ; then + ctdb_test_error "Recovery lock file \"${reclock}\" is missing" +fi + +echo "Recovery lock setting is \"${reclock_setting}\"" +echo "Recovery lock file is \"${reclock}\"" +echo + +echo "Get current recovery master" +ctdb_onnode "$test_node" recmaster +recmaster="$out" +echo "Recovery master is node ${recmaster}" +echo + +echo "Get initial generation" +ctdb_onnode "$test_node" status +# shellcheck disable=SC2154 +# $outfile set by ctdb_onnode() above +generation_init=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") +echo "Initial generation is ${generation_init}" +echo + +echo "Remove recovery lock" +rm "$reclock" +echo + +# This will mean an election has taken place and a recovery has occured +echo "Wait until generation changes" +wait_until 30 generation_has_changed "$test_node" "$generation_init" +echo +echo "Generation changed to ${generation_new}" +echo + +echo "Get current recovery master" +ctdb_onnode "$test_node" recmaster +recmaster_new="$out" + +if [ "$recmaster" != "$recmaster_new" ] ; then + ctdb_test_fail \ + "BAD: Recovery master has changed to node ${recmaster_new}" +fi +echo "GOOD: Recovery master is still node ${recmaster_new}" +echo + +cluster_is_healthy diff --git a/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh new file mode 100755 index 00000000000..cd193f05a49 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +# Verify that if the directory containing the recovery lock is moved +# then all nodes are banned (because they can't take the lock). +# Confirm that if the directory is moved back and the bans time out +# then the cluster returns to good health. + +# This simulates the cluster filesystem containing the recovery lock +# being unmounted and remounted. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -r 5 + +all_nodes_are_banned () +{ + node="$1" + + ctdb_onnode "$node" nodestatus + [ $? -eq 1 ] + + # shellcheck disable=SC2154 + # $out set by ctdb_onnode() above + [ "$out" = "Warning: All nodes are banned." ] +} + +select_test_node + +echo "Get recovery lock setting" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +ctdb_onnode "$test_node" getreclock +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +reclock_setting="$out" + +if [ -z "$reclock_setting" ] ; then + ctdb_test_skip "Recovery lock is not set" +fi + +t="${reclock_setting% 5}" +reclock="${t##* }" + +if [ ! -f "$reclock" ] ; then + ctdb_test_error "Recovery lock file \"${reclock}\" is missing" +fi + +echo "Recovery lock setting is \"${reclock_setting}\"" +echo "Recovery lock file is \"${reclock}\"" +echo + +echo "Set ban period to 30s" +ctdb_onnode all setvar RecoveryBanPeriod 30 +echo + +dir=$(dirname "$reclock") + +echo "Rename recovery lock directory" +mv "$dir" "${dir}.$$" +echo + +echo "Wait until all nodes are banned" +wait_until 60 all_nodes_are_banned "$test_node" +echo + +echo "Restore recovery lock directory" +mv "${dir}.$$" "$dir" +echo + +wait_until_ready 60 diff --git a/ctdb/tests/local_daemons.sh b/ctdb/tests/local_daemons.sh index 3f5729b5a24..e45a79c3e82 100755 --- a/ctdb/tests/local_daemons.sh +++ b/ctdb/tests/local_daemons.sh @@ -128,6 +128,7 @@ Options: -n <num> Number of nodes (default: 3) -P <file> Public addresses file (default: automatically generated) -R Use a command for the recovery lock (default: use a file) + -r <time> Like -R and set recheck interval to <time> (default: use a file) -S <library> Socket wrapper shared library to preload (default: none) -6 Generate IPv6 IPs for nodes, public addresses (default: IPv4) EOF @@ -142,18 +143,22 @@ local_daemons_setup () _num_nodes=3 _public_addresses_file="" _recovery_lock_use_command=false + _recovery_lock_recheck_interval="" _socket_wrapper="" _use_ipv6=false set -e - while getopts "FN:n:P:RS:6h?" _opt ; do + while getopts "FN:n:P:Rr:S:6h?" _opt ; do case "$_opt" in F) _disable_failover=true ;; N) _nodes_file="$OPTARG" ;; n) _num_nodes="$OPTARG" ;; P) _public_addresses_file="$OPTARG" ;; R) _recovery_lock_use_command=true ;; + r) _recovery_lock_use_command=true + _recovery_lock_recheck_interval="$OPTARG" + ;; S) _socket_wrapper="$OPTARG" ;; 6) _use_ipv6=true ;; \?|h) local_daemons_setup_usage ;; @@ -186,10 +191,16 @@ local_daemons_setup () $_use_ipv6 >"$_public_addresses_all" fi - _recovery_lock="${directory}/rec.lock" + _recovery_lock_dir="${directory}/shared/.ctdb" + mkdir -p "$_recovery_lock_dir" + _recovery_lock="${_recovery_lock_dir}/rec.lock" if $_recovery_lock_use_command ; then _helper="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb_mutex_fcntl_helper" - _recovery_lock="! ${_helper} ${_recovery_lock}" + _t="! ${_helper} ${_recovery_lock}" + if [ -n "$_recovery_lock_recheck_interval" ] ; then + _t="${_t} ${_recovery_lock_recheck_interval}" + fi + _recovery_lock="$_t" fi if [ -n "$_socket_wrapper" ] ; then -- Samba Shared Repository