When ovn is upgraded, ovn-controller is updated first on the compute nodes. Then ovn-northd and DB are upgraded. This patch tests whether the intermediate state (i.e. with ovn-controller being upgraded) works properly, running system tests from the base line (i.e. before the upgrade).
Flow tables might change between releases. Hence this patch must take that into account by updating the (old) system tests with any updated table numbers. In some cases, (new) ovn-controller might change flows in existing tables, causing some 'upgrade' tests to fail. Such tests can be skipped using the TAG_TEST_NOT_UPGRADABLE tag. This patch upgrades the ci to run automatically some upgrade tests weekly, on schedule. It also provides a shell script to run those tests locally. This patch depends on patch [1] on branch-25.09. [1] "tests: Add new TAG_TEST_NOT_UPGRADABLE to some tests." Reported-at: https://issues.redhat.com/browse/FDP-1240 Assisted-by: claude, with model: Claude Sonnet 4.5 Signed-off-by: Xavier Simonart <[email protected]> -v2: - Updated based on Ales' feedback: - Move upgrade test logic from complex sh to py script. - Create new yaml for upgrade tests. - Rebased. - Clone Base branch in different folder, to avoid messing up user develoment folder. - Run upgrade tests through make check-upgrade instead of shell script. - Create CI matrix dynamically so it is more clear which steps are run. - Updated testing.rst. -v3: - Updated based on Mark's feedback. - Avoid repetition of code, use contextmanager & dataclasses. - Do not use sparse locally as compilation might fail. - Upgrade more OVN/OVS binaries such as appctl. - A few other changes such as avoid regexp when possible. - Rebased - Removed tested in ci on pull/push and only run on schedule. - Updated Documentation. --- .ci/ci.sh | 5 +- .ci/linux-build.sh | 35 +- .ci/ovn_upgrade_test.py | 104 ++++ .ci/ovn_upgrade_utils.py | 642 ++++++++++++++++++++++++ .github/workflows/ovn-upgrade-tests.yml | 86 ++++ Documentation/topics/testing.rst | 174 +++++++ Makefile.am | 3 + tests/automake.mk | 14 + 8 files changed, 1054 insertions(+), 9 deletions(-) create mode 100755 .ci/ovn_upgrade_test.py create mode 100755 .ci/ovn_upgrade_utils.py create mode 100644 .github/workflows/ovn-upgrade-tests.yml diff --git a/.ci/ci.sh b/.ci/ci.sh index 3640d3243..76c364868 100755 --- a/.ci/ci.sh +++ b/.ci/ci.sh @@ -54,6 +54,9 @@ function archive_logs() { cp -r $CONTAINER_WORKDIR/tests/system-*-testsuite.* \ $log_dir || true \ && \ + cp -r $CONTAINER_WORKDIR/tests/upgrade-testsuite.* \ + $log_dir || true \ + && \ chmod -R +r $log_dir \ && tar -czvf $CONTAINER_WORKSPACE/logs.tgz $log_dir @@ -102,7 +105,7 @@ function run_tests() { ARCH=$ARCH CC=$CC LIBS=$LIBS OPTS=$OPTS TESTSUITE=$TESTSUITE \ TEST_RANGE=$TEST_RANGE SANITIZERS=$SANITIZERS DPDK=$DPDK \ RECHECK=$RECHECK UNSTABLE=$UNSTABLE TIMEOUT=$TIMEOUT \ - ./.ci/linux-build.sh + BASE_VERSION=$BASE_VERSION ./.ci/linux-build.sh " } diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh index 183833a16..d9b49b7b6 100755 --- a/.ci/linux-build.sh +++ b/.ci/linux-build.sh @@ -1,7 +1,12 @@ #!/bin/bash set -o errexit -set -x + +# Enable debug output for CI, optional for local +NO_DEBUG=${NO_DEBUG:-0} +if [ "$NO_DEBUG" = "0" ]; then + set -x +fi ARCH=${ARCH:-"x86_64"} USE_SPARSE=${USE_SPARSE:-"yes"} @@ -181,17 +186,23 @@ function run_system_tests() if ! sudo timeout -k 5m -v $TIMEOUT make $JOBS $type \ TESTSUITEFLAGS="$TEST_RANGE" RECHECK=$RECHECK \ - SKIP_UNSTABLE=$SKIP_UNSTABLE; then - # $log_file is necessary for debugging. - cat tests/$log_file + SKIP_UNSTABLE=$SKIP_UNSTABLE UPGRADE_TEST=$UPGRADE_TEST \ + BASE_VERSION=$BASE_VERSION; then + # Suppress output locally when NO_DEBUG not 0. + if [ "$NO_DEBUG" = "0" ]; then + cat tests/$log_file + fi return 1 fi } function execute_system_tests() { - configure_ovn $OPTS - make $JOBS || { cat config.log; exit 1; } + # Upgrade tests build separately + if [ "$UPGRADE_TEST" != "yes" ]; then + configure_ovn $OPTS + make $JOBS || { cat config.log; exit 1; } + fi local stable_rc=0 local unstable_rc=0 @@ -201,8 +212,12 @@ function execute_system_tests() fi if [ "$UNSTABLE" ]; then - if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \ - run_system_tests $@; then + if [[ "$TEST_RANGE" == *"-d"* ]]; then + TEST_RANGE="-k unstable -d" + else + TEST_RANGE="-k unstable" + fi + if ! SKIP_UNSTABLE=no RECHECK=yes run_system_tests $@; then unstable_rc=1 fi fi @@ -238,6 +253,10 @@ if [ "$TESTSUITE" ]; then sudo bash -c "echo 2048 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" execute_system_tests "check-system-dpdk" "system-dpdk-testsuite.log" ;; + + "upgrade-test") + execute_system_tests "check-upgrade" "system-kmod-testsuite.log" + ;; esac else configure_ovn $OPTS diff --git a/.ci/ovn_upgrade_test.py b/.ci/ovn_upgrade_test.py new file mode 100755 index 000000000..0f13611f5 --- /dev/null +++ b/.ci/ovn_upgrade_test.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +import atexit +import os +import signal +import sys +from pathlib import Path + + +from ovn_upgrade_utils import ( + log, + chdir, + run_command, + run_shell_command, + ovn_upgrade_save_current_binaries, + ovn_upgrade_extract_info, + run_upgrade_workflow, + remove_upgrade_test_directory, + UpgradeConfig +) + +DEFAULT_BASE_BRANCH = 'branch-24.03' + + +def run_tests(config): + log(f"Running system tests in upgrade scenario with flags " + f"{config.env.flags}") + + # Tests are run from the base-branch folder (when upgrading ocn-controller + # and not yet northd, new features do not work. Hence we cannot use new + # system-tests. We use the latest .ci/linux-build.sh i.e. from + # ovn_root_dir. + with chdir(config.path.base_dir): + no_debug = "0" if config.is_ci else "1" + + cmd = f"""CC={config.env.cc} TESTSUITE=system-test UPGRADE_TEST=yes + TEST_RANGE="{config.env.flags}" UNSTABLE={config.env.unstable} + NO_DEBUG={no_debug} + . {config.path.ovn_root_dir}/.ci/linux-build.sh""" + + success = run_shell_command(cmd) + return success + + +def main(): + test_success = False + + def cleanup(): + flags = os.environ.get('TESTSUITEFLAGS', '') + if '-d' in flags or not test_success: + log(f"Keeping {config.path.upgrade_dir} for debugging") + else: + remove_upgrade_test_directory(config) + + atexit.register(cleanup) + signal.signal(signal.SIGINT, lambda s, f: sys.exit(1)) + signal.signal(signal.SIGTERM, lambda s, f: sys.exit(1)) + + config = UpgradeConfig.get(Path.cwd(), DEFAULT_BASE_BRANCH) + + log("=" * 70) + log(f"OVN Upgrade Test - Base: {config.base_version}, " + f"Flags: {config.env.flags}") + log("=" * 70) + + if run_command("sudo -v").returncode: + log("sudo access required") + return 1 + + if not remove_upgrade_test_directory(config): + return 1 + + config.path.upgrade_dir.mkdir(parents=True, exist_ok=True) + config.path.base_dir.mkdir(parents=True, exist_ok=True) + config.path.binaries_dir.mkdir(parents=True, exist_ok=True) + + if not ovn_upgrade_save_current_binaries(config): + return 1 + + if not ovn_upgrade_extract_info(config): + return 1 + + if not run_upgrade_workflow(config): + if config.is_ci: + print(config.file.git_log.read_text(encoding='utf-8')) + else: + log(f"Check: {config.file.git_log}") + return 1 + + test_success = run_tests(config) + + log("=" * 70) + if test_success: + log("UPGRADE TESTS PASSED") + else: + log("UPGRADE TESTS FAILED") + log(f"Check: {config.file.test_log}") + log("=" * 70) + + return 0 if test_success else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.ci/ovn_upgrade_utils.py b/.ci/ovn_upgrade_utils.py new file mode 100755 index 000000000..f5ae787cb --- /dev/null +++ b/.ci/ovn_upgrade_utils.py @@ -0,0 +1,642 @@ +#!/usr/bin/env python3 + +import os +import re +import shutil +import subprocess +from datetime import datetime +from pathlib import Path +from dataclasses import dataclass +import contextlib +import shlex +import sys + +UPGRADE_DIR = 'tests/upgrade-testsuite.dir' +SYSTEM_TESTS_LOGS = 'tests/system-kmod-testsuite.log' +SYSTEM_TESTS_DIR = 'tests/system-kmod-testsuite.dir' +BASE_REPO_DIR = 'base-repo' +BINARIES_DIR = 'ovn-upgrade-binaries' +BUILD_LOG = 'build-base.log' +GIT_LOG = 'git.log' +NEW_EGRESS = 'ovn-upgrade-new-log-egress.txt' +M4_DEFINES = 'ovn-upgrade-oftable-m4-defines.txt' +OFCTL_DEFINES = 'ovn-upgrade-ofctl-defines.h' + + [email protected] +def chdir(target_dir): + original_dir = Path.cwd() + try: + os.chdir(target_dir) + yield + finally: + os.chdir(original_dir) + + +@dataclass +class PathConfig: + ovn_root_dir: Path # Path from which make check-upgrade is run + upgrade_dir: Path # Path where all upgrade-tests related files are stored + base_dir: Path # Path for base branch i.e. from which we upgrade + binaries_dir: Path # Path for binaries from dst branch + test_dir: Path # Path for system tests run by upgrade tests. + + +@dataclass +class FileConfig: + git_log: Path + test_log: Path + build_log: Path + new_egress: Path + m4_defines: Path + ofctl_defines: Path + + +@dataclass +class EnvConfig: + cc: str + flags: str + jobs: str + opts: str + unstable: str + use_sparse: str + + +@dataclass +class UpgradeConfig: + path: PathConfig + env: EnvConfig + file: FileConfig + base_version: str + is_ci: bool + + @classmethod + def get(cls, ovn_root_dir, default_base_version): + upgrade_dir = ovn_root_dir / UPGRADE_DIR + base_dir = upgrade_dir / BASE_REPO_DIR + base_version = os.environ.get('BASE_VERSION', default_base_version) + is_ci = not sys.stdout.isatty() + + path_obj = PathConfig( + ovn_root_dir=ovn_root_dir, + binaries_dir=upgrade_dir / BINARIES_DIR, + base_dir=base_dir, + upgrade_dir=upgrade_dir, + test_dir=base_dir / SYSTEM_TESTS_DIR, + ) + + file_obj = FileConfig( + test_log=base_dir / SYSTEM_TESTS_LOGS, + build_log=upgrade_dir / BUILD_LOG, + git_log=upgrade_dir / GIT_LOG, + new_egress=upgrade_dir / NEW_EGRESS, + m4_defines=upgrade_dir / M4_DEFINES, + ofctl_defines=upgrade_dir / OFCTL_DEFINES + ) + + env_obj = EnvConfig( + cc=os.environ.get('CC', 'gcc'), + flags=os.environ.get('TESTSUITEFLAGS', ''), + jobs=os.environ.get('JOBS', ''), + opts=os.environ.get('OPTS', ''), + unstable=os.environ.get('UNSTABLE', 'no'), + # Enable parse in CI. Disable for local run as might depend of + # content of /usr/local/include/openvswitch + use_sparse='yes' if (is_ci and shutil.which('sparse')) else 'no' + ) + + return cls(path=path_obj, env=env_obj, file=file_obj, + base_version=base_version, is_ci=is_ci) + + def get_ctx(self): + env = os.environ.copy() + env.update(CC=self.env.cc, OPTS=self.env.opts, + JOBS=self.env.jobs, USE_SPARSE=self.env.use_sparse) + return env + + +def log(message): + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"[{timestamp}] {message}", flush=True) + + +def run_command(cmd_str, log_file=None): + cmd = shlex.split(cmd_str) + if log_file: + with open(log_file, 'a', encoding='utf-8') as f: + return subprocess.run(cmd, stdout=f, stderr=subprocess.STDOUT, + check=False) + else: + return subprocess.run(cmd, capture_output=True, text=True, check=False) + + +def run_shell_command(cmd, log_file=None, env_ctx=None): + if log_file: + with open(log_file, 'a', encoding='utf-8') as f: + result = subprocess.run(['bash', '-c', cmd], stdout=f, + stderr=subprocess.STDOUT, check=False, + env=env_ctx) + else: + result = subprocess.run(['bash', '-c', cmd], check=False, env=env_ctx) + return result.returncode == 0 + + +def extract_oftable_values(content): + log_egress = None + save_inport = None + for line in content: + if line.startswith("#define"): + _, var, val, *rest = line.strip().split(maxsplit=3) + if var == "OFTABLE_LOG_EGRESS_PIPELINE": + log_egress = int(val) + if var == "OFTABLE_SAVE_INPORT": + save_inport = int(val) + if log_egress and save_inport: + break + return log_egress, save_inport + + +def replace_block_in_file(target_file, src_file, line_prefix): + if not target_file.exists(): + return False + if not src_file.exists(): + # No src_file file means nothing to replace. + return True + with open(target_file, encoding='utf-8') as f: + lines = f.readlines() + with open(src_file, encoding='utf-8') as f: + new_content = f.read() + + # Replace all lines starting with line_prefix with new_content. + output_lines = [] + inserted = False + + for line in lines: + if line.startswith(line_prefix): + if not inserted: + output_lines.append(new_content) + inserted = True + # Skip old lines with this prefix + continue + output_lines.append(line) + + with open(target_file, 'w', encoding='utf-8') as f: + f.writelines(output_lines) + + return True + + +def ovn_upgrade_build(config): + log(f"Rebuilding OVN with {config.env.cc}") + + build_script = f""" + set -e + make {config.env.jobs} + """ + return run_shell_command(build_script, config.file.build_log, + config.get_ctx()) + + +def ovs_ovn_upgrade_build(config): + log(f"Building OVS and OVN with {config.env.cc}") + build_script = """ + set -e + . .ci/linux-build.sh + """ + return run_shell_command(build_script, config.file.build_log, + config.get_ctx()) + + +def log_binary_version(binary_path, keywords): + result = run_command(f"{binary_path} --version") + if result.returncode == 0: + for line in result.stdout.splitlines(): + if any(kw in line for kw in keywords): + log(f" {line}") + + +def ovn_upgrade_save_current_binaries(config): + files = [ + 'controller/ovn-controller', + 'ovs/vswitchd/ovs-vswitchd', + 'ovs/ovsdb/ovsdb-server', + 'ovs/utilities/ovs-vsctl', + 'ovs/utilities/ovs-ofctl', + 'ovs/utilities/ovs-appctl', + 'ovs/utilities/ovs-dpctl', + 'ovs/vswitchd/vswitch.ovsschema' + ] + + log("Saving current version binaries") + + for file in files: + try: + shutil.copy(Path(file), config.path.binaries_dir) + except Exception as e: + log(f"Failed to save current binaries: failed to copy {file}: {e}") + return False + + log("Saved current versions:") + log_binary_version(config.path.binaries_dir / 'ovn-controller', + ['ovn-controller', 'SB DB Schema']) + log_binary_version(config.path.binaries_dir / 'ovs-vswitchd', ['vSwitch']) + return True + + +def ovn_upgrade_extract_info(config): + lflow_h = Path('controller/lflow.h') + if not lflow_h.exists(): + log('controller/lflow.h not found') + return False + + # Get all ofctl defines from lflow.h. + with open(lflow_h, encoding='utf-8') as f: + oftable_defines = [ + line.strip() for line in f if line.startswith('#define OFTABLE_') + ] + + if not oftable_defines: + log("Failed to extract info: no #define OFTABLE_ found in lflow.h") + return False + + with open(config.file.ofctl_defines, 'w', encoding='utf-8') as of: + of.write('\n'.join(oftable_defines) + '\n') + log(f" Wrote {config.file.ofctl_defines}") + + # Get value of OFTABLE_LOG_EGRESS_PIPELINE. + new_log_egress, _ = extract_oftable_values(oftable_defines) + + if not new_log_egress: + log("Failed to extract info: could not extract " + "OFTABLE_LOG_EGRESS_PIPELINE value") + return False + + with open(config.file.new_egress, 'w', encoding='utf-8') as f: + f.write(str(new_log_egress) + '\n') + log(f" Wrote {config.file.new_egress}") + + # Get all m4_define([OFTABLE_ from ovn-macros.at. + macros_file = Path("tests/ovn-macros.at") + if macros_file.exists(): + with open(macros_file, encoding='utf-8') as f: + m4_defines = [ + line.strip() for line in f + if line.startswith('m4_define([OFTABLE_') + ] + + with open(config.file.m4_defines, 'w', encoding='utf-8') as of: + of.write('\n'.join(m4_defines) + '\n' if m4_defines else '') + log(f" Wrote {config.file.m4_defines}") + + return True + + +def ovn_upgrade_checkout_local(config, base_version): + base_dir = config.path.base_dir + git_log = config.file.git_log + log(f"Running locally. Cloning to {base_dir}") + + result = run_command(f"git clone --local --shared . {str(base_dir)} " + f" --branch {base_version}", git_log) + if result.returncode: + log(f"Failed to clone to {base_dir}") + return False + + with chdir(base_dir): + log(f"Checking out base version: {base_version} from {base_dir}") + result = run_command(f"git checkout {base_version}", git_log) + + if result.returncode: + log(f"Failed to checkout {base_version}") + return False + + return True + + +def ovn_upgrade_clone_github(config, base_version): + base_dir = config.path.base_dir + git_log = config.file.git_log + + result = run_command("git config --get remote.origin.url") + if result.returncode or not result.stdout.strip(): + log("Could not get origin URL from working directory") + return False + + origin_url = result.stdout.strip() + with chdir(base_dir): + log(f"Cloning {base_version} from {origin_url} ") + result = run_command(f"git clone {origin_url} {base_dir} " + f"--branch {base_version} --depth 1 " + "--no-tags", git_log) + + if (result.returncode and + origin_url != "https://github.com/ovn-org/ovn"): + log(f"Not found in {origin_url}, trying ovn-org...") + result = run_command( + "git clone https://github.com/ovn-org/ovn.git " + f"{base_dir} --branch {base_version} --depth 1 " + "--no-tags", git_log + ) + if result.returncode: + log(f"Failed to clone {base_version}") + log(result.stderr) + return False + + return True + + +def ovn_upgrade_checkout_base(config): + base_dir = config.path.base_dir + base_version = config.base_version + git_log = config.file.git_log + is_local = True + + if base_version.startswith('origin/'): + base_version = base_version.split('/', 1)[-1] + is_local = False + + success = False + if is_local: + success = ovn_upgrade_checkout_local(config, base_version) + + if not success: + # Branch not requested or found in local repo. + # Get working directory's origin URL (the real remote, e.g., GitHub) + success = ovn_upgrade_clone_github(config, base_version) + + if not success: + log(f"Failed to fetch/checkout {base_version}") + return False + + # Now move to folder with the cloned version, where we will build + # the base. + with chdir(base_dir): + result = run_command(f"git checkout {base_version}", git_log) + + if result.returncode: + log(f"Failed to checkout {base_version}") + log(result.stderr) + return False + + log(f"Checked out {base_version}") + log("Updating OVS submodule...") + result = run_command("git submodule update --init --depth 1", git_log) + + if result.returncode: + log(f"Failed to update submodules: {result.stderr}") + return False + + return True + + +def ovn_upgrade_patch_for_ovn_debug(config): + return replace_block_in_file( + Path('controller/lflow.h'), + config.file.ofctl_defines, + '#define OFTABLE_') + + +def ovn_upgrade_save_ovn_debug(binaries_dir): + log("Saving hybrid ovn-debug...") + src = Path("utilities/ovn-debug") + dst = binaries_dir / "ovn-debug" + + try: + shutil.copy(src, dst) + except Exception as e: + log(f"Failed to save ovn-debug: {e}") + return False + + return True + + +def update_test(old_start, old_end, shift, test_file): + with open(test_file, encoding='utf-8') as f: + content = f.read() + + def replace_table(match): + table_num = int(match.group(1)) + if old_start <= table_num < old_end: + return f"table={table_num + shift}" + return match.group(0) + + # Replace all table=NUMBER patterns + updated_content = re.sub(r'table\s*=\s*(\d+)', replace_table, content) + + with open(test_file, 'w', encoding='utf-8') as f: + f.write(updated_content) + + +def ovn_upgrade_table_numbers_in_tests_patch(config): + lflow_h = Path('controller/lflow.h') + + if not config.file.new_egress.exists(): + log("No LOG_EGRESS") + return False + + if not lflow_h.exists(): + log("Controller/lflow.h not found") + return False + + with open(config.file.new_egress, encoding='utf-8') as f: + new_log_egress = int(f.read().strip()) + + # Get old values from base version's lflow.h + with open(lflow_h, encoding='utf-8') as f: + content = [ + line.strip() for line in f if line.startswith('#define OFTABLE_') + ] + + old_log_egress, old_save_inport = extract_oftable_values(content) + + if (not old_log_egress or not old_save_inport + or old_log_egress == new_log_egress): + log(f"No change in test files as old_log_egress={old_log_egress}, " + f"old_save_inport={old_save_inport} and " + f"new_log_egress={new_log_egress}") + # No change needed is success. + return True + + shift = new_log_egress - old_log_egress + + log(f"Updating hardcoded table numbers in tests (shift: +{shift} for " + f"tables {old_log_egress}-{old_save_inport - 1})") + + # Update test files + for test_file in ['tests/system-ovn.at', 'tests/system-ovn-kmod.at', + 'tests/system-ovn-netlink.at']: + if Path(test_file).exists(): + log(f"Updating {test_file}") + update_test(old_log_egress, old_save_inport, shift, test_file) + return True + + +def ovn_upgrade_schema_in_macros_patch(): + schema_filter = '/OVN_Southbound database lacks/d' + ovn_pattern = r'/has no network name\*/d' + + macros_file = Path('tests/ovn-macros.at') + if macros_file.exists(): + with open(macros_file, encoding='utf-8') as f: + content = f.read() + + if schema_filter not in content: + if re.search(ovn_pattern, content): + content = re.sub(f'({ovn_pattern})', + rf'\1\n{schema_filter}', content, count=1) + with open(macros_file, 'w', encoding='utf-8') as f: + f.write(content) + log("Added schema warning filter to ovn-macros.at") + else: + log("Could not find pattern in ovn-macros.at") + else: + log("Schema already updated in macro") + else: + log("tests/ovn-macros.at not found") + return False + + kmod_file = Path('tests/system-kmod-macros.at') + if kmod_file.exists(): + with open(kmod_file, encoding='utf-8') as f: + content = f.read() + + if schema_filter not in content: + ovs_pattern = r'\[OVS_VSWITCHD_STOP\(\[\$1\]\)' + + if re.search(ovs_pattern, content): + content = re.sub( + ovs_pattern, + rf'[OVS_VSWITCHD_STOP([dnl\n$1";{schema_filter}"])', + content, count=1) + with open(kmod_file, 'w', encoding='utf-8') as f: + f.write(content) + log("Added schema warning filter to system-kmod-macros.at") + else: + log("Could not find pattern in system-kmod-macros.at") + return False + + return True + + +def ovn_upgrade_oftable_ovn_macro_patch(config): + return replace_block_in_file( + Path('tests/ovn-macros.at'), + config.file.m4_defines, + 'm4_define([OFTABLE_') + + +def ovn_upgrade_apply_tests_patches(config): + log("Applying schema filter and table number patches...") + if not ovn_upgrade_table_numbers_in_tests_patch(config): + return False + if not ovn_upgrade_schema_in_macros_patch(): + return False + if not ovn_upgrade_oftable_ovn_macro_patch(config): + return False + return True + + +def ovn_upgrade_restore_binaries(config): + log("Replacing binaries with current versions") + + binaries = [ + ('ovn-controller', 'controller/ovn-controller'), + ('ovn-debug', 'utilities/ovn-debug'), + ('ovs-vswitchd', 'ovs/vswitchd/ovs-vswitchd'), + ('ovsdb-server', 'ovs/ovsdb/ovsdb-server'), + ('ovs-vsctl', 'ovs/utilities/ovs-vsctl'), + ('ovs-ofctl', 'ovs/utilities/ovs-ofctl'), + ('ovs-appctl', 'ovs/utilities/ovs-appctl'), + ('ovs-dpctl', 'ovs/utilities/ovs-dpctl'), + ('vswitch.ovsschema', 'ovs/vswitchd/vswitch.ovsschema'), + ] + + for src_name, dest_path in binaries: + src = config.path.binaries_dir / src_name + dest = Path(dest_path) + try: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(src, dest) + except Exception as e: + log(f"Failed to copy {src_name} to {dest}: {e}") + return False + + log("Current versions (from current patch):") + log_binary_version("controller/ovn-controller", + ['ovn-controller', 'SB DB Schema']) + log_binary_version("ovs/vswitchd/ovs-vswitchd", ['vSwitch']) + + log("Base versions (for compatibility testing):") + log_binary_version("northd/ovn-northd", ['ovn-northd']) + log_binary_version("utilities/ovn-nbctl", ['ovn-nbctl']) + + return True + + +def run_upgrade_workflow(config): + base_dir = config.path.base_dir + git_log = config.file.git_log + build_log = config.file.build_log + binaries_dir = config.path.binaries_dir + + if not ovn_upgrade_checkout_base(config): + log("Upgrade_workflow failed: failed to checkout base version") + return False + + with chdir(base_dir): + if not ovn_upgrade_apply_tests_patches(config): + log("Upgrade_workflow failed: failed to apply test patches") + return False + + log("Patching lflow.h with current OFTABLE defines...") + ovn_upgrade_patch_for_ovn_debug(config) + + # Build base version with patched lflow.h + log(f"Building base version (with patched lflow.h) from {Path.cwd()}") + if not ovs_ovn_upgrade_build(config): + log("Upgrade_workflow failed: failed to build base version") + log(f"See config.log and {build_log}") + return False + + # Refresh sudo timestamp after long build + run_command("sudo -v") + + if not ovn_upgrade_save_ovn_debug(binaries_dir): + log("Upgrade_workflow failed: failed to save ovn_debug") + return False + + # Rebuild with original lflow.h + log("Restoring lflow.h to original...") + run_command("git checkout controller/lflow.h", git_log) + + log("Rebuilding base version (clean lflow.h)...") + if not ovn_upgrade_build(config): + log("Upgrade_workflow failed: failed to rebuild base version") + log(f"See {build_log}") + return False + + if not ovn_upgrade_restore_binaries(config): + return False + + return True + + +def remove_upgrade_test_directory(config): + upgrade_dir = config.path.upgrade_dir + test_dir = config.path.test_dir + test_log = config.file.test_log + + if not upgrade_dir.exists(): + return True + + log(f"Removing old {upgrade_dir}...") + + run_command(f"sudo rm -rf {test_dir}") + run_command(f"sudo rm -f {test_log}") + + try: + shutil.rmtree(upgrade_dir) + return True + except OSError as e: + log(f"Failed to remove {upgrade_dir}: {e}") + return False diff --git a/.github/workflows/ovn-upgrade-tests.yml b/.github/workflows/ovn-upgrade-tests.yml new file mode 100644 index 000000000..33f4caf42 --- /dev/null +++ b/.github/workflows/ovn-upgrade-tests.yml @@ -0,0 +1,86 @@ +name: OVN Upgrade Tests + +on: + schedule: + # Run Tuesday at midnight + - cron: '0 0 * * 2' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + +jobs: + upgrade-tests: + name: upgrade-test ${{ matrix.cfg.base_version }} ${{ matrix.cfg.test_range }} + if: (github.repository_owner == 'ovn-org' && github.event_name == 'schedule' && github.ref_name == 'main') || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-24.04 + timeout-minutes: 120 + + strategy: + fail-fast: false + matrix: + cfg: + - { base_version: "origin/branch-24.03", test_range: "-100"} + - { base_version: "origin/branch-24.03", test_range: "101-", unstable: unstable} + - { base_version: "origin/branch-25.09", test_range: "-100"} + - { base_version: "origin/branch-25.09", test_range: "101-200"} + - { base_version: "origin/branch-25.09", test_range: "201-", unstable: unstable} + - { base_version: "origin/branch-24.09", test_range: "-100"} + - { base_version: "origin/branch-24.09", test_range: "101-200"} + - { base_version: "origin/branch-24.09", test_range: "201-", unstable: unstable} + - { base_version: "origin/branch-25.03", test_range: "-100"} + - { base_version: "origin/branch-25.03", test_range: "101-200"} + - { base_version: "origin/branch-25.03", test_range: "201-", unstable: unstable} + + env: + CC: gcc + BASE_VERSION: ${{ matrix.cfg.base_version }} + TEST_RANGE: ${{ matrix.cfg.test_range }} + UNSTABLE: ${{ matrix.cfg.unstable }} + TESTSUITE: "upgrade-test" + + steps: + - name: system-level-dependencies + run: | + sudo apt update + sudo apt -y install linux-modules-extra-$(uname -r) + + - name: checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Fix /etc/hosts file + run: | + . .ci/linux-util.sh + fix_etc_hosts + + - name: Disable apparmor + run: | + . .ci/linux-util.sh + disable_apparmor + + - name: Download container + run: sudo podman pull ghcr.io/ovn-org/ovn-tests:ubuntu + + - name: Tag image + run: sudo podman tag ghcr.io/ovn-org/ovn-tests:ubuntu ovn-org/ovn-tests + + # Artifact names cannot contain characters such as '/' + - name: Artifact name + id: artifact + run: | + RAW_NAME='${{ matrix.cfg.base_version }}' + BRANCH_NAME="${RAW_NAME#origin/}" + echo "name=logs-upgrade-test-${BRANCH_NAME}-${{ matrix.cfg.test_range }}" >> $GITHUB_OUTPUT + + - name: build + run: sudo -E ./.ci/ci.sh --archive-logs --timeout=2h + + - name: upload logs on failure + if: failure() || cancelled() + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.artifact.outputs.name }} + path: logs.tgz diff --git a/Documentation/topics/testing.rst b/Documentation/topics/testing.rst index cc928ef64..579422ca0 100644 --- a/Documentation/topics/testing.rst +++ b/Documentation/topics/testing.rst @@ -293,3 +293,177 @@ of these cached objects, be sure to rebuild the test. The cached objects are stored under the relevant folder in ``tests/perf-testsuite.dir/cached``. + +OVN Upgrade Testing +~~~~~~~~~~~~~~~~~~~ + +Overview +++++++++ + +OVN upgrade tests validate that the system continues to function correctly +during rolling upgrades, specifically testing the intermediate state where +ovn-controller is upgraded before ovn-northd and the databases. + +The upgrade tests run the system test suite from an older OVN version using +binaries (ovn-controller, ovs-vswitchd, etc.) from the current development +version, ensuring backward compatibility. + +Running Upgrade Tests Locally ++++++++++++++++++++++++++++++ + +Basic usage:: + + $ make check-upgrade + +This will test upgrades from branch-24.03 (the default base version). + +Specify a different base version:: + + $ make check-upgrade BASE_VERSION=branch-24.09 + +Run a specific range of tests:: + + $ make check-upgrade BASE_VERSION=branch-25.03 TESTSUITEFLAGS="1-100" + +Run only unstable tests:: + + $ make check-upgrade UNSTABLE=1 TESTSUITEFLAGS="-k unstable" + +Environment Variables ++++++++++++++++++++++ + +*BASE_VERSION* + Git branch to use as the base version (default: ``branch-24.03``) + + - branch-24.03: the local repo will be used as the source repo. + - origin/branch-24.03: the local repo origin is used as the source repo. + - If branch is not found in local repo, it will be searched in its origin + (e.g. private github repo or ovn_org repo). If not found in private + github repo, it will be searched in ovn_org repo. + +*TESTSUITEFLAGS* + Test range to run, using autotest syntax (default: ``1-``, meaning all tests) + + - ``1-100`` - Run tests 1 through 100 + - ``50-`` - Run tests 50 and above + - ``-k unstable`` - Run tests with 'unstable' keyword + + Additional flags to pass to the testsuite. Use ``-d`` to keep test + directories on success for debugging. + +*UNSTABLE* + Set to ``1`` to run unstable tests (default: disabled) + +How Upgrade Tests Work +++++++++++++++++++++++ + +The upgrade test workflow: + +1. *Save Current Binaries* + + The test framework saves binaries from your current working tree: + + - ``ovn-controller`` + - ``ovs-vswitchd``, ``ovsdb-server`` + - ``ovs-vsctl``, ``ovs-ofctl``, ``ovs-appctl``, ``ovs-dpctl`` + - Flow table definitions from ``controller/lflow.h`` + +2. *Clone and Checkout Base Version* + + Creates ``upgrade-testsuite.dir/ovn-upgrade-base/`` and checks out the + specified base version. + +3. *Patch Old Tests* + + - Updates hardcoded flow table numbers if tables were renumbered + - Adds schema compatibility filters to suppress expected warnings + - Replaces OFTABLE_* m4 macros with current values + +4. *Build Base Version* + + Builds the base version twice: + + - With patched ``lflow.h`` to create hybrid ``ovn-debug`` tool + - With original ``lflow.h`` for proper ``ovn-northd`` and ``ovn-nbctl`` + +5. *Swap Binaries* + + Replaces the base version's binaries with current versions: + + - Base version: ``ovn-northd``, ``ovn-nbctl`` (test infrastructure) + - Current version: ``ovn-controller``, ``ovs-vswitchd``, ``ovsdb-server`` + +6. *Run Tests* + + Executes the system test suite from the base version with the mixed + binary set. + +Interpreting Test Failures +++++++++++++++++++++++++++ + +Test failures during upgrade testing can indicate: + +*Backward Compatibility Issues* + The new ovn-controller is incompatible with the old northd/databases. + This is a critical issue that must be fixed before release. + +*Flow Generation Changes* + If flow table contents changed intentionally, the (old) test may need the + ``TAG_TEST_NOT_UPGRADABLE`` tag. + +Debugging Failed Tests +++++++++++++++++++++++ + +On failure, the test directory is preserved in ``upgrade-testsuite.dir/``. + +Check the logs:: + + $ upgrade-testsuite.dir/git.log # Git operations + $ upgrade-testsuite.dir/build-base.log # Build output + $ upgrade-testsuite.dir/ovn-upgrade-base/tests/system-kmod-testsuite.log + +Keep test directory for debugging:: + + $ make check-upgrade TESTSUITEFLAGS="-d" + +Marking Tests as Non-Upgradable ++++++++++++++++++++++++++++++++ + +Some tests cannot run in upgrade scenarios: tests for features not yet +fully present in the base version. + +Mark these tests with the ``TAG_TEST_NOT_UPGRADABLE`` keyword:: + + AT_SETUP([test that checks flow details]) + AT_KEYWORDS([TAG_TEST_NOT_UPGRADABLE]) + # ... test code ... + AT_CLEANUP + +These tests will be skipped during upgrade testing but run normally otherwise. + +CI Integration +++++++++++++++ + +Upgrade tests run automatically in GitHub Actions: + +*On Schedule (Weekly)* + - Tests all supported versions (24.03, 24.09, 25.03, 25.09) + +Implementation Details +++++++++++++++++++++++ + +Test are run locally through ``check-upgrade`` Makefile target. +The flow for make check-upgrade is: + +- Makefile +- ci/ovn_upgrade_test.py: run_upgrade_workflow, run_tests +- ci/linux-build.sh(TESTSUITE=system-test) +- execute_system_tests "check-kernel" "system-kmod-testsuite.log" +- run_system_tests check-kernel + +Through the ci the flow is: + +- ci.sh: run_in_container ./.ci/linux-build.sh (TESTSUITE=upgrade-test) +- execute_system_tests "check-upgrade" "system-kmod-testsuite.log" +- run_system_tests check-upgrade +- Back to make check-upgrade-flow. diff --git a/Makefile.am b/Makefile.am index 78aa587e2..50c0fbcd2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -89,6 +89,8 @@ EXTRA_DIST = \ .ci/ci.sh \ .ci/linux-build.sh \ .ci/linux-util.sh \ + .ci/ovn_upgrade_test.py \ + .ci/ovn_upgrade_utils.py \ .ci/osx-build.sh \ .ci/osx-prepare.sh \ .ci/ovn-kubernetes/prepare.sh \ @@ -97,6 +99,7 @@ EXTRA_DIST = \ .github/workflows/test.yml \ .github/workflows/ovn-kubernetes.yml \ .github/workflows/ovn-fake-multinode-tests.yml \ + .github/workflows/ovn-upgrade-tests.yml \ .readthedocs.yaml \ boot.sh \ $(MAN_FRAGMENTS) \ diff --git a/tests/automake.mk b/tests/automake.mk index c8047371b..2dfc0bfa7 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -386,3 +386,17 @@ clean-pki: rm -f tests/pki/stamp rm -rf tests/pki endif + +# Upgrade test support +# Run via: make check-upgrade BASE_VERSION=branch-24.03 TESTSUITEFLAGS="1-100" +BASE_VERSION ?= branch-24.03 + +check-upgrade: all + @mkdir -p upgrade-testsuite.dir + @echo "Running upgrade tests from $(BASE_VERSION)..." + @echo "CC=$(CC) OPTS=$(OPTS) TESTSUITEFLAGS=$(TESTSUITEFLAGS) UNSTABLE=$(UNSTABLE)" + @BASE_VERSION="$(BASE_VERSION)" \ + TESTSUITEFLAGS="$(TESTSUITEFLAGS)" \ + UNSTABLE="$(UNSTABLE)" \ + PYTHONPATH="$(srcdir)/.ci:$$PYTHONPATH" \ + $(PYTHON3) "$(srcdir)/.ci/ovn_upgrade_test.py" -- 2.52.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
