Hi Xavier, thanks for v3! I only have a few minor things to point out. These are mostly small things and probably can be fixed by a maintainer when merging. With the items below fixed:
Acked-by: Mark Michelson <[email protected]> On Fri, Jan 30, 2026 at 6:07 AM Xavier Simonart <[email protected]> wrote: > > When ovn is upgraded, ovn-controller is updated first on the compute > nodes. Then ovn-northd and DB are upgraded. > This patch tests whether the intermediate state (i.e. with > ovn-controller being upgraded) works properly, running system tests > from the base line (i.e. before the upgrade). > > Flow tables might change between releases. > Hence this patch must take that into account by updating the (old) > system tests with any updated table numbers. > In some cases, (new) ovn-controller might change flows in existing > tables, causing some 'upgrade' tests to fail. > Such tests can be skipped using the TAG_TEST_NOT_UPGRADABLE tag. > > This patch upgrades the ci to run automatically some upgrade tests > weekly, on schedule. It also provides a shell script to run those tests > locally. > > This patch depends on patch [1] on branch-25.09. > > [1] "tests: Add new TAG_TEST_NOT_UPGRADABLE to some tests." > > Reported-at: https://issues.redhat.com/browse/FDP-1240 > Assisted-by: claude, with model: Claude Sonnet 4.5 > Signed-off-by: Xavier Simonart <[email protected]> > > -v2: - Updated based on Ales' feedback: > - Move upgrade test logic from complex sh to py script. > - Create new yaml for upgrade tests. > - Rebased. > - Clone Base branch in different folder, to avoid messing up > user develoment folder. > - Run upgrade tests through make check-upgrade instead of > shell script. > - Create CI matrix dynamically so it is more clear which > steps are run. > - Updated testing.rst. > -v3: - Updated based on Mark's feedback. > - Avoid repetition of code, use contextmanager & dataclasses. > - Do not use sparse locally as compilation might fail. > - Upgrade more OVN/OVS binaries such as appctl. > - A few other changes such as avoid regexp when possible. > - Rebased > - Removed tested in ci on pull/push and only run on schedule. > - Updated Documentation. > --- > .ci/ci.sh | 5 +- > .ci/linux-build.sh | 35 +- > .ci/ovn_upgrade_test.py | 104 ++++ > .ci/ovn_upgrade_utils.py | 642 ++++++++++++++++++++++++ > .github/workflows/ovn-upgrade-tests.yml | 86 ++++ > Documentation/topics/testing.rst | 174 +++++++ > Makefile.am | 3 + > tests/automake.mk | 14 + > 8 files changed, 1054 insertions(+), 9 deletions(-) > create mode 100755 .ci/ovn_upgrade_test.py > create mode 100755 .ci/ovn_upgrade_utils.py > create mode 100644 .github/workflows/ovn-upgrade-tests.yml > > diff --git a/.ci/ci.sh b/.ci/ci.sh > index 3640d3243..76c364868 100755 > --- a/.ci/ci.sh > +++ b/.ci/ci.sh > @@ -54,6 +54,9 @@ function archive_logs() { > cp -r $CONTAINER_WORKDIR/tests/system-*-testsuite.* \ > $log_dir || true \ > && \ > + cp -r $CONTAINER_WORKDIR/tests/upgrade-testsuite.* \ > + $log_dir || true \ > + && \ > chmod -R +r $log_dir \ > && > tar -czvf $CONTAINER_WORKSPACE/logs.tgz $log_dir > @@ -102,7 +105,7 @@ function run_tests() { > ARCH=$ARCH CC=$CC LIBS=$LIBS OPTS=$OPTS TESTSUITE=$TESTSUITE \ > TEST_RANGE=$TEST_RANGE SANITIZERS=$SANITIZERS DPDK=$DPDK \ > RECHECK=$RECHECK UNSTABLE=$UNSTABLE TIMEOUT=$TIMEOUT \ > - ./.ci/linux-build.sh > + BASE_VERSION=$BASE_VERSION ./.ci/linux-build.sh > " > } > > diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh > index 183833a16..d9b49b7b6 100755 > --- a/.ci/linux-build.sh > +++ b/.ci/linux-build.sh > @@ -1,7 +1,12 @@ > #!/bin/bash > > set -o errexit > -set -x > + > +# Enable debug output for CI, optional for local > +NO_DEBUG=${NO_DEBUG:-0} > +if [ "$NO_DEBUG" = "0" ]; then > + set -x > +fi > > ARCH=${ARCH:-"x86_64"} > USE_SPARSE=${USE_SPARSE:-"yes"} > @@ -181,17 +186,23 @@ function run_system_tests() > > if ! sudo timeout -k 5m -v $TIMEOUT make $JOBS $type \ > TESTSUITEFLAGS="$TEST_RANGE" RECHECK=$RECHECK \ > - SKIP_UNSTABLE=$SKIP_UNSTABLE; then > - # $log_file is necessary for debugging. > - cat tests/$log_file > + SKIP_UNSTABLE=$SKIP_UNSTABLE UPGRADE_TEST=$UPGRADE_TEST \ > + BASE_VERSION=$BASE_VERSION; then > + # Suppress output locally when NO_DEBUG not 0. > + if [ "$NO_DEBUG" = "0" ]; then > + cat tests/$log_file > + fi > return 1 > fi > } > > function execute_system_tests() > { > - configure_ovn $OPTS > - make $JOBS || { cat config.log; exit 1; } > + # Upgrade tests build separately > + if [ "$UPGRADE_TEST" != "yes" ]; then > + configure_ovn $OPTS > + make $JOBS || { cat config.log; exit 1; } > + fi > > local stable_rc=0 > local unstable_rc=0 > @@ -201,8 +212,12 @@ function execute_system_tests() > fi > > if [ "$UNSTABLE" ]; then > - if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \ > - run_system_tests $@; then > + if [[ "$TEST_RANGE" == *"-d"* ]]; then > + TEST_RANGE="-k unstable -d" > + else > + TEST_RANGE="-k unstable" > + fi > + if ! SKIP_UNSTABLE=no RECHECK=yes run_system_tests $@; then > unstable_rc=1 > fi > fi > @@ -238,6 +253,10 @@ if [ "$TESTSUITE" ]; then > sudo bash -c "echo 2048 > > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" > execute_system_tests "check-system-dpdk" "system-dpdk-testsuite.log" > ;; > + > + "upgrade-test") > + execute_system_tests "check-upgrade" "system-kmod-testsuite.log" > + ;; > esac > else > configure_ovn $OPTS > diff --git a/.ci/ovn_upgrade_test.py b/.ci/ovn_upgrade_test.py > new file mode 100755 > index 000000000..0f13611f5 > --- /dev/null > +++ b/.ci/ovn_upgrade_test.py > @@ -0,0 +1,104 @@ > +#!/usr/bin/env python3 > + > +import atexit > +import os > +import signal > +import sys > +from pathlib import Path > + > + > +from ovn_upgrade_utils import ( > + log, > + chdir, > + run_command, > + run_shell_command, > + ovn_upgrade_save_current_binaries, > + ovn_upgrade_extract_info, > + run_upgrade_workflow, > + remove_upgrade_test_directory, > + UpgradeConfig > +) > + > +DEFAULT_BASE_BRANCH = 'branch-24.03' > + > + > +def run_tests(config): > + log(f"Running system tests in upgrade scenario with flags " > + f"{config.env.flags}") > + > + # Tests are run from the base-branch folder (when upgrading > ocn-controller s/ocn-controller/ovn-controller/ > + # and not yet northd, new features do not work. Hence we cannot use new > + # system-tests. We use the latest .ci/linux-build.sh i.e. from > + # ovn_root_dir. > + with chdir(config.path.base_dir): > + no_debug = "0" if config.is_ci else "1" > + > + cmd = f"""CC={config.env.cc} TESTSUITE=system-test UPGRADE_TEST=yes > + TEST_RANGE="{config.env.flags}" UNSTABLE={config.env.unstable} > + NO_DEBUG={no_debug} > + . {config.path.ovn_root_dir}/.ci/linux-build.sh""" > + > + success = run_shell_command(cmd) > + return success No need for the "success" variable. We can just return run_shell_command(cmd) > + > + > +def main(): > + test_success = False > + > + def cleanup(): > + flags = os.environ.get('TESTSUITEFLAGS', '') > + if '-d' in flags or not test_success: I may be overly nitpicking here, but the debug flag can either be "-d" or "--debug". > + log(f"Keeping {config.path.upgrade_dir} for debugging") > + else: > + remove_upgrade_test_directory(config) > + > + atexit.register(cleanup) > + signal.signal(signal.SIGINT, lambda s, f: sys.exit(1)) > + signal.signal(signal.SIGTERM, lambda s, f: sys.exit(1)) > + > + config = UpgradeConfig.get(Path.cwd(), DEFAULT_BASE_BRANCH) > + > + log("=" * 70) > + log(f"OVN Upgrade Test - Base: {config.base_version}, " > + f"Flags: {config.env.flags}") > + log("=" * 70) > + > + if run_command("sudo -v").returncode: > + log("sudo access required") > + return 1 > + > + if not remove_upgrade_test_directory(config): > + return 1 > + > + config.path.upgrade_dir.mkdir(parents=True, exist_ok=True) > + config.path.base_dir.mkdir(parents=True, exist_ok=True) > + config.path.binaries_dir.mkdir(parents=True, exist_ok=True) > + > + if not ovn_upgrade_save_current_binaries(config): > + return 1 > + > + if not ovn_upgrade_extract_info(config): > + return 1 > + > + if not run_upgrade_workflow(config): > + if config.is_ci: > + print(config.file.git_log.read_text(encoding='utf-8')) > + else: > + log(f"Check: {config.file.git_log}") > + return 1 > + > + test_success = run_tests(config) > + > + log("=" * 70) > + if test_success: > + log("UPGRADE TESTS PASSED") > + else: > + log("UPGRADE TESTS FAILED") > + log(f"Check: {config.file.test_log}") > + log("=" * 70) > + > + return 0 if test_success else 1 > + > + > +if __name__ == "__main__": > + sys.exit(main()) > diff --git a/.ci/ovn_upgrade_utils.py b/.ci/ovn_upgrade_utils.py > new file mode 100755 > index 000000000..f5ae787cb > --- /dev/null > +++ b/.ci/ovn_upgrade_utils.py > @@ -0,0 +1,642 @@ > +#!/usr/bin/env python3 > + > +import os > +import re > +import shutil > +import subprocess > +from datetime import datetime > +from pathlib import Path > +from dataclasses import dataclass > +import contextlib > +import shlex > +import sys > + > +UPGRADE_DIR = 'tests/upgrade-testsuite.dir' > +SYSTEM_TESTS_LOGS = 'tests/system-kmod-testsuite.log' > +SYSTEM_TESTS_DIR = 'tests/system-kmod-testsuite.dir' > +BASE_REPO_DIR = 'base-repo' > +BINARIES_DIR = 'ovn-upgrade-binaries' > +BUILD_LOG = 'build-base.log' > +GIT_LOG = 'git.log' > +NEW_EGRESS = 'ovn-upgrade-new-log-egress.txt' > +M4_DEFINES = 'ovn-upgrade-oftable-m4-defines.txt' > +OFCTL_DEFINES = 'ovn-upgrade-ofctl-defines.h' > + > + > [email protected] > +def chdir(target_dir): > + original_dir = Path.cwd() > + try: > + os.chdir(target_dir) > + yield > + finally: > + os.chdir(original_dir) > + > + > +@dataclass > +class PathConfig: > + ovn_root_dir: Path # Path from which make check-upgrade is run > + upgrade_dir: Path # Path where all upgrade-tests related files are > stored > + base_dir: Path # Path for base branch i.e. from which we upgrade > + binaries_dir: Path # Path for binaries from dst branch > + test_dir: Path # Path for system tests run by upgrade tests. > + > + > +@dataclass > +class FileConfig: > + git_log: Path > + test_log: Path > + build_log: Path > + new_egress: Path > + m4_defines: Path > + ofctl_defines: Path > + > + > +@dataclass > +class EnvConfig: > + cc: str > + flags: str > + jobs: str > + opts: str > + unstable: str > + use_sparse: str > + > + > +@dataclass > +class UpgradeConfig: > + path: PathConfig > + env: EnvConfig > + file: FileConfig > + base_version: str > + is_ci: bool > + > + @classmethod > + def get(cls, ovn_root_dir, default_base_version): > + upgrade_dir = ovn_root_dir / UPGRADE_DIR > + base_dir = upgrade_dir / BASE_REPO_DIR > + base_version = os.environ.get('BASE_VERSION', default_base_version) > + is_ci = not sys.stdout.isatty() > + > + path_obj = PathConfig( > + ovn_root_dir=ovn_root_dir, > + binaries_dir=upgrade_dir / BINARIES_DIR, > + base_dir=base_dir, > + upgrade_dir=upgrade_dir, > + test_dir=base_dir / SYSTEM_TESTS_DIR, > + ) > + > + file_obj = FileConfig( > + test_log=base_dir / SYSTEM_TESTS_LOGS, > + build_log=upgrade_dir / BUILD_LOG, > + git_log=upgrade_dir / GIT_LOG, > + new_egress=upgrade_dir / NEW_EGRESS, > + m4_defines=upgrade_dir / M4_DEFINES, > + ofctl_defines=upgrade_dir / OFCTL_DEFINES > + ) > + > + env_obj = EnvConfig( > + cc=os.environ.get('CC', 'gcc'), > + flags=os.environ.get('TESTSUITEFLAGS', ''), > + jobs=os.environ.get('JOBS', ''), > + opts=os.environ.get('OPTS', ''), > + unstable=os.environ.get('UNSTABLE', 'no'), > + # Enable parse in CI. Disable for local run as might depend of > + # content of /usr/local/include/openvswitch > + use_sparse='yes' if (is_ci and shutil.which('sparse')) else 'no' > + ) > + > + return cls(path=path_obj, env=env_obj, file=file_obj, > + base_version=base_version, is_ci=is_ci) > + > + def get_ctx(self): > + env = os.environ.copy() > + env.update(CC=self.env.cc, OPTS=self.env.opts, > + JOBS=self.env.jobs, USE_SPARSE=self.env.use_sparse) > + return env > + > + > +def log(message): > + timestamp = datetime.now().strftime("%H:%M:%S") > + print(f"[{timestamp}] {message}", flush=True) > + > + > +def run_command(cmd_str, log_file=None): > + cmd = shlex.split(cmd_str) > + if log_file: > + with open(log_file, 'a', encoding='utf-8') as f: > + return subprocess.run(cmd, stdout=f, stderr=subprocess.STDOUT, > + check=False) > + else: > + return subprocess.run(cmd, capture_output=True, text=True, > check=False) > + > + > +def run_shell_command(cmd, log_file=None, env_ctx=None): > + if log_file: > + with open(log_file, 'a', encoding='utf-8') as f: > + result = subprocess.run(['bash', '-c', cmd], stdout=f, > + stderr=subprocess.STDOUT, check=False, > + env=env_ctx) > + else: > + result = subprocess.run(['bash', '-c', cmd], check=False, > env=env_ctx) > + return result.returncode == 0 > + > + > +def extract_oftable_values(content): > + log_egress = None > + save_inport = None > + for line in content: > + if line.startswith("#define"): > + _, var, val, *rest = line.strip().split(maxsplit=3) > + if var == "OFTABLE_LOG_EGRESS_PIPELINE": > + log_egress = int(val) > + if var == "OFTABLE_SAVE_INPORT": > + save_inport = int(val) > + if log_egress and save_inport: > + break > + return log_egress, save_inport > + > + > +def replace_block_in_file(target_file, src_file, line_prefix): > + if not target_file.exists(): > + return False > + if not src_file.exists(): > + # No src_file file means nothing to replace. > + return True > + with open(target_file, encoding='utf-8') as f: > + lines = f.readlines() > + with open(src_file, encoding='utf-8') as f: > + new_content = f.read() > + > + # Replace all lines starting with line_prefix with new_content. > + output_lines = [] > + inserted = False > + > + for line in lines: > + if line.startswith(line_prefix): > + if not inserted: > + output_lines.append(new_content) > + inserted = True > + # Skip old lines with this prefix > + continue > + output_lines.append(line) > + > + with open(target_file, 'w', encoding='utf-8') as f: > + f.writelines(output_lines) > + > + return True > + > + > +def ovn_upgrade_build(config): > + log(f"Rebuilding OVN with {config.env.cc}") > + > + build_script = f""" > + set -e > + make {config.env.jobs} > + """ > + return run_shell_command(build_script, config.file.build_log, > + config.get_ctx()) > + > + > +def ovs_ovn_upgrade_build(config): > + log(f"Building OVS and OVN with {config.env.cc}") > + build_script = """ > + set -e > + . .ci/linux-build.sh > + """ > + return run_shell_command(build_script, config.file.build_log, > + config.get_ctx()) > + > + > +def log_binary_version(binary_path, keywords): > + result = run_command(f"{binary_path} --version") > + if result.returncode == 0: > + for line in result.stdout.splitlines(): > + if any(kw in line for kw in keywords): > + log(f" {line}") > + > + > +def ovn_upgrade_save_current_binaries(config): > + files = [ > + 'controller/ovn-controller', > + 'ovs/vswitchd/ovs-vswitchd', > + 'ovs/ovsdb/ovsdb-server', > + 'ovs/utilities/ovs-vsctl', > + 'ovs/utilities/ovs-ofctl', > + 'ovs/utilities/ovs-appctl', > + 'ovs/utilities/ovs-dpctl', > + 'ovs/vswitchd/vswitch.ovsschema' > + ] > + > + log("Saving current version binaries") > + > + for file in files: > + try: > + shutil.copy(Path(file), config.path.binaries_dir) > + except Exception as e: > + log(f"Failed to save current binaries: failed to copy {file}: > {e}") > + return False > + > + log("Saved current versions:") > + log_binary_version(config.path.binaries_dir / 'ovn-controller', > + ['ovn-controller', 'SB DB Schema']) > + log_binary_version(config.path.binaries_dir / 'ovs-vswitchd', > ['vSwitch']) > + return True > + > + > +def ovn_upgrade_extract_info(config): > + lflow_h = Path('controller/lflow.h') > + if not lflow_h.exists(): > + log('controller/lflow.h not found') > + return False > + > + # Get all ofctl defines from lflow.h. > + with open(lflow_h, encoding='utf-8') as f: > + oftable_defines = [ > + line.strip() for line in f if line.startswith('#define OFTABLE_') > + ] > + > + if not oftable_defines: > + log("Failed to extract info: no #define OFTABLE_ found in lflow.h") > + return False > + > + with open(config.file.ofctl_defines, 'w', encoding='utf-8') as of: > + of.write('\n'.join(oftable_defines) + '\n') > + log(f" Wrote {config.file.ofctl_defines}") > + > + # Get value of OFTABLE_LOG_EGRESS_PIPELINE. > + new_log_egress, _ = extract_oftable_values(oftable_defines) > + > + if not new_log_egress: > + log("Failed to extract info: could not extract " > + "OFTABLE_LOG_EGRESS_PIPELINE value") > + return False > + > + with open(config.file.new_egress, 'w', encoding='utf-8') as f: > + f.write(str(new_log_egress) + '\n') > + log(f" Wrote {config.file.new_egress}") > + > + # Get all m4_define([OFTABLE_ from ovn-macros.at. > + macros_file = Path("tests/ovn-macros.at") > + if macros_file.exists(): > + with open(macros_file, encoding='utf-8') as f: > + m4_defines = [ > + line.strip() for line in f > + if line.startswith('m4_define([OFTABLE_') > + ] > + > + with open(config.file.m4_defines, 'w', encoding='utf-8') as of: > + of.write('\n'.join(m4_defines) + '\n' if m4_defines else '') > + log(f" Wrote {config.file.m4_defines}") > + > + return True > + > + > +def ovn_upgrade_checkout_local(config, base_version): > + base_dir = config.path.base_dir > + git_log = config.file.git_log > + log(f"Running locally. Cloning to {base_dir}") > + > + result = run_command(f"git clone --local --shared . {str(base_dir)} " > + f" --branch {base_version}", git_log) > + if result.returncode: > + log(f"Failed to clone to {base_dir}") > + return False > + > + with chdir(base_dir): > + log(f"Checking out base version: {base_version} from {base_dir}") > + result = run_command(f"git checkout {base_version}", git_log) > + > + if result.returncode: > + log(f"Failed to checkout {base_version}") > + return False > + > + return True > + > + > +def ovn_upgrade_clone_github(config, base_version): > + base_dir = config.path.base_dir > + git_log = config.file.git_log > + > + result = run_command("git config --get remote.origin.url") > + if result.returncode or not result.stdout.strip(): > + log("Could not get origin URL from working directory") > + return False > + > + origin_url = result.stdout.strip() > + with chdir(base_dir): > + log(f"Cloning {base_version} from {origin_url} ") > + result = run_command(f"git clone {origin_url} {base_dir} " > + f"--branch {base_version} --depth 1 " > + "--no-tags", git_log) > + > + if (result.returncode and > + origin_url != "https://github.com/ovn-org/ovn"): > + log(f"Not found in {origin_url}, trying ovn-org...") > + result = run_command( > + "git clone https://github.com/ovn-org/ovn.git " > + f"{base_dir} --branch {base_version} --depth 1 " > + "--no-tags", git_log > + ) > + if result.returncode: > + log(f"Failed to clone {base_version}") > + log(result.stderr) > + return False > + > + return True > + > + > +def ovn_upgrade_checkout_base(config): > + base_dir = config.path.base_dir > + base_version = config.base_version > + git_log = config.file.git_log > + is_local = True > + > + if base_version.startswith('origin/'): > + base_version = base_version.split('/', 1)[-1] > + is_local = False > + > + success = False > + if is_local: > + success = ovn_upgrade_checkout_local(config, base_version) > + > + if not success: > + # Branch not requested or found in local repo. > + # Get working directory's origin URL (the real remote, e.g., GitHub) > + success = ovn_upgrade_clone_github(config, base_version) > + > + if not success: > + log(f"Failed to fetch/checkout {base_version}") > + return False > + > + # Now move to folder with the cloned version, where we will build > + # the base. > + with chdir(base_dir): > + result = run_command(f"git checkout {base_version}", git_log) > + > + if result.returncode: > + log(f"Failed to checkout {base_version}") > + log(result.stderr) > + return False > + > + log(f"Checked out {base_version}") > + log("Updating OVS submodule...") > + result = run_command("git submodule update --init --depth 1", > git_log) > + > + if result.returncode: > + log(f"Failed to update submodules: {result.stderr}") > + return False > + > + return True > + > + > +def ovn_upgrade_patch_for_ovn_debug(config): > + return replace_block_in_file( > + Path('controller/lflow.h'), > + config.file.ofctl_defines, > + '#define OFTABLE_') > + > + > +def ovn_upgrade_save_ovn_debug(binaries_dir): > + log("Saving hybrid ovn-debug...") > + src = Path("utilities/ovn-debug") > + dst = binaries_dir / "ovn-debug" > + > + try: > + shutil.copy(src, dst) > + except Exception as e: > + log(f"Failed to save ovn-debug: {e}") > + return False > + > + return True > + > + > +def update_test(old_start, old_end, shift, test_file): > + with open(test_file, encoding='utf-8') as f: > + content = f.read() > + > + def replace_table(match): > + table_num = int(match.group(1)) > + if old_start <= table_num < old_end: > + return f"table={table_num + shift}" > + return match.group(0) > + > + # Replace all table=NUMBER patterns > + updated_content = re.sub(r'table\s*=\s*(\d+)', replace_table, content) > + > + with open(test_file, 'w', encoding='utf-8') as f: > + f.write(updated_content) > + > + > +def ovn_upgrade_table_numbers_in_tests_patch(config): > + lflow_h = Path('controller/lflow.h') > + > + if not config.file.new_egress.exists(): > + log("No LOG_EGRESS") > + return False > + > + if not lflow_h.exists(): > + log("Controller/lflow.h not found") > + return False > + > + with open(config.file.new_egress, encoding='utf-8') as f: > + new_log_egress = int(f.read().strip()) > + > + # Get old values from base version's lflow.h > + with open(lflow_h, encoding='utf-8') as f: > + content = [ > + line.strip() for line in f if line.startswith('#define OFTABLE_') > + ] > + > + old_log_egress, old_save_inport = extract_oftable_values(content) > + > + if (not old_log_egress or not old_save_inport > + or old_log_egress == new_log_egress): > + log(f"No change in test files as old_log_egress={old_log_egress}, " > + f"old_save_inport={old_save_inport} and " > + f"new_log_egress={new_log_egress}") > + # No change needed is success. > + return True > + > + shift = new_log_egress - old_log_egress > + > + log(f"Updating hardcoded table numbers in tests (shift: +{shift} for " > + f"tables {old_log_egress}-{old_save_inport - 1})") > + > + # Update test files > + for test_file in ['tests/system-ovn.at', 'tests/system-ovn-kmod.at', > + 'tests/system-ovn-netlink.at']: > + if Path(test_file).exists(): > + log(f"Updating {test_file}") > + update_test(old_log_egress, old_save_inport, shift, test_file) > + return True > + > + > +def ovn_upgrade_schema_in_macros_patch(): > + schema_filter = '/OVN_Southbound database lacks/d' > + ovn_pattern = r'/has no network name\*/d' > + > + macros_file = Path('tests/ovn-macros.at') > + if macros_file.exists(): > + with open(macros_file, encoding='utf-8') as f: > + content = f.read() > + > + if schema_filter not in content: > + if re.search(ovn_pattern, content): > + content = re.sub(f'({ovn_pattern})', > + rf'\1\n{schema_filter}', content, count=1) > + with open(macros_file, 'w', encoding='utf-8') as f: > + f.write(content) > + log("Added schema warning filter to ovn-macros.at") > + else: > + log("Could not find pattern in ovn-macros.at") > + else: > + log("Schema already updated in macro") > + else: > + log("tests/ovn-macros.at not found") > + return False > + > + kmod_file = Path('tests/system-kmod-macros.at') > + if kmod_file.exists(): > + with open(kmod_file, encoding='utf-8') as f: > + content = f.read() > + > + if schema_filter not in content: > + ovs_pattern = r'\[OVS_VSWITCHD_STOP\(\[\$1\]\)' > + > + if re.search(ovs_pattern, content): > + content = re.sub( > + ovs_pattern, > + rf'[OVS_VSWITCHD_STOP([dnl\n$1";{schema_filter}"])', > + content, count=1) > + with open(kmod_file, 'w', encoding='utf-8') as f: > + f.write(content) > + log("Added schema warning filter to system-kmod-macros.at") > + else: > + log("Could not find pattern in system-kmod-macros.at") > + return False > + > + return True > + > + > +def ovn_upgrade_oftable_ovn_macro_patch(config): > + return replace_block_in_file( > + Path('tests/ovn-macros.at'), > + config.file.m4_defines, > + 'm4_define([OFTABLE_') > + > + > +def ovn_upgrade_apply_tests_patches(config): > + log("Applying schema filter and table number patches...") > + if not ovn_upgrade_table_numbers_in_tests_patch(config): > + return False > + if not ovn_upgrade_schema_in_macros_patch(): > + return False > + if not ovn_upgrade_oftable_ovn_macro_patch(config): > + return False > + return True > + > + > +def ovn_upgrade_restore_binaries(config): > + log("Replacing binaries with current versions") > + > + binaries = [ > + ('ovn-controller', 'controller/ovn-controller'), > + ('ovn-debug', 'utilities/ovn-debug'), > + ('ovs-vswitchd', 'ovs/vswitchd/ovs-vswitchd'), > + ('ovsdb-server', 'ovs/ovsdb/ovsdb-server'), > + ('ovs-vsctl', 'ovs/utilities/ovs-vsctl'), > + ('ovs-ofctl', 'ovs/utilities/ovs-ofctl'), > + ('ovs-appctl', 'ovs/utilities/ovs-appctl'), > + ('ovs-dpctl', 'ovs/utilities/ovs-dpctl'), > + ('vswitch.ovsschema', 'ovs/vswitchd/vswitch.ovsschema'), > + ] > + > + for src_name, dest_path in binaries: > + src = config.path.binaries_dir / src_name > + dest = Path(dest_path) > + try: > + dest.parent.mkdir(parents=True, exist_ok=True) > + shutil.copy(src, dest) > + except Exception as e: > + log(f"Failed to copy {src_name} to {dest}: {e}") > + return False > + > + log("Current versions (from current patch):") > + log_binary_version("controller/ovn-controller", > + ['ovn-controller', 'SB DB Schema']) > + log_binary_version("ovs/vswitchd/ovs-vswitchd", ['vSwitch']) > + > + log("Base versions (for compatibility testing):") > + log_binary_version("northd/ovn-northd", ['ovn-northd']) > + log_binary_version("utilities/ovn-nbctl", ['ovn-nbctl']) > + > + return True > + > + > +def run_upgrade_workflow(config): > + base_dir = config.path.base_dir > + git_log = config.file.git_log > + build_log = config.file.build_log > + binaries_dir = config.path.binaries_dir > + > + if not ovn_upgrade_checkout_base(config): > + log("Upgrade_workflow failed: failed to checkout base version") > + return False > + > + with chdir(base_dir): > + if not ovn_upgrade_apply_tests_patches(config): > + log("Upgrade_workflow failed: failed to apply test patches") > + return False > + > + log("Patching lflow.h with current OFTABLE defines...") > + ovn_upgrade_patch_for_ovn_debug(config) > + > + # Build base version with patched lflow.h > + log(f"Building base version (with patched lflow.h) from > {Path.cwd()}") > + if not ovs_ovn_upgrade_build(config): > + log("Upgrade_workflow failed: failed to build base version") > + log(f"See config.log and {build_log}") > + return False > + > + # Refresh sudo timestamp after long build > + run_command("sudo -v") > + > + if not ovn_upgrade_save_ovn_debug(binaries_dir): > + log("Upgrade_workflow failed: failed to save ovn_debug") > + return False > + > + # Rebuild with original lflow.h > + log("Restoring lflow.h to original...") > + run_command("git checkout controller/lflow.h", git_log) > + > + log("Rebuilding base version (clean lflow.h)...") > + if not ovn_upgrade_build(config): > + log("Upgrade_workflow failed: failed to rebuild base version") > + log(f"See {build_log}") > + return False > + > + if not ovn_upgrade_restore_binaries(config): > + return False > + > + return True > + > + > +def remove_upgrade_test_directory(config): > + upgrade_dir = config.path.upgrade_dir > + test_dir = config.path.test_dir > + test_log = config.file.test_log > + > + if not upgrade_dir.exists(): > + return True > + > + log(f"Removing old {upgrade_dir}...") > + > + run_command(f"sudo rm -rf {test_dir}") > + run_command(f"sudo rm -f {test_log}") > + > + try: > + shutil.rmtree(upgrade_dir) > + return True > + except OSError as e: > + log(f"Failed to remove {upgrade_dir}: {e}") > + return False > diff --git a/.github/workflows/ovn-upgrade-tests.yml > b/.github/workflows/ovn-upgrade-tests.yml > new file mode 100644 > index 000000000..33f4caf42 > --- /dev/null > +++ b/.github/workflows/ovn-upgrade-tests.yml > @@ -0,0 +1,86 @@ > +name: OVN Upgrade Tests > + > +on: > + schedule: > + # Run Tuesday at midnight > + - cron: '0 0 * * 2' > + workflow_dispatch: > + > +concurrency: > + group: ${{ github.workflow }}-${{ github.event.pull_request.number || > github.run_id }} > + cancel-in-progress: true > + > +jobs: > + upgrade-tests: > + name: upgrade-test ${{ matrix.cfg.base_version }} ${{ > matrix.cfg.test_range }} > + if: (github.repository_owner == 'ovn-org' && github.event_name == > 'schedule' && github.ref_name == 'main') || github.event_name == > 'workflow_dispatch' > + runs-on: ubuntu-24.04 > + timeout-minutes: 120 > + > + strategy: > + fail-fast: false > + matrix: > + cfg: > + - { base_version: "origin/branch-24.03", test_range: "-100"} > + - { base_version: "origin/branch-24.03", test_range: "101-", > unstable: unstable} > + - { base_version: "origin/branch-25.09", test_range: "-100"} > + - { base_version: "origin/branch-25.09", test_range: "101-200"} > + - { base_version: "origin/branch-25.09", test_range: "201-", > unstable: unstable} > + - { base_version: "origin/branch-24.09", test_range: "-100"} > + - { base_version: "origin/branch-24.09", test_range: "101-200"} > + - { base_version: "origin/branch-24.09", test_range: "201-", > unstable: unstable} > + - { base_version: "origin/branch-25.03", test_range: "-100"} > + - { base_version: "origin/branch-25.03", test_range: "101-200"} > + - { base_version: "origin/branch-25.03", test_range: "201-", > unstable: unstable} > + > + env: > + CC: gcc > + BASE_VERSION: ${{ matrix.cfg.base_version }} > + TEST_RANGE: ${{ matrix.cfg.test_range }} > + UNSTABLE: ${{ matrix.cfg.unstable }} > + TESTSUITE: "upgrade-test" > + > + steps: > + - name: system-level-dependencies > + run: | > + sudo apt update > + sudo apt -y install linux-modules-extra-$(uname -r) > + > + - name: checkout > + uses: actions/checkout@v4 > + with: > + submodules: recursive > + > + - name: Fix /etc/hosts file > + run: | > + . .ci/linux-util.sh > + fix_etc_hosts > + > + - name: Disable apparmor > + run: | > + . .ci/linux-util.sh > + disable_apparmor > + > + - name: Download container > + run: sudo podman pull ghcr.io/ovn-org/ovn-tests:ubuntu > + > + - name: Tag image > + run: sudo podman tag ghcr.io/ovn-org/ovn-tests:ubuntu ovn-org/ovn-tests > + > + # Artifact names cannot contain characters such as '/' > + - name: Artifact name > + id: artifact > + run: | > + RAW_NAME='${{ matrix.cfg.base_version }}' > + BRANCH_NAME="${RAW_NAME#origin/}" > + echo "name=logs-upgrade-test-${BRANCH_NAME}-${{ > matrix.cfg.test_range }}" >> $GITHUB_OUTPUT > + > + - name: build > + run: sudo -E ./.ci/ci.sh --archive-logs --timeout=2h > + > + - name: upload logs on failure > + if: failure() || cancelled() > + uses: actions/upload-artifact@v4 > + with: > + name: ${{ steps.artifact.outputs.name }} > + path: logs.tgz > diff --git a/Documentation/topics/testing.rst > b/Documentation/topics/testing.rst > index cc928ef64..579422ca0 100644 > --- a/Documentation/topics/testing.rst > +++ b/Documentation/topics/testing.rst > @@ -293,3 +293,177 @@ of these cached objects, be sure to rebuild the test. > > The cached objects are stored under the relevant folder in > ``tests/perf-testsuite.dir/cached``. > + > +OVN Upgrade Testing > +~~~~~~~~~~~~~~~~~~~ > + > +Overview > +++++++++ > + > +OVN upgrade tests validate that the system continues to function correctly > +during rolling upgrades, specifically testing the intermediate state where > +ovn-controller is upgraded before ovn-northd and the databases. > + > +The upgrade tests run the system test suite from an older OVN version using > +binaries (ovn-controller, ovs-vswitchd, etc.) from the current development > +version, ensuring backward compatibility. > + > +Running Upgrade Tests Locally > ++++++++++++++++++++++++++++++ > + > +Basic usage:: > + > + $ make check-upgrade > + > +This will test upgrades from branch-24.03 (the default base version). > + > +Specify a different base version:: > + > + $ make check-upgrade BASE_VERSION=branch-24.09 > + > +Run a specific range of tests:: > + > + $ make check-upgrade BASE_VERSION=branch-25.03 TESTSUITEFLAGS="1-100" > + > +Run only unstable tests:: > + > + $ make check-upgrade UNSTABLE=1 TESTSUITEFLAGS="-k unstable" > + > +Environment Variables > ++++++++++++++++++++++ > + > +*BASE_VERSION* > + Git branch to use as the base version (default: ``branch-24.03``) > + > + - branch-24.03: the local repo will be used as the source repo. > + - origin/branch-24.03: the local repo origin is used as the source repo. > + - If branch is not found in local repo, it will be searched in its origin > + (e.g. private github repo or ovn_org repo). If not found in private > + github repo, it will be searched in ovn_org repo. > + > +*TESTSUITEFLAGS* > + Test range to run, using autotest syntax (default: ``1-``, meaning all > tests) > + > + - ``1-100`` - Run tests 1 through 100 > + - ``50-`` - Run tests 50 and above > + - ``-k unstable`` - Run tests with 'unstable' keyword > + > + Additional flags to pass to the testsuite. Use ``-d`` to keep test > + directories on success for debugging. > + > +*UNSTABLE* > + Set to ``1`` to run unstable tests (default: disabled) > + > +How Upgrade Tests Work > +++++++++++++++++++++++ > + > +The upgrade test workflow: > + > +1. *Save Current Binaries* > + > + The test framework saves binaries from your current working tree: > + > + - ``ovn-controller`` > + - ``ovs-vswitchd``, ``ovsdb-server`` > + - ``ovs-vsctl``, ``ovs-ofctl``, ``ovs-appctl``, ``ovs-dpctl`` > + - Flow table definitions from ``controller/lflow.h`` > + > +2. *Clone and Checkout Base Version* > + > + Creates ``upgrade-testsuite.dir/ovn-upgrade-base/`` and checks out the > + specified base version. > + > +3. *Patch Old Tests* > + > + - Updates hardcoded flow table numbers if tables were renumbered > + - Adds schema compatibility filters to suppress expected warnings > + - Replaces OFTABLE_* m4 macros with current values > + > +4. *Build Base Version* > + > + Builds the base version twice: > + > + - With patched ``lflow.h`` to create hybrid ``ovn-debug`` tool > + - With original ``lflow.h`` for proper ``ovn-northd`` and ``ovn-nbctl`` > + > +5. *Swap Binaries* > + > + Replaces the base version's binaries with current versions: > + > + - Base version: ``ovn-northd``, ``ovn-nbctl`` (test infrastructure) > + - Current version: ``ovn-controller``, ``ovs-vswitchd``, ``ovsdb-server`` > + > +6. *Run Tests* > + > + Executes the system test suite from the base version with the mixed > + binary set. > + > +Interpreting Test Failures > +++++++++++++++++++++++++++ > + > +Test failures during upgrade testing can indicate: > + > +*Backward Compatibility Issues* > + The new ovn-controller is incompatible with the old northd/databases. > + This is a critical issue that must be fixed before release. > + > +*Flow Generation Changes* > + If flow table contents changed intentionally, the (old) test may need the > + ``TAG_TEST_NOT_UPGRADABLE`` tag. > + > +Debugging Failed Tests > +++++++++++++++++++++++ > + > +On failure, the test directory is preserved in ``upgrade-testsuite.dir/``. > + > +Check the logs:: > + > + $ upgrade-testsuite.dir/git.log # Git operations > + $ upgrade-testsuite.dir/build-base.log # Build output > + $ upgrade-testsuite.dir/ovn-upgrade-base/tests/system-kmod-testsuite.log > + > +Keep test directory for debugging:: > + > + $ make check-upgrade TESTSUITEFLAGS="-d" > + > +Marking Tests as Non-Upgradable > ++++++++++++++++++++++++++++++++ > + > +Some tests cannot run in upgrade scenarios: tests for features not yet > +fully present in the base version. > + > +Mark these tests with the ``TAG_TEST_NOT_UPGRADABLE`` keyword:: > + > + AT_SETUP([test that checks flow details]) > + AT_KEYWORDS([TAG_TEST_NOT_UPGRADABLE]) > + # ... test code ... > + AT_CLEANUP > + > +These tests will be skipped during upgrade testing but run normally > otherwise. > + > +CI Integration > +++++++++++++++ > + > +Upgrade tests run automatically in GitHub Actions: > + > +*On Schedule (Weekly)* > + - Tests all supported versions (24.03, 24.09, 25.03, 25.09) > + > +Implementation Details > +++++++++++++++++++++++ > + > +Test are run locally through ``check-upgrade`` Makefile target. > +The flow for make check-upgrade is: > + > +- Makefile > +- ci/ovn_upgrade_test.py: run_upgrade_workflow, run_tests > +- ci/linux-build.sh(TESTSUITE=system-test) > +- execute_system_tests "check-kernel" "system-kmod-testsuite.log" > +- run_system_tests check-kernel > + > +Through the ci the flow is: > + > +- ci.sh: run_in_container ./.ci/linux-build.sh (TESTSUITE=upgrade-test) > +- execute_system_tests "check-upgrade" "system-kmod-testsuite.log" > +- run_system_tests check-upgrade > +- Back to make check-upgrade-flow. > diff --git a/Makefile.am b/Makefile.am > index 78aa587e2..50c0fbcd2 100644 > --- a/Makefile.am > +++ b/Makefile.am > @@ -89,6 +89,8 @@ EXTRA_DIST = \ > .ci/ci.sh \ > .ci/linux-build.sh \ > .ci/linux-util.sh \ > + .ci/ovn_upgrade_test.py \ > + .ci/ovn_upgrade_utils.py \ > .ci/osx-build.sh \ > .ci/osx-prepare.sh \ > .ci/ovn-kubernetes/prepare.sh \ > @@ -97,6 +99,7 @@ EXTRA_DIST = \ > .github/workflows/test.yml \ > .github/workflows/ovn-kubernetes.yml \ > .github/workflows/ovn-fake-multinode-tests.yml \ > + .github/workflows/ovn-upgrade-tests.yml \ > .readthedocs.yaml \ > boot.sh \ > $(MAN_FRAGMENTS) \ > diff --git a/tests/automake.mk b/tests/automake.mk > index c8047371b..2dfc0bfa7 100644 > --- a/tests/automake.mk > +++ b/tests/automake.mk > @@ -386,3 +386,17 @@ clean-pki: > rm -f tests/pki/stamp > rm -rf tests/pki > endif > + > +# Upgrade test support > +# Run via: make check-upgrade BASE_VERSION=branch-24.03 > TESTSUITEFLAGS="1-100" > +BASE_VERSION ?= branch-24.03 > + > +check-upgrade: all > + @mkdir -p upgrade-testsuite.dir > + @echo "Running upgrade tests from $(BASE_VERSION)..." > + @echo "CC=$(CC) OPTS=$(OPTS) TESTSUITEFLAGS=$(TESTSUITEFLAGS) > UNSTABLE=$(UNSTABLE)" > + @BASE_VERSION="$(BASE_VERSION)" \ > + TESTSUITEFLAGS="$(TESTSUITEFLAGS)" \ > + UNSTABLE="$(UNSTABLE)" \ > + PYTHONPATH="$(srcdir)/.ci:$$PYTHONPATH" \ > + $(PYTHON3) "$(srcdir)/.ci/ovn_upgrade_test.py" > -- > 2.52.0 > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
