Module: Mesa Branch: main Commit: 654f7f783f9e46bf2af345d3044839b46983e9f9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=654f7f783f9e46bf2af345d3044839b46983e9f9
Author: Guilherme Gallo <guilherme.ga...@collabora.com> Date: Wed Oct 25 23:12:16 2023 -0300 ci/lava: Make SSH definition wrap the UART one Simplify both UART and SSH job definitions module to share common building blocks themselves. - generate_lava_yaml_payload is now a LAVAJobDefinition method, so dropped the Strategy pattern between both modules - if SSH is supported and UART is not enforced, default to SSH - when SSH is enabled, wrap the last deploy action to run the SSH server and rewrite the test actions, which should not change due to the boot method - create a constants module to load environment variables Signed-off-by: Guilherme Gallo <guilherme.ga...@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25912> --- .gitlab-ci/lava/utils/constants.py | 14 +++ .gitlab-ci/lava/utils/lava_job_definition.py | 123 +++++++++++++++++++-------- .gitlab-ci/lava/utils/ssh_job_definition.py | 71 +++++++--------- .gitlab-ci/lava/utils/uart_job_definition.py | 81 +++--------------- 4 files changed, 146 insertions(+), 143 deletions(-) diff --git a/.gitlab-ci/lava/utils/constants.py b/.gitlab-ci/lava/utils/constants.py new file mode 100644 index 00000000000..2a9d1547037 --- /dev/null +++ b/.gitlab-ci/lava/utils/constants.py @@ -0,0 +1,14 @@ +from os import getenv + +# How many attempts should be made when a timeout happen during LAVA device boot. +NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3)) + + +# Supports any integers in [0, 100]. +# The scheduler considers the job priority when ordering the queue +# to consider which job should run next. +JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75)) + +# Use UART over the default SSH mechanism to follow logs. +# Caution: this can lead to device silence in some devices in Mesa CI. +FORCE_UART = bool(getenv("LAVA_FORCE_UART", False)) diff --git a/.gitlab-ci/lava/utils/lava_job_definition.py b/.gitlab-ci/lava/utils/lava_job_definition.py index 7e4e4c1556c..e50514c71e8 100644 --- a/.gitlab-ci/lava/utils/lava_job_definition.py +++ b/.gitlab-ci/lava/utils/lava_job_definition.py @@ -1,36 +1,27 @@ -import re from io import StringIO -from os import getenv from typing import TYPE_CHECKING, Any from ruamel.yaml import YAML -from ruamel.yaml.scalarstring import LiteralScalarString from lava.utils.lava_farm import LavaFarm, get_lava_farm +from lava.utils.ssh_job_definition import ( + generate_docker_test, + generate_dut_test, + wrap_boot_action, + wrap_final_deploy_action, +) +from lava.utils.uart_job_definition import ( + fastboot_boot_action, + fastboot_deploy_actions, + tftp_boot_action, + tftp_deploy_actions, + uart_test_actions, +) if TYPE_CHECKING: from lava.lava_job_submitter import LAVAJobSubmitter -# How many attempts should be made when a timeout happen during LAVA device boot. -NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3)) - -# Supports any integers in [0, 100]. -# The scheduler considers the job priority when ordering the queue -# to consider which job should run next. -JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75)) - - -def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString: - def escape_envvar(match): - return "\\" + match.group(0) - - filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")] - final_str = "\n".join(filtered_array) - - for escape_var in escape_vars: - # Find env vars and add '\\' before them - final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str) - return LiteralScalarString(final_str) +from .constants import FORCE_UART, JOB_PRIORITY, NUMBER_OF_ATTEMPTS_LAVA_BOOT class LAVAJobDefinition: @@ -43,9 +34,7 @@ class LAVAJobDefinition: self.job_submitter: "LAVAJobSubmitter" = job_submitter def has_ssh_support(self) -> bool: - force_uart = bool(getenv("LAVA_FORCE_UART", False)) - - if force_uart: + if FORCE_UART: return False # Only Collabora's farm supports to run docker container as a LAVA actions, @@ -59,22 +48,65 @@ class LAVAJobDefinition: def generate_lava_yaml_payload(self) -> dict[str, Any]: """ - Bridge function to use the supported job definition depending on some Mesa - CI job characteristics. + Generates a YAML payload for submitting a LAVA job, based on the provided arguments. + + Args: + None - The strategy here, is to use LAVA with a containerized SSH session to follow - the job output, escaping from dumping data to the UART, which proves to be - error prone in some devices. + Returns: + a dictionary containing the values generated by the `generate_metadata` function and the + actions for the LAVA job submission. """ - from lava.utils.ssh_job_definition import generate_lava_yaml_payload as ssh_lava_yaml - from lava.utils.uart_job_definition import generate_lava_yaml_payload as uart_lava_yaml + args = self.job_submitter + values = self.generate_metadata() + nfsrootfs = { + "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst", + "compression": "zstd", + } + + init_stage1_steps = self.init_stage1_steps() + artifact_download_steps = self.artifact_download_steps() + + deploy_actions = [] + boot_action = [] + test_actions = uart_test_actions(args, init_stage1_steps, artifact_download_steps) + + if args.boot_method == "fastboot": + deploy_actions = fastboot_deploy_actions(self, nfsrootfs) + boot_action = fastboot_boot_action(args) + else: # tftp + deploy_actions = tftp_deploy_actions(self, nfsrootfs) + boot_action = tftp_boot_action(args) if self.has_ssh_support(): - return ssh_lava_yaml(self) + wrap_final_deploy_action(deploy_actions[-1]) + # SSH jobs use namespaces to differentiate between the DUT and the + # docker container. Every LAVA action needs an explicit namespace, when we are not using + # the default one. + for deploy_action in deploy_actions: + deploy_action["namespace"] = "dut" + wrap_boot_action(boot_action) + test_actions = ( + generate_dut_test(args, init_stage1_steps), + generate_docker_test(args, artifact_download_steps), + ) + + values["actions"] = [ + *[{"deploy": d} for d in deploy_actions], + {"boot": boot_action}, + *[{"test": t} for t in test_actions], + ] - return uart_lava_yaml(self) + return values def generate_lava_job_definition(self) -> str: + """ + Generates a LAVA job definition in YAML format and returns it as a string. + + Returns: + a string representation of the job definition generated by analysing job submitter + arguments and environment variables + """ job_stream = StringIO() yaml = YAML() yaml.width = 4096 @@ -157,3 +189,24 @@ class LAVAJobDefinition: ] return download_steps + + def init_stage1_steps(self) -> list[str]: + run_steps = [] + # job execution script: + # - inline .gitlab-ci/common/init-stage1.sh + # - fetch and unpack per-pipeline build artifacts from build job + # - fetch and unpack per-job environment from lava-submit.sh + # - exec .gitlab-ci/common/init-stage2.sh + + with open(self.job_submitter.first_stage_init, "r") as init_sh: + run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip()] + # We cannot distribute the Adreno 660 shader firmware inside rootfs, + # since the license isn't bundled inside the repository + if self.job_submitter.device_type == "sm8350-hdk": + run_steps.append( + "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 " + + "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn " + + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"' + ) + + return run_steps diff --git a/.gitlab-ci/lava/utils/ssh_job_definition.py b/.gitlab-ci/lava/utils/ssh_job_definition.py index 7f030805cb0..e3bfad1ba96 100644 --- a/.gitlab-ci/lava/utils/ssh_job_definition.py +++ b/.gitlab-ci/lava/utils/ssh_job_definition.py @@ -28,14 +28,15 @@ script after sourcing "dut-env-vars.sh" again for the second SSH test case. """ -from pathlib import Path -from typing import TYPE_CHECKING, Any +import re +from typing import TYPE_CHECKING, Any, Iterable -from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT, to_yaml_block +from ruamel.yaml.scalarstring import LiteralScalarString + +from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT if TYPE_CHECKING: from ..lava_job_submitter import LAVAJobSubmitter - from .lava_job_definition import LAVAJobDefinition # Very early SSH server setup. Uses /dut_ready file to flag it is done. SSH_SERVER_COMMANDS = { @@ -78,12 +79,23 @@ lava_ssh_test_case() { ] -def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]: +def to_yaml_block(steps_array: Iterable[str], escape_vars=[]) -> LiteralScalarString: + def escape_envvar(match): + return "\\" + match.group(0) + + filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")] + final_str = "\n".join(filtered_array) + + for escape_var in escape_vars: + # Find env vars and add '\\' before them + final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str) + return LiteralScalarString(final_str) + + +def generate_dut_test(args: "LAVAJobSubmitter", first_stage_steps: list[str]) -> dict[str, Any]: # Commands executed on DUT. # Trying to execute the minimal number of commands, because the console data is # retrieved via UART, which is hang-prone in some devices. - - first_stage_steps: list[str] = Path(args.first_stage_init).read_text().splitlines() return { "namespace": "dut", "definitions": [ @@ -108,8 +120,9 @@ def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]: } -def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]: - args = job_definition.job_submitter +def generate_docker_test( + args: "LAVAJobSubmitter", artifact_download_steps: list[str] +) -> dict[str, Any]: # This is a growing list of commands that will be executed by the docker # guest, which will be the SSH client. docker_commands = [] @@ -148,7 +161,7 @@ def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]: ( "lava_ssh_test_case 'artifact_download' 'bash --' << EOF", "source /dut-env-vars.sh", - *job_definition.artifact_download_steps(), + *artifact_download_steps, "EOF", ) ), @@ -163,44 +176,22 @@ def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]: return init_stages_test -def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]: - values = job_definition.generate_metadata() - job_submitter = job_definition.job_submitter - - # URLs to our kernel rootfs to boot from, both generated by the base - # container build - deploy = { +def wrap_final_deploy_action(final_deploy_action: dict): + wrap = { "namespace": "dut", "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, "timeout": {"minutes": 10}, "timeouts": {"http-download": {"minutes": 2}}, - "to": "tftp", - "os": "oe", - "kernel": {"url": f"{job_submitter.kernel_url_prefix}/{job_submitter.kernel_image_name}"}, - "nfsrootfs": { - "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst", - "compression": "zstd", - }, } - job_definition.attach_kernel_and_dtb(deploy) - # always boot over NFS - boot = { + final_deploy_action.update(wrap) + + +def wrap_boot_action(boot_action: dict): + wrap = { "namespace": "dut", "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, - "method": job_submitter.boot_method, - "commands": "nfs", - "prompts": ["lava-shell:"], **SSH_SERVER_COMMANDS, } - # only declaring each job as a single 'test' since LAVA's test parsing is - # not useful to us - values["actions"] = [ - {"deploy": deploy}, - {"boot": boot}, - {"test": generate_dut_test(job_submitter)}, - {"test": generate_docker_test(job_definition)}, - ] - - return values + boot_action.update(wrap) diff --git a/.gitlab-ci/lava/utils/uart_job_definition.py b/.gitlab-ci/lava/utils/uart_job_definition.py index c1bfaea5840..11a3f3cd429 100644 --- a/.gitlab-ci/lava/utils/uart_job_definition.py +++ b/.gitlab-ci/lava/utils/uart_job_definition.py @@ -4,7 +4,7 @@ if TYPE_CHECKING: from ..lava_job_submitter import LAVAJobSubmitter from .lava_job_definition import LAVAJobDefinition -from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT +from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT # Use the same image that is being used for the hardware enablement and health-checks. # They are pretty small (<100MB) and have all the tools we need to run LAVA, so it is a safe choice. @@ -14,7 +14,9 @@ from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT DOCKER_IMAGE = "registry.gitlab.collabora.com/lava/health-check-docker" -def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]: +def fastboot_deploy_actions( + job_definition: "LAVAJobDefinition", nfsrootfs +) -> tuple[dict[str, Any], ...]: args = job_definition.job_submitter fastboot_deploy_nfs = { "timeout": {"minutes": 10}, @@ -59,10 +61,10 @@ def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> l # container build job_definition.attach_kernel_and_dtb(fastboot_deploy_prepare["images"]) - return [{"deploy": d} for d in (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)] + return (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy) -def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]: +def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> tuple[dict[str, Any]]: args = job_definition.job_submitter tftp_deploy = { "timeout": {"minutes": 5}, @@ -75,35 +77,14 @@ def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[ } job_definition.attach_kernel_and_dtb(tftp_deploy) - return [{"deploy": d} for d in [tftp_deploy]] + return (tftp_deploy,) -def init_stage1_steps(args: "LAVAJobSubmitter") -> list[str]: - run_steps = [] - # job execution script: - # - inline .gitlab-ci/common/init-stage1.sh - # - fetch and unpack per-pipeline build artifacts from build job - # - fetch and unpack per-job environment from lava-submit.sh - # - exec .gitlab-ci/common/init-stage2.sh - - with open(args.first_stage_init, "r") as init_sh: - run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip()] - # We cannot distribute the Adreno 660 shader firmware inside rootfs, - # since the license isn't bundled inside the repository - if args.device_type == "sm8350-hdk": - run_steps.append( - "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 " - + "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn " - + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"' - ) - - return run_steps - - -def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]: +def uart_test_actions( + args: "LAVAJobSubmitter", init_stage1_steps: list[str], artifact_download_steps: list[str] +) -> tuple[dict[str, Any]]: # skeleton test definition: only declaring each job as a single 'test' # since LAVA's test parsing is not useful to us - args = job_definition.job_submitter run_steps = [] test = { "timeout": {"minutes": args.job_timeout_min}, @@ -128,8 +109,8 @@ def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]: ], } - run_steps += init_stage1_steps(args) - run_steps += job_definition.artifact_download_steps() + run_steps += init_stage1_steps + run_steps += artifact_download_steps run_steps += [ f"mkdir -p {args.ci_project_dir}", @@ -143,7 +124,7 @@ def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]: f"lava-test-case '{args.project_name}_{args.mesa_job_name}' --shell /init-stage2.sh", ] - return [{"test": t} for t in [test]] + return (test,) def tftp_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]: @@ -168,39 +149,3 @@ def fastboot_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]: } return fastboot_boot - - -def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]: - """ - Generates a YAML payload for submitting a LAVA job, based on the provided arguments. - - Args: - args ("LAVAJobSubmitter"): The `args` parameter is an instance of the `LAVAJobSubmitter` - class. It contains various properties and methods that are used to configure and submit a - LAVA job. - - Returns: - a dictionary containing the values generated by the `generate_metadata` function and the - actions for the LAVA job submission. - """ - job_submitter = job_definition.job_submitter - values = job_definition.generate_metadata() - nfsrootfs = { - "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst", - "compression": "zstd", - } - - if job_submitter.boot_method == "fastboot": - values["actions"] = [ - *fastboot_deploy_actions(job_definition, nfsrootfs), - {"boot": fastboot_boot_action(job_submitter)}, - ] - else: # tftp - values["actions"] = [ - *tftp_deploy_actions(job_definition, nfsrootfs), - {"boot": tftp_boot_action(job_submitter)}, - ] - - values["actions"].extend(test_actions(job_definition)) - - return values