Mesa (main): ci/lava: Make SSH definition wrap the UART one

GitLab Mirror Wed, 01 Nov 2023 21:30:27 -0700

Module: Mesa
Branch: main
Commit: 654f7f783f9e46bf2af345d3044839b46983e9f9
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=654f7f783f9e46bf2af345d3044839b46983e9f9


Author: Guilherme Gallo <guilherme.ga...@collabora.com>
Date:   Wed Oct 25 23:12:16 2023 -0300

ci/lava: Make SSH definition wrap the UART one

Simplify both UART and SSH job definitions module to share common
building blocks themselves.

- generate_lava_yaml_payload is now a LAVAJobDefinition method, so
  dropped the Strategy pattern between both modules
- if SSH is supported and UART is not enforced, default to SSH
- when SSH is enabled, wrap the last deploy action to run the SSH server
  and rewrite the test actions, which should not change due to the boot
  method
- create a constants module to load environment variables

Signed-off-by: Guilherme Gallo <guilherme.ga...@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25912>

---

 .gitlab-ci/lava/utils/constants.py           |  14 +++
 .gitlab-ci/lava/utils/lava_job_definition.py | 123 +++++++++++++++++++--------
 .gitlab-ci/lava/utils/ssh_job_definition.py  |  71 +++++++---------
 .gitlab-ci/lava/utils/uart_job_definition.py |  81 +++---------------
 4 files changed, 146 insertions(+), 143 deletions(-)

diff --git a/.gitlab-ci/lava/utils/constants.py 
b/.gitlab-ci/lava/utils/constants.py
new file mode 100644
index 00000000000..2a9d1547037
--- /dev/null
+++ b/.gitlab-ci/lava/utils/constants.py
@@ -0,0 +1,14 @@
+from os import getenv
+
+# How many attempts should be made when a timeout happen during LAVA device 
boot.
+NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 
3))
+
+
+# Supports any integers in [0, 100].
+# The scheduler considers the job priority when ordering the queue
+# to consider which job should run next.
+JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
+
+# Use UART over the default SSH mechanism to follow logs.
+# Caution: this can lead to device silence in some devices in Mesa CI.
+FORCE_UART = bool(getenv("LAVA_FORCE_UART", False))
diff --git a/.gitlab-ci/lava/utils/lava_job_definition.py 
b/.gitlab-ci/lava/utils/lava_job_definition.py
index 7e4e4c1556c..e50514c71e8 100644
--- a/.gitlab-ci/lava/utils/lava_job_definition.py
+++ b/.gitlab-ci/lava/utils/lava_job_definition.py
@@ -1,36 +1,27 @@
-import re
 from io import StringIO
-from os import getenv
 from typing import TYPE_CHECKING, Any
 
 from ruamel.yaml import YAML
-from ruamel.yaml.scalarstring import LiteralScalarString
 
 from lava.utils.lava_farm import LavaFarm, get_lava_farm
+from lava.utils.ssh_job_definition import (
+    generate_docker_test,
+    generate_dut_test,
+    wrap_boot_action,
+    wrap_final_deploy_action,
+)
+from lava.utils.uart_job_definition import (
+    fastboot_boot_action,
+    fastboot_deploy_actions,
+    tftp_boot_action,
+    tftp_deploy_actions,
+    uart_test_actions,
+)
 
 if TYPE_CHECKING:
     from lava.lava_job_submitter import LAVAJobSubmitter
 
-# How many attempts should be made when a timeout happen during LAVA device 
boot.
-NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 
3))
-
-# Supports any integers in [0, 100].
-# The scheduler considers the job priority when ordering the queue
-# to consider which job should run next.
-JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
-
-
-def to_yaml_block(steps_array: list[str], escape_vars=[]) -> 
LiteralScalarString:
-    def escape_envvar(match):
-        return "\\" + match.group(0)
-
-    filtered_array = [s for s in steps_array if s.strip() and not 
s.startswith("#")]
-    final_str = "\n".join(filtered_array)
-
-    for escape_var in escape_vars:
-        # Find env vars and add '\\' before them
-        final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str)
-    return LiteralScalarString(final_str)
+from .constants import FORCE_UART, JOB_PRIORITY, NUMBER_OF_ATTEMPTS_LAVA_BOOT
 
 
 class LAVAJobDefinition:
@@ -43,9 +34,7 @@ class LAVAJobDefinition:
         self.job_submitter: "LAVAJobSubmitter" = job_submitter
 
     def has_ssh_support(self) -> bool:
-        force_uart = bool(getenv("LAVA_FORCE_UART", False))
-
-        if force_uart:
+        if FORCE_UART:
             return False
 
         # Only Collabora's farm supports to run docker container as a LAVA 
actions,
@@ -59,22 +48,65 @@ class LAVAJobDefinition:
 
     def generate_lava_yaml_payload(self) -> dict[str, Any]:
         """
-        Bridge function to use the supported job definition depending on some 
Mesa
-        CI job characteristics.
+        Generates a YAML payload for submitting a LAVA job, based on the 
provided arguments.
+
+        Args:
+            None
 
-        The strategy here, is to use LAVA with a containerized SSH session to 
follow
-        the job output, escaping from dumping data to the UART, which proves 
to be
-        error prone in some devices.
+        Returns:
+            a dictionary containing the values generated by the 
`generate_metadata` function and the
+            actions for the LAVA job submission.
         """
-        from lava.utils.ssh_job_definition import generate_lava_yaml_payload 
as ssh_lava_yaml
-        from lava.utils.uart_job_definition import generate_lava_yaml_payload 
as uart_lava_yaml
+        args = self.job_submitter
+        values = self.generate_metadata()
+        nfsrootfs = {
+            "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst",
+            "compression": "zstd",
+        }
+
+        init_stage1_steps = self.init_stage1_steps()
+        artifact_download_steps = self.artifact_download_steps()
+
+        deploy_actions = []
+        boot_action = []
+        test_actions = uart_test_actions(args, init_stage1_steps, 
artifact_download_steps)
+
+        if args.boot_method == "fastboot":
+            deploy_actions = fastboot_deploy_actions(self, nfsrootfs)
+            boot_action = fastboot_boot_action(args)
+        else:  # tftp
+            deploy_actions = tftp_deploy_actions(self, nfsrootfs)
+            boot_action = tftp_boot_action(args)
 
         if self.has_ssh_support():
-            return ssh_lava_yaml(self)
+            wrap_final_deploy_action(deploy_actions[-1])
+            # SSH jobs use namespaces to differentiate between the DUT and the
+            # docker container. Every LAVA action needs an explicit namespace, 
when we are not using
+            # the default one.
+            for deploy_action in deploy_actions:
+                deploy_action["namespace"] = "dut"
+            wrap_boot_action(boot_action)
+            test_actions = (
+                generate_dut_test(args, init_stage1_steps),
+                generate_docker_test(args, artifact_download_steps),
+            )
+
+        values["actions"] = [
+            *[{"deploy": d} for d in deploy_actions],
+            {"boot": boot_action},
+            *[{"test": t} for t in test_actions],
+        ]
 
-        return uart_lava_yaml(self)
+        return values
 
     def generate_lava_job_definition(self) -> str:
+        """
+        Generates a LAVA job definition in YAML format and returns it as a 
string.
+
+        Returns:
+            a string representation of the job definition generated by 
analysing job submitter
+            arguments and environment variables
+        """
         job_stream = StringIO()
         yaml = YAML()
         yaml.width = 4096
@@ -157,3 +189,24 @@ class LAVAJobDefinition:
             ]
 
         return download_steps
+
+    def init_stage1_steps(self) -> list[str]:
+        run_steps = []
+        # job execution script:
+        #   - inline .gitlab-ci/common/init-stage1.sh
+        #   - fetch and unpack per-pipeline build artifacts from build job
+        #   - fetch and unpack per-job environment from lava-submit.sh
+        #   - exec .gitlab-ci/common/init-stage2.sh
+
+        with open(self.job_submitter.first_stage_init, "r") as init_sh:
+            run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") 
and x.rstrip()]
+        # We cannot distribute the Adreno 660 shader firmware inside rootfs,
+        # since the license isn't bundled inside the repository
+        if self.job_submitter.device_type == "sm8350-hdk":
+            run_steps.append(
+                "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
+                + 
"https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn "
+                + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"'
+            )
+
+        return run_steps
diff --git a/.gitlab-ci/lava/utils/ssh_job_definition.py 
b/.gitlab-ci/lava/utils/ssh_job_definition.py
index 7f030805cb0..e3bfad1ba96 100644
--- a/.gitlab-ci/lava/utils/ssh_job_definition.py
+++ b/.gitlab-ci/lava/utils/ssh_job_definition.py
@@ -28,14 +28,15 @@ script after sourcing "dut-env-vars.sh" again for the 
second SSH test case.
 """
 
 
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
+import re
+from typing import TYPE_CHECKING, Any, Iterable
 
-from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT, to_yaml_block
+from ruamel.yaml.scalarstring import LiteralScalarString
+
+from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT
 
 if TYPE_CHECKING:
     from ..lava_job_submitter import LAVAJobSubmitter
-    from .lava_job_definition import LAVAJobDefinition
 
 # Very early SSH server setup. Uses /dut_ready file to flag it is done.
 SSH_SERVER_COMMANDS = {
@@ -78,12 +79,23 @@ lava_ssh_test_case() {
 ]
 
 
-def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]:
+def to_yaml_block(steps_array: Iterable[str], escape_vars=[]) -> 
LiteralScalarString:
+    def escape_envvar(match):
+        return "\\" + match.group(0)
+
+    filtered_array = [s for s in steps_array if s.strip() and not 
s.startswith("#")]
+    final_str = "\n".join(filtered_array)
+
+    for escape_var in escape_vars:
+        # Find env vars and add '\\' before them
+        final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str)
+    return LiteralScalarString(final_str)
+
+
+def generate_dut_test(args: "LAVAJobSubmitter", first_stage_steps: list[str]) 
-> dict[str, Any]:
     # Commands executed on DUT.
     # Trying to execute the minimal number of commands, because the console 
data is
     # retrieved via UART, which is hang-prone in some devices.
-
-    first_stage_steps: list[str] = 
Path(args.first_stage_init).read_text().splitlines()
     return {
         "namespace": "dut",
         "definitions": [
@@ -108,8 +120,9 @@ def generate_dut_test(args: "LAVAJobSubmitter") -> 
dict[str, Any]:
     }
 
 
-def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, 
Any]:
-    args = job_definition.job_submitter
+def generate_docker_test(
+    args: "LAVAJobSubmitter", artifact_download_steps: list[str]
+) -> dict[str, Any]:
     # This is a growing list of commands that will be executed by the docker
     # guest, which will be the SSH client.
     docker_commands = []
@@ -148,7 +161,7 @@ def generate_docker_test(job_definition: 
"LAVAJobDefinition") -> dict[str, Any]:
             (
                 "lava_ssh_test_case 'artifact_download' 'bash --' << EOF",
                 "source /dut-env-vars.sh",
-                *job_definition.artifact_download_steps(),
+                *artifact_download_steps,
                 "EOF",
             )
         ),
@@ -163,44 +176,22 @@ def generate_docker_test(job_definition: 
"LAVAJobDefinition") -> dict[str, Any]:
     return init_stages_test
 
 
-def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> 
dict[str, Any]:
-    values = job_definition.generate_metadata()
-    job_submitter = job_definition.job_submitter
-
-    # URLs to our kernel rootfs to boot from, both generated by the base
-    # container build
-    deploy = {
+def wrap_final_deploy_action(final_deploy_action: dict):
+    wrap = {
         "namespace": "dut",
         "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
         "timeout": {"minutes": 10},
         "timeouts": {"http-download": {"minutes": 2}},
-        "to": "tftp",
-        "os": "oe",
-        "kernel": {"url": 
f"{job_submitter.kernel_url_prefix}/{job_submitter.kernel_image_name}"},
-        "nfsrootfs": {
-            "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
-            "compression": "zstd",
-        },
     }
-    job_definition.attach_kernel_and_dtb(deploy)
 
-    # always boot over NFS
-    boot = {
+    final_deploy_action.update(wrap)
+
+
+def wrap_boot_action(boot_action: dict):
+    wrap = {
         "namespace": "dut",
         "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
-        "method": job_submitter.boot_method,
-        "commands": "nfs",
-        "prompts": ["lava-shell:"],
         **SSH_SERVER_COMMANDS,
     }
 
-    # only declaring each job as a single 'test' since LAVA's test parsing is
-    # not useful to us
-    values["actions"] = [
-        {"deploy": deploy},
-        {"boot": boot},
-        {"test": generate_dut_test(job_submitter)},
-        {"test": generate_docker_test(job_definition)},
-    ]
-
-    return values
+    boot_action.update(wrap)
diff --git a/.gitlab-ci/lava/utils/uart_job_definition.py 
b/.gitlab-ci/lava/utils/uart_job_definition.py
index c1bfaea5840..11a3f3cd429 100644
--- a/.gitlab-ci/lava/utils/uart_job_definition.py
+++ b/.gitlab-ci/lava/utils/uart_job_definition.py
@@ -4,7 +4,7 @@ if TYPE_CHECKING:
     from ..lava_job_submitter import LAVAJobSubmitter
     from .lava_job_definition import LAVAJobDefinition
 
-from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT
+from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT
 
 # Use the same image that is being used for the hardware enablement and 
health-checks.
 # They are pretty small (<100MB) and have all the tools we need to run LAVA, 
so it is a safe choice.
@@ -14,7 +14,9 @@ from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT
 DOCKER_IMAGE = "registry.gitlab.collabora.com/lava/health-check-docker"
 
 
-def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> 
list[dict[str, Any]]:
+def fastboot_deploy_actions(
+    job_definition: "LAVAJobDefinition", nfsrootfs
+) -> tuple[dict[str, Any], ...]:
     args = job_definition.job_submitter
     fastboot_deploy_nfs = {
         "timeout": {"minutes": 10},
@@ -59,10 +61,10 @@ def fastboot_deploy_actions(job_definition: 
"LAVAJobDefinition", nfsrootfs) -> l
     # container build
     job_definition.attach_kernel_and_dtb(fastboot_deploy_prepare["images"])
 
-    return [{"deploy": d} for d in (fastboot_deploy_nfs, 
fastboot_deploy_prepare, fastboot_deploy)]
+    return (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)
 
 
-def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> 
list[dict[str, Any]]:
+def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> 
tuple[dict[str, Any]]:
     args = job_definition.job_submitter
     tftp_deploy = {
         "timeout": {"minutes": 5},
@@ -75,35 +77,14 @@ def tftp_deploy_actions(job_definition: 
"LAVAJobDefinition", nfsrootfs) -> list[
     }
     job_definition.attach_kernel_and_dtb(tftp_deploy)
 
-    return [{"deploy": d} for d in [tftp_deploy]]
+    return (tftp_deploy,)
 
 
-def init_stage1_steps(args: "LAVAJobSubmitter") -> list[str]:
-    run_steps = []
-    # job execution script:
-    #   - inline .gitlab-ci/common/init-stage1.sh
-    #   - fetch and unpack per-pipeline build artifacts from build job
-    #   - fetch and unpack per-job environment from lava-submit.sh
-    #   - exec .gitlab-ci/common/init-stage2.sh
-
-    with open(args.first_stage_init, "r") as init_sh:
-        run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") and 
x.rstrip()]
-    # We cannot distribute the Adreno 660 shader firmware inside rootfs,
-    # since the license isn't bundled inside the repository
-    if args.device_type == "sm8350-hdk":
-        run_steps.append(
-            "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
-            + 
"https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn "
-            + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"'
-        )
-
-    return run_steps
-
-
-def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
+def uart_test_actions(
+    args: "LAVAJobSubmitter", init_stage1_steps: list[str], 
artifact_download_steps: list[str]
+) -> tuple[dict[str, Any]]:
     # skeleton test definition: only declaring each job as a single 'test'
     # since LAVA's test parsing is not useful to us
-    args = job_definition.job_submitter
     run_steps = []
     test = {
         "timeout": {"minutes": args.job_timeout_min},
@@ -128,8 +109,8 @@ def test_actions(job_definition: "LAVAJobDefinition") -> 
list[dict[str, Any]]:
         ],
     }
 
-    run_steps += init_stage1_steps(args)
-    run_steps += job_definition.artifact_download_steps()
+    run_steps += init_stage1_steps
+    run_steps += artifact_download_steps
 
     run_steps += [
         f"mkdir -p {args.ci_project_dir}",
@@ -143,7 +124,7 @@ def test_actions(job_definition: "LAVAJobDefinition") -> 
list[dict[str, Any]]:
         f"lava-test-case '{args.project_name}_{args.mesa_job_name}' --shell 
/init-stage2.sh",
     ]
 
-    return [{"test": t} for t in [test]]
+    return (test,)
 
 
 def tftp_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]:
@@ -168,39 +149,3 @@ def fastboot_boot_action(args: "LAVAJobSubmitter") -> 
dict[str, Any]:
     }
 
     return fastboot_boot
-
-
-def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> 
dict[str, Any]:
-    """
-    Generates a YAML payload for submitting a LAVA job, based on the provided 
arguments.
-
-    Args:
-      args ("LAVAJobSubmitter"): The `args` parameter is an instance of the 
`LAVAJobSubmitter`
-        class. It contains various properties and methods that are used to 
configure and submit a
-        LAVA job.
-
-    Returns:
-        a dictionary containing the values generated by the 
`generate_metadata` function and the
-        actions for the LAVA job submission.
-    """
-    job_submitter = job_definition.job_submitter
-    values = job_definition.generate_metadata()
-    nfsrootfs = {
-        "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
-        "compression": "zstd",
-    }
-
-    if job_submitter.boot_method == "fastboot":
-        values["actions"] = [
-            *fastboot_deploy_actions(job_definition, nfsrootfs),
-            {"boot": fastboot_boot_action(job_submitter)},
-        ]
-    else:  # tftp
-        values["actions"] = [
-            *tftp_deploy_actions(job_definition, nfsrootfs),
-            {"boot": tftp_boot_action(job_submitter)},
-        ]
-
-    values["actions"].extend(test_actions(job_definition))
-
-    return values

Mesa (main): ci/lava: Make SSH definition wrap the UART one

Reply via email to