[buildstream] 01/01: Revert to expecting '.bst' on the command line to disambiguate elements and artifacts

tvb Sun, 10 Oct 2021 02:19:43 -0700

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/disambiguate-artifact-globs
in repository https://gitbox.apache.org/repos/asf/buildstream.git


commit 19a15be6ef41619ace5dec1b488ac6c8a5f36c7f
Author: Tristan van Berkom <[email protected]>
AuthorDate: Sun Oct 10 18:05:08 2021 +0900

    Revert to expecting '.bst' on the command line to disambiguate elements and 
artifacts
    
    Summary of changes:
    
      * _stream.py: Treat targets with `.bst` suffixes as elements and other
        targets as artifacts, and improve error reporting around this.
    
        Also use a new machine readable error string to denote when we fail
        to create a directory when creating a workspace.
    
      * _loader/loader.py: Remove handling of LoadErrorReason.LOADING_DIRECTORY
    
        This additional handling was only to suggest that maybe the user
        meant to specify "<directory_name>.bst" in the case a directory is
        encountered, but now we will bail out earlier if an element target
        is specified without a ".bst" suffix anyway.
    
        The only way to reach this error is to load a directory which itself
        already has a ".bst" suffix, in which case the additional suggestion
        is no longer useful.
    
      * tests/format/elementnames.py: Expect different error for target elements
    
      * tests/frontend/artifact_show.py: Removed test that is no longer 
relevant,
        it is now impossible to glob for both elements and artifacts with the
        same glob expression.
    
      * tests/frontend/show.py: Removed some parameters of the glob test which
        were expecting to get element results without specifying a ".bst" suffix
    
      * tests/frontend/workspace.py: Specify a newly added machine readable 
reason
        for the error of failing to create a directory while opening a workspace
    
      * tests/internals/loader.py: Trigger the LoadErrorReason.LOADING_DIRECTORY
        error by creating a directory named "element.bst", so that it gets by
        the initial element name suffix checks.
    
    Fixes #1411
---
 src/buildstream/_loader/loader.py |  12 ----
 src/buildstream/_stream.py        | 142 +++++++++++++++++++++-----------------
 tests/format/elementnames.py      |  28 ++++++--
 tests/frontend/artifact_show.py   |  24 -------
 tests/frontend/show.py            |  10 +--
 tests/frontend/workspace.py       |   2 +-
 tests/internals/loader.py         |   3 +-
 7 files changed, 106 insertions(+), 115 deletions(-)

diff --git a/src/buildstream/_loader/loader.py 
b/src/buildstream/_loader/loader.py
index ea73afd..71fff51 100644
--- a/src/buildstream/_loader/loader.py
+++ b/src/buildstream/_loader/loader.py
@@ -298,18 +298,6 @@ class Loader:
 
                 raise LoadError(message, LoadErrorReason.MISSING_FILE, 
detail=detail) from e
 
-            if e.reason == LoadErrorReason.LOADING_DIRECTORY:
-                # If a <directory>.bst file exists in the element path,
-                # let's suggest this as a plausible alternative.
-                message = str(e)
-                if provenance_node:
-                    message = "{}: 
{}".format(provenance_node.get_provenance(), message)
-                detail = None
-                if os.path.exists(os.path.join(self._basedir, filename + 
".bst")):
-                    element_name = filename + ".bst"
-                    detail = "Did you mean '{}'?\n".format(element_name)
-                raise LoadError(message, LoadErrorReason.LOADING_DIRECTORY, 
detail=detail) from e
-
             # Otherwise, we don't know the reason, so just raise
             raise
 
diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py
index 6fcfd12..ad7d596 100644
--- a/src/buildstream/_stream.py
+++ b/src/buildstream/_stream.py
@@ -1040,7 +1040,11 @@ class Stream:
                 if todo_elements:
                     # This output should make creating the remaining 
workspaces as easy as possible.
                     todo_elements = "\nDid not try to create workspaces for " 
+ todo_elements
-                raise StreamError("Failed to create workspace directory: 
{}".format(e) + todo_elements) from e
+                raise StreamError(
+                    "Failed to create workspace directory: {}".format(e),
+                    reason="workspace-directory-failure",
+                    detail=todo_elements,
+                ) from e
 
             workspaces.create_workspace(target, directory, checkout=not 
no_checkout)
             self._context.messenger.info("Created a workspace for element: 
{}".format(target._get_full_name()))
@@ -1999,80 +2003,94 @@ class Stream:
     def _expand_and_classify_targets(
         self, targets: Iterable[str], valid_artifact_names: bool = False
     ) -> Tuple[List[str], List[str]]:
-        initial_targets = []
-        element_targets = []
-        artifact_names = []
-        globs = {}  # Count whether a glob matched elements and artifacts
+        #
+        # We use dicts here instead of sets, in order to deduplicate any 
possibly duplicate
+        # entries, while also retaining the original order of element 
specification/discovery,
+        # (which we cannot do with sets).
+        #
+        element_names = {}
+        artifact_names = {}
+        element_globs = {}
+        artifact_globs = {}
 
-        # First extract the globs
+        # First sort out globs and targets
         for target in targets:
             if any(c in "*?[" for c in target):
-                globs[target] = 0
+                if target.endswith(".bst"):
+                    element_globs[target]
+                else:
+                    artifact_globs[target]
+            elif target.endswith(".bst"):
+                element_names[target] = True
             else:
-                initial_targets.append(target)
+                artifact_names[target] = True
 
-        # Filter out any targets which are found to be artifact names
-        if valid_artifact_names:
-            for target in initial_targets:
-                try:
-                    verify_artifact_ref(target)
-                except ArtifactElementError:
-                    element_targets.append(target)
-                else:
-                    artifact_names.append(target)
-        else:
-            element_targets = initial_targets
+        # Bail out in commands which don't support artifacts if any of the 
targets
+        # or globs did not end with the expected '.bst' suffix.
+        #
+        if (artifact_names or artifact_globs) and not valid_artifact_names:
+            raise StreamError(
+                "Invalid element names or element glob patterns were 
specified: {}".format(
+                    ", ".join(list(artifact_names) + list(artifact_globs))
+                ),
+                reason="invalid-element-names",
+                detail="Element names and element glob expressions must end in 
'.bst'",
+            )
+
+        # Verify targets which were not classified as elements
+        for artifact_name in artifact_names.keys():
+            try:
+                verify_artifact_ref(artifact_name)
+            except ArtifactElementError as e:
+                raise StreamError(
+                    "Specified target does not appear to be an artifact or 
element name: ".format(artifact_name),
+                    reason="unrecognized-target-format",
+                    detail="Element names and element glob expressions must 
end in '.bst'",
+                ) from e
 
         # Expand globs for elements
-        if self._project:
+        if element_globs:
+
+            # Bail out if an element glob is specified without providing a 
project directory
+            if not self._project:
+                raise StreamError(
+                    "Element glob expressions were specified without any 
project directory: {}".format(
+                        ", ".join(element_globs)
+                    ),
+                    reason="glob-elements-without-project",
+                )
+
+            # Collect a list of `all_elements` in the project, stripping out 
the leading
+            # project directory and element path prefix, to produce only 
element names.
+            #
             all_elements = []
             element_path_length = len(self._project.element_path) + 1
             for dirpath, _, filenames in os.walk(self._project.element_path):
                 for filename in filenames:
                     if filename.endswith(".bst"):
-                        element_path = os.path.join(dirpath, filename)
-                        element_path = element_path[element_path_length:]  # 
Strip out the element_path
-                        all_elements.append(element_path)
-
-            for glob in globs:
-                matched = False
-                for element_path in utils.glob(all_elements, glob):
-                    element_targets.append(element_path)
-                    matched = True
-                if matched:
-                    globs[glob] = globs[glob] + 1
-
-        # Expand globs for artifact names
-        if valid_artifact_names:
-            for glob in globs:
-                matches = self._artifacts.list_artifacts(glob=glob)
-                if matches:
-                    artifact_names.extend(matches)
-                    globs[glob] = globs[glob] + 1
-
-        # Issue warnings and errors
-        unmatched = [glob for glob, glob_count in globs.items() if glob_count 
== 0]
-        doubly_matched = [glob for glob, glob_count in globs.items() if 
glob_count > 1]
-
-        # Warn the user if any of the provided globs did not match anything
-        if unmatched:
-            if valid_artifact_names:
-                message = "No elements or artifacts matched the following glob 
expression(s): {}".format(
-                    ", ".join(unmatched)
-                )
-            else:
-                message = "No elements matched the following glob 
expression(s): {}".format(", ".join(unmatched))
-            self._context.messenger.warn(message)
+                        element_name = os.path.join(dirpath, filename)
+                        element_name = element_name[element_path_length:]
+                        all_elements.append(element_name)
 
-        if doubly_matched:
-            raise StreamError(
-                "The provided glob expression(s) matched both element names 
and artifact names: {}".format(
-                    ", ".join(doubly_matched)
-                ),
-                reason="glob-elements-and-artifacts",
-            )
-
-        return element_targets, artifact_names
+            # Glob the elements and add the results to the set
+            #
+            for glob in element_globs.keys():
+                glob_results = list(utils.glob(all_elements, glob))
+                for element_name in glob_results:
+                    element_names[glob_results] = True
+                if not glob_results:
+                    self._context.messenger.warn("No elements matched the glob 
expression: {}".format(glob))
+
+        # Glob the artifact names and add the results to the set
+        #
+        for glob in artifact_globs.keys():
+            glob_results = self._artifacts.list_artifacts(glob=glob)
+            for artifact_name in glob_results:
+                artifact_names[glob_results] = True
+            if not glob_results:
+                self._context.messenger.warn("No artifact names matched the 
glob expression: {}".format(glob))
+
+        return list(element_names), list(artifact_names)
 
 
 # _handle_compression()
diff --git a/tests/format/elementnames.py b/tests/format/elementnames.py
index f059d54..2e34952 100644
--- a/tests/format/elementnames.py
+++ b/tests/format/elementnames.py
@@ -11,19 +11,33 @@ DATA_DIR = os.path.dirname(os.path.realpath(__file__))
 
 
 @pytest.mark.parametrize(
-    "target,reason,provenance",
+    "target,domain,reason,provenance",
     [
-        ("farm.pony", LoadErrorReason.BAD_ELEMENT_SUFFIX, None),
-        ('The "quoted" pony.bst', LoadErrorReason.BAD_CHARACTERS_IN_NAME, 
None),
-        ("bad-suffix-dep.bst", LoadErrorReason.BAD_ELEMENT_SUFFIX, 
"bad-suffix-dep.bst [line 3 column 2]"),
-        ("bad-chars-dep.bst", LoadErrorReason.BAD_CHARACTERS_IN_NAME, 
"bad-chars-dep.bst [line 3 column 2]"),
+        # When specifying a bad suffix on the command line we get a different 
error, we
+        # catch this error earlier on in the load sequence while sorting out 
element and
+        # artifact names and glob expressions.
+        #
+        ("farm.pony", ErrorDomain.STREAM, "invalid-element-names", None),
+        ('The "quoted" pony.bst', ErrorDomain.LOAD, 
LoadErrorReason.BAD_CHARACTERS_IN_NAME, None),
+        (
+            "bad-suffix-dep.bst",
+            ErrorDomain.LOAD,
+            LoadErrorReason.BAD_ELEMENT_SUFFIX,
+            "bad-suffix-dep.bst [line 3 column 2]",
+        ),
+        (
+            "bad-chars-dep.bst",
+            ErrorDomain.LOAD,
+            LoadErrorReason.BAD_CHARACTERS_IN_NAME,
+            "bad-chars-dep.bst [line 3 column 2]",
+        ),
     ],
     ids=["toplevel-bad-suffix", "toplevel-bad-chars", "dependency-bad-suffix", 
"dependency-bad-chars"],
 )
 @pytest.mark.datafiles(DATA_DIR)
-def test_invalid_element_names(cli, datafiles, target, reason, provenance):
+def test_invalid_element_names(cli, datafiles, target, domain, reason, 
provenance):
     project = os.path.join(str(datafiles), "elementnames")
     result = cli.run(project=project, silent=True, args=["show", target])
-    result.assert_main_error(ErrorDomain.LOAD, reason)
+    result.assert_main_error(domain, reason)
     if provenance:
         assert provenance in result.stderr
diff --git a/tests/frontend/artifact_show.py b/tests/frontend/artifact_show.py
index 652adfb..6810847 100644
--- a/tests/frontend/artifact_show.py
+++ b/tests/frontend/artifact_show.py
@@ -150,30 +150,6 @@ def test_artifact_show_glob(cli, tmpdir, datafiles, 
pattern, expected_prefixes):
         assert found, "Expected result {} not found".format(expected_prefix)
 
 
-# Test artifact show glob behaviors
[email protected](SIMPLE_DIR)
[email protected](
-    "pattern",
-    [
-        # Catch all glob will match everything, that is an error since the 
glob matches
-        # both elements and artifacts
-        #
-        "**",
-        # This glob is more selective but will also match both artifacts and 
elements
-        #
-        "**import-bin**",
-    ],
-)
-def test_artifact_show_doubly_matched_glob_error(cli, tmpdir, datafiles, 
pattern):
-    project = str(datafiles)
-
-    result = cli.run(project=project, args=["build", "target.bst"])
-    result.assert_success()
-
-    result = cli.run(project=project, args=["artifact", "show", pattern])
-    result.assert_main_error(ErrorDomain.STREAM, "glob-elements-and-artifacts")
-
-
 # Test artifact show artifact in remote
 @pytest.mark.datafiles(DATA_DIR)
 def test_artifact_show_element_available_remotely(cli, tmpdir, datafiles):
diff --git a/tests/frontend/show.py b/tests/frontend/show.py
index 456123b..9ed5b65 100644
--- a/tests/frontend/show.py
+++ b/tests/frontend/show.py
@@ -55,9 +55,6 @@ def test_show_fail(cli, datafiles):
 @pytest.mark.parametrize(
     "pattern,expected_elements",
     [
-        # Use catch all glob. This should report all elements.
-        #
-        ("**", ["import-bin.bst", "import-dev.bst", "compose-all.bst", 
"target.bst", "subdir/target.bst"]),
         # Only bst files, same as "**" for `bst show`
         #
         ("**.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", 
"target.bst", "subdir/target.bst"]),
@@ -67,16 +64,13 @@ def test_show_fail(cli, datafiles):
         ("*.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", 
"target.bst"]),
         # Report only targets in the subdirectory
         #
-        ("subdir/*", ["subdir/target.bst"]),
+        ("subdir/*.bst", ["subdir/target.bst"]),
         # Report both targets which end in "target.bst"
         #
         ("**target.bst", ["target.bst", "subdir/target.bst"]),
         # All elements starting with the prefix "import"
         #
-        ("import*", ["import-bin.bst", "import-dev.bst"]),
-        # Glob would match artifact refs, but `bst show` does not accept these 
as input.
-        #
-        ("test/**", []),
+        ("import*.bst", ["import-bin.bst", "import-dev.bst"]),
     ],
     ids=["**", "**.bst", "*.bst", "subdir/*", "**target.bst", "import*", 
"test/**"],
 )
diff --git a/tests/frontend/workspace.py b/tests/frontend/workspace.py
index 26c0a99..bd64a24 100644
--- a/tests/frontend/workspace.py
+++ b/tests/frontend/workspace.py
@@ -215,7 +215,7 @@ def test_open_multi_unwritable(cli, tmpdir, datafiles):
         # Using this finally to make sure we always put thing back how they 
should be.
         os.chmod(workspace_object.workspace_cmd, cwdstat.st_mode)
 
-    result.assert_main_error(ErrorDomain.STREAM, None)
+    result.assert_main_error(ErrorDomain.STREAM, "workspace-directory-failure")
     # Normally we avoid checking stderr in favour of using the mechine 
readable result.assert_main_error
     # But Tristan was very keen that the names of the elements left needing 
workspaces were present in the out put
     assert " ".join([element_name for element_name, workspace_dir_suffix in 
element_tuples[1:]]) in result.stderr
diff --git a/tests/internals/loader.py b/tests/internals/loader.py
index 2da0172..fd4a357 100644
--- a/tests/internals/loader.py
+++ b/tests/internals/loader.py
@@ -90,7 +90,8 @@ def test_invalid_key(datafiles):
 def test_invalid_directory_load(datafiles):
 
     basedir = str(datafiles)
+    os.makedirs(os.path.join(basedir, "element.bst"))
     with make_loader(basedir) as loader, pytest.raises(LoadError) as exc:
-        loader.load(["elements/"])
+        loader.load(["element.bst"])
 
     assert exc.value.reason == LoadErrorReason.LOADING_DIRECTORY

[buildstream] 01/01: Revert to expecting '.bst' on the command line to disambiguate elements and artifacts

Reply via email to