pitrou commented on code in PR #50056:
URL: https://github.com/apache/arrow/pull/50056#discussion_r3419256202
##########
dev/archery/archery/cli.py:
##########
@@ -305,6 +328,11 @@ def check_language(ctx, param, value):
click.option("--preserve", type=BOOL, default=False, show_default=True,
is_flag=True,
help="Preserve workspace for investigation."),
+ click.option("--preserve-dir", metavar="<path>",
+ type=click.Path(file_okay=False, resolve_path=True),
+ default=None,
+ help="Parent directory in which to create the preserved "
+ "workspace. Has no effect without --preserve."),
Review Comment:
Well, perhaps `--preserve-dir` should automatically imply `--preserve`? One
less option to type.
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -26,8 +27,22 @@
from ..lang.cpp import CppCMakeDefinition, CppConfiguration
from ..lang.java import JavaMavenDefinition, JavaConfiguration
from ..utils.cmake import CMakeBuild
+from ..utils.git import git
from ..utils.maven import MavenBuild
from ..utils.logger import logger
+from ..utils.source import ArrowSources
+
+
+def _rev_or_path_dirname(src, rev_or_path):
+ if rev_or_path == ArrowSources.WORKSPACE:
+ return rev_or_path
+ try:
+ sha = git.rev_parse(rev_or_path, git_dir=src.path)
+ if isinstance(sha, bytes):
+ sha = sha.decode("ascii")
+ return sha
+ except Exception:
Review Comment:
This is too broad, which kind of error are we trying to intercept here?
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -217,19 +236,25 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf,
**kwargs):
build = CMakeBuild.from_path(rev_or_path)
return CppBenchmarkRunner(build, **kwargs)
else:
- # Revisions can references remote via the `/` character, ensure
- # that the revision is path friendly
- path_rev = rev_or_path.replace("/", "_")
+ path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
- os.mkdir(root_rev)
+ os.makedirs(root_rev, exist_ok=True)
clone_dir = os.path.join(root_rev, "arrow")
- # Possibly checkout the sources at given revision, no need to
- # perform cleanup on cloned repository as root_rev is reclaimed.
- src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ if os.path.isdir(clone_dir):
+ src_rev = ArrowSources(clone_dir)
Review Comment:
Add a comment explaining why `clone_dir` can be an existing local directory?
##########
dev/archery/archery/cli.py:
##########
@@ -465,6 +494,15 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output,
cmake_extras,
# when asked to JSON-serialize the results, so produce a JSON
# output even when none is requested.
json_out = json.dumps(runner_base, cls=JsonEncoder)
+ if runner_base.results_dir is not None:
+ results_path = os.path.join(runner_base.results_dir,
+ "benchmark.json")
+ with open(results_path, "w") as f:
+ f.write(json_out)
+ metadata_path = os.path.join(runner_base.results_dir,
+ "metadata.json")
Review Comment:
Is this purely for the user's convenience? What are you expecting to do with
this file?
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -217,19 +236,25 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf,
**kwargs):
build = CMakeBuild.from_path(rev_or_path)
return CppBenchmarkRunner(build, **kwargs)
else:
- # Revisions can references remote via the `/` character, ensure
- # that the revision is path friendly
- path_rev = rev_or_path.replace("/", "_")
+ path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
- os.mkdir(root_rev)
+ os.makedirs(root_rev, exist_ok=True)
clone_dir = os.path.join(root_rev, "arrow")
- # Possibly checkout the sources at given revision, no need to
- # perform cleanup on cloned repository as root_rev is reclaimed.
- src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ if os.path.isdir(clone_dir):
+ src_rev = ArrowSources(clone_dir)
+ else:
+ src_rev, _ = src.at_revision(rev_or_path, clone_dir)
cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
- build_dir = os.path.join(root_rev, "build")
- return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+ run_root = os.path.join(root_rev, "build", "run")
+ os.makedirs(run_root, exist_ok=True)
+ run_id = f"{random.randrange(16**8):08x}"
Review Comment:
Hmm, so each invocation with `--preserve-dir` will create an additional
CMake build directory? Won't that blow up disk footprint for the preserve_dir ?
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -26,8 +27,22 @@
from ..lang.cpp import CppCMakeDefinition, CppConfiguration
from ..lang.java import JavaMavenDefinition, JavaConfiguration
from ..utils.cmake import CMakeBuild
+from ..utils.git import git
from ..utils.maven import MavenBuild
from ..utils.logger import logger
+from ..utils.source import ArrowSources
+
+
+def _rev_or_path_dirname(src, rev_or_path):
Review Comment:
Can you add a docstring explaining what this function does?
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -217,19 +236,25 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf,
**kwargs):
build = CMakeBuild.from_path(rev_or_path)
return CppBenchmarkRunner(build, **kwargs)
else:
- # Revisions can references remote via the `/` character, ensure
- # that the revision is path friendly
- path_rev = rev_or_path.replace("/", "_")
+ path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
- os.mkdir(root_rev)
+ os.makedirs(root_rev, exist_ok=True)
clone_dir = os.path.join(root_rev, "arrow")
- # Possibly checkout the sources at given revision, no need to
- # perform cleanup on cloned repository as root_rev is reclaimed.
- src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ if os.path.isdir(clone_dir):
+ src_rev = ArrowSources(clone_dir)
+ else:
+ src_rev, _ = src.at_revision(rev_or_path, clone_dir)
cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
- build_dir = os.path.join(root_rev, "build")
- return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+ run_root = os.path.join(root_rev, "build", "run")
Review Comment:
Not sure why we're using `<root_rev>/build/run`. Does `<root_rev>/build`
contain something else than the `run` subdirectory?
##########
dev/archery/archery/cli.py:
##########
Review Comment:
I don't think it is.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]