This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 68b607631d [minor] Download clickbench file when missing (#9553)
68b607631d is described below
commit 68b607631dc930d7220b82356be30cc0e5b9cac2
Author: Daniƫl Heres <[email protected]>
AuthorDate: Tue Mar 17 19:01:00 2026 +0100
[minor] Download clickbench file when missing (#9553)
# Which issue does this PR close?
- Closes #NNN.
# Rationale for this change
I want it to download the file when it's not there
# What changes are included in this PR?
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
# Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
# Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
If there are any breaking changes to public APIs, please call them out.
-->
---
parquet/benches/arrow_reader_clickbench.rs | 41 +++++++++++++++++++-----------
1 file changed, 26 insertions(+), 15 deletions(-)
diff --git a/parquet/benches/arrow_reader_clickbench.rs
b/parquet/benches/arrow_reader_clickbench.rs
index 5a6fb36d58..039829f1b9 100644
--- a/parquet/benches/arrow_reader_clickbench.rs
+++ b/parquet/benches/arrow_reader_clickbench.rs
@@ -598,27 +598,38 @@ impl Display for Query {
/// FULL path to the ClickBench hits_1.parquet file
static HITS_1_PATH: OnceLock<PathBuf> = OnceLock::new();
-/// Finds the paths to the ClickBench file, or panics with a useful message
-/// explaining how to download if it is not found
+/// Finds the paths to the ClickBench file, downloading it if not found
fn hits_1() -> &'static Path {
HITS_1_PATH.get_or_init(|| {
+ let current_dir = std::env::current_dir().expect("Failed to get
current directory");
+ println!(
+ "Looking for ClickBench files starting in current_dir and all
parent directories: {current_dir:?}"
+ );
- let current_dir = std::env::current_dir().expect("Failed to get current
directory");
- println!(
- "Looking for ClickBench files starting in current_dir and all parent
directories: {current_dir:?}"
-
- );
+ if let Some(hits_1_path) = find_file_if_exists(current_dir.clone(),
"hits_1.parquet") {
+ return hits_1_path;
+ }
- let Some(hits_1_path) = find_file_if_exists(current_dir.clone(),
"hits_1.parquet") else {
- eprintln!(
- "Could not find hits_1.parquet in directory or parents:
{current_dir:?}. Download it via",
+ // File not found, download it
+ let download_path = current_dir.join("hits_1.parquet");
+ let url =
"https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet";
+ println!("hits_1.parquet not found, downloading from {url}...");
+
+ let status = std::process::Command::new("wget")
+ .args(["--continue", "-O"])
+ .arg(&download_path)
+ .arg(url)
+ .status()
+ .expect("Failed to execute wget. Please install wget or download
manually.");
+
+ assert!(
+ status.success(),
+ "Failed to download hits_1.parquet. You can download it manually
via:\n\
+ wget --continue {url}"
);
- eprintln!();
- eprintln!("wget --continue
https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet");
- panic!("Stopping");
- };
- hits_1_path
+ println!("Downloaded hits_1.parquet to {download_path:?}");
+ download_path
})
}