alamb commented on code in PR #13672: URL: https://github.com/apache/datafusion/pull/13672#discussion_r1982195950
########## .github/workflows/extended.yml: ########## @@ -67,25 +67,45 @@ jobs: fetch-depth: 1 - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - - name: Install Rust + - name: Setup Rust toolchain run: | - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - source $HOME/.cargo/env + rustup toolchain install stable Review Comment: I don't think it needs to install stable toolchain (maybe this is a merge mistake 🤔 ) ########## .github/workflows/extended.yml: ########## @@ -67,25 +67,45 @@ jobs: fetch-depth: 1 - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - - name: Install Rust + - name: Setup Rust toolchain run: | - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - source $HOME/.cargo/env + rustup toolchain install stable rustup toolchain install - name: Install Protobuf Compiler run: sudo apt-get install -y protobuf-compiler + - name: Setup Minio - S3-compatible storage Review Comment: I am worried that an extended test may fail (as it is only run on commits to main). And I have spent a bunch of time recently keeping the extended tests gree. Maybe we can put this test in the normal rust.yaml that runs on each PR for a while until we are sure it is stable 🤔 ########## datafusion-cli/CONTRIBUTING.md: ########## @@ -0,0 +1,84 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Development instructions + +## Running Tests + +Tests can be run using `cargo` + +```shell +cargo test +``` + +## Snapshot testing Review Comment: Could you please put these instructions in the existing https://datafusion.apache.org/contributor-guide/testing.html instead? And then perhaps you can add a link to that page in `datafusion-cli/CONTRIBUTING.md` (this is partly so when we start using insta for other tests, we can reuse the same instructions) ########## datafusion-cli/CONTRIBUTING.md: ########## @@ -0,0 +1,72 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Development instructions + +## Running Tests + +Tests can be run using `cargo` + +```shell +cargo test +``` + +## Snapshot testing + +To test CLI output, [Insta](https://github.com/mitsuhiko/insta) is used for snapshot testing. Snapshots are generated +and compared on each test run. If the output changes, tests will fail. +To review the changes, you can use Insta CLI: + +```shell +cargo install cargo-insta +cargo insta review Review Comment: In general I worry about adding "yet another dependency / test tool" to DataFusion However, given how painful it is to make plan output changes (e.g. see @wiedld 's PR here https://github.com/apache/datafusion/pull/15007) I think it would be really nice to add insta ########## datafusion-cli/tests/cli_integration.rs: ########## @@ -17,42 +17,223 @@ use std::process::Command; -use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; -use predicates::prelude::predicate; use rstest::rstest; +use aws_config::Region; +use aws_credential_types::Credentials; +use aws_sdk_s3::error::SdkError; +use aws_sdk_ssooidc::config::BehaviorVersion; +use insta::{glob, Settings}; +use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; +use std::{env, fs}; + +fn cli() -> Command { + Command::new(get_cargo_bin("datafusion-cli")) +} + +fn make_settings() -> Settings { + let mut settings = Settings::clone_current(); + settings.set_prepend_module_to_snapshot(false); + settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); + settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); + settings +} + #[cfg(test)] #[ctor::ctor] fn init() { // Enable RUST_LOG logging configuration for tests let _ = env_logger::try_init(); } -// Disabled due to https://github.com/apache/datafusion/issues/10793 -#[cfg(not(target_family = "windows"))] #[rstest] -#[case::exec_from_commands( - ["--command", "select 1", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n" -)] #[case::exec_multiple_statements( - ["--command", "select 1; select 2;", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" + "statements", + ["--command", "select 1; select 2;", "-q"], )] #[case::exec_from_files( - ["--file", "tests/data/sql.txt", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n" + "files", + ["--file", "tests/sql/select.sql", "-q"], )] #[case::set_batch_size( - ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], - "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" + "batch_size", + ["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"], )] #[test] fn cli_quick_test<'a>( + #[case] snapshot_name: &'a str, #[case] args: impl IntoIterator<Item = &'a str>, - #[case] expected: &str, ) { - let mut cmd = Command::cargo_bin("datafusion-cli").unwrap(); + let mut settings = make_settings(); + settings.set_snapshot_suffix(snapshot_name); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); cmd.args(args); - cmd.assert().stdout(predicate::eq(expected)); + + assert_cmd_snapshot!(cmd); +} + +#[rstest] +#[case("csv")] +#[case("tsv")] +#[case("table")] +#[case("json")] +#[case("nd-json")] +#[case("automatic")] +#[test] +fn test_cli_format<'a>(#[case] format: &'a str) { + let mut settings = make_settings(); + settings.set_snapshot_suffix(format); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); + cmd.args(["--command", "select 1", "-q", "--format", format]); + + assert_cmd_snapshot!(cmd); +} + +async fn s3_client() -> aws_sdk_s3::Client { + let access_key_id = + env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); + let secret_access_key = + env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); + + let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); + let region = Region::new(env::var("AWS_REGION").unwrap_or("df-test".to_string())); + + let allow_non_test_credentials = env::var("ALLOW_NON_TEST_CREDENTIALS") + .map(|v| v == "1") + .unwrap_or(false); + + if allow_non_test_credentials + || !access_key_id.starts_with("TEST-") + || !secret_access_key.starts_with("TEST-") + { + panic!("Refusing with non-test credentials. Either set ALLOW_NON_TEST_CREDENTIALS=1 or add TEST- for AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY"); + } + + let creds = Credentials::new(access_key_id, secret_access_key, None, None, "test"); + let config = aws_sdk_s3::Config::builder() + .credentials_provider(creds) + .endpoint_url(endpoint_url) + .region(region) + .behavior_version(BehaviorVersion::v2024_03_28()) + .build(); + + aws_sdk_s3::Client::from_conf(config) +} + +async fn create_bucket(bucket_name: &str, client: &aws_sdk_s3::Client) { + match client.head_bucket().bucket(bucket_name).send().await { + Ok(_) => {} + Err(SdkError::ServiceError(err)) + if matches!( + err.err(), + aws_sdk_s3::operation::head_bucket::HeadBucketError::NotFound(_) + ) => + { + client + .create_bucket() + .bucket(bucket_name) + .send() + .await + .expect("Failed to create bucket"); + } + Err(e) => panic!("Failed to head bucket: {:?}", e), + } +} + +async fn move_file_to_bucket( + from_path: &str, + to_path: &str, + bucket_name: &str, + client: &aws_sdk_s3::Client, +) { + let body = + aws_sdk_s3::primitives::ByteStream::from_path(std::path::Path::new(from_path)) + .await + .expect("Failed to read file"); + + client + .put_object() + .bucket(bucket_name) + .key(to_path) + .body(body) + .send() + .await + .expect("Failed to put object"); +} + +#[tokio::test] +async fn test_cli() { + if env::var("TEST_STORAGE_INTEGRATION").is_err() { + eprintln!("Skipping external storages integration tests"); + return; + } + + let client = s3_client().await; + create_bucket("cli", &client).await; + move_file_to_bucket( + "../datafusion/core/tests/data/cars.csv", + "cars.csv", + "cli", + &client, + ) + .await; + + let settings = make_settings(); + let _bound = settings.bind_to_scope(); + + glob!("sql/*.sql", |path| { + let input = fs::read_to_string(path).unwrap(); + assert_cmd_snapshot!(cli().pass_stdin(input)) Review Comment: > It is absolutely possible! My idea was to make it closer to slt and separate the code from the data. FWIW I think .slt keeps the tests setup and expected output together (the slt driver is separate for sure, but most of the tests are self contained in the .slt file -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org