alamb commented on code in PR #4497:
URL: https://github.com/apache/arrow-datafusion/pull/4497#discussion_r1039899137


##########
datafusion/core/tests/sqllogictests/src/main.rs:
##########
@@ -70,56 +71,96 @@ pub async fn main() -> Result<()> {
 #[tokio::main]
 #[cfg(not(target_family = "windows"))]
 pub async fn main() -> Result<()> {
-    let paths = std::fs::read_dir(TEST_DIRECTORY).unwrap();
+    // Enable logging (e.g. set RUST_LOG=debug to see debug logs)
+    env_logger::init();
 
-    // run each file using its own new SessionContext
+    // run each file using its own new DB
     //
     // Note: can't use tester.run_parallel_async()
     // as that will reuse the same SessionContext
     //
     // We could run these tests in parallel eventually if we wanted.
 
-    for path in paths {
-        // TODO better error handling
-        let path = path.unwrap().path();
+    let files = get_test_files();
+    info!("Running test files {:?}", files);
 
-        run_file(&path).await?;
+    for path in files {
+        println!("Running: {}", path.display());
+
+        let file_name = 
path.file_name().unwrap().to_str().unwrap().to_string();
+
+        let ctx = context_for_test_file(&file_name).await;
+
+        let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name 
});
+        tester.run_file_async(path).await?;
     }
 
     Ok(())
 }
 
-/// Run the tests in the specified `.slt` file
-async fn run_file(path: &Path) -> Result<()> {
-    println!("Running: {}", path.display());
-
-    let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
-
-    let ctx = context_for_test_file(&file_name).await;
+/// Gets a list of test files to execute. If there were arguments
+/// passed to the program treat it as a cargo test filter (substring match on 
filenames)
+fn get_test_files() -> Vec<PathBuf> {
+    info!("Test directory: {}", TEST_DIRECTORY);
+
+    let args: Vec<_> = std::env::args().collect();
+
+    // treat args after the first as filters to run (substring matching)
+    let filters = if !args.is_empty() {
+        args.iter()
+            .skip(1)
+            .map(|arg| arg.as_str())
+            .collect::<Vec<_>>()
+    } else {
+        vec![]
+    };
+
+    // default to all files in test directory filtering based on name
+    std::fs::read_dir(TEST_DIRECTORY)
+        .unwrap()
+        .map(|path| path.unwrap().path())
+        .filter(|path| check_test_file(&filters, path.as_path()))
+        .collect()
+}
 
-    let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
-    tester.run_file_async(path).await?;
+/// because this test can be run as a cargo test, commands like
+///
+/// ```shell
+/// cargo test foo
+/// ```
+///
+/// Will end up passing `foo` as a command line argument.
+///
+/// be compatible with this, treat the command line arguments as a
+/// filter and that does a substring match on each input.
+/// returns true f this path should be run
+fn check_test_file(filters: &[&str], path: &Path) -> bool {
+    if filters.is_empty() {
+        return true;
+    }
 
-    Ok(())
+    // otherwise check if any filter matches
+    let path_str = path.to_string_lossy();
+    filters.iter().any(|filter| path_str.contains(filter))
 }
 
 /// Create a SessionContext, configured for the specific test
 async fn context_for_test_file(file_name: &str) -> SessionContext {
     match file_name {
         "aggregate.slt" => {
-            println!("Registering aggregate tables");
+            info!("Registering aggregate tables");
             let ctx = SessionContext::new();
             setup::register_aggregate_tables(&ctx).await;
             ctx
         }
         "information_schema.slt" => {
-            println!("Enabling information schema");
+            info!("Enabling information schema");
             SessionContext::with_config(
                 SessionConfig::new().with_information_schema(true),
             )
         }
         _ => {
-            println!("Using default SessionContex");
+            info!("Using default SessionContex");

Review Comment:
   Good catch -- in 
https://github.com/apache/arrow-datafusion/pull/4497/commits/3be240924fe3262cd4e29b6cdf3dcc450a097d5f



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to