This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 283c4b51d3 Document and `scratch` directory for sqllogictest and make 
test specific (#7312)
283c4b51d3 is described below

commit 283c4b51d3afab7abf63407e7d2f2a8cb60869a0
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Aug 24 06:25:03 2023 -0400

    Document and `scratch` directory for sqllogictest and make test specific 
(#7312)
    
    * Document and `scratch` directory for sqllogictest and make test specific
    
    * Fix name
    
    * Update test pats
    
    * Apply suggestions from code review
    
    Co-authored-by: Metehan Yıldırım 
<[email protected]>
    
    ---------
    
    Co-authored-by: Metehan Yıldırım 
<[email protected]>
---
 datafusion/sqllogictest/README.md                  | 30 ++++++++++++++----
 datafusion/sqllogictest/bin/sqllogictests.rs       | 17 +++++-----
 datafusion/sqllogictest/test_files/copy.slt        | 36 +++++++++++-----------
 .../sqllogictest/test_files/insert_to_external.slt | 24 +++++++--------
 4 files changed, 64 insertions(+), 43 deletions(-)

diff --git a/datafusion/sqllogictest/README.md 
b/datafusion/sqllogictest/README.md
index 1f69bb864e..3e94859d35 100644
--- a/datafusion/sqllogictest/README.md
+++ b/datafusion/sqllogictest/README.md
@@ -177,14 +177,32 @@ You can update the tests / generate expected output by 
passing the `--complete`
 cargo test --test sqllogictests -- ddl --complete
 ```
 
-#### sqllogictests
+#### Running tests: `scratchdir`
 
-sqllogictest is a program originally written for SQLite to verify the 
correctness of SQL queries against the SQLite
-engine. The program is engine-agnostic and can parse sqllogictest files 
(`.slt`), runs queries against an SQL engine and
-compare the output to the expected output.
+The DataFusion sqllogictest runner automatically creates a directory
+named `test_files/scratch/<filename>`, creating it if needed and
+clearing any file contents if it exists.
 
-Tests in the `.slt` file are a sequence of query record generally starting 
with `CREATE` statements to populate tables
-and then further queries to test the populated data (arrow-datafusion 
exception).
+For example, the `test_files/copy.slt` file should use scratch
+directory `test_files/scratch/copy`.
+
+Tests that need to write temporary files should write (only) to this
+directory to ensure they do not interfere with others concurrently
+running tests.
+
+#### `.slt` file format
+
+[`sqllogictest`] was originally written for SQLite to verify the
+correctness of SQL queries against the SQLite engine. The format is designed
+engine-agnostic and can parse sqllogictest files (`.slt`), runs
+queries against an SQL engine and compares the output to the expected
+output.
+
+[`sqllogictest`]: https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki
+
+Tests in the `.slt` file are a sequence of query records generally
+starting with `CREATE` statements to populate tables and then further
+queries to test the populated data.
 
 Each `.slt` file runs in its own, isolated `SessionContext`, to make the test 
setup explicit and so they can run in
 parallel. Thus it important to keep the tests from having externally visible 
side effects (like writing to a global
diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs 
b/datafusion/sqllogictest/bin/sqllogictests.rs
index fe38a9d8f8..618e3106c6 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -55,17 +55,21 @@ pub async fn main() -> Result<()> {
     run_tests().await
 }
 
-/// Sets up an empty directory at test_files/scratch
+/// Sets up an empty directory at test_files/scratch/<name>
 /// creating it if needed and clearing any file contents if it exists
 /// This allows tests for inserting to external tables or copy to
 /// to persist data to disk and have consistent state when running
 /// a new test
-fn setup_scratch_dir() -> Result<()> {
-    let path = std::path::Path::new("test_files/scratch");
+fn setup_scratch_dir(name: &Path) -> Result<()> {
+    // go from copy.slt --> copy
+    let file_stem = name.file_stem().expect("File should have a stem");
+    let path = PathBuf::from("test_files").join("scratch").join(file_stem);
+
+    info!("Creating scratch dir in {path:?}");
     if path.exists() {
-        fs::remove_dir_all(path)?;
+        fs::remove_dir_all(&path)?;
     }
-    fs::create_dir(path)?;
+    fs::create_dir_all(&path)?;
     Ok(())
 }
 
@@ -73,8 +77,6 @@ async fn run_tests() -> Result<()> {
     // Enable logging (e.g. set RUST_LOG=debug to see debug logs)
     env_logger::init();
 
-    setup_scratch_dir()?;
-
     let options = Options::new();
 
     // Run all tests in parallel, reporting failures at the end
@@ -135,6 +137,7 @@ async fn run_test_file(test_file: TestFile) -> Result<()> {
         info!("Skipping: {}", path.display());
         return Ok(());
     };
+    setup_scratch_dir(&relative_path)?;
     let mut runner = sqllogictest::Runner::new(|| async {
         Ok(DataFusion::new(
             test_ctx.session_ctx().clone(),
diff --git a/datafusion/sqllogictest/test_files/copy.slt 
b/datafusion/sqllogictest/test_files/copy.slt
index a44a662ada..d13caa47c1 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -21,19 +21,19 @@ create table source_table(col1 integer, col2 varchar) as 
values (1, 'Foo'), (2,
 
 # Copy to directory as multiple files
 query IT
-COPY source_table TO 'test_files/scratch/table' (format parquet, 
per_thread_output true);
+COPY source_table TO 'test_files/scratch/copy/table' (format parquet, 
per_thread_output true);
 ----
 2
 
 # Error case
 query error DataFusion error: Error during planning: Copy To format not 
explicitly set and unable to get file extension!
-EXPLAIN COPY source_table to 'test_files/scratch/table'
+EXPLAIN COPY source_table to 'test_files/scratch/copy/table'
 
 query TT
-EXPLAIN COPY source_table to 'test_files/scratch/table' (format parquet, 
per_thread_output true)
+EXPLAIN COPY source_table to 'test_files/scratch/copy/table' (format parquet, 
per_thread_output true)
 ----
 logical_plan
-CopyTo: format=parquet output_url=test_files/scratch/table 
per_thread_output=true options: (format parquet, per_thread_output true)
+CopyTo: format=parquet output_url=test_files/scratch/copy/table 
per_thread_output=true options: (format parquet, per_thread_output true)
 --TableScan: source_table projection=[col1, col2]
 physical_plan
 InsertExec: sink=ParquetSink(writer_mode=PutMultipart, file_groups=[])
@@ -41,13 +41,13 @@ InsertExec: sink=ParquetSink(writer_mode=PutMultipart, 
file_groups=[])
 
 # Copy more files to directory via query
 query IT
-COPY (select * from source_table UNION ALL select * from source_table) to 
'test_files/scratch/table' (format parquet, per_thread_output true);
+COPY (select * from source_table UNION ALL select * from source_table) to 
'test_files/scratch/copy/table' (format parquet, per_thread_output true);
 ----
 4
 
 # validate multiple parquet file output
 statement ok
-CREATE EXTERNAL TABLE validate_parquet STORED AS PARQUET LOCATION 
'test_files/scratch/table/';
+CREATE EXTERNAL TABLE validate_parquet STORED AS PARQUET LOCATION 
'test_files/scratch/copy/table/';
 
 query IT
 select * from validate_parquet;
@@ -61,13 +61,13 @@ select * from validate_parquet;
 
 # Copy from table to single file
 query IT
-COPY source_table to 'test_files/scratch/table.parquet';
+COPY source_table to 'test_files/scratch/copy/table.parquet';
 ----
 2
 
 # validate single parquet file output
 statement ok
-CREATE EXTERNAL TABLE validate_parquet_single STORED AS PARQUET LOCATION 
'test_files/scratch/table.parquet';
+CREATE EXTERNAL TABLE validate_parquet_single STORED AS PARQUET LOCATION 
'test_files/scratch/copy/table.parquet';
 
 query IT
 select * from validate_parquet_single;
@@ -77,13 +77,13 @@ select * from validate_parquet_single;
 
 # copy from table to folder of csv files
 query IT
-COPY source_table  to 'test_files/scratch/table_csv' (format csv, 
per_thread_output true);
+COPY source_table  to 'test_files/scratch/copy/table_csv' (format csv, 
per_thread_output true);
 ----
 2
 
 # validate folder of csv files
 statement ok
-CREATE EXTERNAL TABLE validate_csv STORED AS csv WITH HEADER ROW LOCATION 
'test_files/scratch/table_csv';
+CREATE EXTERNAL TABLE validate_csv STORED AS csv WITH HEADER ROW LOCATION 
'test_files/scratch/copy/table_csv';
 
 query IT
 select * from validate_csv;
@@ -93,13 +93,13 @@ select * from validate_csv;
 
 # Copy from table to single csv
 query IT
-COPY source_table  to 'test_files/scratch/table.csv';
+COPY source_table  to 'test_files/scratch/copy/table.csv';
 ----
 2
 
 # Validate single csv output
 statement ok
-CREATE EXTERNAL TABLE validate_single_csv STORED AS csv WITH HEADER ROW 
LOCATION 'test_files/scratch/table.csv';
+CREATE EXTERNAL TABLE validate_single_csv STORED AS csv WITH HEADER ROW 
LOCATION 'test_files/scratch/copy/table.csv';
 
 query IT
 select * from validate_single_csv;
@@ -109,13 +109,13 @@ select * from validate_single_csv;
 
 # Copy from table to folder of json
 query IT
-COPY source_table to 'test_files/scratch/table_json' (format json, 
per_thread_output true);
+COPY source_table to 'test_files/scratch/copy/table_json' (format json, 
per_thread_output true);
 ----
 2
 
 # Validate json output
 statement ok
-CREATE EXTERNAL TABLE validate_json STORED AS json LOCATION 
'test_files/scratch/table_json';
+CREATE EXTERNAL TABLE validate_json STORED AS json LOCATION 
'test_files/scratch/copy/table_json';
 
 query IT
 select * from validate_json;
@@ -125,13 +125,13 @@ select * from validate_json;
 
 # Copy from table to single json file
 query IT
-COPY source_table  to 'test_files/scratch/table.json';
+COPY source_table  to 'test_files/scratch/copy/table.json';
 ----
 2
 
 # Validate single JSON file`
 statement ok
-CREATE EXTERNAL TABLE validate_single_json STORED AS json LOCATION 
'test_files/scratch/table_json';
+CREATE EXTERNAL TABLE validate_single_json STORED AS json LOCATION 
'test_files/scratch/copy/table_json';
 
 query IT
 select * from validate_single_json;
@@ -141,13 +141,13 @@ select * from validate_single_json;
 
 # Copy from table with options
 query IT
-COPY source_table  to 'test_files/scratch/table.json' (row_group_size 55);
+COPY source_table  to 'test_files/scratch/copy/table.json' (row_group_size 55);
 ----
 2
 
 # Copy from table with options (and trailing comma)
 query IT
-COPY source_table  to 'test_files/scratch/table.json' (row_group_size 55, 
row_group_limit_bytes 9,);
+COPY source_table  to 'test_files/scratch/copy/table.json' (row_group_size 55, 
row_group_limit_bytes 9,);
 ----
 2
 
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt 
b/datafusion/sqllogictest/test_files/insert_to_external.slt
index b2ee5468e7..a29c230a46 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -49,7 +49,7 @@ statement ok
 CREATE EXTERNAL TABLE
 single_file_test(a bigint, b bigint)
 STORED AS csv
-LOCATION 'test_files/scratch/single_csv_table.csv'
+LOCATION 'test_files/scratch/insert_to_external/single_csv_table.csv'
 OPTIONS(
 create_local_path 'true',
 single_file 'true',
@@ -70,7 +70,7 @@ statement ok
 CREATE EXTERNAL TABLE
 directory_test(a bigint, b bigint)
 STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q0'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q0'
 OPTIONS(
 create_local_path 'true',
 );
@@ -87,10 +87,10 @@ select * from directory_test;
 3 4
 
 statement ok
-CREATE EXTERNAL TABLE 
+CREATE EXTERNAL TABLE
 table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
 STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q1'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q1'
 OPTIONS (create_local_path 'true');
 
 query TT
@@ -153,10 +153,10 @@ drop table table_without_values;
 
 # test_insert_into_as_select_multi_partitioned
 statement ok
-CREATE EXTERNAL TABLE 
+CREATE EXTERNAL TABLE
 table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
 STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q2'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q2'
 OPTIONS (create_local_path 'true');
 
 query TT
@@ -197,10 +197,10 @@ drop table table_without_values;
 
 # test_insert_into_with_sort
 statement ok
-CREATE EXTERNAL TABLE 
+CREATE EXTERNAL TABLE
 table_without_values(c1 varchar NULL)
 STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q3'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3'
 OPTIONS (create_local_path 'true');
 
 # verify that the sort order of the insert query is maintained into the
@@ -237,10 +237,10 @@ drop table table_without_values;
 
 # test insert with column names
 statement ok
-CREATE EXTERNAL TABLE 
+CREATE EXTERNAL TABLE
 table_without_values(id BIGINT, name varchar)
 STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q4'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q4'
 OPTIONS (create_local_path 'true');
 
 query IT
@@ -276,10 +276,10 @@ drop table table_without_values;
 
 # test insert with non-nullable column
 statement ok
-CREATE EXTERNAL TABLE 
+CREATE EXTERNAL TABLE
 table_without_values(field1 BIGINT NOT NULL, field2 BIGINT NULL)
 STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q5'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q5'
 OPTIONS (create_local_path 'true');
 
 query II

Reply via email to