(spark) branch master updated: [SPARK-48844][FOLLOWUP][TESTS] Cleanup duplicated data resource files in hive-thriftserver test

yao Thu, 25 Jul 2024 02:40:55 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 5c19505c5f16 [SPARK-48844][FOLLOWUP][TESTS] Cleanup duplicated data 
resource files in hive-thriftserver test
5c19505c5f16 is described below

commit 5c19505c5f1670ac81cf7de93f734455dc8f7e9d
Author: Kent Yao <y...@apache.org>
AuthorDate: Thu Jul 25 17:39:11 2024 +0800

    [SPARK-48844][FOLLOWUP][TESTS] Cleanup duplicated data resource files in 
hive-thriftserver test
    
    ### What changes were proposed in this pull request?
    
    A follow up of SPARK-48844 to cleanup duplicated data resource files in 
hive-thriftserver test
    
    ### Why are the changes needed?
    
    code refactoring
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    new tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #47480 from yaooqinn/SPARK-48844-F.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../analyzer-results/sql-on-files.sql.out          | 106 +++++++++++++++++----
 .../resources/sql-tests/inputs/sql-on-files.sql    |  19 +++-
 .../sql-tests/results/sql-on-files.sql.out         | 104 +++++++++++++++++---
 .../test-data/before_1582_date_v2_4.snappy.orc     | Bin 201 -> 0 bytes
 .../src/test/resources/test-data/cars.csv          |   7 --
 .../resources/test-data/dec-in-fixed-len.parquet   | Bin 460 -> 0 bytes
 .../test/resources/test-data/with-map-fields.json  |   5 -
 7 files changed, 193 insertions(+), 48 deletions(-)

diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
index 78e2a876da86..b098a9758fe4 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
@@ -1,4 +1,19 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE DATABASE IF NOT EXISTS sql_on_files
+-- !query analysis
+CreateNamespace true
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_on_files]
+
+
+-- !query
+CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1
+-- !query analysis
+CreateDataSourceTableAsSelectCommand 
`spark_catalog`.`sql_on_files`.`test_parquet`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
+
+
 -- !query
 SELECT * FROM parquet.``
 -- !query analysis
@@ -33,12 +48,25 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` 
LIMIT 1
+SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet`
+-- !query analysis
+Project [1#x]
++- Relation [1#x] parquet
+
+
+-- !query
+DROP TABLE sql_on_files.test_parquet
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), 
sql_on_files.test_parquet
+
+
+-- !query
+CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [fixed_len_dec#x]
-      +- Relation [fixed_len_dec#x] parquet
+CreateDataSourceTableAsSelectCommand 
`spark_catalog`.`sql_on_files`.`test_orc`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
 
 
 -- !query
@@ -75,12 +103,25 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM 
orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1
+SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc`
+-- !query analysis
+Project [1#x]
++- Relation [1#x] orc
+
+
+-- !query
+DROP TABLE sql_on_files.test_orc
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [dt#x]
-      +- Relation [dt#x] orc
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_orc
+
+
+-- !query
+CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1
+-- !query analysis
+CreateDataSourceTableAsSelectCommand 
`spark_catalog`.`sql_on_files`.`test_csv`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
 
 
 -- !query
@@ -117,12 +158,25 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1
+SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv`
+-- !query analysis
+Project [_c0#x]
++- Relation [_c0#x] csv
+
+
+-- !query
+DROP TABLE sql_on_files.test_csv
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_csv
+
+
+-- !query
+CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [_c0#x, _c1#x, _c2#x, _c3#x, _c4#x]
-      +- Relation [_c0#x,_c1#x,_c2#x,_c3#x,_c4#x] csv
+CreateDataSourceTableAsSelectCommand 
`spark_catalog`.`sql_on_files`.`test_json`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
 
 
 -- !query
@@ -159,9 +213,21 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1
+SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`
+-- !query analysis
+Project [1#xL]
++- Relation [1#xL] json
+
+
+-- !query
+DROP TABLE sql_on_files.test_json
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_json
+
+
+-- !query
+DROP DATABASE sql_on_files
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [id#xL, intervals#x]
-      +- Relation [id#xL,intervals#x] json
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_on_files]
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql 
b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
index aee8aaa4d195..8a00e4400e6b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
@@ -1,19 +1,30 @@
+CREATE DATABASE IF NOT EXISTS sql_on_files;
 -- Parquet
+CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1;
 SELECT * FROM parquet.``;
 SELECT * FROM parquet.`/file/not/found`;
-SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` 
LIMIT 1;
+SELECT * FROM 
parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet`;
+DROP TABLE sql_on_files.test_parquet;
 
 -- ORC
+CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1;
 SELECT * FROM orc.``;
 SELECT * FROM orc.`/file/not/found`;
-SELECT * FROM 
orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1;
+SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc`;
+DROP TABLE sql_on_files.test_orc;
 
 -- CSV
+CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1;
 SELECT * FROM csv.``;
 SELECT * FROM csv.`/file/not/found`;
-SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1;
+SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv`;
+DROP TABLE sql_on_files.test_csv;
 
 -- JSON
+CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1;
 SELECT * FROM json.``;
 SELECT * FROM json.`/file/not/found`;
-SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1;
+SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`;
+DROP TABLE sql_on_files.test_json;
+
+DROP DATABASE sql_on_files;
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out 
b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
index 5c1e5697d029..fc8f44bc22fe 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
@@ -1,4 +1,20 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE DATABASE IF NOT EXISTS sql_on_files
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT * FROM parquet.``
 -- !query schema
@@ -37,11 +53,27 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` 
LIMIT 1
+SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet`
 -- !query schema
-struct<fixed_len_dec:decimal(10,2)>
+struct<1:int>
 -- !query output
-0.00
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
 
 
 -- !query
@@ -82,11 +114,27 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM 
orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1
+SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc`
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_orc
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1
 -- !query schema
-struct<dt:date>
+struct<>
 -- !query output
-1200-01-01
+
 
 
 -- !query
@@ -127,11 +175,27 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1
+SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv`
+-- !query schema
+struct<_c0:string>
+-- !query output
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_csv
 -- !query schema
-struct<_c0:string,_c1:string,_c2:string,_c3:string,_c4:string>
+struct<>
 -- !query output
-year   make    model   comment blank
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
 
 
 -- !query
@@ -172,8 +236,24 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1
+SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`
 -- !query schema
-struct<id:bigint,intervals:struct<a:struct<endTime:bigint,startTime:bigint>,b:struct<endTime:bigint,startTime:bigint>>>
+struct<1:bigint>
+-- !query output
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_json
+-- !query schema
+struct<>
 -- !query output
-1      
{"a":{"endTime":211,"startTime":111},"b":{"endTime":221,"startTime":121}}
+
+
+
+-- !query
+DROP DATABASE sql_on_files
+-- !query schema
+struct<>
+-- !query output
+
diff --git 
a/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc
 
b/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc
deleted file mode 100644
index ebe01743b2e2..000000000000
Binary files 
a/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc
 and /dev/null differ
diff --git a/sql/hive-thriftserver/src/test/resources/test-data/cars.csv 
b/sql/hive-thriftserver/src/test/resources/test-data/cars.csv
deleted file mode 100644
index 40ded573ade5..000000000000
--- a/sql/hive-thriftserver/src/test/resources/test-data/cars.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-
-year,make,model,comment,blank
-"2012","Tesla","S","No comment",
-
-1997,Ford,E350,"Go get one now they are going fast",
-2015,Chevy,Volt
-
diff --git 
a/sql/hive-thriftserver/src/test/resources/test-data/dec-in-fixed-len.parquet 
b/sql/hive-thriftserver/src/test/resources/test-data/dec-in-fixed-len.parquet
deleted file mode 100644
index 6ad37d563951..000000000000
Binary files 
a/sql/hive-thriftserver/src/test/resources/test-data/dec-in-fixed-len.parquet 
and /dev/null differ
diff --git 
a/sql/hive-thriftserver/src/test/resources/test-data/with-map-fields.json 
b/sql/hive-thriftserver/src/test/resources/test-data/with-map-fields.json
deleted file mode 100644
index 576fbb9b8758..000000000000
--- a/sql/hive-thriftserver/src/test/resources/test-data/with-map-fields.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{ "id": 1, "intervals": { "a": { "startTime": 111, "endTime": 211 }, "b": { 
"startTime": 121, "endTime": 221 }}}
-{ "id": 2, "intervals": { "a": { "startTime": 112, "endTime": 212 }, "b": { 
"startTime": 122, "endTime": 222 }}}
-{ "id": 3, "intervals": { "a": { "startTime": 113, "endTime": 213 }, "b": { 
"startTime": 123, "endTime": 223 }}}
-{ "id": 4, "intervals": { }}
-{ "id": 5 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-48844][FOLLOWUP][TESTS] Cleanup duplicated data resource files in hive-thriftserver test

Reply via email to