This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 121de7f [KYUUBI #1035] Add classification of SQL_Type: this PR is
just for DML
121de7f is described below
commit 121de7fbb09725487343d64e4af497e86b7d5ad4
Author: 张宇翔 <[email protected]>
AuthorDate: Mon Sep 6 16:32:29 2021 +0800
[KYUUBI #1035] Add classification of SQL_Type: this PR is just for DML
Add classification of SQL_Type: this PR is just for DML
### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
1. If you add a feature, you can talk about the use case of it.
2. If you fix a bug, you can clarify why it is a bug.
-->
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run
test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #1035 from zhang1002/branch-1.3_get-sql-type-for-dml.
Closes #1035
eb6a96d8 [张宇翔] Merge branch 'master' into branch-1.3_get-sql-type-for-dml
f0a7e566 [张宇翔] Merge remote-tracking branch 'upstream/master'
8e586df1 [张宇翔] Merge branch 'master' into branch-1.3_get-sql-type-for-dml
ba0cd0a3 [张宇翔] Merge branch 'master' into branch-1.3_get-sql-type-for-dml
112aa6b6 [张宇翔] Merge remote-tracking branch 'upstream/master'
18f35f2b [张宇翔] Add the classification of sql: DML
025bc3f8 [张宇翔] Add the classification of sql: DML
55ef6af7 [张宇翔] some modification
f1f8b355 [张宇翔] change other to undefined
1052ae45 [张宇翔] Change some code standards
5e21dc62 [张宇翔] Change some code standards
f531744d [张宇翔] Add dml test
3017b96c [张宇翔] Merge branch 'master' into branch-1.3_get-sql-type-for-ddl
c55652fd [张宇翔] Merge remote-tracking branch 'upstream/master'
c2572b98 [张宇翔] 1. Use RuleBuilder to develop this function 2. Use analyzed
logical plan to judge this sql's classification 3. Change the matching rule:
use map, the key is simpleClassName, the value is classification of this sql
93b5624a [张宇翔] Exclude license check for json
d8187ced [张宇翔] Exclude license check for json
e46bc86e [张宇翔] Add exception
3b358bf0 [张宇翔] Add licence
1125b600 [张宇翔] Merge branch 'master' into branch-1.3_get-sql-type-for-ddl
ef5e8c55 [张宇翔] Merge remote-tracking branch 'upstream/master'
ba8f99eb [张宇翔] Use extension to get simpleName
c0bdea7b [张宇翔] Merge branch 'master' into branch-1.3_get-sql-type-for-ddl
5a75384c [张宇翔] Merge remote-tracking branch 'upstream/master'
55b85849 [张宇翔] Update settings.md
ecbd8000 [张宇翔] Repair the scalastyle violations.
76edd20d [张宇翔] Repair the scalastyle violations.
d8e820ee [张宇翔] Merge branch 'master' into branch-1.3_get-sql-type-for-ddl
8da4f7ed [张宇翔] Merge remote-tracking branch 'upstream/master'
65a90958 [张宇翔] Classification for sqlType: DDL
a7ba1bfc [张宇翔] Merge remote-tracking branch 'upstream/master'
b662a989 [张宇翔] Merge remote-tracking branch 'upstream/master'
4c8f3b87 [张宇翔] Merge remote-tracking branch 'upstream/master'
8b686767 [张宇翔] Merge remote-tracking branch 'upstream/master'
cf99e309 [张宇翔] Merge remote-tracking branch 'upstream/master'
0afaa578 [张宇翔] Merge remote-tracking branch 'upstream/master'
b24fea07 [张宇翔] Merge remote-tracking branch 'upstream/master'
e517cfc5 [张宇翔] Merge remote-tracking branch 'upstream/master'
18aebe76 [张宇翔] Merge remote-tracking branch 'upstream/master'
f248bef7 [张宇翔] Merge remote-tracking branch 'upstream/master'
5ffb54f3 [张宇翔] Add kyuubi-spark-monitor module for nightly.yml
Authored-by: 张宇翔 <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../main/resources/sql-classification-default.json | 6 +-
.../test/resources/sql-classification-default.json | 6 +-
.../apache/spark/sql/KyuubiExtensionSuite.scala | 252 ++++++++++++++++++++-
3 files changed, 253 insertions(+), 11 deletions(-)
diff --git
a/dev/kyuubi-extension-spark-3-1/src/main/resources/sql-classification-default.json
b/dev/kyuubi-extension-spark-3-1/src/main/resources/sql-classification-default.json
index e2abc2e..1280ad3 100644
---
a/dev/kyuubi-extension-spark-3-1/src/main/resources/sql-classification-default.json
+++
b/dev/kyuubi-extension-spark-3-1/src/main/resources/sql-classification-default.json
@@ -26,5 +26,9 @@
"DropTableCommand": "ddl",
"TruncateTableCommand": "ddl",
"AlterTableRecoverPartitionsCommand": "ddl",
- "SetCatalogAndNamespace": "ddl"
+ "SetCatalogAndNamespace": "ddl",
+ "InsertIntoHadoopFsRelationCommand": "dml",
+ "LoadDataCommand": "dml",
+ "InsertIntoDataSourceDirCommand": "dml",
+ "InsertIntoHiveDirCommand": "dml"
}
\ No newline at end of file
diff --git
a/dev/kyuubi-extension-spark-3-1/src/test/resources/sql-classification-default.json
b/dev/kyuubi-extension-spark-3-1/src/test/resources/sql-classification-default.json
index e2abc2e..1280ad3 100644
---
a/dev/kyuubi-extension-spark-3-1/src/test/resources/sql-classification-default.json
+++
b/dev/kyuubi-extension-spark-3-1/src/test/resources/sql-classification-default.json
@@ -26,5 +26,9 @@
"DropTableCommand": "ddl",
"TruncateTableCommand": "ddl",
"AlterTableRecoverPartitionsCommand": "ddl",
- "SetCatalogAndNamespace": "ddl"
+ "SetCatalogAndNamespace": "ddl",
+ "InsertIntoHadoopFsRelationCommand": "dml",
+ "LoadDataCommand": "dml",
+ "InsertIntoDataSourceDirCommand": "dml",
+ "InsertIntoHiveDirCommand": "dml"
}
\ No newline at end of file
diff --git
a/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala
b/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala
index 64529b8..db1db77 100644
---
a/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala
+++
b/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala
@@ -419,6 +419,36 @@ class KyuubiExtensionSuite extends QueryTest with
SQLTestUtils with AdaptiveSpar
}
}
+ test("Sql classification for ddl") {
+ withSQLConf(KyuubiSQLConf.SQL_CLASSIFICATION_ENABLED.key -> "true") {
+ withDatabase("inventory") {
+ val df = sql("CREATE DATABASE inventory;")
+ assert(df.sparkSession.conf.get("kyuubi.spark.sql.classification") ===
"ddl")
+ }
+ val df = sql("select timestamp'2021-06-01'")
+ assert(df.sparkSession.conf.get("kyuubi.spark.sql.classification") !==
"ddl")
+ }
+ }
+
+ test("Sql classification for dml") {
+ withSQLConf(KyuubiSQLConf.SQL_CLASSIFICATION_ENABLED.key -> "true") {
+ val df01 = sql("CREATE TABLE IF NOT EXISTS students " +
+ "(name VARCHAR(64), address VARCHAR(64)) " +
+ "USING PARQUET PARTITIONED BY (student_id INT);")
+ assert(df01.sparkSession.conf.get("kyuubi.spark.sql.classification") ===
"ddl")
+
+ val sql02 = "INSERT INTO students VALUES ('Amy Smith', '123 Park Ave,
San Jose', 111111);"
+ val df02 = sql(sql02)
+
+ // scalastyle:off println
+ println("the query execution is :" + spark.sessionState.executePlan(
+ spark.sessionState.sqlParser.parsePlan(sql02)).toString())
+ // scalastyle:on println
+
+ assert(df02.sparkSession.conf.get("kyuubi.spark.sql.classification") ===
"dml")
+ }
+ }
+
test("get simple name for DDL") {
import scala.collection.mutable.Set
@@ -646,14 +676,218 @@ class KyuubiExtensionSuite extends QueryTest with
SQLTestUtils with AdaptiveSpar
// scalastyle:on println
}
- test("Sql classification for ddl") {
- withSQLConf(KyuubiSQLConf.SQL_CLASSIFICATION_ENABLED.key -> "true") {
- withDatabase("inventory") {
- val df = sql("CREATE DATABASE inventory;")
- assert(df.sparkSession.conf.get("kyuubi.spark.sql.classification") ===
"ddl")
- }
- val df = sql("select timestamp'2021-06-01'")
- assert(df.sparkSession.conf.get("kyuubi.spark.sql.classification") !==
"ddl")
- }
+ test("get simple name for DML") {
+ import scala.collection.mutable.Set
+ val dmlSimpleName: Set[String] = Set()
+
+ var pre_sql = "CREATE TABLE IF NOT EXISTS students (name VARCHAR(64),
address VARCHAR(64)) " +
+ "USING PARQUET PARTITIONED BY (student_id INT);"
+ spark.sql(pre_sql)
+ pre_sql = "CREATE TABLE IF NOT EXISTS PERSONS (name VARCHAR(64), address
VARCHAR(64)) " +
+ "USING PARQUET PARTITIONED BY (ssn INT);"
+ spark.sql(pre_sql)
+ pre_sql = "INSERT INTO persons VALUES " +
+ "('Dora Williams', '134 Forest Ave, Menlo Park', 123456789), " +
+ "('Eddie Davis', '245 Market St, Milpitas', 345678901);"
+ spark.sql(pre_sql)
+ pre_sql = "CREATE TABLE IF NOT EXISTS visiting_students " +
+ "(name VARCHAR(64), address VARCHAR(64)) USING PARQUET PARTITIONED BY
(student_id INT);"
+ spark.sql(pre_sql)
+ pre_sql = "CREATE TABLE IF NOT EXISTS applicants " +
+ "(name VARCHAR(64), address VARCHAR(64), qualified BOOLEAN) " +
+ "USING PARQUET PARTITIONED BY (student_id INT);"
+ spark.sql(pre_sql)
+ pre_sql = "INSERT INTO applicants VALUES " +
+ "('Helen Davis', '469 Mission St, San Diego', true, 999999), " +
+ "('Ivy King', '367 Leigh Ave, Santa Clara', false, 101010), " +
+ "('Jason Wang', '908 Bird St, Saratoga', true, 121212);"
+ spark.sql(pre_sql)
+
+ val sql01 = "INSERT INTO students VALUES ('Amy Smith', '123 Park Ave, San
Jose', 111111);"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql01)
+ ).getClass.getSimpleName
+ )
+
+ val sql02 = "INSERT INTO students VALUES " +
+ "('Bob Brown', '456 Taylor St, Cupertino', 222222), " +
+ "('Cathy Johnson', '789 Race Ave, Palo Alto', 333333);"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql02)
+ ).getClass.getSimpleName
+ )
+
+ val sql03 = "INSERT INTO students PARTITION (student_id = 444444) " +
+ "SELECT name, address FROM persons WHERE name = \"Dora Williams\";"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql03)
+ ).getClass.getSimpleName
+ )
+
+ val sql04 = "INSERT INTO students TABLE visiting_students;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql04)
+ ).getClass.getSimpleName
+ )
+
+ val sql05 = "INSERT INTO students FROM applicants " +
+ "SELECT name, address, student_id WHERE qualified = true;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql05)
+ ).getClass.getSimpleName
+ )
+
+ val sql06 = "INSERT INTO students (address, name, student_id) " +
+ "VALUES ('Hangzhou, China', 'Kent Yao', 11215016);"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql06)
+ ).getClass.getSimpleName
+ )
+
+ val sql07 = "INSERT INTO students PARTITION (student_id = 11215017) " +
+ "(address, name) VALUES ('Hangzhou, China', 'Kent Yao Jr.');"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql07)
+ ).getClass.getSimpleName
+ )
+
+ val sql08 = "INSERT OVERWRITE students VALUES " +
+ "('Ashua Hill', '456 Erica Ct, Cupertino', 111111), " +
+ "('Brian Reed', '723 Kern Ave, Palo Alto', 222222);"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql08)
+ ).getClass.getSimpleName
+ )
+
+ val sql09 = "INSERT OVERWRITE students PARTITION (student_id = 222222) " +
+ "SELECT name, address FROM persons WHERE name = \"Dora Williams\";"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql09)
+ ).getClass.getSimpleName
+ )
+
+ val sql10 = "INSERT OVERWRITE students TABLE visiting_students;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql10)
+ ).getClass.getSimpleName
+ )
+
+ val sql11 = "INSERT OVERWRITE students FROM applicants " +
+ "SELECT name, address, student_id WHERE qualified = true;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql11)
+ ).getClass.getSimpleName
+ )
+
+ val sql12 = "INSERT OVERWRITE students (address, name, student_id) VALUES
" +
+ "('Hangzhou, China', 'Kent Yao', 11215016);"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql12)
+ ).getClass.getSimpleName
+ )
+
+ val sql13 = "INSERT OVERWRITE students PARTITION (student_id = 11215016) "
+
+ "(address, name) VALUES ('Hangzhou, China', 'Kent Yao Jr.');"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql13)
+ ).getClass.getSimpleName
+ )
+
+ val sql14 = "INSERT OVERWRITE DIRECTORY '/tmp/destination' " +
+ "USING parquet OPTIONS (col1 1, col2 2, col3 'test') " +
+ "SELECT * FROM students;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql14)
+ ).getClass.getSimpleName
+ )
+
+ val sql15 = "INSERT OVERWRITE DIRECTORY " +
+ "USING parquet " +
+ "OPTIONS ('path' '/tmp/destination', col1 1, col2 2, col3 'test') " +
+ "SELECT * FROM students;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql15)
+ ).getClass.getSimpleName
+ )
+
+ val sql016 = "INSERT OVERWRITE LOCAL DIRECTORY '/tmp/destination' " +
+ "STORED AS orc " +
+ "SELECT * FROM students;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql016)
+ ).getClass.getSimpleName
+ )
+
+ val sql017 = "INSERT OVERWRITE LOCAL DIRECTORY '/tmp/destination' " +
+ "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' " +
+ "SELECT * FROM students;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql017)
+ ).getClass.getSimpleName
+ )
+
+ pre_sql = "CREATE TABLE IF NOT EXISTS students_test " +
+ "(name VARCHAR(64), address VARCHAR(64)) " +
+ "USING PARQUET PARTITIONED BY (student_id INT) " +
+ "LOCATION '/tmp/destination/';"
+ spark.sql(pre_sql)
+ pre_sql = "INSERT INTO students_test VALUES " +
+ "('Bob Brown', '456 Taylor St, Cupertino', 222222), " +
+ "('Cathy Johnson', '789 Race Ave, Palo Alto', 333333);"
+ spark.sql(pre_sql)
+ pre_sql = "CREATE TABLE IF NOT EXISTS test_load " +
+ "(name VARCHAR(64), address VARCHAR(64), student_id INT) " +
+ "USING HIVE;"
+ spark.sql(pre_sql)
+
+ val sql018 = "LOAD DATA LOCAL INPATH " +
+ "'/tmp/destination/students_test' OVERWRITE INTO TABLE test_load;"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql018)
+ ).getClass.getSimpleName
+ )
+
+ pre_sql = "CREATE TABLE IF NOT EXISTS test_partition " +
+ "(c1 INT, c2 INT, c3 INT) PARTITIONED BY (c2, c3) " +
+ "LOCATION '/tmp/destination/';"
+ spark.sql(pre_sql)
+ pre_sql = "INSERT INTO test_partition PARTITION (c2 = 2, c3 = 3) VALUES
(1);"
+ spark.sql(pre_sql)
+ pre_sql = "INSERT INTO test_partition PARTITION (c2 = 5, c3 = 6) VALUES
(4);"
+ spark.sql(pre_sql)
+ pre_sql = "INSERT INTO test_partition PARTITION (c2 = 8, c3 = 9) VALUES
(7);"
+ spark.sql(pre_sql)
+ pre_sql = "CREATE TABLE IF NOT EXISTS test_load_partition " +
+ "(c1 INT, c2 INT, c3 INT) USING HIVE PARTITIONED BY (c2, c3);"
+ spark.sql(pre_sql)
+
+ val sql019 = "LOAD DATA LOCAL INPATH
'/tmp/destination/test_partition/c2=2/c3=3' " +
+ "OVERWRITE INTO TABLE test_load_partition PARTITION (c2=2, c3=3);"
+ dmlSimpleName.add(
+ spark.sessionState.analyzer.execute(
+ spark.sessionState.sqlParser.parsePlan(sql019)
+ ).getClass.getSimpleName
+ )
+ // scalastyle:off println
+ println("dml simple name is :" + dmlSimpleName)
+ // scalastyle:on println
}
}