LuciferYang commented on code in PR #52757:
URL: https://github.com/apache/spark/pull/52757#discussion_r2473396667
##########
sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala:
##########
@@ -76,8 +77,11 @@ class SparkConnectDatabaseMetaData(conn:
SparkConnectConnection) extends Databas
override def getIdentifierQuoteString: String = "`"
- override def getSQLKeywords: String =
- throw new SQLFeatureNotSupportedException
+ override def getSQLKeywords: String = {
+ conn.checkOpen()
+ conn.spark.sql("SELECT keyword FROM sql_keywords()").collect()
Review Comment:
For the same connection, should the results returned by multiple calls to
getSQLKeywords be the same? Can we cache the sqlKeywords?
##########
sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala:
##########
@@ -190,4 +190,13 @@ class SparkConnectDatabaseMetaDataSuite extends
ConnectFunSuite with RemoteSpark
assert(metadata.supportsSharding === false)
}
}
+
+ test("SparkConnectDatabaseMetaData getSQLKeywords") {
+ withConnection { conn =>
+ val metadata = conn.getMetaData
+ // scalastyle:off line.size.limit
+ assert(metadata.getSQLKeywords ===
"ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CASCADE,CATALOG,CATALOGS,CHANGE,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONT
HS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE")
Review Comment:
When keywords are added or deleted, does this case need to be maintained
synchronously? Are there more robust testing way?
##########
sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala:
##########
@@ -76,8 +77,11 @@ class SparkConnectDatabaseMetaData(conn:
SparkConnectConnection) extends Databas
override def getIdentifierQuoteString: String = "`"
- override def getSQLKeywords: String =
- throw new SQLFeatureNotSupportedException
+ override def getSQLKeywords: String = {
+ conn.checkOpen()
+ conn.spark.sql("SELECT keyword FROM sql_keywords()").collect()
+ .map(_.getString(0)).diff(SQL_2003_RESERVED_KEYWORDS).mkString(",")
Review Comment:
```
private lazy val sqlKeywords: String = {
val sql2003Set = SQL_2003_RESERVED_KEYWORDS.toSet
conn.spark.sql("SELECT keyword FROM sql_keywords()").collect()
.map(_.getString(0))
.filterNot(sql2003Set.contains)
.mkString(",")
}
```
Would this be more efficient?
##########
sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala:
##########
@@ -494,3 +498,61 @@ class SparkConnectDatabaseMetaData(conn:
SparkConnectConnection) extends Databas
override def isWrapperFor(iface: Class[_]): Boolean = iface.isInstance(this)
}
+
+object SparkConnectDatabaseMetaData {
+
+ // SQL:2003 reserved keywords refers to PostgreSQL 9.1 docs:
+ // https://www.postgresql.org/docs/9.1/sql-keywords-appendix.html
Review Comment:
Why is the reference made to pg 9.1 instead of a newer version?
##########
sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala:
##########
@@ -76,8 +77,11 @@ class SparkConnectDatabaseMetaData(conn:
SparkConnectConnection) extends Databas
override def getIdentifierQuoteString: String = "`"
- override def getSQLKeywords: String =
- throw new SQLFeatureNotSupportedException
+ override def getSQLKeywords: String = {
+ conn.checkOpen()
+ conn.spark.sql("SELECT keyword FROM sql_keywords()").collect()
+ .map(_.getString(0)).diff(SQL_2003_RESERVED_KEYWORDS).mkString(",")
Review Comment:
Additionally, do we need to standardize the case (uppercase/lowercase) here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]