Github user hvanhovell commented on a diff in the pull request:
https://github.com/apache/spark/pull/11557#discussion_r57149320
--- Diff:
sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4
---
@@ -0,0 +1,742 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is an adaptation of Presto's
presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar.
+ */
+
+grammar SqlBase;
+
+tokens {
+ DELIMITER
+}
+
+singleStatement
+ : statement EOF
+ ;
+
+singleExpression
+ : namedExpression EOF
+ ;
+
+singleTableIdentifier
+ : tableIdentifier EOF
+ ;
+
+singleDataType
+ : dataType EOF
+ ;
+
+statement
+ : query
#statementDefault
+ | USE db=identifier #use
+ | createTable ('(' colTypeList ')')? tableProvider tableProperties
#createTableUsing
+ | createTable tableProvider tableProperties? AS? query
#createTableUsingAsSelect
+ | DROP TABLE (IF EXISTS)? qualifiedName
#dropTable
+ | DELETE FROM qualifiedName (WHERE booleanExpression)?
#delete
+ | ALTER TABLE from=qualifiedName RENAME TO to=qualifiedName
#renameTable
+ | ALTER TABLE tableName=qualifiedName
+ RENAME COLUMN from=identifier TO to=identifier
#renameColumn
+ | ALTER TABLE tableName=qualifiedName
+ ADD COLUMN column=colType
#addColumn
+ | CREATE (OR REPLACE)? VIEW qualifiedName AS query
#createView
+ | DROP VIEW (IF EXISTS)? qualifiedName
#dropView
+ | CALL qualifiedName '(' (callArgument (',' callArgument)*)? ')'
#call
+ | EXPLAIN explainOption* statement
#explain
+ | SHOW TABLES ((FROM | IN) db=identifier)?
+ (LIKE (qualifiedName | pattern=STRING))?
#showTables
+ | SHOW SCHEMAS ((FROM | IN) identifier)?
#showSchemas
+ | SHOW CATALOGS
#showCatalogs
+ | SHOW COLUMNS (FROM | IN) qualifiedName
#showColumns
+ | SHOW FUNCTIONS (LIKE? (qualifiedName | pattern=STRING))?
#showFunctions
+ | (DESC | DESCRIBE) FUNCTION EXTENDED? qualifiedName
#describeFunction
+ | (DESC | DESCRIBE) option=(EXTENDED | FORMATTED)?
+ tableIdentifier partitionSpec? describeColName?
#describeTable
+ | SHOW SESSION
#showSession
+ | SET SESSION qualifiedName EQ expression
#setSession
+ | RESET SESSION qualifiedName
#resetSession
+ | START TRANSACTION (transactionMode (',' transactionMode)*)?
#startTransaction
+ | COMMIT WORK?
#commit
+ | ROLLBACK WORK?
#rollback
+ | SHOW PARTITIONS (FROM | IN) qualifiedName
+ (WHERE booleanExpression)?
+ (ORDER BY sortItem (',' sortItem)*)?
+ (LIMIT limit=(INTEGER_VALUE | ALL))?
#showPartitions
+ | REFRESH TABLE tableIdentifier
#refreshTable
+ | CACHE LAZY? TABLE identifier (AS? query)?
#cacheTable
+ | UNCACHE TABLE identifier
#uncacheTable
+ | CLEAR CACHE
#clearCache
+ | SET .*?
#setConfiguration
+ ;
+
+createTable
+ : CREATE TEMPORARY? TABLE (IF NOT EXISTS)? tableIdentifier
+ ;
+
+query
+ : ctes? queryNoWith
+ ;
+
+insertInto
+ : INSERT OVERWRITE TABLE tableIdentifier partitionSpec? (IF NOT
EXISTS)?
+ | INSERT INTO TABLE? tableIdentifier partitionSpec?
+ ;
+
+partitionSpec
+ : PARTITION '(' partitionVal (',' partitionVal)* ')'
+ ;
+
+partitionVal
+ : identifier (EQ constant)?
+ ;
+
+describeColName
+ : identifier ('.' (identifier | STRING))*
+ ;
+
+ctes
+ : WITH namedQuery (',' namedQuery)*
+ ;
+
+namedQuery
+ : name=identifier AS? '(' queryNoWith ')'
+ ;
+
+tableProvider
+ : USING qualifiedName
+ ;
+
+tableProperties
+ :(OPTIONS | WITH) '(' tableProperty (',' tableProperty)* ')'
+ ;
+
+tableProperty
+ : key=tablePropertyKey (EQ? value=STRING)?
+ ;
+
+tablePropertyKey
+ : looseIdentifier ('.' looseIdentifier)*
+ | STRING
+ ;
+
+queryNoWith
+ : insertInto? queryTerm queryOrganization
#singleInsertQuery
+ | fromClause multiInsertQueryBody+
#multiInsertQuery
+ ;
+
+queryOrganization
+ : (ORDER BY order+=sortItem (',' order+=sortItem)*)?
+ (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)?
+ (DISTRIBUTE BY distributeBy+=expression (','
distributeBy+=expression)*)?
+ (SORT BY sort+=sortItem (',' sort+=sortItem)*)?
+ windows?
+ (LIMIT limit=expression)?
+ ;
+
+multiInsertQueryBody
+ : insertInto?
+ querySpecification
+ queryOrganization
+ ;
+
+queryTerm
+ : queryPrimary
#queryTermDefault
+ | left=queryTerm operator=(INTERSECT | UNION | EXCEPT) setQuantifier?
right=queryTerm #setOperation
+ ;
+
+queryPrimary
+ : querySpecification
#queryPrimaryDefault
+ | TABLE tableIdentifier
#table
+ | inlineTable
#inlineTableDefault1
+ | '(' queryNoWith ')'
#subquery
+ ;
+
+sortItem
+ : expression ordering=(ASC | DESC)?
+ ;
+
+querySpecification
+ : (((SELECT kind=TRANSFORM | kind=MAP | kind=REDUCE)) '('
namedExpression (',' namedExpression)* ')'
+ inRowFormat=rowFormat?
+ USING script=STRING
+ (AS (columnAliasList | colTypeList | ('(' (columnAliasList |
colTypeList) ')')))?
+ outRowFormat=rowFormat?
+ (RECORDREADER outRecordReader=STRING)?
+ fromClause?
+ (WHERE where=booleanExpression)?)
+ | (kind=SELECT setQuantifier? namedExpression (',' namedExpression)*
+ fromClause?
+ lateralView*
+ (WHERE where=booleanExpression)?
+ aggregation?
+ (HAVING having=booleanExpression)?
+ windows?)
+ ;
+
+fromClause
+ : FROM relation (',' relation)* lateralView*
+ ;
+
+aggregation
+ : GROUP BY groupingExpressions+=expression (','
groupingExpressions+=expression)* (
+ WITH kind=ROLLUP
+ | WITH kind=CUBE
+ | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')?
+ ;
+
+groupingSet
+ : '(' (expression (',' expression)*)? ')'
+ | expression
+ ;
+
+lateralView
+ : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (','
expression)*)? ')' tblName=identifier (AS? colName+=identifier (','
colName+=identifier)*)
+ ;
+
+setQuantifier
+ : DISTINCT
+ | ALL
+ ;
+
+relation
+ : left=relation
+ ( CROSS JOIN right=sampledRelation
+ | joinType JOIN rightRelation=relation joinCriteria?
+ | NATURAL joinType JOIN right=sampledRelation
+ ) #joinRelation
+ | sampledRelation #relationDefault
+ ;
+
+joinType
+ : INNER?
+ | LEFT OUTER?
+ | LEFT SEMI
+ | RIGHT OUTER?
+ | FULL OUTER?
+ ;
+
+joinCriteria
+ : ON booleanExpression
+ | USING '(' identifier (',' identifier)* ')'
+ ;
+
+sampledRelation
+ : relationPrimary (
+ TABLESAMPLE '('
+ ( (percentage=(INTEGER_VALUE | DECIMAL_VALUE)
sampleType=PERCENTLIT)
+ | (expression sampleType=ROWS)
+ | (sampleType=BUCKET numerator=INTEGER_VALUE OUT OF
denominator=INTEGER_VALUE (ON identifier)?))
+ ')'
+ )?
+ ;
+
+columnAliases
+ : '(' columnAliasList ')'
+ ;
+
+columnAliasList
+ : identifier (',' identifier)*
+ ;
+
+relationPrimary
+ : tableIdentifier (AS? identifier)?
#tableName
+ | '(' queryNoWith ')' (AS? identifier)?
#aliasedQuery
+ | '(' relation ')' (AS? identifier)?
#aliasedRelation
+ | inlineTable
#inlineTableDefault2
+ ;
+
+inlineTable
+ : VALUES expression (',' expression)* (AS? identifier columnAliases?)?
+ ;
+
+rowFormat
+ : rowFormatSerde
+ | rowFormatDelimited
+ ;
+
+rowFormatSerde
+ : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES
props=tableProperties)?
+ ;
+
+rowFormatDelimited
+ : ROW FORMAT DELIMITED
+ (FIELDS TERMINATED BY fieldsTerminatedBy=STRING)?
+ (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)?
+ (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)?
+ (ESCAPED BY escapedBy=STRING)?
+ (LINES SEPARATED BY linesSeparatedBy=STRING)?
+ ;
+
+tableIdentifier
+ : (db=identifier '.')? table=identifier
+ ;
+
+namedExpression
+ : expression (AS? (identifier | columnAliases))?
+ ;
+
+expression
+ : booleanExpression
+ ;
+
+booleanExpression
+ : predicated
#booleanDefault
+ | NOT booleanExpression
#logicalNot
+ | left=booleanExpression operator=AND right=booleanExpression
#logicalBinary
+ | left=booleanExpression operator=OR right=booleanExpression
#logicalBinary
+ | EXISTS '(' query ')' #exists
+ ;
+
+// workaround for:
+// https://github.com/antlr/antlr4/issues/780
+// https://github.com/antlr/antlr4/issues/781
+predicated
+ : valueExpression predicate[$valueExpression.ctx]?
+ ;
+
+predicate[ParserRuleContext value]
+ : comparisonOperator right=valueExpression
#comparison
+ | NOT? BETWEEN lower=valueExpression AND upper=valueExpression
#between
+ | NOT? IN '(' expression (',' expression)* ')'
#inList
+ | NOT? IN '(' query ')'
#inSubquery
+ | NOT? like=(RLIKE | LIKE) pattern=valueExpression
#like
+ | IS NOT? NULL
#nullPredicate
+ ;
+
+valueExpression
+ : primaryExpression
#valueExpressionDefault
+ | operator=(MINUS | PLUS | TILDE) valueExpression
#arithmeticUnary
+ | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV)
right=valueExpression #arithmeticBinary
+ | left=valueExpression operator=(PLUS | MINUS) right=valueExpression
#arithmeticBinary
+ | left=valueExpression operator=AMPERSAND right=valueExpression
#arithmeticBinary
+ | left=valueExpression operator=HAT right=valueExpression
#arithmeticBinary
+ | left=valueExpression operator=PIPE right=valueExpression
#arithmeticBinary
+ ;
+
+primaryExpression
+ : constant
#constantDefault
+ | ASTERISK
#star
+ | qualifiedName '.' ASTERISK
#star
+ | '(' expression (',' expression)+ ')'
#rowConstructor
+ | qualifiedName '(' (ASTERISK) ')' (OVER windowSpec)?
#functionCall
+ | qualifiedName '(' (setQuantifier? expression (',' expression)*)? ')'
(OVER windowSpec)? #functionCall
+ | '(' query ')'
#subqueryExpression
+ | CASE valueExpression whenClause+ (ELSE elseExpression=expression)?
END #simpleCase
+ | CASE whenClause+ (ELSE elseExpression=expression)? END
#searchedCase
+ | CAST '(' expression AS dataType ')'
#cast
+ | value=primaryExpression '[' index=valueExpression ']'
#subscript
+ | identifier
#columnReference
+ | base=primaryExpression '.' fieldName=identifier
#dereference
+ | '(' expression ')'
#parenthesizedExpression
+ ;
+
+constant
+ : NULL
#nullLiteral
+ | interval
#intervalLiteral
+ | identifier STRING
#typeConstructor
+ | number
#numericLiteral
+ | booleanValue
#booleanLiteral
+ | STRING+
#stringLiteral
+ ;
+
+comparisonOperator
+ : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ
+ ;
+
+booleanValue
+ : TRUE | FALSE
+ ;
+
+interval
+ : INTERVAL intervalField*
+ ;
+
+intervalField
+ : value=intervalValue unit=identifier (TO to=identifier)?
+ ;
+
+intervalValue
+ : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE)
+ | STRING
+ ;
+
+dataType
+ : complex=ARRAY '<' dataType '>'
#complexDataType
+ | complex=MAP '<' dataType ',' dataType '>'
#complexDataType
+ | complex=STRUCT ('<' colTypeList? '>' | NEQ)
#complexDataType
+ | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')?
#primitiveDataType
+ ;
+
+colTypeList
+ : colType (',' colType)*
+ ;
+
+colType
+ : identifier ':'? dataType (COMMENT STRING)?
+ ;
+
+whenClause
+ : WHEN condition=expression THEN result=expression
+ ;
+
+windows
+ : WINDOW namedWindow (',' namedWindow)*
+ ;
+
+namedWindow
+ : identifier AS windowSpec
+ ;
+
+windowSpec
+ : name=identifier #windowRef
+ | '('
+ (PARTITION BY partition+=expression (',' partition+=expression)*)?
+ (ORDER BY sortItem (',' sortItem)* windowFrame?)?
+ ')' #windowDef
+ ;
+
+windowFrame
+ : frameType=RANGE start=frameBound
+ | frameType=ROWS start=frameBound
+ | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
+ | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+ ;
+
+frameBound
+ : UNBOUNDED boundType=(PRECEDING | FOLLOWING)
+ | boundType=CURRENT ROW
+ | expression boundType=(PRECEDING | FOLLOWING)
+ ;
+
+
+explainOption
+ : LOGICAL | FORMATTED | EXTENDED
+ ;
+
+transactionMode
+ : ISOLATION LEVEL levelOfIsolation #isolationLevel
+ | READ accessMode=(ONLY | WRITE) #transactionAccessMode
+ ;
+
+levelOfIsolation
+ : READ UNCOMMITTED #readUncommitted
+ | READ COMMITTED #readCommitted
+ | REPEATABLE READ #repeatableRead
+ | SERIALIZABLE #serializable
+ ;
+
+callArgument
+ : expression #positionalArgument
+ | identifier '=>' expression #namedArgument
+ ;
+
+qualifiedName
+ : identifier ('.' identifier)*
+ ;
+
+// Identifier that also allows the use of a number of SQL keywords (mainly
for backwards compatibility).
+looseIdentifier
+ : identifier
+ | FROM
+ | TO
+ | TABLE
+ | WITH
+ ;
+
+identifier
+ : IDENTIFIER #unquotedIdentifier
+ | quotedIdentifier #quotedIdentifierAlternative
+ | nonReserved #unquotedIdentifier
+ ;
+
+quotedIdentifier
+ : BACKQUOTED_IDENTIFIER
+ ;
+
+number
+ : DECIMAL_VALUE #decimalLiteral
+ | SCIENTIFIC_DECIMAL_VALUE #scientificDecimalLiteral
+ | INTEGER_VALUE #integerLiteral
+ | BIGINT_LITERAL #bigIntLiteral
+ | SMALLINT_LITERAL #smallIntLiteral
+ | TINYINT_LITERAL #tinyIntLiteral
+ | DOUBLE_LITERAL #doubleLiteral
+ ;
+
+nonReserved
+ : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS | SCHEMAS
| CATALOGS | SESSION
+ | ADD
+ | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT |
ROW | MAP | ARRAY | STRUCT
+ | LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE |
SERDEPROPERTIES | RECORDREADER
+ | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS |
ESCAPED | LINES | SEPARATED
+ | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY |
OPTIONS
+ | GROUPING | CUBE | ROLLUP
+ | EXPLAIN | FORMAT | LOGICAL | FORMATTED
+ | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF
+ | SET | RESET
+ | VIEW | REPLACE
+ | IF
+ | NO | DATA
+ | START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL
+ | SERIALIZABLE | REPEATABLE | COMMITTED | UNCOMMITTED | READ | WRITE |
ONLY
+ | CALL
+ | SORT | CLUSTER | DISTRIBUTE
+ ;
+
+SELECT: 'SELECT';
--- End diff --
We could. However it currently works better if they are in the same file
when we are making modifications (which we currently are).
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]