http://git-wip-us.apache.org/repos/asf/spark/blob/a9b93e07/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 new file mode 100644 index 0000000..3b9f82a --- /dev/null +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -0,0 +1,943 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar. + */ + +grammar SqlBase; + +tokens { + DELIMITER +} + +singleStatement + : statement EOF + ; + +singleExpression + : namedExpression EOF + ; + +singleTableIdentifier + : tableIdentifier EOF + ; + +singleDataType + : dataType EOF + ; + +statement + : query #statementDefault + | USE db=identifier #use + | CREATE DATABASE (IF NOT EXISTS)? identifier + (COMMENT comment=STRING)? locationSpec? + (WITH DBPROPERTIES tablePropertyList)? #createDatabase + | ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties + | DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase + | createTableHeader ('(' colTypeList ')')? tableProvider + (OPTIONS tablePropertyList)? #createTableUsing + | createTableHeader tableProvider + (OPTIONS tablePropertyList)? AS? query #createTableUsing + | createTableHeader ('(' columns=colTypeList ')')? + (COMMENT STRING)? + (PARTITIONED BY '(' partitionColumns=colTypeList ')')? + bucketSpec? skewSpec? + rowFormat? createFileFormat? locationSpec? + (TBLPROPERTIES tablePropertyList)? + (AS? query)? #createTable + | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS + (identifier | FOR COLUMNS identifierSeq?)? #analyze + | ALTER TABLE from=tableIdentifier RENAME TO to=tableIdentifier #renameTable + | ALTER TABLE tableIdentifier + SET TBLPROPERTIES tablePropertyList #setTableProperties + | ALTER TABLE tableIdentifier + UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties + | ALTER TABLE tableIdentifier (partitionSpec)? + SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe + | ALTER TABLE tableIdentifier (partitionSpec)? + SET SERDEPROPERTIES tablePropertyList #setTableSerDe + | ALTER TABLE tableIdentifier bucketSpec #bucketTable + | ALTER TABLE tableIdentifier NOT CLUSTERED #unclusterTable + | ALTER TABLE tableIdentifier NOT SORTED #unsortTable + | ALTER TABLE tableIdentifier skewSpec #skewTable + | ALTER TABLE tableIdentifier NOT SKEWED #unskewTable + | ALTER TABLE tableIdentifier NOT STORED AS DIRECTORIES #unstoreTable + | ALTER TABLE tableIdentifier + SET SKEWED LOCATION skewedLocationList #setTableSkewLocations + | ALTER TABLE tableIdentifier ADD (IF NOT EXISTS)? + partitionSpecLocation+ #addTablePartition + | ALTER TABLE tableIdentifier + from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition + | ALTER TABLE from=tableIdentifier + EXCHANGE partitionSpec WITH TABLE to=tableIdentifier #exchangeTablePartition + | ALTER TABLE tableIdentifier + DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions + | ALTER TABLE tableIdentifier ARCHIVE partitionSpec #archiveTablePartition + | ALTER TABLE tableIdentifier UNARCHIVE partitionSpec #unarchiveTablePartition + | ALTER TABLE tableIdentifier partitionSpec? + SET FILEFORMAT fileFormat #setTableFileFormat + | ALTER TABLE tableIdentifier partitionSpec? SET locationSpec #setTableLocation + | ALTER TABLE tableIdentifier TOUCH partitionSpec? #touchTable + | ALTER TABLE tableIdentifier partitionSpec? COMPACT STRING #compactTable + | ALTER TABLE tableIdentifier partitionSpec? CONCATENATE #concatenateTable + | ALTER TABLE tableIdentifier partitionSpec? + CHANGE COLUMN? oldName=identifier colType + (FIRST | AFTER after=identifier)? (CASCADE | RESTRICT)? #changeColumn + | ALTER TABLE tableIdentifier partitionSpec? + ADD COLUMNS '(' colTypeList ')' (CASCADE | RESTRICT)? #addColumns + | ALTER TABLE tableIdentifier partitionSpec? + REPLACE COLUMNS '(' colTypeList ')' (CASCADE | RESTRICT)? #replaceColumns + | DROP TABLE (IF EXISTS)? tableIdentifier PURGE? + (FOR METADATA? REPLICATION '(' STRING ')')? #dropTable + | CREATE (OR REPLACE)? VIEW (IF NOT EXISTS)? tableIdentifier + identifierCommentList? (COMMENT STRING)? + (PARTITIONED ON identifierList)? + (TBLPROPERTIES tablePropertyList)? AS query #createView + | ALTER VIEW tableIdentifier AS? query #alterViewQuery + | CREATE TEMPORARY? FUNCTION qualifiedName AS className=STRING + (USING resource (',' resource)*)? #createFunction + | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName #dropFunction + | EXPLAIN explainOption* statement #explain + | SHOW TABLES ((FROM | IN) db=identifier)? + (LIKE (qualifiedName | pattern=STRING))? #showTables + | SHOW FUNCTIONS (LIKE? (qualifiedName | pattern=STRING))? #showFunctions + | (DESC | DESCRIBE) FUNCTION EXTENDED? qualifiedName #describeFunction + | (DESC | DESCRIBE) option=(EXTENDED | FORMATTED)? + tableIdentifier partitionSpec? describeColName? #describeTable + | (DESC | DESCRIBE) DATABASE EXTENDED? identifier #describeDatabase + | REFRESH TABLE tableIdentifier #refreshTable + | CACHE LAZY? TABLE identifier (AS? query)? #cacheTable + | UNCACHE TABLE identifier #uncacheTable + | CLEAR CACHE #clearCache + | ADD identifier .*? #addResource + | SET ROLE .*? #failNativeCommand + | SET .*? #setConfiguration + | kws=unsupportedHiveNativeCommands .*? #failNativeCommand + | hiveNativeCommands #executeNativeCommand + ; + +hiveNativeCommands + : createTableHeader LIKE tableIdentifier + rowFormat? createFileFormat? locationSpec? + (TBLPROPERTIES tablePropertyList)? + | DELETE FROM tableIdentifier (WHERE booleanExpression)? + | TRUNCATE TABLE tableIdentifier partitionSpec? + (COLUMNS identifierList)? + | ALTER VIEW from=tableIdentifier AS? RENAME TO to=tableIdentifier + | ALTER VIEW from=tableIdentifier AS? + SET TBLPROPERTIES tablePropertyList + | ALTER VIEW from=tableIdentifier AS? + UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList + | ALTER VIEW from=tableIdentifier AS? + ADD (IF NOT EXISTS)? partitionSpecLocation+ + | ALTER VIEW from=tableIdentifier AS? + DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? + | DROP VIEW (IF EXISTS)? qualifiedName + | SHOW COLUMNS (FROM | IN) tableIdentifier ((FROM|IN) identifier)? + | START TRANSACTION (transactionMode (',' transactionMode)*)? + | COMMIT WORK? + | ROLLBACK WORK? + | SHOW PARTITIONS tableIdentifier partitionSpec? + | DFS .*? + | (CREATE | ALTER | DROP | SHOW | DESC | DESCRIBE | LOCK | UNLOCK | MSCK | LOAD) .*? + ; + +unsupportedHiveNativeCommands + : kw1=CREATE kw2=ROLE + | kw1=DROP kw2=ROLE + | kw1=GRANT kw2=ROLE? + | kw1=REVOKE kw2=ROLE? + | kw1=SHOW kw2=GRANT + | kw1=SHOW kw2=ROLE kw3=GRANT? + | kw1=SHOW kw2=PRINCIPALS + | kw1=SHOW kw2=ROLES + | kw1=SHOW kw2=CURRENT kw3=ROLES + | kw1=EXPORT kw2=TABLE + | kw1=IMPORT kw2=TABLE + | kw1=SHOW kw2=COMPACTIONS + | kw1=SHOW kw2=CREATE kw3=TABLE + | kw1=SHOW kw2=TRANSACTIONS + | kw1=SHOW kw2=INDEXES + | kw1=SHOW kw2=LOCKS + ; + +createTableHeader + : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? tableIdentifier + ; + +bucketSpec + : CLUSTERED BY identifierList + (SORTED BY orderedIdentifierList)? + INTO INTEGER_VALUE BUCKETS + ; + +skewSpec + : SKEWED BY identifierList + ON (constantList | nestedConstantList) + (STORED AS DIRECTORIES)? + ; + +locationSpec + : LOCATION STRING + ; + +query + : ctes? queryNoWith + ; + +insertInto + : INSERT OVERWRITE TABLE tableIdentifier partitionSpec? (IF NOT EXISTS)? + | INSERT INTO TABLE? tableIdentifier partitionSpec? + ; + +partitionSpecLocation + : partitionSpec locationSpec? + ; + +partitionSpec + : PARTITION '(' partitionVal (',' partitionVal)* ')' + ; + +partitionVal + : identifier (EQ constant)? + ; + +describeColName + : identifier ('.' (identifier | STRING))* + ; + +ctes + : WITH namedQuery (',' namedQuery)* + ; + +namedQuery + : name=identifier AS? '(' queryNoWith ')' + ; + +tableProvider + : USING qualifiedName + ; + +tablePropertyList + : '(' tableProperty (',' tableProperty)* ')' + ; + +tableProperty + : key=tablePropertyKey (EQ? value=STRING)? + ; + +tablePropertyKey + : looseIdentifier ('.' looseIdentifier)* + | STRING + ; + +constantList + : '(' constant (',' constant)* ')' + ; + +nestedConstantList + : '(' constantList (',' constantList)* ')' + ; + +skewedLocation + : (constant | constantList) EQ STRING + ; + +skewedLocationList + : '(' skewedLocation (',' skewedLocation)* ')' + ; + +createFileFormat + : STORED AS fileFormat + | STORED BY storageHandler + ; + +fileFormat + : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING (SERDE serdeCls=STRING)? + (INPUTDRIVER inDriver=STRING OUTPUTDRIVER outDriver=STRING)? #tableFileFormat + | identifier #genericFileFormat + ; + +storageHandler + : STRING (WITH SERDEPROPERTIES tablePropertyList)? + ; + +resource + : identifier STRING + ; + +queryNoWith + : insertInto? queryTerm queryOrganization #singleInsertQuery + | fromClause multiInsertQueryBody+ #multiInsertQuery + ; + +queryOrganization + : (ORDER BY order+=sortItem (',' order+=sortItem)*)? + (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? + (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)? + (SORT BY sort+=sortItem (',' sort+=sortItem)*)? + windows? + (LIMIT limit=expression)? + ; + +multiInsertQueryBody + : insertInto? + querySpecification + queryOrganization + ; + +queryTerm + : queryPrimary #queryTermDefault + | left=queryTerm operator=(INTERSECT | UNION | EXCEPT) setQuantifier? right=queryTerm #setOperation + ; + +queryPrimary + : querySpecification #queryPrimaryDefault + | TABLE tableIdentifier #table + | inlineTable #inlineTableDefault1 + | '(' queryNoWith ')' #subquery + ; + +sortItem + : expression ordering=(ASC | DESC)? + ; + +querySpecification + : (((SELECT kind=TRANSFORM '(' namedExpressionSeq ')' + | kind=MAP namedExpressionSeq + | kind=REDUCE namedExpressionSeq)) + inRowFormat=rowFormat? + (RECORDWRITER recordWriter=STRING)? + USING script=STRING + (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))? + outRowFormat=rowFormat? + (RECORDREADER recordReader=STRING)? + fromClause? + (WHERE where=booleanExpression)?) + | ((kind=SELECT setQuantifier? namedExpressionSeq fromClause? + | fromClause (kind=SELECT setQuantifier? namedExpressionSeq)?) + lateralView* + (WHERE where=booleanExpression)? + aggregation? + (HAVING having=booleanExpression)? + windows?) + ; + +fromClause + : FROM relation (',' relation)* lateralView* + ; + +aggregation + : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* ( + WITH kind=ROLLUP + | WITH kind=CUBE + | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')? + ; + +groupingSet + : '(' (expression (',' expression)*)? ')' + | expression + ; + +lateralView + : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)? + ; + +setQuantifier + : DISTINCT + | ALL + ; + +relation + : left=relation + ((CROSS | joinType) JOIN right=relation joinCriteria? + | NATURAL joinType JOIN right=relation + ) #joinRelation + | relationPrimary #relationDefault + ; + +joinType + : INNER? + | LEFT OUTER? + | LEFT SEMI + | RIGHT OUTER? + | FULL OUTER? + ; + +joinCriteria + : ON booleanExpression + | USING '(' identifier (',' identifier)* ')' + ; + +sample + : TABLESAMPLE '(' + ( (percentage=(INTEGER_VALUE | DECIMAL_VALUE) sampleType=PERCENTLIT) + | (expression sampleType=ROWS) + | (sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE (ON identifier)?)) + ')' + ; + +identifierList + : '(' identifierSeq ')' + ; + +identifierSeq + : identifier (',' identifier)* + ; + +orderedIdentifierList + : '(' orderedIdentifier (',' orderedIdentifier)* ')' + ; + +orderedIdentifier + : identifier ordering=(ASC | DESC)? + ; + +identifierCommentList + : '(' identifierComment (',' identifierComment)* ')' + ; + +identifierComment + : identifier (COMMENT STRING)? + ; + +relationPrimary + : tableIdentifier sample? (AS? identifier)? #tableName + | '(' queryNoWith ')' sample? (AS? identifier)? #aliasedQuery + | '(' relation ')' sample? (AS? identifier)? #aliasedRelation + | inlineTable #inlineTableDefault2 + ; + +inlineTable + : VALUES expression (',' expression)* (AS? identifier identifierList?)? + ; + +rowFormat + : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde + | ROW FORMAT DELIMITED + (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? + (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? + (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? + (LINES TERMINATED BY linesSeparatedBy=STRING)? + (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited + ; + +tableIdentifier + : (db=identifier '.')? table=identifier + ; + +namedExpression + : expression (AS? (identifier | identifierList))? + ; + +namedExpressionSeq + : namedExpression (',' namedExpression)* + ; + +expression + : booleanExpression + ; + +booleanExpression + : predicated #booleanDefault + | NOT booleanExpression #logicalNot + | left=booleanExpression operator=AND right=booleanExpression #logicalBinary + | left=booleanExpression operator=OR right=booleanExpression #logicalBinary + | EXISTS '(' query ')' #exists + ; + +// workaround for: +// https://github.com/antlr/antlr4/issues/780 +// https://github.com/antlr/antlr4/issues/781 +predicated + : valueExpression predicate[$valueExpression.ctx]? + ; + +predicate[ParserRuleContext value] + : NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between + | NOT? IN '(' expression (',' expression)* ')' #inList + | NOT? IN '(' query ')' #inSubquery + | NOT? like=(RLIKE | LIKE) pattern=valueExpression #like + | IS NOT? NULL #nullPredicate + ; + +valueExpression + : primaryExpression #valueExpressionDefault + | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary + | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary + | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary + | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary + | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary + | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary + | left=valueExpression comparisonOperator right=valueExpression #comparison + ; + +primaryExpression + : constant #constantDefault + | ASTERISK #star + | qualifiedName '.' ASTERISK #star + | '(' expression (',' expression)+ ')' #rowConstructor + | qualifiedName '(' (setQuantifier? expression (',' expression)*)? ')' (OVER windowSpec)? #functionCall + | '(' query ')' #subqueryExpression + | CASE valueExpression whenClause+ (ELSE elseExpression=expression)? END #simpleCase + | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase + | CAST '(' expression AS dataType ')' #cast + | value=primaryExpression '[' index=valueExpression ']' #subscript + | identifier #columnReference + | base=primaryExpression '.' fieldName=identifier #dereference + | '(' expression ')' #parenthesizedExpression + ; + +constant + : NULL #nullLiteral + | interval #intervalLiteral + | identifier STRING #typeConstructor + | number #numericLiteral + | booleanValue #booleanLiteral + | STRING+ #stringLiteral + ; + +comparisonOperator + : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ + ; + +booleanValue + : TRUE | FALSE + ; + +interval + : INTERVAL intervalField* + ; + +intervalField + : value=intervalValue unit=identifier (TO to=identifier)? + ; + +intervalValue + : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE) + | STRING + ; + +dataType + : complex=ARRAY '<' dataType '>' #complexDataType + | complex=MAP '<' dataType ',' dataType '>' #complexDataType + | complex=STRUCT ('<' colTypeList? '>' | NEQ) #complexDataType + | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType + ; + +colTypeList + : colType (',' colType)* + ; + +colType + : identifier ':'? dataType (COMMENT STRING)? + ; + +whenClause + : WHEN condition=expression THEN result=expression + ; + +windows + : WINDOW namedWindow (',' namedWindow)* + ; + +namedWindow + : identifier AS windowSpec + ; + +windowSpec + : name=identifier #windowRef + | '(' + ( CLUSTER BY partition+=expression (',' partition+=expression)* + | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)? + ((ORDER | SORT) BY sortItem (',' sortItem)*)?) + windowFrame? + ')' #windowDef + ; + +windowFrame + : frameType=RANGE start=frameBound + | frameType=ROWS start=frameBound + | frameType=RANGE BETWEEN start=frameBound AND end=frameBound + | frameType=ROWS BETWEEN start=frameBound AND end=frameBound + ; + +frameBound + : UNBOUNDED boundType=(PRECEDING | FOLLOWING) + | boundType=CURRENT ROW + | expression boundType=(PRECEDING | FOLLOWING) + ; + + +explainOption + : LOGICAL | FORMATTED | EXTENDED + ; + +transactionMode + : ISOLATION LEVEL SNAPSHOT #isolationLevel + | READ accessMode=(ONLY | WRITE) #transactionAccessMode + ; + +qualifiedName + : identifier ('.' identifier)* + ; + +// Identifier that also allows the use of a number of SQL keywords (mainly for backwards compatibility). +looseIdentifier + : identifier + | FROM + | TO + | TABLE + | WITH + ; + +identifier + : IDENTIFIER #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + | nonReserved #unquotedIdentifier + ; + +quotedIdentifier + : BACKQUOTED_IDENTIFIER + ; + +number + : DECIMAL_VALUE #decimalLiteral + | SCIENTIFIC_DECIMAL_VALUE #scientificDecimalLiteral + | INTEGER_VALUE #integerLiteral + | BIGINT_LITERAL #bigIntLiteral + | SMALLINT_LITERAL #smallIntLiteral + | TINYINT_LITERAL #tinyIntLiteral + | DOUBLE_LITERAL #doubleLiteral + ; + +nonReserved + : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS + | ADD + | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | MAP | ARRAY | STRUCT + | LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER + | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED + | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS + | GROUPING | CUBE | ROLLUP + | EXPLAIN | FORMAT | LOGICAL | FORMATTED + | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF + | SET + | VIEW | REPLACE + | IF + | NO | DATA + | START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL + | SNAPSHOT | READ | WRITE | ONLY + | SORT | CLUSTER | DISTRIBUTE UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION + | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST + | AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT + | INPUTDRIVER | OUTPUTDRIVER | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE + | STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER + | REVOKE | GRANT | LOCK | UNLOCK | MSCK | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE + | ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEXES | LOCKS | OPTION + ; + +SELECT: 'SELECT'; +FROM: 'FROM'; +ADD: 'ADD'; +AS: 'AS'; +ALL: 'ALL'; +DISTINCT: 'DISTINCT'; +WHERE: 'WHERE'; +GROUP: 'GROUP'; +BY: 'BY'; +GROUPING: 'GROUPING'; +SETS: 'SETS'; +CUBE: 'CUBE'; +ROLLUP: 'ROLLUP'; +ORDER: 'ORDER'; +HAVING: 'HAVING'; +LIMIT: 'LIMIT'; +AT: 'AT'; +OR: 'OR'; +AND: 'AND'; +IN: 'IN'; +NOT: 'NOT' | '!'; +NO: 'NO'; +EXISTS: 'EXISTS'; +BETWEEN: 'BETWEEN'; +LIKE: 'LIKE'; +RLIKE: 'RLIKE' | 'REGEXP'; +IS: 'IS'; +NULL: 'NULL'; +TRUE: 'TRUE'; +FALSE: 'FALSE'; +NULLS: 'NULLS'; +ASC: 'ASC'; +DESC: 'DESC'; +FOR: 'FOR'; +INTERVAL: 'INTERVAL'; +CASE: 'CASE'; +WHEN: 'WHEN'; +THEN: 'THEN'; +ELSE: 'ELSE'; +END: 'END'; +JOIN: 'JOIN'; +CROSS: 'CROSS'; +OUTER: 'OUTER'; +INNER: 'INNER'; +LEFT: 'LEFT'; +SEMI: 'SEMI'; +RIGHT: 'RIGHT'; +FULL: 'FULL'; +NATURAL: 'NATURAL'; +ON: 'ON'; +LATERAL: 'LATERAL'; +WINDOW: 'WINDOW'; +OVER: 'OVER'; +PARTITION: 'PARTITION'; +RANGE: 'RANGE'; +ROWS: 'ROWS'; +UNBOUNDED: 'UNBOUNDED'; +PRECEDING: 'PRECEDING'; +FOLLOWING: 'FOLLOWING'; +CURRENT: 'CURRENT'; +ROW: 'ROW'; +WITH: 'WITH'; +VALUES: 'VALUES'; +CREATE: 'CREATE'; +TABLE: 'TABLE'; +VIEW: 'VIEW'; +REPLACE: 'REPLACE'; +INSERT: 'INSERT'; +DELETE: 'DELETE'; +INTO: 'INTO'; +DESCRIBE: 'DESCRIBE'; +EXPLAIN: 'EXPLAIN'; +FORMAT: 'FORMAT'; +LOGICAL: 'LOGICAL'; +CAST: 'CAST'; +SHOW: 'SHOW'; +TABLES: 'TABLES'; +COLUMNS: 'COLUMNS'; +COLUMN: 'COLUMN'; +USE: 'USE'; +PARTITIONS: 'PARTITIONS'; +FUNCTIONS: 'FUNCTIONS'; +DROP: 'DROP'; +UNION: 'UNION'; +EXCEPT: 'EXCEPT'; +INTERSECT: 'INTERSECT'; +TO: 'TO'; +TABLESAMPLE: 'TABLESAMPLE'; +STRATIFY: 'STRATIFY'; +ALTER: 'ALTER'; +RENAME: 'RENAME'; +ARRAY: 'ARRAY'; +MAP: 'MAP'; +STRUCT: 'STRUCT'; +COMMENT: 'COMMENT'; +SET: 'SET'; +DATA: 'DATA'; +START: 'START'; +TRANSACTION: 'TRANSACTION'; +COMMIT: 'COMMIT'; +ROLLBACK: 'ROLLBACK'; +WORK: 'WORK'; +ISOLATION: 'ISOLATION'; +LEVEL: 'LEVEL'; +SNAPSHOT: 'SNAPSHOT'; +READ: 'READ'; +WRITE: 'WRITE'; +ONLY: 'ONLY'; + +IF: 'IF'; + +EQ : '=' | '=='; +NSEQ: '<=>'; +NEQ : '<>'; +NEQJ: '!='; +LT : '<'; +LTE : '<='; +GT : '>'; +GTE : '>='; + +PLUS: '+'; +MINUS: '-'; +ASTERISK: '*'; +SLASH: '/'; +PERCENT: '%'; +DIV: 'DIV'; +TILDE: '~'; +AMPERSAND: '&'; +PIPE: '|'; +HAT: '^'; + +PERCENTLIT: 'PERCENT'; +BUCKET: 'BUCKET'; +OUT: 'OUT'; +OF: 'OF'; + +SORT: 'SORT'; +CLUSTER: 'CLUSTER'; +DISTRIBUTE: 'DISTRIBUTE'; +OVERWRITE: 'OVERWRITE'; +TRANSFORM: 'TRANSFORM'; +REDUCE: 'REDUCE'; +USING: 'USING'; +SERDE: 'SERDE'; +SERDEPROPERTIES: 'SERDEPROPERTIES'; +RECORDREADER: 'RECORDREADER'; +RECORDWRITER: 'RECORDWRITER'; +DELIMITED: 'DELIMITED'; +FIELDS: 'FIELDS'; +TERMINATED: 'TERMINATED'; +COLLECTION: 'COLLECTION'; +ITEMS: 'ITEMS'; +KEYS: 'KEYS'; +ESCAPED: 'ESCAPED'; +LINES: 'LINES'; +SEPARATED: 'SEPARATED'; +FUNCTION: 'FUNCTION'; +EXTENDED: 'EXTENDED'; +REFRESH: 'REFRESH'; +CLEAR: 'CLEAR'; +CACHE: 'CACHE'; +UNCACHE: 'UNCACHE'; +LAZY: 'LAZY'; +FORMATTED: 'FORMATTED'; +TEMPORARY: 'TEMPORARY' | 'TEMP'; +OPTIONS: 'OPTIONS'; +UNSET: 'UNSET'; +TBLPROPERTIES: 'TBLPROPERTIES'; +DBPROPERTIES: 'DBPROPERTIES'; +BUCKETS: 'BUCKETS'; +SKEWED: 'SKEWED'; +STORED: 'STORED'; +DIRECTORIES: 'DIRECTORIES'; +LOCATION: 'LOCATION'; +EXCHANGE: 'EXCHANGE'; +ARCHIVE: 'ARCHIVE'; +UNARCHIVE: 'UNARCHIVE'; +FILEFORMAT: 'FILEFORMAT'; +TOUCH: 'TOUCH'; +COMPACT: 'COMPACT'; +CONCATENATE: 'CONCATENATE'; +CHANGE: 'CHANGE'; +FIRST: 'FIRST'; +AFTER: 'AFTER'; +CASCADE: 'CASCADE'; +RESTRICT: 'RESTRICT'; +CLUSTERED: 'CLUSTERED'; +SORTED: 'SORTED'; +PURGE: 'PURGE'; +INPUTFORMAT: 'INPUTFORMAT'; +OUTPUTFORMAT: 'OUTPUTFORMAT'; +INPUTDRIVER: 'INPUTDRIVER'; +OUTPUTDRIVER: 'OUTPUTDRIVER'; +DATABASE: 'DATABASE' | 'SCHEMA'; +DFS: 'DFS'; +TRUNCATE: 'TRUNCATE'; +METADATA: 'METADATA'; +REPLICATION: 'REPLICATION'; +ANALYZE: 'ANALYZE'; +COMPUTE: 'COMPUTE'; +STATISTICS: 'STATISTICS'; +PARTITIONED: 'PARTITIONED'; +EXTERNAL: 'EXTERNAL'; +DEFINED: 'DEFINED'; +REVOKE: 'REVOKE'; +GRANT: 'GRANT'; +LOCK: 'LOCK'; +UNLOCK: 'UNLOCK'; +MSCK: 'MSCK'; +EXPORT: 'EXPORT'; +IMPORT: 'IMPORT'; +LOAD: 'LOAD'; +ROLE: 'ROLE'; +ROLES: 'ROLES'; +COMPACTIONS: 'COMPACTIONS'; +PRINCIPALS: 'PRINCIPALS'; +TRANSACTIONS: 'TRANSACTIONS'; +INDEXES: 'INDEXES'; +LOCKS: 'LOCKS'; +OPTION: 'OPTION'; + +STRING + : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"' + ; + +BIGINT_LITERAL + : DIGIT+ 'L' + ; + +SMALLINT_LITERAL + : DIGIT+ 'S' + ; + +TINYINT_LITERAL + : DIGIT+ 'Y' + ; + +INTEGER_VALUE + : DIGIT+ + ; + +DECIMAL_VALUE + : DIGIT+ '.' DIGIT* + | '.' DIGIT+ + ; + +SCIENTIFIC_DECIMAL_VALUE + : DIGIT+ ('.' DIGIT*)? EXPONENT + | '.' DIGIT+ EXPONENT + ; + +DOUBLE_LITERAL + : + (INTEGER_VALUE | DECIMAL_VALUE | SCIENTIFIC_DECIMAL_VALUE) 'D' + ; + +IDENTIFIER + : (LETTER | DIGIT | '_')+ + ; + +BACKQUOTED_IDENTIFIER + : '`' ( ~'`' | '``' )* '`' + ; + +fragment EXPONENT + : 'E' [+-]? DIGIT+ + ; + +fragment DIGIT + : [0-9] + ; + +fragment LETTER + : [A-Z] + ; + +SIMPLE_COMMENT + : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) + ; + +BRACKETED_COMMENT + : '/*' .*? '*/' -> channel(HIDDEN) + ; + +WS + : [ \r\n\t]+ -> channel(HIDDEN) + ; + +// Catch-all for anything we can't recognize. +// We use this to be able to ignore and recover all the text +// when splitting statements with DelimiterLexer +UNRECOGNIZED + : . + ;
http://git-wip-us.apache.org/repos/asf/spark/blob/a9b93e07/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4 ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4 deleted file mode 100644 index 4e77b6d..0000000 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4 +++ /dev/null @@ -1,941 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar. - */ - -grammar SqlBase; - -tokens { - DELIMITER -} - -singleStatement - : statement EOF - ; - -singleExpression - : namedExpression EOF - ; - -singleTableIdentifier - : tableIdentifier EOF - ; - -singleDataType - : dataType EOF - ; - -statement - : query #statementDefault - | USE db=identifier #use - | CREATE DATABASE (IF NOT EXISTS)? identifier - (COMMENT comment=STRING)? locationSpec? - (WITH DBPROPERTIES tablePropertyList)? #createDatabase - | ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties - | DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase - | createTableHeader ('(' colTypeList ')')? tableProvider - (OPTIONS tablePropertyList)? #createTableUsing - | createTableHeader tableProvider - (OPTIONS tablePropertyList)? AS? query #createTableUsing - | createTableHeader ('(' colTypeList ')')? (COMMENT STRING)? - (PARTITIONED BY identifierList)? bucketSpec? skewSpec? - rowFormat? createFileFormat? locationSpec? - (TBLPROPERTIES tablePropertyList)? - (AS? query)? #createTable - | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS - (identifier | FOR COLUMNS identifierSeq?) #analyze - | ALTER TABLE from=tableIdentifier RENAME TO to=tableIdentifier #renameTable - | ALTER TABLE tableIdentifier - SET TBLPROPERTIES tablePropertyList #setTableProperties - | ALTER TABLE tableIdentifier - UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties - | ALTER TABLE tableIdentifier (partitionSpec)? - SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe - | ALTER TABLE tableIdentifier (partitionSpec)? - SET SERDEPROPERTIES tablePropertyList #setTableSerDe - | ALTER TABLE tableIdentifier bucketSpec #bucketTable - | ALTER TABLE tableIdentifier NOT CLUSTERED #unclusterTable - | ALTER TABLE tableIdentifier NOT SORTED #unsortTable - | ALTER TABLE tableIdentifier skewSpec #skewTable - | ALTER TABLE tableIdentifier NOT SKEWED #unskewTable - | ALTER TABLE tableIdentifier NOT STORED AS DIRECTORIES #unstoreTable - | ALTER TABLE tableIdentifier - SET SKEWED LOCATION skewedLocationList #setTableSkewLocations - | ALTER TABLE tableIdentifier ADD (IF NOT EXISTS)? - partitionSpecLocation+ #addTablePartition - | ALTER TABLE tableIdentifier - from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition - | ALTER TABLE from=tableIdentifier - EXCHANGE partitionSpec WITH TABLE to=tableIdentifier #exchangeTablePartition - | ALTER TABLE tableIdentifier - DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions - | ALTER TABLE tableIdentifier ARCHIVE partitionSpec #archiveTablePartition - | ALTER TABLE tableIdentifier UNARCHIVE partitionSpec #unarchiveTablePartition - | ALTER TABLE tableIdentifier partitionSpec? - SET FILEFORMAT fileFormat #setTableFileFormat - | ALTER TABLE tableIdentifier partitionSpec? SET locationSpec #setTableLocation - | ALTER TABLE tableIdentifier TOUCH partitionSpec? #touchTable - | ALTER TABLE tableIdentifier partitionSpec? COMPACT STRING #compactTable - | ALTER TABLE tableIdentifier partitionSpec? CONCATENATE #concatenateTable - | ALTER TABLE tableIdentifier partitionSpec? - CHANGE COLUMN? oldName=identifier colType - (FIRST | AFTER after=identifier)? (CASCADE | RESTRICT)? #changeColumn - | ALTER TABLE tableIdentifier partitionSpec? - ADD COLUMNS '(' colTypeList ')' (CASCADE | RESTRICT)? #addColumns - | ALTER TABLE tableIdentifier partitionSpec? - REPLACE COLUMNS '(' colTypeList ')' (CASCADE | RESTRICT)? #replaceColumns - | DROP TABLE (IF EXISTS)? tableIdentifier PURGE? - (FOR METADATA? REPLICATION '(' STRING ')')? #dropTable - | CREATE (OR REPLACE)? VIEW (IF NOT EXISTS)? tableIdentifier - identifierCommentList? (COMMENT STRING)? - (PARTITIONED ON identifierList)? - (TBLPROPERTIES tablePropertyList)? AS query #createView - | ALTER VIEW tableIdentifier AS? query #alterViewQuery - | CREATE TEMPORARY? FUNCTION qualifiedName AS className=STRING - (USING resource (',' resource)*)? #createFunction - | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName #dropFunction - | EXPLAIN explainOption* statement #explain - | SHOW TABLES ((FROM | IN) db=identifier)? - (LIKE (qualifiedName | pattern=STRING))? #showTables - | SHOW FUNCTIONS (LIKE? (qualifiedName | pattern=STRING))? #showFunctions - | (DESC | DESCRIBE) FUNCTION EXTENDED? qualifiedName #describeFunction - | (DESC | DESCRIBE) option=(EXTENDED | FORMATTED)? - tableIdentifier partitionSpec? describeColName? #describeTable - | (DESC | DESCRIBE) DATABASE EXTENDED? identifier #describeDatabase - | REFRESH TABLE tableIdentifier #refreshTable - | CACHE LAZY? TABLE identifier (AS? query)? #cacheTable - | UNCACHE TABLE identifier #uncacheTable - | CLEAR CACHE #clearCache - | ADD identifier .*? #addResource - | SET ROLE .*? #failNativeCommand - | SET .*? #setConfiguration - | kws=unsupportedHiveNativeCommands .*? #failNativeCommand - | hiveNativeCommands #executeNativeCommand - ; - -hiveNativeCommands - : createTableHeader LIKE tableIdentifier - rowFormat? createFileFormat? locationSpec? - (TBLPROPERTIES tablePropertyList)? - | DELETE FROM tableIdentifier (WHERE booleanExpression)? - | TRUNCATE TABLE tableIdentifier partitionSpec? - (COLUMNS identifierList)? - | ALTER VIEW from=tableIdentifier AS? RENAME TO to=tableIdentifier - | ALTER VIEW from=tableIdentifier AS? - SET TBLPROPERTIES tablePropertyList - | ALTER VIEW from=tableIdentifier AS? - UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList - | ALTER VIEW from=tableIdentifier AS? - ADD (IF NOT EXISTS)? partitionSpecLocation+ - | ALTER VIEW from=tableIdentifier AS? - DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? - | DROP VIEW (IF EXISTS)? qualifiedName - | SHOW COLUMNS (FROM | IN) tableIdentifier ((FROM|IN) identifier)? - | START TRANSACTION (transactionMode (',' transactionMode)*)? - | COMMIT WORK? - | ROLLBACK WORK? - | SHOW PARTITIONS tableIdentifier partitionSpec? - | DFS .*? - | (CREATE | ALTER | DROP | SHOW | DESC | DESCRIBE | LOCK | UNLOCK | MSCK | LOAD) .*? - ; - -unsupportedHiveNativeCommands - : kw1=CREATE kw2=ROLE - | kw1=DROP kw2=ROLE - | kw1=GRANT kw2=ROLE? - | kw1=REVOKE kw2=ROLE? - | kw1=SHOW kw2=GRANT - | kw1=SHOW kw2=ROLE kw3=GRANT? - | kw1=SHOW kw2=PRINCIPALS - | kw1=SHOW kw2=ROLES - | kw1=SHOW kw2=CURRENT kw3=ROLES - | kw1=EXPORT kw2=TABLE - | kw1=IMPORT kw2=TABLE - | kw1=SHOW kw2=COMPACTIONS - | kw1=SHOW kw2=CREATE kw3=TABLE - | kw1=SHOW kw2=TRANSACTIONS - | kw1=SHOW kw2=INDEXES - | kw1=SHOW kw2=LOCKS - ; - -createTableHeader - : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? tableIdentifier - ; - -bucketSpec - : CLUSTERED BY identifierList - (SORTED BY orderedIdentifierList)? - INTO INTEGER_VALUE BUCKETS - ; - -skewSpec - : SKEWED BY identifierList - ON (constantList | nestedConstantList) - (STORED AS DIRECTORIES)? - ; - -locationSpec - : LOCATION STRING - ; - -query - : ctes? queryNoWith - ; - -insertInto - : INSERT OVERWRITE TABLE tableIdentifier partitionSpec? (IF NOT EXISTS)? - | INSERT INTO TABLE? tableIdentifier partitionSpec? - ; - -partitionSpecLocation - : partitionSpec locationSpec? - ; - -partitionSpec - : PARTITION '(' partitionVal (',' partitionVal)* ')' - ; - -partitionVal - : identifier (EQ constant)? - ; - -describeColName - : identifier ('.' (identifier | STRING))* - ; - -ctes - : WITH namedQuery (',' namedQuery)* - ; - -namedQuery - : name=identifier AS? '(' queryNoWith ')' - ; - -tableProvider - : USING qualifiedName - ; - -tablePropertyList - : '(' tableProperty (',' tableProperty)* ')' - ; - -tableProperty - : key=tablePropertyKey (EQ? value=STRING)? - ; - -tablePropertyKey - : looseIdentifier ('.' looseIdentifier)* - | STRING - ; - -constantList - : '(' constant (',' constant)* ')' - ; - -nestedConstantList - : '(' constantList (',' constantList)* ')' - ; - -skewedLocation - : (constant | constantList) EQ STRING - ; - -skewedLocationList - : '(' skewedLocation (',' skewedLocation)* ')' - ; - -createFileFormat - : STORED AS fileFormat - | STORED BY storageHandler - ; - -fileFormat - : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING (SERDE serdeCls=STRING)? - (INPUTDRIVER inDriver=STRING OUTPUTDRIVER outDriver=STRING)? #tableFileFormat - | identifier #genericFileFormat - ; - -storageHandler - : STRING (WITH SERDEPROPERTIES tablePropertyList)? - ; - -resource - : identifier STRING - ; - -queryNoWith - : insertInto? queryTerm queryOrganization #singleInsertQuery - | fromClause multiInsertQueryBody+ #multiInsertQuery - ; - -queryOrganization - : (ORDER BY order+=sortItem (',' order+=sortItem)*)? - (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? - (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)? - (SORT BY sort+=sortItem (',' sort+=sortItem)*)? - windows? - (LIMIT limit=expression)? - ; - -multiInsertQueryBody - : insertInto? - querySpecification - queryOrganization - ; - -queryTerm - : queryPrimary #queryTermDefault - | left=queryTerm operator=(INTERSECT | UNION | EXCEPT) setQuantifier? right=queryTerm #setOperation - ; - -queryPrimary - : querySpecification #queryPrimaryDefault - | TABLE tableIdentifier #table - | inlineTable #inlineTableDefault1 - | '(' queryNoWith ')' #subquery - ; - -sortItem - : expression ordering=(ASC | DESC)? - ; - -querySpecification - : (((SELECT kind=TRANSFORM '(' namedExpressionSeq ')' - | kind=MAP namedExpressionSeq - | kind=REDUCE namedExpressionSeq)) - inRowFormat=rowFormat? - (RECORDWRITER recordWriter=STRING)? - USING script=STRING - (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))? - outRowFormat=rowFormat? - (RECORDREADER recordReader=STRING)? - fromClause? - (WHERE where=booleanExpression)?) - | ((kind=SELECT setQuantifier? namedExpressionSeq fromClause? - | fromClause (kind=SELECT setQuantifier? namedExpressionSeq)?) - lateralView* - (WHERE where=booleanExpression)? - aggregation? - (HAVING having=booleanExpression)? - windows?) - ; - -fromClause - : FROM relation (',' relation)* lateralView* - ; - -aggregation - : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* ( - WITH kind=ROLLUP - | WITH kind=CUBE - | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')? - ; - -groupingSet - : '(' (expression (',' expression)*)? ')' - | expression - ; - -lateralView - : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)? - ; - -setQuantifier - : DISTINCT - | ALL - ; - -relation - : left=relation - ((CROSS | joinType) JOIN right=relation joinCriteria? - | NATURAL joinType JOIN right=relation - ) #joinRelation - | relationPrimary #relationDefault - ; - -joinType - : INNER? - | LEFT OUTER? - | LEFT SEMI - | RIGHT OUTER? - | FULL OUTER? - ; - -joinCriteria - : ON booleanExpression - | USING '(' identifier (',' identifier)* ')' - ; - -sample - : TABLESAMPLE '(' - ( (percentage=(INTEGER_VALUE | DECIMAL_VALUE) sampleType=PERCENTLIT) - | (expression sampleType=ROWS) - | (sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE (ON identifier)?)) - ')' - ; - -identifierList - : '(' identifierSeq ')' - ; - -identifierSeq - : identifier (',' identifier)* - ; - -orderedIdentifierList - : '(' orderedIdentifier (',' orderedIdentifier)* ')' - ; - -orderedIdentifier - : identifier ordering=(ASC | DESC)? - ; - -identifierCommentList - : '(' identifierComment (',' identifierComment)* ')' - ; - -identifierComment - : identifier (COMMENT STRING)? - ; - -relationPrimary - : tableIdentifier sample? (AS? identifier)? #tableName - | '(' queryNoWith ')' sample? (AS? identifier)? #aliasedQuery - | '(' relation ')' sample? (AS? identifier)? #aliasedRelation - | inlineTable #inlineTableDefault2 - ; - -inlineTable - : VALUES expression (',' expression)* (AS? identifier identifierList?)? - ; - -rowFormat - : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde - | ROW FORMAT DELIMITED - (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? - (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? - (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? - (LINES TERMINATED BY linesSeparatedBy=STRING)? - (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited - ; - -tableIdentifier - : (db=identifier '.')? table=identifier - ; - -namedExpression - : expression (AS? (identifier | identifierList))? - ; - -namedExpressionSeq - : namedExpression (',' namedExpression)* - ; - -expression - : booleanExpression - ; - -booleanExpression - : predicated #booleanDefault - | NOT booleanExpression #logicalNot - | left=booleanExpression operator=AND right=booleanExpression #logicalBinary - | left=booleanExpression operator=OR right=booleanExpression #logicalBinary - | EXISTS '(' query ')' #exists - ; - -// workaround for: -// https://github.com/antlr/antlr4/issues/780 -// https://github.com/antlr/antlr4/issues/781 -predicated - : valueExpression predicate[$valueExpression.ctx]? - ; - -predicate[ParserRuleContext value] - : NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between - | NOT? IN '(' expression (',' expression)* ')' #inList - | NOT? IN '(' query ')' #inSubquery - | NOT? like=(RLIKE | LIKE) pattern=valueExpression #like - | IS NOT? NULL #nullPredicate - ; - -valueExpression - : primaryExpression #valueExpressionDefault - | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary - | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary - | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary - | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary - | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary - | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary - | left=valueExpression comparisonOperator right=valueExpression #comparison - ; - -primaryExpression - : constant #constantDefault - | ASTERISK #star - | qualifiedName '.' ASTERISK #star - | '(' expression (',' expression)+ ')' #rowConstructor - | qualifiedName '(' (setQuantifier? expression (',' expression)*)? ')' (OVER windowSpec)? #functionCall - | '(' query ')' #subqueryExpression - | CASE valueExpression whenClause+ (ELSE elseExpression=expression)? END #simpleCase - | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase - | CAST '(' expression AS dataType ')' #cast - | value=primaryExpression '[' index=valueExpression ']' #subscript - | identifier #columnReference - | base=primaryExpression '.' fieldName=identifier #dereference - | '(' expression ')' #parenthesizedExpression - ; - -constant - : NULL #nullLiteral - | interval #intervalLiteral - | identifier STRING #typeConstructor - | number #numericLiteral - | booleanValue #booleanLiteral - | STRING+ #stringLiteral - ; - -comparisonOperator - : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ - ; - -booleanValue - : TRUE | FALSE - ; - -interval - : INTERVAL intervalField* - ; - -intervalField - : value=intervalValue unit=identifier (TO to=identifier)? - ; - -intervalValue - : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE) - | STRING - ; - -dataType - : complex=ARRAY '<' dataType '>' #complexDataType - | complex=MAP '<' dataType ',' dataType '>' #complexDataType - | complex=STRUCT ('<' colTypeList? '>' | NEQ) #complexDataType - | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType - ; - -colTypeList - : colType (',' colType)* - ; - -colType - : identifier ':'? dataType (COMMENT STRING)? - ; - -whenClause - : WHEN condition=expression THEN result=expression - ; - -windows - : WINDOW namedWindow (',' namedWindow)* - ; - -namedWindow - : identifier AS windowSpec - ; - -windowSpec - : name=identifier #windowRef - | '(' - ( CLUSTER BY partition+=expression (',' partition+=expression)* - | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)? - ((ORDER | SORT) BY sortItem (',' sortItem)*)?) - windowFrame? - ')' #windowDef - ; - -windowFrame - : frameType=RANGE start=frameBound - | frameType=ROWS start=frameBound - | frameType=RANGE BETWEEN start=frameBound AND end=frameBound - | frameType=ROWS BETWEEN start=frameBound AND end=frameBound - ; - -frameBound - : UNBOUNDED boundType=(PRECEDING | FOLLOWING) - | boundType=CURRENT ROW - | expression boundType=(PRECEDING | FOLLOWING) - ; - - -explainOption - : LOGICAL | FORMATTED | EXTENDED - ; - -transactionMode - : ISOLATION LEVEL SNAPSHOT #isolationLevel - | READ accessMode=(ONLY | WRITE) #transactionAccessMode - ; - -qualifiedName - : identifier ('.' identifier)* - ; - -// Identifier that also allows the use of a number of SQL keywords (mainly for backwards compatibility). -looseIdentifier - : identifier - | FROM - | TO - | TABLE - | WITH - ; - -identifier - : IDENTIFIER #unquotedIdentifier - | quotedIdentifier #quotedIdentifierAlternative - | nonReserved #unquotedIdentifier - ; - -quotedIdentifier - : BACKQUOTED_IDENTIFIER - ; - -number - : DECIMAL_VALUE #decimalLiteral - | SCIENTIFIC_DECIMAL_VALUE #scientificDecimalLiteral - | INTEGER_VALUE #integerLiteral - | BIGINT_LITERAL #bigIntLiteral - | SMALLINT_LITERAL #smallIntLiteral - | TINYINT_LITERAL #tinyIntLiteral - | DOUBLE_LITERAL #doubleLiteral - ; - -nonReserved - : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS - | ADD - | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | MAP | ARRAY | STRUCT - | LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER - | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED - | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS - | GROUPING | CUBE | ROLLUP - | EXPLAIN | FORMAT | LOGICAL | FORMATTED - | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF - | SET - | VIEW | REPLACE - | IF - | NO | DATA - | START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL - | SNAPSHOT | READ | WRITE | ONLY - | SORT | CLUSTER | DISTRIBUTE UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION - | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST - | AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT - | INPUTDRIVER | OUTPUTDRIVER | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE - | STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER - | REVOKE | GRANT | LOCK | UNLOCK | MSCK | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE - | ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEXES | LOCKS | OPTION - ; - -SELECT: 'SELECT'; -FROM: 'FROM'; -ADD: 'ADD'; -AS: 'AS'; -ALL: 'ALL'; -DISTINCT: 'DISTINCT'; -WHERE: 'WHERE'; -GROUP: 'GROUP'; -BY: 'BY'; -GROUPING: 'GROUPING'; -SETS: 'SETS'; -CUBE: 'CUBE'; -ROLLUP: 'ROLLUP'; -ORDER: 'ORDER'; -HAVING: 'HAVING'; -LIMIT: 'LIMIT'; -AT: 'AT'; -OR: 'OR'; -AND: 'AND'; -IN: 'IN'; -NOT: 'NOT' | '!'; -NO: 'NO'; -EXISTS: 'EXISTS'; -BETWEEN: 'BETWEEN'; -LIKE: 'LIKE'; -RLIKE: 'RLIKE' | 'REGEXP'; -IS: 'IS'; -NULL: 'NULL'; -TRUE: 'TRUE'; -FALSE: 'FALSE'; -NULLS: 'NULLS'; -ASC: 'ASC'; -DESC: 'DESC'; -FOR: 'FOR'; -INTERVAL: 'INTERVAL'; -CASE: 'CASE'; -WHEN: 'WHEN'; -THEN: 'THEN'; -ELSE: 'ELSE'; -END: 'END'; -JOIN: 'JOIN'; -CROSS: 'CROSS'; -OUTER: 'OUTER'; -INNER: 'INNER'; -LEFT: 'LEFT'; -SEMI: 'SEMI'; -RIGHT: 'RIGHT'; -FULL: 'FULL'; -NATURAL: 'NATURAL'; -ON: 'ON'; -LATERAL: 'LATERAL'; -WINDOW: 'WINDOW'; -OVER: 'OVER'; -PARTITION: 'PARTITION'; -RANGE: 'RANGE'; -ROWS: 'ROWS'; -UNBOUNDED: 'UNBOUNDED'; -PRECEDING: 'PRECEDING'; -FOLLOWING: 'FOLLOWING'; -CURRENT: 'CURRENT'; -ROW: 'ROW'; -WITH: 'WITH'; -VALUES: 'VALUES'; -CREATE: 'CREATE'; -TABLE: 'TABLE'; -VIEW: 'VIEW'; -REPLACE: 'REPLACE'; -INSERT: 'INSERT'; -DELETE: 'DELETE'; -INTO: 'INTO'; -DESCRIBE: 'DESCRIBE'; -EXPLAIN: 'EXPLAIN'; -FORMAT: 'FORMAT'; -LOGICAL: 'LOGICAL'; -CAST: 'CAST'; -SHOW: 'SHOW'; -TABLES: 'TABLES'; -COLUMNS: 'COLUMNS'; -COLUMN: 'COLUMN'; -USE: 'USE'; -PARTITIONS: 'PARTITIONS'; -FUNCTIONS: 'FUNCTIONS'; -DROP: 'DROP'; -UNION: 'UNION'; -EXCEPT: 'EXCEPT'; -INTERSECT: 'INTERSECT'; -TO: 'TO'; -TABLESAMPLE: 'TABLESAMPLE'; -STRATIFY: 'STRATIFY'; -ALTER: 'ALTER'; -RENAME: 'RENAME'; -ARRAY: 'ARRAY'; -MAP: 'MAP'; -STRUCT: 'STRUCT'; -COMMENT: 'COMMENT'; -SET: 'SET'; -DATA: 'DATA'; -START: 'START'; -TRANSACTION: 'TRANSACTION'; -COMMIT: 'COMMIT'; -ROLLBACK: 'ROLLBACK'; -WORK: 'WORK'; -ISOLATION: 'ISOLATION'; -LEVEL: 'LEVEL'; -SNAPSHOT: 'SNAPSHOT'; -READ: 'READ'; -WRITE: 'WRITE'; -ONLY: 'ONLY'; - -IF: 'IF'; - -EQ : '=' | '=='; -NSEQ: '<=>'; -NEQ : '<>'; -NEQJ: '!='; -LT : '<'; -LTE : '<='; -GT : '>'; -GTE : '>='; - -PLUS: '+'; -MINUS: '-'; -ASTERISK: '*'; -SLASH: '/'; -PERCENT: '%'; -DIV: 'DIV'; -TILDE: '~'; -AMPERSAND: '&'; -PIPE: '|'; -HAT: '^'; - -PERCENTLIT: 'PERCENT'; -BUCKET: 'BUCKET'; -OUT: 'OUT'; -OF: 'OF'; - -SORT: 'SORT'; -CLUSTER: 'CLUSTER'; -DISTRIBUTE: 'DISTRIBUTE'; -OVERWRITE: 'OVERWRITE'; -TRANSFORM: 'TRANSFORM'; -REDUCE: 'REDUCE'; -USING: 'USING'; -SERDE: 'SERDE'; -SERDEPROPERTIES: 'SERDEPROPERTIES'; -RECORDREADER: 'RECORDREADER'; -RECORDWRITER: 'RECORDWRITER'; -DELIMITED: 'DELIMITED'; -FIELDS: 'FIELDS'; -TERMINATED: 'TERMINATED'; -COLLECTION: 'COLLECTION'; -ITEMS: 'ITEMS'; -KEYS: 'KEYS'; -ESCAPED: 'ESCAPED'; -LINES: 'LINES'; -SEPARATED: 'SEPARATED'; -FUNCTION: 'FUNCTION'; -EXTENDED: 'EXTENDED'; -REFRESH: 'REFRESH'; -CLEAR: 'CLEAR'; -CACHE: 'CACHE'; -UNCACHE: 'UNCACHE'; -LAZY: 'LAZY'; -FORMATTED: 'FORMATTED'; -TEMPORARY: 'TEMPORARY' | 'TEMP'; -OPTIONS: 'OPTIONS'; -UNSET: 'UNSET'; -TBLPROPERTIES: 'TBLPROPERTIES'; -DBPROPERTIES: 'DBPROPERTIES'; -BUCKETS: 'BUCKETS'; -SKEWED: 'SKEWED'; -STORED: 'STORED'; -DIRECTORIES: 'DIRECTORIES'; -LOCATION: 'LOCATION'; -EXCHANGE: 'EXCHANGE'; -ARCHIVE: 'ARCHIVE'; -UNARCHIVE: 'UNARCHIVE'; -FILEFORMAT: 'FILEFORMAT'; -TOUCH: 'TOUCH'; -COMPACT: 'COMPACT'; -CONCATENATE: 'CONCATENATE'; -CHANGE: 'CHANGE'; -FIRST: 'FIRST'; -AFTER: 'AFTER'; -CASCADE: 'CASCADE'; -RESTRICT: 'RESTRICT'; -CLUSTERED: 'CLUSTERED'; -SORTED: 'SORTED'; -PURGE: 'PURGE'; -INPUTFORMAT: 'INPUTFORMAT'; -OUTPUTFORMAT: 'OUTPUTFORMAT'; -INPUTDRIVER: 'INPUTDRIVER'; -OUTPUTDRIVER: 'OUTPUTDRIVER'; -DATABASE: 'DATABASE' | 'SCHEMA'; -DFS: 'DFS'; -TRUNCATE: 'TRUNCATE'; -METADATA: 'METADATA'; -REPLICATION: 'REPLICATION'; -ANALYZE: 'ANALYZE'; -COMPUTE: 'COMPUTE'; -STATISTICS: 'STATISTICS'; -PARTITIONED: 'PARTITIONED'; -EXTERNAL: 'EXTERNAL'; -DEFINED: 'DEFINED'; -REVOKE: 'REVOKE'; -GRANT: 'GRANT'; -LOCK: 'LOCK'; -UNLOCK: 'UNLOCK'; -MSCK: 'MSCK'; -EXPORT: 'EXPORT'; -IMPORT: 'IMPORT'; -LOAD: 'LOAD'; -ROLE: 'ROLE'; -ROLES: 'ROLES'; -COMPACTIONS: 'COMPACTIONS'; -PRINCIPALS: 'PRINCIPALS'; -TRANSACTIONS: 'TRANSACTIONS'; -INDEXES: 'INDEXES'; -LOCKS: 'LOCKS'; -OPTION: 'OPTION'; - -STRING - : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' - | '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"' - ; - -BIGINT_LITERAL - : DIGIT+ 'L' - ; - -SMALLINT_LITERAL - : DIGIT+ 'S' - ; - -TINYINT_LITERAL - : DIGIT+ 'Y' - ; - -INTEGER_VALUE - : DIGIT+ - ; - -DECIMAL_VALUE - : DIGIT+ '.' DIGIT* - | '.' DIGIT+ - ; - -SCIENTIFIC_DECIMAL_VALUE - : DIGIT+ ('.' DIGIT*)? EXPONENT - | '.' DIGIT+ EXPONENT - ; - -DOUBLE_LITERAL - : - (INTEGER_VALUE | DECIMAL_VALUE | SCIENTIFIC_DECIMAL_VALUE) 'D' - ; - -IDENTIFIER - : (LETTER | DIGIT | '_')+ - ; - -BACKQUOTED_IDENTIFIER - : '`' ( ~'`' | '``' )* '`' - ; - -fragment EXPONENT - : 'E' [+-]? DIGIT+ - ; - -fragment DIGIT - : [0-9] - ; - -fragment LETTER - : [A-Z] - ; - -SIMPLE_COMMENT - : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) - ; - -BRACKETED_COMMENT - : '/*' .*? '*/' -> channel(HIDDEN) - ; - -WS - : [ \r\n\t]+ -> channel(HIDDEN) - ; - -// Catch-all for anything we can't recognize. -// We use this to be able to ignore and recover all the text -// when splitting statements with DelimiterLexer -UNRECOGNIZED - : . - ; http://git-wip-us.apache.org/repos/asf/spark/blob/a9b93e07/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala deleted file mode 100644 index 28f7b10..0000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.catalyst.parser - -import org.antlr.runtime.{Token, TokenRewriteStream} - -import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode} - -case class ASTNode( - token: Token, - startIndex: Int, - stopIndex: Int, - children: List[ASTNode], - stream: TokenRewriteStream) extends TreeNode[ASTNode] { - /** Cache the number of children. */ - val numChildren: Int = children.size - - /** tuple used in pattern matching. */ - val pattern: Some[(String, List[ASTNode])] = Some((token.getText, children)) - - /** Line in which the ASTNode starts. */ - lazy val line: Int = { - val line = token.getLine - if (line == 0) { - if (children.nonEmpty) children.head.line - else 0 - } else { - line - } - } - - /** Position of the Character at which ASTNode starts. */ - lazy val positionInLine: Int = { - val line = token.getCharPositionInLine - if (line == -1) { - if (children.nonEmpty) children.head.positionInLine - else 0 - } else { - line - } - } - - /** Origin of the ASTNode. */ - override val origin: Origin = Origin(Some(line), Some(positionInLine)) - - /** Source text. */ - lazy val source: String = stream.toOriginalString(startIndex, stopIndex) - - /** Get the source text that remains after this token. */ - lazy val remainder: String = { - stream.fill() - stream.toOriginalString(stopIndex + 1, stream.size() - 1).trim() - } - - def text: String = token.getText - - def tokenType: Int = token.getType - - /** - * Checks if this node is equal to another node. - * - * Right now this function only checks the name, type, text and children of the node - * for equality. - */ - def treeEquals(other: ASTNode): Boolean = { - def check(f: ASTNode => Any): Boolean = { - val l = f(this) - val r = f(other) - (l == null && r == null) || l.equals(r) - } - if (other == null) { - false - } else if (!check(_.token.getType) - || !check(_.token.getText) - || !check(_.numChildren)) { - false - } else { - children.zip(other.children).forall { - case (l, r) => l treeEquals r - } - } - } - - override def simpleString: String = s"$text $line, $startIndex, $stopIndex, $positionInLine " -} http://git-wip-us.apache.org/repos/asf/spark/blob/a9b93e07/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala deleted file mode 100644 index 7b456a6..0000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.parser - -import scala.language.implicitConversions -import scala.util.parsing.combinator.lexical.StdLexical -import scala.util.parsing.combinator.syntactical.StandardTokenParsers -import scala.util.parsing.combinator.PackratParsers -import scala.util.parsing.input.CharArrayReader.EofCh - -import org.apache.spark.sql.catalyst.plans.logical._ - -private[sql] abstract class AbstractSparkSQLParser - extends StandardTokenParsers with PackratParsers with ParserInterface { - - def parsePlan(input: String): LogicalPlan = synchronized { - // Initialize the Keywords. - initLexical - phrase(start)(new lexical.Scanner(input)) match { - case Success(plan, _) => plan - case failureOrError => sys.error(failureOrError.toString) - } - } - /* One time initialization of lexical.This avoid reinitialization of lexical in parse method */ - protected lazy val initLexical: Unit = lexical.initialize(reservedWords) - - protected case class Keyword(str: String) { - def normalize: String = lexical.normalizeKeyword(str) - def parser: Parser[String] = normalize - } - - protected implicit def asParser(k: Keyword): Parser[String] = k.parser - - // By default, use Reflection to find the reserved words defined in the sub class. - // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this - // method during the parent class instantiation, because the sub class instance - // isn't created yet. - protected lazy val reservedWords: Seq[String] = - this - .getClass - .getMethods - .filter(_.getReturnType == classOf[Keyword]) - .map(_.invoke(this).asInstanceOf[Keyword].normalize) - - // Set the keywords as empty by default, will change that later. - override val lexical = new SqlLexical - - protected def start: Parser[LogicalPlan] - - // Returns the whole input string - protected lazy val wholeInput: Parser[String] = new Parser[String] { - def apply(in: Input): ParseResult[String] = - Success(in.source.toString, in.drop(in.source.length())) - } - - // Returns the rest of the input string that are not parsed yet - protected lazy val restInput: Parser[String] = new Parser[String] { - def apply(in: Input): ParseResult[String] = - Success( - in.source.subSequence(in.offset, in.source.length()).toString, - in.drop(in.source.length())) - } -} - -class SqlLexical extends StdLexical { - case class DecimalLit(chars: String) extends Token { - override def toString: String = chars - } - - /* This is a work around to support the lazy setting */ - def initialize(keywords: Seq[String]): Unit = { - reserved.clear() - reserved ++= keywords - } - - /* Normal the keyword string */ - def normalizeKeyword(str: String): String = str.toLowerCase - - delimiters += ( - "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")", - ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>" - ) - - protected override def processIdent(name: String) = { - val token = normalizeKeyword(name) - if (reserved contains token) Keyword(token) else Identifier(name) - } - - override lazy val token: Parser[Token] = - ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) } - | '.' ~> (rep1(digit) ~ scientificNotation) ^^ - { case i ~ s => DecimalLit("0." + i.mkString + s) } - | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^ - { case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) } - | digit.* ~ identChar ~ (identChar | digit).* ^^ - { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) } - | rep1(digit) ~ ('.' ~> digit.*).? ^^ { - case i ~ None => NumericLit(i.mkString) - case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString) - } - | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^ - { case chars => StringLit(chars mkString "") } - | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^ - { case chars => StringLit(chars mkString "") } - | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^ - { case chars => Identifier(chars mkString "") } - | EofCh ^^^ EOF - | '\'' ~> failure("unclosed string literal") - | '"' ~> failure("unclosed string literal") - | delim - | failure("illegal character") - ) - - override def identChar: Parser[Elem] = letter | elem('_') - - private lazy val scientificNotation: Parser[String] = - (elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ { - case s ~ rest => "e" + s.mkString + rest.mkString - } - - override def whitespace: Parser[Any] = - ( whitespaceChar - | '/' ~ '*' ~ comment - | '/' ~ '/' ~ chrExcept(EofCh, '\n').* - | '#' ~ chrExcept(EofCh, '\n').* - | '-' ~ '-' ~ chrExcept(EofCh, '\n').* - | '/' ~ '*' ~ failure("unclosed comment") - ).* -} - --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
