[6/8] spark git commit: [SPARK-12573][SPARK-12574][SQL] Move SQL Parser from Hive to Catalyst

rxin Wed, 06 Jan 2016 11:17:46 -0800

http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
new file mode 100644
index 0000000..42bdf25
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
@@ -0,0 +1,961 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst
+
+import java.sql.Date
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.Count
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin
+import org.apache.spark.sql.catalyst.parser._
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.random.RandomSampler
+
+/**
+ * This class translates a HQL String to a Catalyst [[LogicalPlan]] or 
[[Expression]].
+ */
+private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) {
+  object Token {
+    def unapply(node: ASTNode): Some[(String, List[ASTNode])] = {
+      CurrentOrigin.setPosition(node.line, node.positionInLine)
+      node.pattern
+    }
+  }
+
+
+  /**
+   * Returns the AST for the given SQL string.
+   */
+  protected def getAst(sql: String): ASTNode = ParseDriver.parse(sql, conf)
+
+  /** Creates LogicalPlan for a given HiveQL string. */
+  def createPlan(sql: String): LogicalPlan = {
+    try {
+      createPlan(sql, ParseDriver.parse(sql, conf))
+    } catch {
+      case e: MatchError => throw e
+      case e: AnalysisException => throw e
+      case e: Exception =>
+        throw new AnalysisException(e.getMessage)
+      case e: NotImplementedError =>
+        throw new AnalysisException(
+          s"""
+             |Unsupported language features in query: $sql
+             |${getAst(sql).treeString}
+             |$e
+             |${e.getStackTrace.head}
+          """.stripMargin)
+    }
+  }
+
+  protected def createPlan(sql: String, tree: ASTNode): LogicalPlan = 
nodeToPlan(tree)
+
+  def parseDdl(ddl: String): Seq[Attribute] = {
+    val tree = getAst(ddl)
+    assert(tree.text == "TOK_CREATETABLE", "Only CREATE TABLE supported.")
+    val tableOps = tree.children
+    val colList = tableOps
+      .find(_.text == "TOK_TABCOLLIST")
+      .getOrElse(sys.error("No columnList!"))
+
+    colList.children.map(nodeToAttribute)
+  }
+
+  protected def getClauses(
+      clauseNames: Seq[String],
+      nodeList: Seq[ASTNode]): Seq[Option[ASTNode]] = {
+    var remainingNodes = nodeList
+    val clauses = clauseNames.map { clauseName =>
+      val (matches, nonMatches) = remainingNodes.partition(_.text.toUpperCase 
== clauseName)
+      remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail else 
Nil)
+      matches.headOption
+    }
+
+    if (remainingNodes.nonEmpty) {
+      sys.error(
+        s"""Unhandled clauses: 
${remainingNodes.map(_.treeString).mkString("\n")}.
+            |You are likely trying to use an unsupported Hive 
feature."""".stripMargin)
+    }
+    clauses
+  }
+
+  protected def getClause(clauseName: String, nodeList: Seq[ASTNode]): ASTNode 
=
+    getClauseOption(clauseName, nodeList).getOrElse(sys.error(
+      s"Expected clause $clauseName missing from 
${nodeList.map(_.treeString).mkString("\n")}"))
+
+  protected def getClauseOption(clauseName: String, nodeList: Seq[ASTNode]): 
Option[ASTNode] = {
+    nodeList.filter { case ast: ASTNode => ast.text == clauseName } match {
+      case Seq(oneMatch) => Some(oneMatch)
+      case Seq() => None
+      case _ => sys.error(s"Found multiple instances of clause $clauseName")
+    }
+  }
+
+  protected def nodeToAttribute(node: ASTNode): Attribute = node match {
+    case Token("TOK_TABCOL", Token(colName, Nil) :: dataType :: Nil) =>
+      AttributeReference(colName, nodeToDataType(dataType), nullable = true)()
+    case _ =>
+      noParseRule("Attribute", node)
+  }
+
+  protected def nodeToDataType(node: ASTNode): DataType = node match {
+    case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
+      DecimalType(precision.text.toInt, scale.text.toInt)
+    case Token("TOK_DECIMAL", precision :: Nil) =>
+      DecimalType(precision.text.toInt, 0)
+    case Token("TOK_DECIMAL", Nil) => DecimalType.USER_DEFAULT
+    case Token("TOK_BIGINT", Nil) => LongType
+    case Token("TOK_INT", Nil) => IntegerType
+    case Token("TOK_TINYINT", Nil) => ByteType
+    case Token("TOK_SMALLINT", Nil) => ShortType
+    case Token("TOK_BOOLEAN", Nil) => BooleanType
+    case Token("TOK_STRING", Nil) => StringType
+    case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
+    case Token("TOK_FLOAT", Nil) => FloatType
+    case Token("TOK_DOUBLE", Nil) => DoubleType
+    case Token("TOK_DATE", Nil) => DateType
+    case Token("TOK_TIMESTAMP", Nil) => TimestampType
+    case Token("TOK_BINARY", Nil) => BinaryType
+    case Token("TOK_LIST", elementType :: Nil) => 
ArrayType(nodeToDataType(elementType))
+    case Token("TOK_STRUCT", Token("TOK_TABCOLLIST", fields) :: Nil) =>
+      StructType(fields.map(nodeToStructField))
+    case Token("TOK_MAP", keyType :: valueType :: Nil) =>
+      MapType(nodeToDataType(keyType), nodeToDataType(valueType))
+    case _ =>
+      noParseRule("DataType", node)
+  }
+
+  protected def nodeToStructField(node: ASTNode): StructField = node match {
+    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
+      StructField(fieldName, nodeToDataType(dataType), nullable = true)
+    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: _ /* comment 
*/:: Nil) =>
+      StructField(fieldName, nodeToDataType(dataType), nullable = true)
+    case _ =>
+      noParseRule("StructField", node)
+  }
+
+  protected def extractTableIdent(tableNameParts: ASTNode): TableIdentifier = {
+    tableNameParts.children.map {
+      case Token(part, Nil) => cleanIdentifier(part)
+    } match {
+      case Seq(tableOnly) => TableIdentifier(tableOnly)
+      case Seq(databaseName, table) => TableIdentifier(table, 
Some(databaseName))
+      case other => sys.error("Hive only supports tables names like 
'tableName' " +
+        s"or 'databaseName.tableName', found '$other'")
+    }
+  }
+
+  /**
+   * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), 
(k2))
+   * is equivalent to
+   * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM 
src GROUP BY k2
+   * Check the following link for details.
+   *
+https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup
+   *
+   * The bitmask denotes the grouping expressions validity for a grouping set,
+   * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual 
column in Hive)
+   * e.g. In superset (k1, k2, k3), (bit 0: k1, bit 1: k2, and bit 2: k3), the 
grouping id of
+   * GROUPING SETS (k1, k2) and (k2) should be 3 and 2 respectively.
+   */
+  protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], 
Seq[Int]) = {
+    val (keyASTs, setASTs) = children.partition {
+      case Token("TOK_GROUPING_SETS_EXPRESSION", _) => false // grouping sets
+      case _ => true // grouping keys
+    }
+
+    val keys = keyASTs.map(nodeToExpr)
+    val keyMap = keyASTs.zipWithIndex.toMap
+
+    val bitmasks: Seq[Int] = setASTs.map {
+      case Token("TOK_GROUPING_SETS_EXPRESSION", null) => 0
+      case Token("TOK_GROUPING_SETS_EXPRESSION", columns) =>
+        columns.foldLeft(0)((bitmap, col) => {
+          val keyIndex = keyMap.find(_._1.treeEquals(col)).map(_._2)
+          bitmap | 1 << keyIndex.getOrElse(
+            throw new AnalysisException(s"${col.treeString} doesn't show up in 
the GROUP BY list"))
+        })
+      case _ => sys.error("Expect GROUPING SETS clause")
+    }
+
+    (keys, bitmasks)
+  }
+
+  protected def nodeToPlan(node: ASTNode): LogicalPlan = node match {
+    case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | 
"TOK_INSERT", _) :: _) =>
+      val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
+        queryArgs match {
+          case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: inserts =>
+            val cteRelations = ctes.map { node =>
+              val relation = nodeToRelation(node).asInstanceOf[Subquery]
+              relation.alias -> relation
+            }
+            (Some(from.head), inserts, Some(cteRelations.toMap))
+          case Token("TOK_FROM", from) :: inserts =>
+            (Some(from.head), inserts, None)
+          case Token("TOK_INSERT", _) :: Nil =>
+            (None, queryArgs, None)
+        }
+
+      // Return one query for each insert clause.
+      val queries = insertClauses.map {
+        case Token("TOK_INSERT", singleInsert) =>
+          val (
+            intoClause ::
+              destClause ::
+              selectClause ::
+              selectDistinctClause ::
+              whereClause ::
+              groupByClause ::
+              rollupGroupByClause ::
+              cubeGroupByClause ::
+              groupingSetsClause ::
+              orderByClause ::
+              havingClause ::
+              sortByClause ::
+              clusterByClause ::
+              distributeByClause ::
+              limitClause ::
+              lateralViewClause ::
+              windowClause :: Nil) = {
+            getClauses(
+              Seq(
+                "TOK_INSERT_INTO",
+                "TOK_DESTINATION",
+                "TOK_SELECT",
+                "TOK_SELECTDI",
+                "TOK_WHERE",
+                "TOK_GROUPBY",
+                "TOK_ROLLUP_GROUPBY",
+                "TOK_CUBE_GROUPBY",
+                "TOK_GROUPING_SETS",
+                "TOK_ORDERBY",
+                "TOK_HAVING",
+                "TOK_SORTBY",
+                "TOK_CLUSTERBY",
+                "TOK_DISTRIBUTEBY",
+                "TOK_LIMIT",
+                "TOK_LATERAL_VIEW",
+                "WINDOW"),
+              singleInsert)
+          }
+
+          val relations = fromClause match {
+            case Some(f) => nodeToRelation(f)
+            case None => OneRowRelation
+          }
+
+          val withWhere = whereClause.map { whereNode =>
+            val Seq(whereExpr) = whereNode.children
+            Filter(nodeToExpr(whereExpr), relations)
+          }.getOrElse(relations)
+
+          val select = (selectClause orElse selectDistinctClause)
+            .getOrElse(sys.error("No select clause."))
+
+          val transformation = nodeToTransformation(select.children.head, 
withWhere)
+
+          val withLateralView = lateralViewClause.map { lv =>
+            nodeToGenerate(lv.children.head, outer = false, withWhere)
+          }.getOrElse(withWhere)
+
+          // The projection of the query can either be a normal projection, an 
aggregation
+          // (if there is a group by) or a script transformation.
+          val withProject: LogicalPlan = transformation.getOrElse {
+            val selectExpressions =
+              
select.children.flatMap(selExprNodeToExpr).map(UnresolvedAlias(_))
+            Seq(
+              groupByClause.map(e => e match {
+                case Token("TOK_GROUPBY", children) =>
+                  // Not a transformation so must be either project or 
aggregation.
+                  Aggregate(children.map(nodeToExpr), selectExpressions, 
withLateralView)
+                case _ => sys.error("Expect GROUP BY")
+              }),
+              groupingSetsClause.map(e => e match {
+                case Token("TOK_GROUPING_SETS", children) =>
+                  val(groupByExprs, masks) = extractGroupingSet(children)
+                  GroupingSets(masks, groupByExprs, withLateralView, 
selectExpressions)
+                case _ => sys.error("Expect GROUPING SETS")
+              }),
+              rollupGroupByClause.map(e => e match {
+                case Token("TOK_ROLLUP_GROUPBY", children) =>
+                  Aggregate(
+                    Seq(Rollup(children.map(nodeToExpr))),
+                    selectExpressions,
+                    withLateralView)
+                case _ => sys.error("Expect WITH ROLLUP")
+              }),
+              cubeGroupByClause.map(e => e match {
+                case Token("TOK_CUBE_GROUPBY", children) =>
+                  Aggregate(
+                    Seq(Cube(children.map(nodeToExpr))),
+                    selectExpressions,
+                    withLateralView)
+                case _ => sys.error("Expect WITH CUBE")
+              }),
+              Some(Project(selectExpressions, withLateralView))).flatten.head
+          }
+
+          // Handle HAVING clause.
+          val withHaving = havingClause.map { h =>
+            val havingExpr = h.children match { case Seq(hexpr) => 
nodeToExpr(hexpr) }
+            // Note that we added a cast to boolean. If the expression itself 
is already boolean,
+            // the optimizer will get rid of the unnecessary cast.
+            Filter(Cast(havingExpr, BooleanType), withProject)
+          }.getOrElse(withProject)
+
+          // Handle SELECT DISTINCT
+          val withDistinct =
+            if (selectDistinctClause.isDefined) Distinct(withHaving) else 
withHaving
+
+          // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
+          val withSort =
+            (orderByClause, sortByClause, distributeByClause, clusterByClause) 
match {
+              case (Some(totalOrdering), None, None, None) =>
+                Sort(totalOrdering.children.map(nodeToSortOrder), global = 
true, withDistinct)
+              case (None, Some(perPartitionOrdering), None, None) =>
+                Sort(
+                  perPartitionOrdering.children.map(nodeToSortOrder),
+                  global = false, withDistinct)
+              case (None, None, Some(partitionExprs), None) =>
+                RepartitionByExpression(
+                  partitionExprs.children.map(nodeToExpr), withDistinct)
+              case (None, Some(perPartitionOrdering), Some(partitionExprs), 
None) =>
+                Sort(
+                  perPartitionOrdering.children.map(nodeToSortOrder), global = 
false,
+                  RepartitionByExpression(
+                    partitionExprs.children.map(nodeToExpr),
+                    withDistinct))
+              case (None, None, None, Some(clusterExprs)) =>
+                Sort(
+                  clusterExprs.children.map(nodeToExpr).map(SortOrder(_, 
Ascending)),
+                  global = false,
+                  RepartitionByExpression(
+                    clusterExprs.children.map(nodeToExpr),
+                    withDistinct))
+              case (None, None, None, None) => withDistinct
+              case _ => sys.error("Unsupported set of ordering / distribution 
clauses.")
+            }
+
+          val withLimit =
+            limitClause.map(l => nodeToExpr(l.children.head))
+              .map(Limit(_, withSort))
+              .getOrElse(withSort)
+
+          // Collect all window specifications defined in the WINDOW clause.
+          val windowDefinitions = windowClause.map(_.children.collect {
+            case Token("TOK_WINDOWDEF",
+            Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) =>
+              windowName -> nodesToWindowSpecification(spec)
+          }.toMap)
+          // Handle cases like
+          // window w1 as (partition by p_mfgr order by p_name
+          //               range between 2 preceding and 2 following),
+          //        w2 as w1
+          val resolvedCrossReference = windowDefinitions.map {
+            windowDefMap => windowDefMap.map {
+              case (windowName, WindowSpecReference(other)) =>
+                (windowName, 
windowDefMap(other).asInstanceOf[WindowSpecDefinition])
+              case o => o.asInstanceOf[(String, WindowSpecDefinition)]
+            }
+          }
+
+          val withWindowDefinitions =
+            resolvedCrossReference.map(WithWindowDefinition(_, 
withLimit)).getOrElse(withLimit)
+
+          // TOK_INSERT_INTO means to add files to the table.
+          // TOK_DESTINATION means to overwrite the table.
+          val resultDestination =
+            (intoClause orElse destClause).getOrElse(sys.error("No destination 
found."))
+          val overwrite = intoClause.isEmpty
+          nodeToDest(
+            resultDestination,
+            withWindowDefinitions,
+            overwrite)
+      }
+
+      // If there are multiple INSERTS just UNION them together into on query.
+      val query = queries.reduceLeft(Union)
+
+      // return With plan if there is CTE
+      cteRelations.map(With(query, _)).getOrElse(query)
+
+    // HIVE-9039 renamed TOK_UNION => TOK_UNIONALL while adding 
TOK_UNIONDISTINCT
+    case Token("TOK_UNIONALL", left :: right :: Nil) =>
+      Union(nodeToPlan(left), nodeToPlan(right))
+
+    case _ =>
+      noParseRule("Plan", node)
+  }
+
+  val allJoinTokens = "(TOK_.*JOIN)".r
+  val laterViewToken = "TOK_LATERAL_VIEW(.*)".r
+  protected def nodeToRelation(node: ASTNode): LogicalPlan = {
+    node match {
+      case Token("TOK_SUBQUERY", query :: Token(alias, Nil) :: Nil) =>
+        Subquery(cleanIdentifier(alias), nodeToPlan(query))
+
+      case Token(laterViewToken(isOuter), selectClause :: relationClause :: 
Nil) =>
+        nodeToGenerate(
+          selectClause,
+          outer = isOuter.nonEmpty,
+          nodeToRelation(relationClause))
+
+      /* All relations, possibly with aliases or sampling clauses. */
+      case Token("TOK_TABREF", clauses) =>
+        // If the last clause is not a token then it's the alias of the table.
+        val (nonAliasClauses, aliasClause) =
+          if (clauses.last.text.startsWith("TOK")) {
+            (clauses, None)
+          } else {
+            (clauses.dropRight(1), Some(clauses.last))
+          }
+
+        val (Some(tableNameParts) ::
+          splitSampleClause ::
+          bucketSampleClause :: Nil) = {
+          getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", 
"TOK_TABLEBUCKETSAMPLE"),
+            nonAliasClauses)
+        }
+
+        val tableIdent = extractTableIdent(tableNameParts)
+        val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) 
}
+        val relation = UnresolvedRelation(tableIdent, alias)
+
+        // Apply sampling if requested.
+        (bucketSampleClause orElse splitSampleClause).map {
+          case Token("TOK_TABLESPLITSAMPLE",
+          Token("TOK_ROWCOUNT", Nil) :: Token(count, Nil) :: Nil) =>
+            Limit(Literal(count.toInt), relation)
+          case Token("TOK_TABLESPLITSAMPLE",
+          Token("TOK_PERCENT", Nil) :: Token(fraction, Nil) :: Nil) =>
+            // The range of fraction accepted by Sample is [0, 1]. Because 
Hive's block sampling
+            // function takes X PERCENT as the input and the range of X is [0, 
100], we need to
+            // adjust the fraction.
+            require(
+              fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
+                && fraction.toDouble <= (100.0 + 
RandomSampler.roundingEpsilon),
+              s"Sampling fraction ($fraction) must be on interval [0, 100]")
+            Sample(0.0, fraction.toDouble / 100, withReplacement = false,
+              (math.random * 1000).toInt,
+              relation)
+          case Token("TOK_TABLEBUCKETSAMPLE",
+          Token(numerator, Nil) ::
+            Token(denominator, Nil) :: Nil) =>
+            val fraction = numerator.toDouble / denominator.toDouble
+            Sample(0.0, fraction, withReplacement = false, (math.random * 
1000).toInt, relation)
+          case a =>
+            noParseRule("Sampling", a)
+        }.getOrElse(relation)
+
+      case Token(allJoinTokens(joinToken), relation1 :: relation2 :: other) =>
+        if (!(other.size <= 1)) {
+          sys.error(s"Unsupported join operation: $other")
+        }
+
+        val joinType = joinToken match {
+          case "TOK_JOIN" => Inner
+          case "TOK_CROSSJOIN" => Inner
+          case "TOK_RIGHTOUTERJOIN" => RightOuter
+          case "TOK_LEFTOUTERJOIN" => LeftOuter
+          case "TOK_FULLOUTERJOIN" => FullOuter
+          case "TOK_LEFTSEMIJOIN" => LeftSemi
+          case "TOK_UNIQUEJOIN" => noParseRule("Unique Join", node)
+          case "TOK_ANTIJOIN" => noParseRule("Anti Join", node)
+        }
+        Join(nodeToRelation(relation1),
+          nodeToRelation(relation2),
+          joinType,
+          other.headOption.map(nodeToExpr))
+
+      case _ =>
+        noParseRule("Relation", node)
+    }
+  }
+
+  protected def nodeToSortOrder(node: ASTNode): SortOrder = node match {
+    case Token("TOK_TABSORTCOLNAMEASC", sortExpr :: Nil) =>
+      SortOrder(nodeToExpr(sortExpr), Ascending)
+    case Token("TOK_TABSORTCOLNAMEDESC", sortExpr :: Nil) =>
+      SortOrder(nodeToExpr(sortExpr), Descending)
+    case _ =>
+      noParseRule("SortOrder", node)
+  }
+
+  val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
+  protected def nodeToDest(
+      node: ASTNode,
+      query: LogicalPlan,
+      overwrite: Boolean): LogicalPlan = node match {
+    case Token(destinationToken(),
+    Token("TOK_DIR",
+    Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
+      query
+
+    case Token(destinationToken(),
+    Token("TOK_TAB",
+    tableArgs) :: Nil) =>
+      val Some(tableNameParts) :: partitionClause :: Nil =
+        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
+
+      val tableIdent = extractTableIdent(tableNameParts)
+
+      val partitionKeys = partitionClause.map(_.children.map {
+        // Parse partitions. We also make keys case insensitive.
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) 
=>
+          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
+          cleanIdentifier(key.toLowerCase) -> None
+      }.toMap).getOrElse(Map.empty)
+
+      InsertIntoTable(
+        UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, 
ifNotExists = false)
+
+    case Token(destinationToken(),
+    Token("TOK_TAB",
+    tableArgs) ::
+      Token("TOK_IFNOTEXISTS",
+      ifNotExists) :: Nil) =>
+      val Some(tableNameParts) :: partitionClause :: Nil =
+        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
+
+      val tableIdent = extractTableIdent(tableNameParts)
+
+      val partitionKeys = partitionClause.map(_.children.map {
+        // Parse partitions. We also make keys case insensitive.
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) 
=>
+          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
+          cleanIdentifier(key.toLowerCase) -> None
+      }.toMap).getOrElse(Map.empty)
+
+      InsertIntoTable(
+        UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, 
ifNotExists = true)
+
+    case _ =>
+      noParseRule("Destination", node)
+  }
+
+  protected def selExprNodeToExpr(node: ASTNode): Option[Expression] = node 
match {
+    case Token("TOK_SELEXPR", e :: Nil) =>
+      Some(nodeToExpr(e))
+
+    case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
+      Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
+
+    case Token("TOK_SELEXPR", e :: aliasChildren) =>
+      val aliasNames = aliasChildren.collect {
+        case Token(name, Nil) => cleanIdentifier(name)
+      }
+      Some(MultiAlias(nodeToExpr(e), aliasNames))
+
+    /* Hints are ignored */
+    case Token("TOK_HINTLIST", _) => None
+
+    case _ =>
+      noParseRule("Select", node)
+  }
+
+  protected val escapedIdentifier = "`([^`]+)`".r
+  protected val doubleQuotedString = "\"([^\"]+)\"".r
+  protected val singleQuotedString = "'([^']+)'".r
+
+  protected def unquoteString(str: String) = str match {
+    case singleQuotedString(s) => s
+    case doubleQuotedString(s) => s
+    case other => other
+  }
+
+  /** Strips backticks from ident if present */
+  protected def cleanIdentifier(ident: String): String = ident match {
+    case escapedIdentifier(i) => i
+    case plainIdent => plainIdent
+  }
+
+  val numericAstTypes = Seq(
+    SparkSqlParser.Number,
+    SparkSqlParser.TinyintLiteral,
+    SparkSqlParser.SmallintLiteral,
+    SparkSqlParser.BigintLiteral,
+    SparkSqlParser.DecimalLiteral)
+
+  /* Case insensitive matches */
+  val COUNT = "(?i)COUNT".r
+  val SUM = "(?i)SUM".r
+  val AND = "(?i)AND".r
+  val OR = "(?i)OR".r
+  val NOT = "(?i)NOT".r
+  val TRUE = "(?i)TRUE".r
+  val FALSE = "(?i)FALSE".r
+  val LIKE = "(?i)LIKE".r
+  val RLIKE = "(?i)RLIKE".r
+  val REGEXP = "(?i)REGEXP".r
+  val IN = "(?i)IN".r
+  val DIV = "(?i)DIV".r
+  val BETWEEN = "(?i)BETWEEN".r
+  val WHEN = "(?i)WHEN".r
+  val CASE = "(?i)CASE".r
+
+  protected def nodeToExpr(node: ASTNode): Expression = node match {
+    /* Attribute References */
+    case Token("TOK_TABLE_OR_COL", Token(name, Nil) :: Nil) =>
+      UnresolvedAttribute.quoted(cleanIdentifier(name))
+    case Token(".", qualifier :: Token(attr, Nil) :: Nil) =>
+      nodeToExpr(qualifier) match {
+        case UnresolvedAttribute(nameParts) =>
+          UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
+        case other => UnresolvedExtractValue(other, Literal(attr))
+      }
+
+    /* Stars (*) */
+    case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
+    // The format of dbName.tableName.* cannot be parsed by HiveParser. 
TOK_TABNAME will only
+    // has a single child which is tableName.
+    case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", Token(name, Nil) :: Nil) 
:: Nil) =>
+      UnresolvedStar(Some(UnresolvedAttribute.parseAttributeName(name)))
+
+    /* Aggregate Functions */
+    case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
+      Count(args.map(nodeToExpr)).toAggregateExpression(isDistinct = true)
+    case Token("TOK_FUNCTIONSTAR", Token(COUNT(), Nil) :: Nil) =>
+      Count(Literal(1)).toAggregateExpression()
+
+    /* Casts */
+    case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), IntegerType)
+    case Token("TOK_FUNCTION", Token("TOK_BIGINT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), LongType)
+    case Token("TOK_FUNCTION", Token("TOK_FLOAT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), FloatType)
+    case Token("TOK_FUNCTION", Token("TOK_DOUBLE", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DoubleType)
+    case Token("TOK_FUNCTION", Token("TOK_SMALLINT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), ShortType)
+    case Token("TOK_FUNCTION", Token("TOK_TINYINT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), ByteType)
+    case Token("TOK_FUNCTION", Token("TOK_BINARY", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), BinaryType)
+    case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), BooleanType)
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) 
:: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 
scale.text.toInt))
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg 
:: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 0))
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType.USER_DEFAULT)
+    case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), TimestampType)
+    case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DateType)
+
+    /* Arithmetic */
+    case Token("+", child :: Nil) => nodeToExpr(child)
+    case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
+    case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
+    case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), 
nodeToExpr(right))
+    case Token(DIV(), left :: right:: Nil) =>
+      Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
+    case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), 
nodeToExpr(right))
+
+    /* Comparisons */
+    case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), 
nodeToExpr(right)))
+    case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), 
nodeToExpr(right)))
+    case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), 
nodeToExpr(right))
+    case Token(">=", left :: right:: Nil) => 
GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
+    case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), 
nodeToExpr(right))
+    case Token(LIKE(), left :: right:: Nil) => Like(nodeToExpr(left), 
nodeToExpr(right))
+    case Token(RLIKE(), left :: right:: Nil) => RLike(nodeToExpr(left), 
nodeToExpr(right))
+    case Token(REGEXP(), left :: right:: Nil) => RLike(nodeToExpr(left), 
nodeToExpr(right))
+    case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) =>
+      IsNotNull(nodeToExpr(child))
+    case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) =>
+      IsNull(nodeToExpr(child))
+    case Token("TOK_FUNCTION", Token(IN(), Nil) :: value :: list) =>
+      In(nodeToExpr(value), list.map(nodeToExpr))
+    case Token("TOK_FUNCTION",
+    Token(BETWEEN(), Nil) ::
+      kw ::
+      target ::
+      minValue ::
+      maxValue :: Nil) =>
+
+      val targetExpression = nodeToExpr(target)
+      val betweenExpr =
+        And(
+          GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
+          LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
+      kw match {
+        case Token("KW_FALSE", Nil) => betweenExpr
+        case Token("KW_TRUE", Nil) => Not(betweenExpr)
+      }
+
+    /* Boolean Logic */
+    case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), 
nodeToExpr(right))
+    case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), 
nodeToExpr(right))
+    case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
+    case Token("!", child :: Nil) => Not(nodeToExpr(child))
+
+    /* Case statements */
+    case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
+      CaseWhen(branches.map(nodeToExpr))
+    case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
+      val keyExpr = nodeToExpr(branches.head)
+      CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
+
+    /* Complex datatype manipulation */
+    case Token("[", child :: ordinal :: Nil) =>
+      UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))
+
+    /* Window Functions */
+    case Token(text, args :+ Token("TOK_WINDOWSPEC", spec)) =>
+      val function = nodeToExpr(node.copy(children = node.children.init))
+      nodesToWindowSpecification(spec) match {
+        case reference: WindowSpecReference =>
+          UnresolvedWindowExpression(function, reference)
+        case definition: WindowSpecDefinition =>
+          WindowExpression(function, definition)
+      }
+
+    /* UDFs - Must be last otherwise will preempt built in functions */
+    case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
+      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = false)
+    // Aggregate function with DISTINCT keyword.
+    case Token("TOK_FUNCTIONDI", Token(name, Nil) :: args) =>
+      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = true)
+    case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
+      UnresolvedFunction(name, UnresolvedStar(None) :: Nil, isDistinct = false)
+
+    /* Literals */
+    case Token("TOK_NULL", Nil) => Literal.create(null, NullType)
+    case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
+    case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
+    case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
+      Literal(strings.map(s => ParseUtils.unescapeSQLString(s.text)).mkString)
+
+    // This code is adapted from
+    // 
/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java#L223
+    case ast: ASTNode if numericAstTypes contains ast.tokenType =>
+      var v: Literal = null
+      try {
+        if (ast.text.endsWith("L")) {
+          // Literal bigint.
+          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toLong, LongType)
+        } else if (ast.text.endsWith("S")) {
+          // Literal smallint.
+          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toShort, ShortType)
+        } else if (ast.text.endsWith("Y")) {
+          // Literal tinyint.
+          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toByte, ByteType)
+        } else if (ast.text.endsWith("BD") || ast.text.endsWith("D")) {
+          // Literal decimal
+          val strVal = ast.text.stripSuffix("D").stripSuffix("B")
+          v = Literal(Decimal(strVal))
+        } else {
+          v = Literal.create(ast.text.toDouble, DoubleType)
+          v = Literal.create(ast.text.toLong, LongType)
+          v = Literal.create(ast.text.toInt, IntegerType)
+        }
+      } catch {
+        case nfe: NumberFormatException => // Do nothing
+      }
+
+      if (v == null) {
+        sys.error(s"Failed to parse number '${ast.text}'.")
+      } else {
+        v
+      }
+
+    case ast: ASTNode if ast.tokenType == SparkSqlParser.StringLiteral =>
+      Literal(ParseUtils.unescapeSQLString(ast.text))
+
+    case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
+      Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
+
+    case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_CHARSETLITERAL =>
+      Literal(ParseUtils.charSetString(ast.children.head.text, 
ast.children(1).text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
+      Literal(CalendarInterval.fromYearMonthString(ast.text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
+      Literal(CalendarInterval.fromDayTimeString(ast.text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL =>
+      Literal(CalendarInterval.fromSingleUnitString("year", ast.text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL =>
+      Literal(CalendarInterval.fromSingleUnitString("month", ast.text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_DAY_LITERAL =>
+      Literal(CalendarInterval.fromSingleUnitString("day", ast.text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL =>
+      Literal(CalendarInterval.fromSingleUnitString("hour", ast.text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL =>
+      Literal(CalendarInterval.fromSingleUnitString("minute", ast.text))
+
+    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL =>
+      Literal(CalendarInterval.fromSingleUnitString("second", ast.text))
+
+    case _ =>
+      noParseRule("Expression", node)
+  }
+
+  /* Case insensitive matches for Window Specification */
+  val PRECEDING = "(?i)preceding".r
+  val FOLLOWING = "(?i)following".r
+  val CURRENT = "(?i)current".r
+  protected def nodesToWindowSpecification(nodes: Seq[ASTNode]): WindowSpec = 
nodes match {
+    case Token(windowName, Nil) :: Nil =>
+      // Refer to a window spec defined in the window clause.
+      WindowSpecReference(windowName)
+    case Nil =>
+      // OVER()
+      WindowSpecDefinition(
+        partitionSpec = Nil,
+        orderSpec = Nil,
+        frameSpecification = UnspecifiedFrame)
+    case spec =>
+      val (partitionClause :: rowFrame :: rangeFrame :: Nil) =
+        getClauses(
+          Seq(
+            "TOK_PARTITIONINGSPEC",
+            "TOK_WINDOWRANGE",
+            "TOK_WINDOWVALUES"),
+          spec)
+
+      // Handle Partition By and Order By.
+      val (partitionSpec, orderSpec) = partitionClause.map { 
partitionAndOrdering =>
+        val (partitionByClause :: orderByClause :: sortByClause :: 
clusterByClause :: Nil) =
+          getClauses(
+            Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", 
"TOK_CLUSTERBY"),
+            partitionAndOrdering.children)
+
+        (partitionByClause, orderByClause.orElse(sortByClause), 
clusterByClause) match {
+          case (Some(partitionByExpr), Some(orderByExpr), None) =>
+            (partitionByExpr.children.map(nodeToExpr),
+              orderByExpr.children.map(nodeToSortOrder))
+          case (Some(partitionByExpr), None, None) =>
+            (partitionByExpr.children.map(nodeToExpr), Nil)
+          case (None, Some(orderByExpr), None) =>
+            (Nil, orderByExpr.children.map(nodeToSortOrder))
+          case (None, None, Some(clusterByExpr)) =>
+            val expressions = clusterByExpr.children.map(nodeToExpr)
+            (expressions, expressions.map(SortOrder(_, Ascending)))
+          case _ =>
+            noParseRule("Partition & Ordering", partitionAndOrdering)
+        }
+      }.getOrElse {
+        (Nil, Nil)
+      }
+
+      // Handle Window Frame
+      val windowFrame =
+        if (rowFrame.isEmpty && rangeFrame.isEmpty) {
+          UnspecifiedFrame
+        } else {
+          val frameType = rowFrame.map(_ => RowFrame).getOrElse(RangeFrame)
+          def nodeToBoundary(node: ASTNode): FrameBoundary = node match {
+            case Token(PRECEDING(), Token(count, Nil) :: Nil) =>
+              if (count.toLowerCase() == "unbounded") {
+                UnboundedPreceding
+              } else {
+                ValuePreceding(count.toInt)
+              }
+            case Token(FOLLOWING(), Token(count, Nil) :: Nil) =>
+              if (count.toLowerCase() == "unbounded") {
+                UnboundedFollowing
+              } else {
+                ValueFollowing(count.toInt)
+              }
+            case Token(CURRENT(), Nil) => CurrentRow
+            case _ =>
+              noParseRule("Window Frame Boundary", node)
+          }
+
+          rowFrame.orElse(rangeFrame).map { frame =>
+            frame.children match {
+              case precedingNode :: followingNode :: Nil =>
+                SpecifiedWindowFrame(
+                  frameType,
+                  nodeToBoundary(precedingNode),
+                  nodeToBoundary(followingNode))
+              case precedingNode :: Nil =>
+                SpecifiedWindowFrame(frameType, nodeToBoundary(precedingNode), 
CurrentRow)
+              case _ =>
+                noParseRule("Window Frame", frame)
+            }
+          }.getOrElse(sys.error(s"If you see this, please file a bug report 
with your query."))
+        }
+
+      WindowSpecDefinition(partitionSpec, orderSpec, windowFrame)
+  }
+
+  protected def nodeToTransformation(
+      node: ASTNode,
+      child: LogicalPlan): Option[ScriptTransformation] = None
+
+  val explode = "(?i)explode".r
+  val jsonTuple = "(?i)json_tuple".r
+  protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: 
LogicalPlan): Generate = {
+    val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
+
+    val alias = getClause("TOK_TABALIAS", clauses).children.head.text
+
+    val generator = clauses.head match {
+      case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>
+        Explode(nodeToExpr(childNode))
+      case Token("TOK_FUNCTION", Token(jsonTuple(), Nil) :: children) =>
+        JsonTuple(children.map(nodeToExpr))
+      case other =>
+        nodeToGenerator(other)
+    }
+
+    val attributes = clauses.collect {
+      case Token(a, Nil) => UnresolvedAttribute(a.toLowerCase)
+    }
+
+    Generate(generator, join = true, outer = outer, Some(alias.toLowerCase), 
attributes, child)
+  }
+
+  protected def nodeToGenerator(node: ASTNode): Generator = 
noParseRule("Generator", node)
+
+  protected def noParseRule(msg: String, node: ASTNode): Nothing = throw new 
NotImplementedError(
+    s"[$msg]: No parse rules for ASTNode type: ${node.tokenType}, 
tree:\n${node.treeString}")
+}


http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala
new file mode 100644
index 0000000..ec5e710
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser
+
+import org.antlr.runtime.{Token, TokenRewriteStream}
+
+import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode}
+
+case class ASTNode(
+    token: Token,
+    startIndex: Int,
+    stopIndex: Int,
+    children: List[ASTNode],
+    stream: TokenRewriteStream) extends TreeNode[ASTNode] {
+  /** Cache the number of children. */
+  val numChildren = children.size
+
+  /** tuple used in pattern matching. */
+  val pattern = Some((token.getText, children))
+
+  /** Line in which the ASTNode starts. */
+  lazy val line: Int = {
+    val line = token.getLine
+    if (line == 0) {
+      if (children.nonEmpty) children.head.line
+      else 0
+    } else {
+      line
+    }
+  }
+
+  /** Position of the Character at which ASTNode starts. */
+  lazy val positionInLine: Int = {
+    val line = token.getCharPositionInLine
+    if (line == -1) {
+      if (children.nonEmpty) children.head.positionInLine
+      else 0
+    } else {
+      line
+    }
+  }
+
+  /** Origin of the ASTNode. */
+  override val origin = Origin(Some(line), Some(positionInLine))
+
+  /** Source text. */
+  lazy val source = stream.toString(startIndex, stopIndex)
+
+  def text: String = token.getText
+
+  def tokenType: Int = token.getType
+
+  /**
+   * Checks if this node is equal to another node.
+   *
+   * Right now this function only checks the name, type, text and children of 
the node
+   * for equality.
+   */
+  def treeEquals(other: ASTNode): Boolean = {
+    def check(f: ASTNode => Any): Boolean = {
+      val l = f(this)
+      val r = f(other)
+      (l == null && r == null) || l.equals(r)
+    }
+    if (other == null) {
+      false
+    } else if (!check(_.token.getType)
+      || !check(_.token.getText)
+      || !check(_.numChildren)) {
+      false
+    } else {
+      children.zip(other.children).forall {
+        case (l, r) => l treeEquals r
+      }
+    }
+  }
+
+  override def simpleString: String = s"$text $line, $startIndex, $stopIndex, 
$positionInLine "
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
new file mode 100644
index 0000000..0e93af8
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser
+
+import org.antlr.runtime._
+import org.antlr.runtime.tree.CommonTree
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.AnalysisException
+
+/**
+ * The ParseDriver takes a SQL command and turns this into an AST.
+ *
+ * This is based on Hive's org.apache.hadoop.hive.ql.parse.ParseDriver
+ */
+object ParseDriver extends Logging {
+  def parse(command: String, conf: ParserConf): ASTNode = {
+    logInfo(s"Parsing command: $command")
+
+    // Setup error collection.
+    val reporter = new ParseErrorReporter()
+
+    // Create lexer.
+    val lexer = new SparkSqlLexer(new ANTLRNoCaseStringStream(command))
+    val tokens = new TokenRewriteStream(lexer)
+    lexer.configure(conf, reporter)
+
+    // Create the parser.
+    val parser = new SparkSqlParser(tokens)
+    parser.configure(conf, reporter)
+
+    try {
+      val result = parser.statement()
+
+      // Check errors.
+      reporter.checkForErrors()
+
+      // Return the AST node from the result.
+      logInfo(s"Parse completed.")
+
+      // Find the non null token tree in the result.
+      def nonNullToken(tree: CommonTree): CommonTree = {
+        if (tree.token != null || tree.getChildCount == 0) tree
+        else nonNullToken(tree.getChild(0).asInstanceOf[CommonTree])
+      }
+      val tree = nonNullToken(result.getTree)
+
+      // Make sure all boundaries are set.
+      tree.setUnknownTokenBoundaries()
+
+      // Construct the immutable AST.
+      def createASTNode(tree: CommonTree): ASTNode = {
+        val children = (0 until tree.getChildCount).map { i =>
+          createASTNode(tree.getChild(i).asInstanceOf[CommonTree])
+        }.toList
+        ASTNode(tree.token, tree.getTokenStartIndex, tree.getTokenStopIndex, 
children, tokens)
+      }
+      createASTNode(tree)
+    }
+    catch {
+      case e: RecognitionException =>
+        logInfo(s"Parse failed.")
+        reporter.throwError(e)
+    }
+  }
+}
+
+/**
+ * This string stream provides the lexer with upper case characters only. This 
greatly simplifies
+ * lexing the stream, while we can maintain the original command.
+ *
+ * This is based on Hive's 
org.apache.hadoop.hive.ql.parse.ParseDriver.ANTLRNoCaseStringStream
+ *
+ * The comment below (taken from the original class) describes the rationale 
for doing this:
+ *
+ * This class provides and implementation for a case insensitive token checker 
for the lexical
+ * analysis part of antlr. By converting the token stream into upper case at 
the time when lexical
+ * rules are checked, this class ensures that the lexical rules need to just 
match the token with
+ * upper case letters as opposed to combination of upper case and lower case 
characters. This is
+ * purely used for matching lexical rules. The actual token text is stored in 
the same way as the
+ * user input without actually converting it into an upper case. The token 
values are generated by
+ * the consume() function of the super class ANTLRStringStream. The LA() 
function is the lookahead
+ * function and is purely used for matching lexical rules. This also means 
that the grammar will
+ * only accept capitalized tokens in case it is run from other tools like 
antlrworks which do not
+ * have the ANTLRNoCaseStringStream implementation.
+ */
+
+private[parser] class ANTLRNoCaseStringStream(input: String) extends 
ANTLRStringStream(input) {
+  override def LA(i: Int): Int = {
+    val la = super.LA(i)
+    if (la == 0 || la == CharStream.EOF) la
+    else Character.toUpperCase(la)
+  }
+}
+
+/**
+ * Utility used by the Parser and the Lexer for error collection and reporting.
+ */
+private[parser] class ParseErrorReporter {
+  val errors = scala.collection.mutable.Buffer.empty[ParseError]
+
+  def report(br: BaseRecognizer, re: RecognitionException, tokenNames: 
Array[String]): Unit = {
+    errors += ParseError(br, re, tokenNames)
+  }
+
+  def checkForErrors(): Unit = {
+    if (errors.nonEmpty) {
+      val first = errors.head
+      val e = first.re
+      throwError(e.line, e.charPositionInLine, first.buildMessage().toString, 
errors.tail)
+    }
+  }
+
+  def throwError(e: RecognitionException): Nothing = {
+    throwError(e.line, e.charPositionInLine, e.toString, errors)
+  }
+
+  private def throwError(
+      line: Int,
+      startPosition: Int,
+      msg: String,
+      errors: Seq[ParseError]): Nothing = {
+    val b = new StringBuilder
+    b.append(msg).append("\n")
+    errors.foreach(error => error.buildMessage(b).append("\n"))
+    throw new AnalysisException(b.toString, Option(line), 
Option(startPosition))
+  }
+}
+
+/**
+ * Error collected during the parsing process.
+ *
+ * This is based on Hive's org.apache.hadoop.hive.ql.parse.ParseError
+ */
+private[parser] case class ParseError(
+    br: BaseRecognizer,
+    re: RecognitionException,
+    tokenNames: Array[String]) {
+  def buildMessage(s: StringBuilder = new StringBuilder): StringBuilder = {
+    s.append(br.getErrorHeader(re)).append(" ").append(br.getErrorMessage(re, 
tokenNames))
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserConf.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserConf.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserConf.scala
new file mode 100644
index 0000000..ce449b1
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserConf.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser
+
+trait ParserConf {
+  def supportQuotedId: Boolean
+  def supportSQL11ReservedKeywords: Boolean
+}
+
+case class SimpleParserConf(
+    supportQuotedId: Boolean = true,
+    supportSQL11ReservedKeywords: Boolean = false) extends ParserConf

http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index b58a373..26c00dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -25,6 +25,7 @@ import scala.collection.JavaConverters._
 import org.apache.parquet.hadoop.ParquetOutputCommitter
 
 import org.apache.spark.sql.catalyst.CatalystConf
+import org.apache.spark.sql.catalyst.parser.ParserConf
 
 
////////////////////////////////////////////////////////////////////////////////////////////////////
 // This file defines the configuration options for Spark SQL.
@@ -451,6 +452,19 @@ private[spark] object SQLConf {
     doc = "When true, we could use `datasource`.`path` as table in SQL query"
   )
 
+  val PARSER_SUPPORT_QUOTEDID = 
booleanConf("spark.sql.parser.supportQuotedIdentifiers",
+    defaultValue = Some(true),
+    isPublic = false,
+    doc = "Whether to use quoted identifier.\n  false: default(past) behavior. 
Implies only" +
+      "alphaNumeric and underscore are valid characters in identifiers.\n" +
+      "  true: implies column names can contain any character.")
+
+  val PARSER_SUPPORT_SQL11_RESERVED_KEYWORDS = booleanConf(
+    "spark.sql.parser.supportSQL11ReservedKeywords",
+    defaultValue = Some(false),
+    isPublic = false,
+    doc = "This flag should be set to true to enable support for SQL2011 
reserved keywords.")
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
     val EXTERNAL_SORT = "spark.sql.planner.externalSort"
@@ -471,7 +485,7 @@ private[spark] object SQLConf {
  *
  * SQLConf is thread-safe (internally synchronized, so safe to be used in 
multiple threads).
  */
-private[sql] class SQLConf extends Serializable with CatalystConf {
+private[sql] class SQLConf extends Serializable with CatalystConf with 
ParserConf {
   import SQLConf._
 
   /** Only low degree of contention is expected for conf, thus NOT using 
ConcurrentHashMap. */
@@ -569,6 +583,10 @@ private[sql] class SQLConf extends Serializable with 
CatalystConf {
 
   private[spark] def runSQLOnFile: Boolean = getConf(RUN_SQL_ON_FILES)
 
+  def supportQuotedId: Boolean = getConf(PARSER_SUPPORT_QUOTEDID)
+
+  def supportSQL11ReservedKeywords: Boolean = 
getConf(PARSER_SUPPORT_SQL11_RESERVED_KEYWORDS)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */

http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
new file mode 100644
index 0000000..a322688
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.parser.{ASTNode, ParserConf, 
SimpleParserConf}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, 
OneRowRelation}
+import org.apache.spark.sql.catalyst.{CatalystQl, TableIdentifier}
+
+private[sql] class SparkQl(conf: ParserConf = SimpleParserConf()) extends 
CatalystQl(conf) {
+  /** Check if a command should not be explained. */
+  protected def isNoExplainCommand(command: String): Boolean = "TOK_DESCTABLE" 
== command
+
+  protected override def nodeToPlan(node: ASTNode): LogicalPlan = {
+    node match {
+      // Just fake explain for any of the native commands.
+      case Token("TOK_EXPLAIN", explainArgs) if 
isNoExplainCommand(explainArgs.head.text) =>
+        ExplainCommand(OneRowRelation)
+
+      case Token("TOK_EXPLAIN", explainArgs) if "TOK_CREATETABLE" == 
explainArgs.head.text =>
+        val Some(crtTbl) :: _ :: extended :: Nil =
+          getClauses(Seq("TOK_CREATETABLE", "FORMATTED", "EXTENDED"), 
explainArgs)
+        ExplainCommand(nodeToPlan(crtTbl), extended = extended.isDefined)
+
+      case Token("TOK_EXPLAIN", explainArgs) =>
+        // Ignore FORMATTED if present.
+        val Some(query) :: _ :: extended :: Nil =
+          getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs)
+        ExplainCommand(nodeToPlan(query), extended = extended.isDefined)
+
+      case Token("TOK_DESCTABLE", describeArgs) =>
+        // Reference: 
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
+        val Some(tableType) :: formatted :: extended :: pretty :: Nil =
+          getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), 
describeArgs)
+        if (formatted.isDefined || pretty.isDefined) {
+          // FORMATTED and PRETTY are not supported and this statement will be 
treated as
+          // a Hive native command.
+          nodeToDescribeFallback(node)
+        } else {
+          tableType match {
+            case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts :: Nil) 
:: Nil) =>
+              nameParts match {
+                case Token(".", dbName :: tableName :: Nil) =>
+                  // It is describing a table with the format like "describe 
db.table".
+                  // TODO: Actually, a user may mean tableName.columnName. 
Need to resolve this
+                  // issue.
+                  val tableIdent = extractTableIdent(nameParts)
+                  datasources.DescribeCommand(
+                    UnresolvedRelation(tableIdent, None), isExtended = 
extended.isDefined)
+                case Token(".", dbName :: tableName :: colName :: Nil) =>
+                  // It is describing a column with the format like "describe 
db.table column".
+                  nodeToDescribeFallback(node)
+                case tableName =>
+                  // It is describing a table with the format like "describe 
table".
+                  datasources.DescribeCommand(
+                    UnresolvedRelation(TableIdentifier(tableName.text), None),
+                    isExtended = extended.isDefined)
+              }
+            // All other cases.
+            case _ => nodeToDescribeFallback(node)
+          }
+        }
+
+      case _ =>
+        super.nodeToPlan(node)
+    }
+  }
+
+  protected def nodeToDescribeFallback(node: ASTNode): LogicalPlan = 
noParseRule("Describe", node)
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/hive/pom.xml
----------------------------------------------------------------------
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index ffabb92..cd0c2ae 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -262,26 +262,6 @@
         </executions>
       </plugin>
 
-
-      <plugin>
-        <groupId>org.antlr</groupId>
-        <artifactId>antlr3-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>antlr</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <sourceDirectory>${basedir}/src/main/antlr3</sourceDirectory>
-          <includes>
-            <include>**/SparkSqlLexer.g</include>
-            <include>**/SparkSqlParser.g</include>
-          </includes>
-        </configuration>
-      </plugin>
-
     </plugins>
   </build>
 </project>

http://git-wip-us.apache.org/repos/asf/spark/blob/ea489f14/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g 
b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g
deleted file mode 100644
index e4a80f0..0000000
--- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g
+++ /dev/null
@@ -1,330 +0,0 @@
-/**
-   Licensed to the Apache Software Foundation (ASF) under one or more 
-   contributor license agreements.  See the NOTICE file distributed with 
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with 
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-*/
-parser grammar FromClauseParser;
-
-options
-{
-output=AST;
-ASTLabelType=CommonTree;
-backtrack=false;
-k=3;
-}
-
-@members {
-  @Override
-  public Object recoverFromMismatchedSet(IntStream input,
-      RecognitionException re, BitSet follow) throws RecognitionException {
-    throw re;
-  }
-  @Override
-  public void displayRecognitionError(String[] tokenNames,
-      RecognitionException e) {
-    gParent.errors.add(new ParseError(gParent, e, tokenNames));
-  }
-  protected boolean useSQL11ReservedKeywordsForIdentifier() {
-    return gParent.useSQL11ReservedKeywordsForIdentifier();
-  }
-}
-
-@rulecatch {
-catch (RecognitionException e) {
-  throw e;
-}
-}
-
-//-----------------------------------------------------------------------------------
-
-tableAllColumns
-    : STAR
-        -> ^(TOK_ALLCOLREF)
-    | tableName DOT STAR
-        -> ^(TOK_ALLCOLREF tableName)
-    ;
-
-// (table|column)
-tableOrColumn
-@init { gParent.pushMsg("table or column identifier", state); }
-@after { gParent.popMsg(state); }
-    :
-    identifier -> ^(TOK_TABLE_OR_COL identifier)
-    ;
-
-expressionList
-@init { gParent.pushMsg("expression list", state); }
-@after { gParent.popMsg(state); }
-    :
-    expression (COMMA expression)* -> ^(TOK_EXPLIST expression+)
-    ;
-
-aliasList
-@init { gParent.pushMsg("alias list", state); }
-@after { gParent.popMsg(state); }
-    :
-    identifier (COMMA identifier)* -> ^(TOK_ALIASLIST identifier+)
-    ;
-
-//----------------------- Rules for parsing fromClause 
------------------------------
-// from [col1, col2, col3] table1, [col4, col5] table2
-fromClause
-@init { gParent.pushMsg("from clause", state); }
-@after { gParent.popMsg(state); }
-    :
-    KW_FROM joinSource -> ^(TOK_FROM joinSource)
-    ;
-
-joinSource
-@init { gParent.pushMsg("join source", state); }
-@after { gParent.popMsg(state); }
-    : fromSource ( joinToken^ fromSource ( KW_ON! expression 
{$joinToken.start.getType() != COMMA}? )? )*
-    | uniqueJoinToken^ uniqueJoinSource (COMMA! uniqueJoinSource)+
-    ;
-
-uniqueJoinSource
-@init { gParent.pushMsg("unique join source", state); }
-@after { gParent.popMsg(state); }
-    : KW_PRESERVE? fromSource uniqueJoinExpr
-    ;
-
-uniqueJoinExpr
-@init { gParent.pushMsg("unique join expression list", state); }
-@after { gParent.popMsg(state); }
-    : LPAREN e1+=expression (COMMA e1+=expression)* RPAREN
-      -> ^(TOK_EXPLIST $e1*)
-    ;
-
-uniqueJoinToken
-@init { gParent.pushMsg("unique join", state); }
-@after { gParent.popMsg(state); }
-    : KW_UNIQUEJOIN -> TOK_UNIQUEJOIN;
-
-joinToken
-@init { gParent.pushMsg("join type specifier", state); }
-@after { gParent.popMsg(state); }
-    :
-      KW_JOIN                      -> TOK_JOIN
-    | KW_INNER KW_JOIN             -> TOK_JOIN
-    | COMMA                        -> TOK_JOIN
-    | KW_CROSS KW_JOIN             -> TOK_CROSSJOIN
-    | KW_LEFT  (KW_OUTER)? KW_JOIN -> TOK_LEFTOUTERJOIN
-    | KW_RIGHT (KW_OUTER)? KW_JOIN -> TOK_RIGHTOUTERJOIN
-    | KW_FULL  (KW_OUTER)? KW_JOIN -> TOK_FULLOUTERJOIN
-    | KW_LEFT KW_SEMI KW_JOIN      -> TOK_LEFTSEMIJOIN
-    | KW_ANTI KW_JOIN              -> TOK_ANTIJOIN
-    ;
-
-lateralView
-@init {gParent.pushMsg("lateral view", state); }
-@after {gParent.popMsg(state); }
-       :
-       (KW_LATERAL KW_VIEW KW_OUTER) => KW_LATERAL KW_VIEW KW_OUTER function 
tableAlias (KW_AS identifier ((COMMA)=> COMMA identifier)*)?
-       -> ^(TOK_LATERAL_VIEW_OUTER ^(TOK_SELECT ^(TOK_SELEXPR function 
identifier* tableAlias)))
-       |
-       KW_LATERAL KW_VIEW function tableAlias (KW_AS identifier ((COMMA)=> 
COMMA identifier)*)?
-       -> ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR function identifier* 
tableAlias)))
-       ;
-
-tableAlias
-@init {gParent.pushMsg("table alias", state); }
-@after {gParent.popMsg(state); }
-    :
-    identifier -> ^(TOK_TABALIAS identifier)
-    ;
-
-fromSource
-@init { gParent.pushMsg("from source", state); }
-@after { gParent.popMsg(state); }
-    :
-    (LPAREN KW_VALUES) => fromSource0
-    | (LPAREN) => LPAREN joinSource RPAREN -> joinSource
-    | fromSource0
-    ;
-
-
-fromSource0
-@init { gParent.pushMsg("from source 0", state); }
-@after { gParent.popMsg(state); }
-    :
-    ((Identifier LPAREN)=> partitionedTableFunction | tableSource | 
subQuerySource | virtualTableSource) (lateralView^)*
-    ;
-
-tableBucketSample
-@init { gParent.pushMsg("table bucket sample specification", state); }
-@after { gParent.popMsg(state); }
-    :
-    KW_TABLESAMPLE LPAREN KW_BUCKET (numerator=Number) KW_OUT KW_OF 
(denominator=Number) (KW_ON expr+=expression (COMMA expr+=expression)*)? RPAREN 
-> ^(TOK_TABLEBUCKETSAMPLE $numerator $denominator $expr*)
-    ;
-
-splitSample
-@init { gParent.pushMsg("table split sample specification", state); }
-@after { gParent.popMsg(state); }
-    :
-    KW_TABLESAMPLE LPAREN  (numerator=Number) (percent=KW_PERCENT|KW_ROWS) 
RPAREN
-    -> {percent != null}? ^(TOK_TABLESPLITSAMPLE TOK_PERCENT $numerator)
-    -> ^(TOK_TABLESPLITSAMPLE TOK_ROWCOUNT $numerator)
-    |
-    KW_TABLESAMPLE LPAREN  (numerator=ByteLengthLiteral) RPAREN
-    -> ^(TOK_TABLESPLITSAMPLE TOK_LENGTH $numerator)
-    ;
-
-tableSample
-@init { gParent.pushMsg("table sample specification", state); }
-@after { gParent.popMsg(state); }
-    :
-    tableBucketSample |
-    splitSample
-    ;
-
-tableSource
-@init { gParent.pushMsg("table source", state); }
-@after { gParent.popMsg(state); }
-    : tabname=tableName 
-    ((tableProperties) => props=tableProperties)?
-    ((tableSample) => ts=tableSample)? 
-    ((KW_AS) => (KW_AS alias=Identifier) 
-    |
-    (Identifier) => (alias=Identifier))?
-    -> ^(TOK_TABREF $tabname $props? $ts? $alias?)
-    ;
-
-tableName
-@init { gParent.pushMsg("table name", state); }
-@after { gParent.popMsg(state); }
-    :
-    db=identifier DOT tab=identifier
-    -> ^(TOK_TABNAME $db $tab)
-    |
-    tab=identifier
-    -> ^(TOK_TABNAME $tab)
-    ;
-
-viewName
-@init { gParent.pushMsg("view name", state); }
-@after { gParent.popMsg(state); }
-    :
-    (db=identifier DOT)? view=identifier
-    -> ^(TOK_TABNAME $db? $view)
-    ;
-
-subQuerySource
-@init { gParent.pushMsg("subquery source", state); }
-@after { gParent.popMsg(state); }
-    :
-    LPAREN queryStatementExpression[false] RPAREN KW_AS? identifier -> 
^(TOK_SUBQUERY queryStatementExpression identifier)
-    ;
-
-//---------------------- Rules for parsing PTF clauses 
-----------------------------
-partitioningSpec
-@init { gParent.pushMsg("partitioningSpec clause", state); }
-@after { gParent.popMsg(state); } 
-   :
-   partitionByClause orderByClause? -> ^(TOK_PARTITIONINGSPEC 
partitionByClause orderByClause?) |
-   orderByClause -> ^(TOK_PARTITIONINGSPEC orderByClause) |
-   distributeByClause sortByClause? -> ^(TOK_PARTITIONINGSPEC 
distributeByClause sortByClause?) |
-   sortByClause -> ^(TOK_PARTITIONINGSPEC sortByClause) |
-   clusterByClause -> ^(TOK_PARTITIONINGSPEC clusterByClause)
-   ;
-
-partitionTableFunctionSource
-@init { gParent.pushMsg("partitionTableFunctionSource clause", state); }
-@after { gParent.popMsg(state); } 
-   :
-   subQuerySource |
-   tableSource |
-   partitionedTableFunction
-   ;
-
-partitionedTableFunction
-@init { gParent.pushMsg("ptf clause", state); }
-@after { gParent.popMsg(state); } 
-   :
-   name=Identifier LPAREN KW_ON 
-   ((partitionTableFunctionSource) => (ptfsrc=partitionTableFunctionSource 
spec=partitioningSpec?))
-   ((Identifier LPAREN expression RPAREN ) => Identifier LPAREN expression 
RPAREN ( COMMA Identifier LPAREN expression RPAREN)*)?
-   ((RPAREN) => (RPAREN)) ((Identifier) => alias=Identifier)?
-   ->   ^(TOK_PTBLFUNCTION $name $alias? $ptfsrc $spec? expression*)
-   ; 
-
-//----------------------- Rules for parsing whereClause 
-----------------------------
-// where a=b and ...
-whereClause
-@init { gParent.pushMsg("where clause", state); }
-@after { gParent.popMsg(state); }
-    :
-    KW_WHERE searchCondition -> ^(TOK_WHERE searchCondition)
-    ;
-
-searchCondition
-@init { gParent.pushMsg("search condition", state); }
-@after { gParent.popMsg(state); }
-    :
-    expression
-    ;
-
-//-----------------------------------------------------------------------------------
-
-//-------- Row Constructor 
----------------------------------------------------------
-//in support of SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as FOO(a,b,c) and
-// INSERT INTO <table> (col1,col2,...) VALUES(...),(...),...
-// INSERT INTO <table> (col1,col2,...) SELECT * FROM 
(VALUES(1,2,3),(4,5,6),...) as Foo(a,b,c)
-valueRowConstructor
-@init { gParent.pushMsg("value row constructor", state); }
-@after { gParent.popMsg(state); }
-    :
-    LPAREN precedenceUnaryPrefixExpression (COMMA 
precedenceUnaryPrefixExpression)* RPAREN -> ^(TOK_VALUE_ROW 
precedenceUnaryPrefixExpression+)
-    ;
-
-valuesTableConstructor
-@init { gParent.pushMsg("values table constructor", state); }
-@after { gParent.popMsg(state); }
-    :
-    valueRowConstructor (COMMA valueRowConstructor)* -> ^(TOK_VALUES_TABLE 
valueRowConstructor+)
-    ;
-
-/*
-VALUES(1),(2) means 2 rows, 1 column each.
-VALUES(1,2),(3,4) means 2 rows, 2 columns each.
-VALUES(1,2,3) means 1 row, 3 columns
-*/
-valuesClause
-@init { gParent.pushMsg("values clause", state); }
-@after { gParent.popMsg(state); }
-    :
-    KW_VALUES valuesTableConstructor -> valuesTableConstructor
-    ;
-
-/*
-This represents a clause like this:
-(VALUES(1,2),(2,3)) as VirtTable(col1,col2)
-*/
-virtualTableSource
-@init { gParent.pushMsg("virtual table source", state); }
-@after { gParent.popMsg(state); }
-   :
-   LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE 
tableNameColList valuesClause)
-   ;
-/*
-e.g. as VirtTable(col1,col2)
-Note that we only want literals as column names
-*/
-tableNameColList
-@init { gParent.pushMsg("from source", state); }
-@after { gParent.popMsg(state); }
-    :
-    KW_AS? identifier LPAREN identifier (COMMA identifier)* RPAREN -> 
^(TOK_VIRTUAL_TABREF ^(TOK_TABNAME identifier) ^(TOK_COL_NAME identifier+))
-    ;
-
-//-----------------------------------------------------------------------------------


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[6/8] spark git commit: [SPARK-12573][SPARK-12574][SQL] Move SQL Parser from Hive to Catalyst

Reply via email to