[GitHub] spark pull request #11557: [SPARK-13713][SQL] Migrate parser from ANTLR3 to ...

hvanhovell Fri, 05 Aug 2016 04:17:59 -0700

Github user hvanhovell commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11557#discussion_r73677314
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/AstBuilder.scala
 ---
    @@ -0,0 +1,1452 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.spark.sql.catalyst.parser.ng
    +
    +import java.sql.{Date, Timestamp}
    +
    +import scala.collection.JavaConverters._
    +import scala.collection.mutable.ArrayBuffer
    +
    +import org.antlr.v4.runtime.{ParserRuleContext, Token}
    +import org.antlr.v4.runtime.tree.{ParseTree, TerminalNode}
    +
    +import org.apache.spark.internal.Logging
    +import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
    +import org.apache.spark.sql.catalyst.analysis._
    +import org.apache.spark.sql.catalyst.expressions._
    +import org.apache.spark.sql.catalyst.parser.ng.SqlBaseParser._
    +import org.apache.spark.sql.catalyst.plans._
    +import org.apache.spark.sql.catalyst.plans.logical._
    +import org.apache.spark.sql.types._
    +import org.apache.spark.unsafe.types.CalendarInterval
    +import org.apache.spark.util.random.RandomSampler
    +
    +/**
    + * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, 
LogicalPlan or
    + * TableIdentifier.
    + */
    +class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    +  import ParserUtils._
    +
    +  protected def typedVisit[T](ctx: ParseTree): T = {
    +    ctx.accept(this).asInstanceOf[T]
    +  }
    +
    +  override def visitSingleStatement(ctx: SingleStatementContext): 
LogicalPlan = withOrigin(ctx) {
    +    visit(ctx.statement).asInstanceOf[LogicalPlan]
    +  }
    +
    +  override def visitSingleExpression(ctx: SingleExpressionContext): 
Expression = withOrigin(ctx) {
    +    visitNamedExpression(ctx.namedExpression)
    +  }
    +
    +  override def visitSingleTableIdentifier(
    +      ctx: SingleTableIdentifierContext): TableIdentifier = 
withOrigin(ctx) {
    +    visitTableIdentifier(ctx.tableIdentifier)
    +  }
    +
    +  override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = 
withOrigin(ctx) {
    +    visit(ctx.dataType).asInstanceOf[DataType]
    +  }
    +
    +  /* 
********************************************************************************************
    +   * Plan parsing
    +   * 
********************************************************************************************
 */
    +  protected def plan(tree: ParserRuleContext): LogicalPlan = 
typedVisit(tree)
    +
    +  /**
    +   * Make sure we do not try to create a plan for a native command.
    +   */
    +  override def visitExecuteNativeCommand(ctx: 
ExecuteNativeCommandContext): LogicalPlan = null
    +
    +  /**
    +   * Create a plan for a SHOW FUNCTIONS command.
    +   */
    +  override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan 
= withOrigin(ctx) {
    +    import ctx._
    +    if (qualifiedName != null) {
    +      val names = 
qualifiedName().identifier().asScala.map(_.getText).toList
    +      names match {
    +        case db :: name :: Nil =>
    +          ShowFunctions(Some(db), Some(name))
    +        case name :: Nil =>
    +          ShowFunctions(None, Some(name))
    +        case _ =>
    +          throw new ParseException("SHOW FUNCTIONS unsupported name", ctx)
    +      }
    +    } else if (pattern != null) {
    +      ShowFunctions(None, Some(string(pattern)))
    +    } else {
    +      ShowFunctions(None, None)
    +    }
    +  }
    +
    +  /**
    +   * Create a plan for a DESCRIBE FUNCTION command.
    +   */
    +  override def visitDescribeFunction(ctx: DescribeFunctionContext): 
LogicalPlan = withOrigin(ctx) {
    +    val functionName = 
ctx.qualifiedName().identifier().asScala.map(_.getText).mkString(".")
    +    DescribeFunction(functionName, ctx.EXTENDED != null)
    +  }
    +
    +  /**
    +   * Create a top-level plan with Common Table Expressions.
    +   */
    +  override def visitQuery(ctx: QueryContext): LogicalPlan = 
withOrigin(ctx) {
    +    val query = plan(ctx.queryNoWith)
    +
    +    // Apply CTEs
    +    query.optional(ctx.ctes) {
    +      val ctes = ctx.ctes.namedQuery.asScala.map {
    +        case nCtx =>
    +          val namedQuery = visitNamedQuery(nCtx)
    +          (namedQuery.alias, namedQuery)
    +      }
    +
    +      // Check for duplicate names.
    +      ctes.groupBy(_._1).filter(_._2.size > 1).foreach {
    +        case (name, _) =>
    +          throw new ParseException(
    +            s"Name '$name' is used for multiple common table expressions", 
ctx)
    +      }
    +
    +      With(query, ctes.toMap)
    +    }
    +  }
    +
    +  /**
    +   * Create a named logical plan.
    +   *
    +   * This is only used for Common Table Expressions.
    +   */
    +  override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = 
withOrigin(ctx) {
    +    SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith))
    +  }
    +
    +  /**
    +   * Create a logical plan which allows for multiple inserts using one 
'from' statement. These
    +   * queries have the following SQL form:
    +   * {{{
    +   *   [WITH cte...]?
    +   *   FROM src
    +   *   [INSERT INTO tbl1 SELECT *]+
    +   * }}}
    +   * For example:
    +   * {{{
    +   *   FROM db.tbl1 A
    +   *   INSERT INTO dbo.tbl1 SELECT * WHERE A.value = 10 LIMIT 5
    +   *   INSERT INTO dbo.tbl2 SELECT * WHERE A.value = 12
    +   * }}}
    +   * This (Hive) feature cannot be combined with set-operators.
    +   */
    +  override def visitMultiInsertQuery(ctx: MultiInsertQueryContext): 
LogicalPlan = withOrigin(ctx) {
    +    val from = visitFromClause(ctx.fromClause)
    +
    +    // Build the insert clauses.
    +    val inserts = ctx.multiInsertQueryBody.asScala.map {
    +      body =>
    +        assert(body.querySpecification.fromClause == null,
    +          "Multi-Insert queries cannot have a FROM clause in their 
individual SELECT statements",
    +          body)
    +
    +        withQuerySpecification(body.querySpecification, from).
    +          // Add organization statements.
    +          optionalMap(body.queryOrganization)(withQueryResultClauses).
    +          // Add insert.
    +          optionalMap(body.insertInto())(withInsertInto)
    +    }
    +
    +    // If there are multiple INSERTS just UNION them together into one 
query.
    +    inserts match {
    +      case Seq(query) => query
    +      case queries => Union(queries)
    +    }
    +  }
    +
    +  /**
    +   * Create a logical plan for a regular (single-insert) query.
    +   */
    +  override def visitSingleInsertQuery(
    +      ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
    +    plan(ctx.queryTerm).
    +      // Add organization statements.
    +      optionalMap(ctx.queryOrganization)(withQueryResultClauses).
    +      // Add insert.
    +      optionalMap(ctx.insertInto())(withInsertInto)
    +  }
    +
    +  /**
    +   * Add an INSERT INTO [TABLE]/INSERT OVERWRITE TABLE operation to the 
logical plan.
    +   */
    +  private def withInsertInto(
    +      ctx: InsertIntoContext,
    +      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
    +    val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
    +    val partitionKeys = 
Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
    +
    +    InsertIntoTable(
    +      UnresolvedRelation(tableIdent, None),
    +      partitionKeys,
    +      query,
    +      ctx.OVERWRITE != null,
    +      ctx.EXISTS != null)
    +  }
    +
    +  /**
    +   * Create a partition specification map.
    +   */
    +  override def visitPartitionSpec(
    +      ctx: PartitionSpecContext): Map[String, Option[String]] = 
withOrigin(ctx) {
    +    ctx.partitionVal.asScala.map { pVal =>
    +      val name = pVal.identifier.getText.toLowerCase
    +      val value = Option(pVal.constant).map(visitStringConstant)
    +      name -> value
    +    }.toMap
    +  }
    +
    +  /**
    +   * Create a partition specification map without optional values.
    +   */
    +  protected def visitNonOptionalPartitionSpec(
    +      ctx: PartitionSpecContext): Map[String, String] = withOrigin(ctx) {
    +    visitPartitionSpec(ctx).mapValues(_.orNull).map(identity)
    +  }
    +
    +  /**
    +   * Convert a constant of any type into a string. This is typically used 
in DDL commands, and its
    +   * main purpose is to prevent slight differences due to back to back 
conversions i.e.:
    +   * String -> Literal -> String.
    +   */
    +  protected def visitStringConstant(ctx: ConstantContext): String = 
withOrigin(ctx) {
    +    ctx match {
    +      case s: StringLiteralContext => createString(s)
    +      case o => o.getText
    +    }
    +  }
    +
    +  /**
    +   * Add ORDER BY/SORT BY/CLUSTER BY/DISTRIBUTE BY/LIMIT/WINDOWS clauses 
to the logical plan. These
    +   * clauses determine the shape (ordering/partitioning/rows) of the query 
result.
    +   */
    +  private def withQueryResultClauses(
    +      ctx: QueryOrganizationContext,
    +      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
    +    import ctx._
    +
    +    // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
    +    val withOrder = if (
    +      !order.isEmpty && sort.isEmpty && distributeBy.isEmpty && 
clusterBy.isEmpty) {
    +      // ORDER BY ...
    +      Sort(order.asScala.map(visitSortItem), global = true, query)
    +    } else if (order.isEmpty && !sort.isEmpty && distributeBy.isEmpty && 
clusterBy.isEmpty) {
    +      // SORT BY ...
    +      Sort(sort.asScala.map(visitSortItem), global = false, query)
    +    } else if (order.isEmpty && sort.isEmpty && !distributeBy.isEmpty && 
clusterBy.isEmpty) {
    +      // DISTRIBUTE BY ...
    +      RepartitionByExpression(expressionList(distributeBy), query)
    +    } else if (order.isEmpty && !sort.isEmpty && !distributeBy.isEmpty && 
clusterBy.isEmpty) {
    +      // SORT BY ... DISTRIBUTE BY ...
    +      Sort(
    +        sort.asScala.map(visitSortItem),
    +        global = false,
    +        RepartitionByExpression(expressionList(distributeBy), query))
    +    } else if (order.isEmpty && sort.isEmpty && distributeBy.isEmpty && 
!clusterBy.isEmpty) {
    +      // CLUSTER BY ...
    +      val expressions = expressionList(clusterBy)
    +      Sort(
    +        expressions.map(SortOrder(_, Ascending)),
    +        global = false,
    +        RepartitionByExpression(expressions, query))
    +    } else if (order.isEmpty && sort.isEmpty && distributeBy.isEmpty && 
clusterBy.isEmpty) {
    +      // [EMPTY]
    +      query
    +    } else {
    +      throw new ParseException(
    +        "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not 
supported", ctx)
    +    }
    +
    +    // WINDOWS
    +    val withWindow = withOrder.optionalMap(windows)(withWindows)
    +
    +    // LIMIT
    +    withWindow.optional(limit) {
    +      Limit(typedVisit(limit), withWindow)
    +    }
    +  }
    +
    +  /**
    +   * Create a logical plan using a query specification.
    +   */
    +  override def visitQuerySpecification(
    +      ctx: QuerySpecificationContext): LogicalPlan = withOrigin(ctx) {
    +    val from = OneRowRelation.optional(ctx.fromClause) {
    +      visitFromClause(ctx.fromClause)
    +    }
    +    withQuerySpecification(ctx, from)
    +  }
    +
    +  /**
    +   * Add a query specification to a logical plan. The query specification 
is the core of the logical
    +   * plan, this is where sourcing (FROM clause), transforming (SELECT 
TRANSFORM/MAP/REDUCE),
    +   * projection (SELECT), aggregation (GROUP BY ... HAVING ...) and 
filtering (WHERE) takes place.
    +   *
    +   * Note that query hints are ignored (both by the parser and the 
builder).
    +   */
    +  private def withQuerySpecification(
    +      ctx: QuerySpecificationContext,
    +      relation: LogicalPlan): LogicalPlan = withOrigin(ctx) {
    +    import ctx._
    +
    +    // WHERE
    +    def filter(ctx: BooleanExpressionContext, plan: LogicalPlan): 
LogicalPlan = {
    +      Filter(expression(ctx), plan)
    +    }
    +
    +    // Expressions.
    +    val expressions = Option(namedExpressionSeq).toSeq
    +      .flatMap(_.namedExpression.asScala)
    +      .map(typedVisit[Expression])
    +
    +    // Create either a transform or a regular query.
    +    val specType = 
Option(kind).map(_.getType).getOrElse(SqlBaseParser.SELECT)
    +    specType match {
    +      case SqlBaseParser.MAP | SqlBaseParser.REDUCE | 
SqlBaseParser.TRANSFORM =>
    +        // Transform
    +
    +        // Add where.
    +        val withFilter = relation.optionalMap(where)(filter)
    +
    +        // Create the attributes.
    +        val (attributes, schemaLess) = if (colTypeList != null) {
    +          // Typed return columns.
    +          (createStructType(colTypeList).toAttributes, false)
    +        } else if (identifierSeq != null) {
    +          // Untyped return columns.
    +          val attrs = visitIdentifierSeq(identifierSeq).map { name =>
    +            AttributeReference(name, StringType, nullable = true)()
    +          }
    +          (attrs, false)
    +        } else {
    +          (Seq(AttributeReference("key", StringType)(),
    +            AttributeReference("value", StringType)()), true)
    +        }
    +
    +        // Create the transform.
    +        ScriptTransformation(
    +          expressions,
    +          string(script),
    +          attributes,
    +          withFilter,
    +          withScriptIOSchema(inRowFormat, recordWriter, outRowFormat, 
recordReader, schemaLess))
    +
    +      case SqlBaseParser.SELECT =>
    +        // Regular select
    +
    +        // Add lateral views.
    +        val withLateralView = 
ctx.lateralView.asScala.foldLeft(relation)(withGenerate)
    +
    +        // Add where.
    +        val withFilter = withLateralView.optionalMap(where)(filter)
    +
    +        // Add aggregation or a project.
    +        val namedExpressions = expressions.map {
    +          case e: NamedExpression => e
    +          case e: Expression => UnresolvedAlias(e)
    +        }
    +        val withProject = if (aggregation != null) {
    +          withAggregation(aggregation, namedExpressions, withFilter)
    +        } else if (namedExpressions.nonEmpty) {
    +          Project(namedExpressions, withFilter)
    +        } else {
    +          withFilter
    +        }
    +
    +        // Having
    +        val withHaving = withProject.optional(having) {
    +          // Note that we added a cast to boolean. If the expression 
itself is already boolean,
    +          // the optimizer will get rid of the unnecessary cast.
    +          Filter(Cast(expression(having), BooleanType), withProject)
    +        }
    +
    +        // Distinct
    +        val withDistinct = if (setQuantifier() != null && 
setQuantifier().DISTINCT() != null) {
    +          Distinct(withHaving)
    +        } else {
    +          withHaving
    +        }
    +
    +        // Window
    +        withDistinct.optionalMap(windows)(withWindows)
    +    }
    +  }
    +
    +  /**
    +   * Create a (Hive based) [[ScriptInputOutputSchema]].
    +   */
    +  protected def withScriptIOSchema(
    +      inRowFormat: RowFormatContext,
    +      recordWriter: Token,
    +      outRowFormat: RowFormatContext,
    +      recordReader: Token,
    +      schemaLess: Boolean): ScriptInputOutputSchema = null
    +
    +  /**
    +   * Create a logical plan for a given 'FROM' clause. Note that we support 
multiple (comma
    +   * separated) relations here, these get converted into a single plan by 
condition-less inner join.
    +   */
    +  override def visitFromClause(ctx: FromClauseContext): LogicalPlan = 
withOrigin(ctx) {
    +    val from = ctx.relation.asScala.map(plan).reduceLeft(Join(_, _, Inner, 
None))
    +    ctx.lateralView.asScala.foldLeft(from)(withGenerate)
    +  }
    +
    +  /**
    +   * Connect two queries by a Set operator.
    +   *
    +   * Supported Set operators are:
    +   * - UNION [DISTINCT]
    +   * - UNION ALL
    +   * - EXCEPT [DISTINCT]
    +   * - INTERSECT [DISTINCT]
    +   */
    +  override def visitSetOperation(ctx: SetOperationContext): LogicalPlan = 
withOrigin(ctx) {
    +    val left = plan(ctx.left)
    +    val right = plan(ctx.right)
    +    val all = Option(ctx.setQuantifier()).exists(_.ALL != null)
    +    ctx.operator.getType match {
    +      case SqlBaseParser.UNION if all =>
    +        Union(left, right)
    +      case SqlBaseParser.UNION =>
    +        Distinct(Union(left, right))
    +      case SqlBaseParser.INTERSECT if all =>
    +        throw new ParseException("INTERSECT ALL is not supported.", ctx)
    +      case SqlBaseParser.INTERSECT =>
    +        Intersect(left, right)
    +      case SqlBaseParser.EXCEPT if all =>
    +        throw new ParseException("EXCEPT ALL is not supported.", ctx)
    +      case SqlBaseParser.EXCEPT =>
    +        Except(left, right)
    +    }
    +  }
    +
    +  /**
    +   * Add a [[WithWindowDefinition]] operator to a logical plan.
    +   */
    +  private def withWindows(
    +      ctx: WindowsContext,
    +      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
    +    // Collect all window specifications defined in the WINDOW clause.
    +    val baseWindowMap = ctx.namedWindow.asScala.map {
    +      wCtx =>
    +        (wCtx.identifier.getText, typedVisit[WindowSpec](wCtx.windowSpec))
    +    }.toMap
    +
    +    // Handle cases like
    +    // window w1 as (partition by p_mfgr order by p_name
    +    //               range between 2 preceding and 2 following),
    +    //        w2 as w1
    +    val windowMapView = baseWindowMap.mapValues {
    +      case WindowSpecReference(name) =>
    +        baseWindowMap.get(name) match {
    +          case Some(spec: WindowSpecDefinition) =>
    +            spec
    +          case Some(ref) =>
    +            throw new ParseException(s"Window reference '$name' is not a 
window specification", ctx)
    +          case None =>
    +            throw new ParseException(s"Cannot resolve window reference 
'$name'", ctx)
    +        }
    +      case spec: WindowSpecDefinition => spec
    +    }
    +
    +    // Note that mapValues creates a view instead of materialized map. We 
force materialization by
    +    // mapping over identity.
    +    WithWindowDefinition(windowMapView.map(identity), query)
    +  }
    +
    +  /**
    +   * Add an [[Aggregate]] to a logical plan.
    +   */
    +  private def withAggregation(
    +      ctx: AggregationContext,
    +      selectExpressions: Seq[NamedExpression],
    +      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
    +    import ctx._
    +    val groupByExpressions = expressionList(groupingExpressions)
    +
    +    if (GROUPING != null) {
    +      // GROUP BY .... GROUPING SETS (...)
    +      val expressionMap = groupByExpressions.zipWithIndex.toMap
    +      val numExpressions = expressionMap.size
    +      val mask = (1 << numExpressions) - 1
    +      val masks = ctx.groupingSet.asScala.map {
    +        _.expression.asScala.foldLeft(mask) {
    +          case (bitmap, eCtx) =>
    +            // Find the index of the expression.
    +            val e = typedVisit[Expression](eCtx)
    +            val index = 
expressionMap.find(_._1.semanticEquals(e)).map(_._2).getOrElse(
    +              throw new ParseException(
    +                s"$e doesn't show up in the GROUP BY list", ctx))
    +            // 0 means that the column at the given index is a grouping 
column, 1 means it is not,
    +            // so we unset the bit in bitmap.
    +            bitmap & ~(1 << (numExpressions - 1 - index))
    +        }
    +      }
    +      GroupingSets(masks, groupByExpressions, query, selectExpressions)
    +    } else {
    +      // GROUP BY .... (WITH CUBE | WITH ROLLUP)?
    +      val mappedGroupByExpressions = if (CUBE != null) {
    +        Seq(Cube(groupByExpressions))
    +      } else if (ROLLUP != null) {
    +        Seq(Rollup(groupByExpressions))
    +      } else {
    +        groupByExpressions
    +      }
    +      Aggregate(mappedGroupByExpressions, selectExpressions, query)
    +    }
    +  }
    +
    +  /**
    +   * Add a [[Generate]] (Lateral View) to a logical plan.
    +   */
    +  private def withGenerate(
    +      query: LogicalPlan,
    +      ctx: LateralViewContext): LogicalPlan = withOrigin(ctx) {
    +    val expressions = expressionList(ctx.expression)
    +
    +    // Create the generator.
    +    val generator = ctx.qualifiedName.getText.toLowerCase match {
    +      case "explode" if expressions.size == 1 =>
    +        Explode(expressions.head)
    +      case "json_tuple" =>
    +        JsonTuple(expressions)
    +      case other =>
    +        withGenerator(other, expressions, ctx)
    +    }
    +
    +    Generate(
    +      generator,
    +      join = true,
    +      outer = ctx.OUTER != null,
    +      Some(ctx.tblName.getText.toLowerCase),
    +      ctx.colName.asScala.map(_.getText).map(UnresolvedAttribute.apply),
    +      query)
    +  }
    +
    +  /**
    +   * Create a [[Generator]]. Override this method in order to support 
custom Generators.
    +   */
    +  protected def withGenerator(
    +      name: String,
    +      expressions: Seq[Expression],
    +      ctx: LateralViewContext): Generator = {
    +    throw new ParseException(s"Generator function '$name' is not 
supported", ctx)
    +  }
    +
    +  /**
    +   * Create a joins between two or more logical plans.
    +   */
    +  override def visitJoinRelation(ctx: JoinRelationContext): LogicalPlan = 
withOrigin(ctx) {
    +    /** Build a join between two plans. */
    +    def join(ctx: JoinRelationContext, left: LogicalPlan, right: 
LogicalPlan): Join = {
    +      val baseJoinType = ctx.joinType match {
    +        case null => Inner
    +        case jt if jt.FULL != null => FullOuter
    +        case jt if jt.SEMI != null => LeftSemi
    +        case jt if jt.LEFT != null => LeftOuter
    +        case jt if jt.RIGHT != null => RightOuter
    +        case _ => Inner
    +      }
    +
    +      // Resolve the join type and join condition
    +      val (joinType, condition) = Option(ctx.joinCriteria) match {
    +        case Some(c) if c.USING != null =>
    +          val columns = c.identifier.asScala.map { column =>
    +            UnresolvedAttribute.quoted(column.getText)
    +          }
    +          (UsingJoin(baseJoinType, columns), None)
    +        case Some(c) if c.booleanExpression != null =>
    +          (baseJoinType, Option(expression(c.booleanExpression)))
    +        case None if ctx.NATURAL != null =>
    +          (NaturalJoin(baseJoinType), None)
    +        case None =>
    +          (baseJoinType, None)
    +      }
    +      Join(left, right, joinType, condition)
    +    }
    +
    +    // Handle all consecutive join clauses. ANTLR produces a right nested 
tree in which the the
    +    // first join clause is at the top. However fields of previously 
referenced tables can be used
    +    // in following join clauses. The tree needs to be reversed in order 
to make this work.
    +    var result = plan(ctx.left)
    +    var current = ctx
    +    while (current != null) {
    +      current.right match {
    +        case right: JoinRelationContext =>
    +          result = join(current, result, plan(right.left))
    +          current = right
    +        case right =>
    +          result = join(current, result, plan(right))
    +          current = null
    +      }
    +    }
    +    result
    +  }
    +
    +  /**
    +   * Add a [[Sample]] to a logical plan.
    +   *
    +   * This currently supports the following sampling methods:
    +   * - TABLESAMPLE(x ROWS): Sample the table down to the given number of 
rows.
    +   * - TABLESAMPLE(x PERCENT): Sample the table down to the given 
percentage. Note that percentages
    +   * are defined as a number between 0 and 100.
    +   * - TABLESAMPLE(BUCKET x OUT OF y): Sample the table down to a 'x' 
divided by 'y' fraction.
    +   */
    +  private def withSample(ctx: SampleContext, query: LogicalPlan): 
LogicalPlan = withOrigin(ctx) {
    +    // Create a sampled plan if we need one.
    +    def sample(fraction: Double): Sample = {
    +      // The range of fraction accepted by Sample is [0, 1]. Because 
Hive's block sampling
    +      // function takes X PERCENT as the input and the range of X is [0, 
100], we need to
    +      // adjust the fraction.
    +      val eps = RandomSampler.roundingEpsilon
    +      assert(fraction >= 0.0 - eps && fraction <= 1.0 + eps,
    +        s"Sampling fraction ($fraction) must be on interval [0, 1]",
    +        ctx)
    +      Sample(0.0, fraction, withReplacement = false, (math.random * 
1000).toInt, query)(true)
    +    }
    +
    +    ctx.sampleType.getType match {
    +      case SqlBaseParser.ROWS =>
    +        Limit(expression(ctx.expression), query)
    +
    +      case SqlBaseParser.PERCENTLIT =>
    +        val fraction = ctx.percentage.getText.toDouble
    +        sample(fraction / 100.0d)
    +
    +      case SqlBaseParser.BUCKET if ctx.ON != null =>
    +        throw new ParseException("TABLESAMPLE(BUCKET x OUT OF y ON id) is 
not supported", ctx)
    +
    +      case SqlBaseParser.BUCKET =>
    +        sample(ctx.numerator.getText.toDouble / 
ctx.denominator.getText.toDouble)
    +    }
    +  }
    +
    +  /**
    +   * Create a logical plan for a sub-query.
    +   */
    +  override def visitSubquery(ctx: SubqueryContext): LogicalPlan = 
withOrigin(ctx) {
    +    plan(ctx.queryNoWith)
    +  }
    +
    +  /**
    +   * Create an un-aliased table reference. This is typically used for 
top-level table references,
    +   * for example:
    +   * {{{
    +   *   INSERT INTO db.tbl2
    +   *   TABLE db.tbl1
    +   * }}}
    +   */
    +  override def visitTable(ctx: TableContext): LogicalPlan = 
withOrigin(ctx) {
    +    UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier), None)
    +  }
    +
    +  /**
    +   * Create an aliased table reference. This is typically used in FROM 
clauses.
    +   */
    +  override def visitTableName(ctx: TableNameContext): LogicalPlan = 
withOrigin(ctx) {
    +    val table = UnresolvedRelation(
    +      visitTableIdentifier(ctx.tableIdentifier),
    +      Option(ctx.identifier).map(_.getText))
    +    table.optionalMap(ctx.sample)(withSample)
    +  }
    +
    +  /**
    +   * Create an inline table (a virtual table in Hive parlance).
    +   */
    +  override def visitInlineTable(ctx: InlineTableContext): LogicalPlan = 
withOrigin(ctx) {
    +    // Get the backing expressions.
    +    val expressions = ctx.expression.asScala.map { eCtx =>
    +      val e = expression(eCtx)
    +      assert(e.foldable, "All expressions in an inline table must be 
constants.", eCtx)
    +      e
    +    }
    +
    +    // Validate and evaluate the rows.
    +    val (structType, structConstructor) = expressions.head.dataType match {
    --- End diff --
    
    As discussed offline: The current run is to use the schema of the first row 
and to force other rows to comply to that schema. This is semantically very 
different from the same thing written as a number of UNION ALL statements, 
e.g.: `SELECT 1 AS id , 'a' AS name UNION ALL SELECT 2, 'b' UNION ALL SELECT 2, 
'c'`
    
    Fixing this has not the highest priority. The fix should probably be using 
`UNION ALL`s instead of a `LocalRelation` (the Optimizer should be able to 
rewrite this into a LocalRelation eventually).



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #11557: [SPARK-13713][SQL] Migrate parser from ANTLR3 to ...

Reply via email to