[GitHub] spark pull request: [SPARK-12575][SQL] Grammar parity with existin...

cloud-fan Thu, 14 Jan 2016 16:22:14 -0800

Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/10745#discussion_r49806594
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala ---
    @@ -787,76 +783,92 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
         case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
           Literal(strings.map(s => 
ParseUtils.unescapeSQLString(s.text)).mkString)
     
    -    // This code is adapted from
    -    // 
/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java#L223
    -    case ast: ASTNode if numericAstTypes contains ast.tokenType =>
    -      var v: Literal = null
    -      try {
    -        if (ast.text.endsWith("L")) {
    -          // Literal bigint.
    -          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toLong, LongType)
    -        } else if (ast.text.endsWith("S")) {
    -          // Literal smallint.
    -          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toShort, ShortType)
    -        } else if (ast.text.endsWith("Y")) {
    -          // Literal tinyint.
    -          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toByte, ByteType)
    -        } else if (ast.text.endsWith("BD") || ast.text.endsWith("D")) {
    -          // Literal decimal
    -          val strVal = ast.text.stripSuffix("D").stripSuffix("B")
    -          v = Literal(Decimal(strVal))
    -        } else {
    -          v = Literal.create(ast.text.toDouble, DoubleType)
    -          v = Literal.create(ast.text.toLong, LongType)
    -          v = Literal.create(ast.text.toInt, IntegerType)
    -        }
    -      } catch {
    -        case nfe: NumberFormatException => // Do nothing
    -      }
    -
    -      if (v == null) {
    -        sys.error(s"Failed to parse number '${ast.text}'.")
    -      } else {
    -        v
    -      }
    -
    -    case ast: ASTNode if ast.tokenType == SparkSqlParser.StringLiteral =>
    -      Literal(ParseUtils.unescapeSQLString(ast.text))
    -
    -    case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
    -      Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
    -
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_CHARSETLITERAL =>
    -      Literal(ParseUtils.charSetString(ast.children.head.text, 
ast.children(1).text))
    -
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
    -      Literal(CalendarInterval.fromYearMonthString(ast.text))
    +    case ast if ast.tokenType == SparkSqlParser.TinyintLiteral =>
    +      Literal.create(ast.text.substring(0, ast.text.length() - 1).toByte, 
ByteType)
     
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
    -      Literal(CalendarInterval.fromDayTimeString(ast.text))
    +    case ast if ast.tokenType == SparkSqlParser.SmallintLiteral =>
    +      Literal.create(ast.text.substring(0, ast.text.length() - 1).toShort, 
ShortType)
     
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL =>
    -      Literal(CalendarInterval.fromSingleUnitString("year", ast.text))
    +    case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
    +      Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, 
LongType)
     
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL =>
    -      Literal(CalendarInterval.fromSingleUnitString("month", ast.text))
    +    case ast if ast.tokenType == SparkSqlParser.DecimalLiteral =>
    +      Literal(Decimal(ast.text.substring(0, ast.text.length() - 2)))
     
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_DAY_LITERAL =>
    -      Literal(CalendarInterval.fromSingleUnitString("day", ast.text))
    -
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL =>
    -      Literal(CalendarInterval.fromSingleUnitString("hour", ast.text))
    +    case ast if ast.tokenType == SparkSqlParser.Number =>
    +      val text = ast.text
    +      text match {
    +        case INTEGRAL() =>
    +          BigDecimal(text) match {
    +            case v if v.isValidInt =>
    +              Literal(v.intValue())
    +            case v if v.isValidLong =>
    +              Literal(v.longValue())
    +            case v => Literal(v.underlying())
    +          }
    +        case DECIMAL(_*) =>
    +          // Hive and the current the old Spark SQL Parser have a 
different way of dealing with
    +          // decimal numbers, the SQL Parser would use a decimal whereas 
the Hive would use a
    +          // Double.
    +           if (convertDecimalLiteralToBigDecimal) {
    +             Literal(BigDecimal(text).underlying())
    +          } else {
    +             Literal(text.toDouble)
    +          }
    +        case SCIENTIFICDECIMAL(_*) =>
    +          Literal(text.toDouble)
    +        case _ =>
    +          noParseRule("Numeric", ast)
    +      }
    +    case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
    +      Literal(ParseUtils.unescapeSQLString(ast.text))
     
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL =>
    -      Literal(CalendarInterval.fromSingleUnitString("minute", ast.text))
    +    case ast if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
    +      Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
     
    -    case ast: ASTNode if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL =>
    -      Literal(CalendarInterval.fromSingleUnitString("second", ast.text))
    +    case ast if ast.tokenType == SparkSqlParser.TOK_CHARSETLITERAL =>
    +      
Literal(ParseUtils.charSetString(unquoteString(ast.children.head.text), 
ast.children(1).text))
    +
    +    case ast if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
    +      Literal(CalendarInterval.fromYearMonthString(ast.children.head.text))
    +
    +    case ast if ast.tokenType == 
SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
    +      Literal(CalendarInterval.fromDayTimeString(ast.children.head.text))
    +
    +    case Token("TOK_INTERVAL", elements) =>
    +      val (interval, updated) = elements.foldLeft((new CalendarInterval(0, 
0), false)) {
    --- End diff --
    
    How about we check `elements.isEmpty` first and throw exception if needed, 
and then `foldLeft`? then we don't need this `updated` variable.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request: [SPARK-12575][SQL] Grammar parity with existin...

Reply via email to