Github user yhuai commented on a diff in the pull request:
https://github.com/apache/spark/pull/3431#discussion_r22547365
--- Diff: sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala ---
@@ -83,10 +99,104 @@ private[sql] class DDLParser extends
StandardTokenParsers with PackratParsers wi
protected lazy val className: Parser[String] = repsep(ident, ".") ^^ {
case s => s.mkString(".")}
protected lazy val pair: Parser[(String, String)] = ident ~ stringLit ^^
{ case k ~ v => (k,v) }
+
+ protected lazy val column: Parser[StructField] =
+ ( ident ~ ident ^^ { case name ~ typ =>
+ StructField(name, metastoreTypes.toDataType(typ))
+ }
+ |
+ ident ~ (DECIMAL ~ "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
+ case name ~ precision ~ scale =>
+ StructField(name, DecimalType(precision.toInt, scale.toInt))
+ }
+ )
+}
+
+/**
+ * :: DeveloperApi ::
+ * Provides a parser for data types.
+ */
+@DeveloperApi
+private[sql] class MetastoreTypes extends RegexParsers {
+ protected lazy val primitiveType: Parser[DataType] =
+ "string" ^^^ StringType |
+ "float" ^^^ FloatType |
+ "int" ^^^ IntegerType |
+ "tinyint" ^^^ ByteType |
+ "smallint" ^^^ ShortType |
+ "double" ^^^ DoubleType |
+ "bigint" ^^^ LongType |
+ "binary" ^^^ BinaryType |
+ "boolean" ^^^ BooleanType |
+ fixedDecimalType | // decimal with
precision/scale
+ "decimal" ^^^ DecimalType.Unlimited | // decimal with no
precision/scale
+ "date" ^^^ DateType |
+ "timestamp" ^^^ TimestampType |
+ "varchar\\((\\d+)\\)".r ^^^ StringType
+
+ protected lazy val fixedDecimalType: Parser[DataType] =
+ ("decimal" ~> "(" ~> "\\d+".r) ~ ("," ~> "\\d+".r <~ ")") ^^ {
+ case precision ~ scale =>
+ DecimalType(precision.toInt, scale.toInt)
+ }
+
+ protected lazy val arrayType: Parser[DataType] =
+ "array" ~> "<" ~> dataType <~ ">" ^^ {
+ case tpe => ArrayType(tpe)
+ }
+
+ protected lazy val mapType: Parser[DataType] =
+ "map" ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
+ case t1 ~ _ ~ t2 => MapType(t1, t2)
+ }
+
+ protected lazy val structField: Parser[StructField] =
+ "[a-zA-Z0-9_]*".r ~ ":" ~ dataType ^^ {
+ case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
+ }
+
+ protected lazy val structType: Parser[DataType] =
+ "struct" ~> "<" ~> repsep(structField,",") <~ ">" ^^ {
+ case fields => new StructType(fields)
+ }
+
+ private[sql] lazy val dataType: Parser[DataType] =
+ arrayType |
+ mapType |
+ structType |
+ primitiveType
+
+ def toDataType(metastoreType: String): DataType = parseAll(dataType,
metastoreType) match {
+ case Success(result, _) => result
+ case failure: NoSuccess => sys.error(s"Unsupported dataType:
$metastoreType")
+ }
+
+ def toMetastoreType(dt: DataType): String = dt match {
--- End diff --
Seems we do not need it at here since we only need to parse strings.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]