Github user marmbrus commented on a diff in the pull request: https://github.com/apache/spark/pull/1346#discussion_r14904015 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala --- @@ -197,47 +213,145 @@ object FractionalType { } } abstract class FractionalType extends NumericType { - val fractional: Fractional[JvmType] + private[sql] val fractional: Fractional[JvmType] } case object DecimalType extends FractionalType { - type JvmType = BigDecimal - @transient lazy val tag = typeTag[JvmType] - val numeric = implicitly[Numeric[BigDecimal]] - val fractional = implicitly[Fractional[BigDecimal]] - val ordering = implicitly[Ordering[JvmType]] + private[sql] type JvmType = BigDecimal + @transient private[sql] lazy val tag = typeTag[JvmType] + private[sql] val numeric = implicitly[Numeric[BigDecimal]] + private[sql] val fractional = implicitly[Fractional[BigDecimal]] + private[sql] val ordering = implicitly[Ordering[JvmType]] + def simpleString: String = "decimal" } case object DoubleType extends FractionalType { - type JvmType = Double - @transient lazy val tag = typeTag[JvmType] - val numeric = implicitly[Numeric[Double]] - val fractional = implicitly[Fractional[Double]] - val ordering = implicitly[Ordering[JvmType]] + private[sql] type JvmType = Double + @transient private[sql] lazy val tag = typeTag[JvmType] + private[sql] val numeric = implicitly[Numeric[Double]] + private[sql] val fractional = implicitly[Fractional[Double]] + private[sql] val ordering = implicitly[Ordering[JvmType]] + def simpleString: String = "double" } case object FloatType extends FractionalType { - type JvmType = Float - @transient lazy val tag = typeTag[JvmType] - val numeric = implicitly[Numeric[Float]] - val fractional = implicitly[Fractional[Float]] - val ordering = implicitly[Ordering[JvmType]] + private[sql] type JvmType = Float + @transient private[sql] lazy val tag = typeTag[JvmType] + private[sql] val numeric = implicitly[Numeric[Float]] + private[sql] val fractional = implicitly[Fractional[Float]] + private[sql] val ordering = implicitly[Ordering[JvmType]] + def simpleString: String = "float" } -case class ArrayType(elementType: DataType) extends DataType +object ArrayType { + def apply(elementType: DataType): ArrayType = ArrayType(elementType, false) +} -case class StructField(name: String, dataType: DataType, nullable: Boolean) +case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType { + private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { + builder.append( + s"${prefix}-- element: ${elementType.simpleString} (containsNull = ${containsNull})\n") + elementType match { + case array: ArrayType => + array.buildFormattedString(s"$prefix |", builder) + case struct: StructType => + struct.buildFormattedString(s"$prefix |", builder) + case map: MapType => + map.buildFormattedString(s"$prefix |", builder) + case _ => + } + } + + def simpleString: String = "array" +} + +case class StructField(name: String, dataType: DataType, nullable: Boolean) { + + private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { + builder.append(s"${prefix}-- ${name}: ${dataType.simpleString} (nullable = ${nullable})\n") + dataType match { + case array: ArrayType => + array.buildFormattedString(s"$prefix |", builder) + case struct: StructType => + struct.buildFormattedString(s"$prefix |", builder) + case map: MapType => + map.buildFormattedString(s"$prefix |", builder) + case _ => + } + } +} object StructType { - def fromAttributes(attributes: Seq[Attribute]): StructType = { + def fromAttributes(attributes: Seq[Attribute]): StructType = StructType(attributes.map(a => StructField(a.name, a.dataType, a.nullable))) - } - // def apply(fields: Seq[StructField]) = new StructType(fields.toIndexedSeq) + private def validateFields(fields: Seq[StructField]): Boolean = + fields.map(field => field.name).distinct.size == fields.size + + def apply[A <: String: ClassTag, B <: DataType: ClassTag](fields: (A, B)*): StructType = + StructType(fields.map(field => StructField(field._1, field._2, true))) + + def apply[A <: String: ClassTag, B <: DataType: ClassTag, C <: Boolean: ClassTag]( + fields: (A, B, C)*): StructType = + StructType(fields.map(field => StructField(field._1, field._2, field._3))) } case class StructType(fields: Seq[StructField]) extends DataType { + require(StructType.validateFields(fields), "Found fields with the same name.") + + def apply(name: String): StructField = { + fields.find(f => f.name == name).orNull + } + + def apply(names: String*): StructType = { + val nameSet = names.toSet + StructType(fields.filter(f => nameSet.contains(f.name))) + } + def toAttributes = fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)()) + + def schemaString: String = { + val builder = new StringBuilder + builder.append("root\n") + val prefix = " |" + fields.foreach(field => field.buildFormattedString(prefix, builder)) + + builder.toString() + } + + def printSchema(): Unit = println(schemaString) + + private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { + fields.foreach(field => field.buildFormattedString(prefix, builder)) + } + + def simpleString: String = "struct" } -case class MapType(keyType: DataType, valueType: DataType) extends DataType +case class MapType(keyType: DataType, valueType: DataType) extends DataType { + private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { + builder.append(s"${prefix}-- key: ${keyType.simpleString}\n") + keyType match { --- End diff -- This matching code is duplicated like 4 times AFAICT. Perhaps it could just be a protected function in DataType.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---