Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/13988#discussion_r94755096
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
---
@@ -214,127 +234,47 @@ private[csv] object CSVInferSchema {
case _ => None
}
-}
-
-private[csv] object CSVTypeCast {
- // A `ValueConverter` is responsible for converting the given value to a
desired type.
- private type ValueConverter = String => Any
/**
- * Create converters which cast each given string datum to each
specified type in given schema.
- * Currently, we do not support complex types (`ArrayType`, `MapType`,
`StructType`).
- *
- * For string types, this is simply the datum.
- * For other types, this is converted into the value according to the
type.
- * For other nullable types, returns null if it is null or equals to the
value specified
- * in `nullValue` option.
- *
- * @param schema schema that contains data types to cast the given value
into.
- * @param options CSV options.
+ * Generates a header from the given row which is null-safe and
duplicate-safe.
*/
- def makeConverters(
- schema: StructType,
- options: CSVOptions = CSVOptions()): Array[ValueConverter] = {
- schema.map(f => makeConverter(f.name, f.dataType, f.nullable,
options)).toArray
- }
-
- /**
- * Create a converter which converts the string value to a value
according to a desired type.
- */
- def makeConverter(
- name: String,
- dataType: DataType,
- nullable: Boolean = true,
- options: CSVOptions = CSVOptions()): ValueConverter = dataType
match {
- case _: ByteType => (d: String) =>
- nullSafeDatum(d, name, nullable, options)(_.toByte)
-
- case _: ShortType => (d: String) =>
- nullSafeDatum(d, name, nullable, options)(_.toShort)
-
- case _: IntegerType => (d: String) =>
- nullSafeDatum(d, name, nullable, options)(_.toInt)
-
- case _: LongType => (d: String) =>
- nullSafeDatum(d, name, nullable, options)(_.toLong)
-
- case _: FloatType => (d: String) =>
- nullSafeDatum(d, name, nullable, options) {
- case options.nanValue => Float.NaN
- case options.negativeInf => Float.NegativeInfinity
- case options.positiveInf => Float.PositiveInfinity
- case datum =>
- Try(datum.toFloat)
-
.getOrElse(NumberFormat.getInstance(Locale.US).parse(datum).floatValue())
- }
-
- case _: DoubleType => (d: String) =>
- nullSafeDatum(d, name, nullable, options) {
- case options.nanValue => Double.NaN
- case options.negativeInf => Double.NegativeInfinity
- case options.positiveInf => Double.PositiveInfinity
- case datum =>
- Try(datum.toDouble)
-
.getOrElse(NumberFormat.getInstance(Locale.US).parse(datum).doubleValue())
+ private def makeSafeHeader(
--- End diff --
This just came from `CSVFileFormat`.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]