Github user gengliangwang commented on a diff in the pull request:
https://github.com/apache/spark/pull/21004#discussion_r180744104
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
---
@@ -126,35 +126,35 @@ abstract class PartitioningAwareFileIndex(
val caseInsensitiveOptions = CaseInsensitiveMap(parameters)
val timeZoneId =
caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
.getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone)
-
- userPartitionSchema match {
+ val inferredPartitionSpec = PartitioningUtils.parsePartitions(
+ leafDirs,
+ typeInference =
sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
+ basePaths = basePaths,
+ timeZoneId = timeZoneId)
+ userSpecifiedSchema match {
case Some(userProvidedSchema) if userProvidedSchema.nonEmpty =>
- val spec = PartitioningUtils.parsePartitions(
- leafDirs,
- typeInference = false,
- basePaths = basePaths,
- timeZoneId = timeZoneId)
+ val userPartitionSchema =
+
combineInferredAndUserSpecifiedPartitionSchema(inferredPartitionSpec)
- // Without auto inference, all of value in the `row` should be
null or in StringType,
// we need to cast into the data type that user specified.
def castPartitionValuesToUserSchema(row: InternalRow) = {
InternalRow((0 until row.numFields).map { i =>
+ val expr =
inferredPartitionSpec.partitionColumns.fields(i).dataType match {
+ case StringType => Literal.create(row.getUTF8String(i),
StringType)
--- End diff --
`row.get(i, StringType)` throws exception
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]