Github user liancheng commented on a diff in the pull request: https://github.com/apache/spark/pull/13769#discussion_r67637303 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala --- @@ -43,8 +43,128 @@ import org.apache.spark.unsafe.types.UTF8String * Replaces generic operations with specific variants that are designed to work with Spark * SQL Data Sources. */ -private[sql] object DataSourceAnalysis extends Rule[LogicalPlan] { +private[sql] case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] { + + def resolver: Resolver = { + if (conf.caseSensitiveAnalysis) { + caseSensitiveResolution + } else { + caseInsensitiveResolution + } + } + + // The access modifier is used to expose this method to tests. + private[sql] def convertStaticPartitions( + sourceAttributes: Seq[Attribute], + providedPartitions: Map[String, Option[String]], + targetAttributes: Seq[Attribute], + targetPartitionSchema: StructType): Seq[NamedExpression] = { + + assert(providedPartitions.exists(_._2.isDefined)) + + val staticPartitions = providedPartitions.flatMap { + case (partKey, Some(partValue)) => (partKey, partValue) :: Nil + case (_, None) => Nil + } + + // The sum of the number of static partition columns and columns provided in the SELECT + // clause needs to match the number of columns of the target table. + if (staticPartitions.size + sourceAttributes.size != targetAttributes.size) { + throw new AnalysisException( + s"The data to be inserted needs to have the same number of " + + s"columns as the target table: target table has ${targetAttributes.size} " + + s"column(s) but the inserted data has ${sourceAttributes.size + staticPartitions.size} " + + s"column(s), which contain ${staticPartitions.size} partition column(s) having " + + s"assigned constant values.") + } + + if (providedPartitions.size != targetPartitionSchema.fields.size) { + throw new AnalysisException( + s"The data to be inserted needs to have the same number of " + + s"partition columns as the target table: target table " + + s"has ${targetPartitionSchema.fields.size} partition column(s) but the inserted " + + s"data has ${providedPartitions.size} partition columns specified.") + } + + staticPartitions.foreach { + case (partKey, partValue) => + if (!targetPartitionSchema.fields.exists(field => resolver(field.name, partKey))) { + throw new AnalysisException( + s"$partKey is not a partition column. Partition columns are " + + s"${targetPartitionSchema.fields.map(_.name).mkString("[", ",", "]")}") + } + } + + val partitionList = targetPartitionSchema.fields.map { field => + val potentialSpecs = staticPartitions.filter { + case (partKey, partValue) => resolver(field.name, partKey) + } + if (potentialSpecs.size == 0) { + None + } else if (potentialSpecs.size == 1) { + val partValue = potentialSpecs.head._2 + Some(Alias(Cast(Literal(partValue), field.dataType), "_staticPart")()) + } else { + throw new AnalysisException( + s"Partition column ${field.name} have multiple values specified, " + + s"${potentialSpecs.mkString("[", ", ", "]")}. Please only specify a single value.") + } + } + + partitionList.sliding(2).foreach { v => --- End diff -- We can use the following check instead: ```scala partitionList.dropWhile(_.isDefined).collectFirst { case Some(_) => throw new AnalysisException("...") } ```
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org