dtenedor commented on code in PR #40652:
URL: https://github.com/apache/spark/pull/40652#discussion_r1159046300
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultColumns.scala:
##########
@@ -271,32 +271,34 @@ case class ResolveDefaultColumns(catalog: SessionCatalog)
extends Rule[LogicalPl
/**
* Updates an inline table to generate missing default column values.
*/
- private def addMissingDefaultValuesForInsertFromInlineTable(
+ def addMissingDefaultValuesForInsertFromInlineTable(
node: LogicalPlan,
insertTableSchemaWithoutPartitionColumns: StructType,
numUserSpecifiedColumns: Int): LogicalPlan = {
val schema = insertTableSchemaWithoutPartitionColumns
- val newDefaultExpressions: Seq[Expression] =
- getDefaultExpressionsForInsert(schema, numUserSpecifiedColumns)
- val newNames: Seq[String] = if (numUserSpecifiedColumns > 0) {
- schema.fields.drop(numUserSpecifiedColumns).map(_.name)
- } else {
- schema.fields.map(_.name)
- }
+ val newDefaultExpressions: Seq[UnresolvedAttribute] =
+ getNewDefaultExpressionsForInsert(schema, numUserSpecifiedColumns,
node.output.size)
+ val newNames: Seq[String] = schema.fields.map(_.name)
node match {
case _ if newDefaultExpressions.isEmpty => node
case table: UnresolvedInlineTable =>
table.copy(
- names = table.names ++ newNames,
+ names = newNames,
rows = table.rows.map { row => row ++ newDefaultExpressions })
case local: LocalRelation =>
- // Note that we have consumed a LocalRelation but return an
UnresolvedInlineTable, because
- // addMissingDefaultValuesForInsertFromProject must replace unresolved
DEFAULT references.
- UnresolvedInlineTable(
- local.output.map(_.name) ++ newNames,
- local.data.map { row =>
- val colTypes = StructType(local.output.map(col =>
StructField(col.name, col.dataType)))
- row.toSeq(colTypes).map(Literal(_)) ++ newDefaultExpressions
+ val newDefaultExpressionsRow = new GenericInternalRow(
+ schema.fields.drop(local.output.size).map {
Review Comment:
Good question: researching this, I find this code path only runs when there
is a user-specified column list of fewer column than the target table;
otherwise, the above 'newDefaultExpressions' is empty and we match the first
case in this list instead. So that should never happen. I added this
information to a comment here, and added a unit test case to cover it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]