eatoncys commented on issue #23010: [SPARK-26012][SQL]Null and '' values should not cause dynamic partition failure of string types URL: https://github.com/apache/spark/pull/23010#issuecomment-465891736 @cloud-fan @maropu I have add an analyzer rule like below, and it works correctly for sql, but I don't know how to apply this rule directly in `writeAndRead`, can you give some suggestion, thanks. case class UpdateEmptyValueOfPartitionToNull(conf: SQLConf) extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp { case i: InsertIntoHadoopFsRelationCommand => val partitionSet = AttributeSet(i.partitionColumns) val projectList: Seq[NamedExpression] = i.query.output.map { case p if partitionSet.contains(p) && p.dataType == StringType && p.nullable => Alias(Empty2Null(p), p.name)() case attr => attr } val actualQuery = Project(projectList, i.query) val partitionColumns = i.partitionColumns.map { col => actualQuery.output.find(a => conf.resolver(a.name, col.name)).getOrElse(col) } i.copy(partitionColumns = partitionColumns, query = actualQuery) } } case class Empty2Null(child: Expression) extends UnaryExpression with String2StringExpression { override def convert(v: UTF8String): UTF8String = if (v.numBytes() == 0) null else v override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, c => s"""if ($c.numBytes() == 0) { | ${ev.isNull} = true; | ${ev.value} = null; |} else { | ${ev.value} = $c; |}""".stripMargin) } }
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
