sandip-db commented on code in PR #44571:
URL: https://github.com/apache/spark/pull/44571#discussion_r1442378103
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -175,31 +174,26 @@ class XmlInferSchema(options: XmlOptions, caseSensitive:
Boolean)
parser.nextEvent()
parser.peek match {
Review Comment:
This case (`case c: Characters if c.isWhiteSpace`) can be combined with the
next one with a minor change suggested below
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -175,31 +174,26 @@ class XmlInferSchema(options: XmlOptions, caseSensitive:
Boolean)
parser.nextEvent()
parser.peek match {
case _: StartElement => inferObject(parser)
- case _: EndElement if data.isEmpty => NullType
- case _: EndElement if options.nullValue == "" => NullType
- case _: EndElement => StringType
+ case _: EndElement if data.trim.isEmpty =>
Review Comment:
Isn't `data.trim.isEmpty` will always be true because `c.isWhiteSpace` is
true?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -175,31 +174,26 @@ class XmlInferSchema(options: XmlOptions, caseSensitive:
Boolean)
parser.nextEvent()
parser.peek match {
case _: StartElement => inferObject(parser)
- case _: EndElement if data.isEmpty => NullType
- case _: EndElement if options.nullValue == "" => NullType
- case _: EndElement => StringType
+ case _: EndElement if data.trim.isEmpty =>
+ StaxXmlParserUtils.consumeNextEndElement(parser)
+ NullType
+ case _: EndElement if options.nullValue == "" =>
Review Comment:
Given that `c.isWhiteSpace` is true, for `EndElement`, the previous case
will be true and this will never be reached.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -175,31 +174,26 @@ class XmlInferSchema(options: XmlOptions, caseSensitive:
Boolean)
parser.nextEvent()
parser.peek match {
case _: StartElement => inferObject(parser)
- case _: EndElement if data.isEmpty => NullType
- case _: EndElement if options.nullValue == "" => NullType
- case _: EndElement => StringType
+ case _: EndElement if data.trim.isEmpty =>
+ StaxXmlParserUtils.consumeNextEndElement(parser)
+ NullType
+ case _: EndElement if options.nullValue == "" =>
+ StaxXmlParserUtils.consumeNextEndElement(parser)
+ NullType
+ case _: EndElement =>
+ StaxXmlParserUtils.consumeNextEndElement(parser)
+ StringType
case _ => inferField(parser)
}
case c: Characters if !c.isWhiteSpace =>
- val characterType = inferFrom(c.getData)
- parser.nextEvent()
- parser.peek match {
- case _: StartElement =>
- // Some more elements follow;
- // This is a mix of values and other elements
- val innerType = inferObject(parser).asInstanceOf[StructType]
- addOrUpdateValueTagType(innerType, characterType)
- case _ =>
- val fieldType = inferField(parser)
- fieldType match {
- case st: StructType => addOrUpdateValueTagType(st, characterType)
- case _: NullType => characterType
- case _: DataType =>
- // The field type couldn't be an array type
- new StructType()
- .add(options.valueTag, addOrUpdateType(Some(characterType),
fieldType))
-
- }
+ val structType = inferObject(parser).asInstanceOf[StructType]
+ structType match {
+ case simpleType
+ if structType.fields.length == 1
+ && isPrimitiveType(structType.fields.head.dataType)
+ && isValueTagField(structType.fields.head) =>
+ simpleType.fields.head.dataType
+ case _ => structType
Review Comment:
Remove the case (`case c: Characters if c.isWhiteSpace`) and add the
following case here:
```suggestion
case simpleType if structType.isEmpty => NullType
case _ => structType
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]