shujingyang-db commented on code in PR #45411:
URL: https://github.com/apache/spark/pull/45411#discussion_r1520394446
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/TestXmlData.scala:
##########
@@ -68,4 +68,444 @@ private[xml] trait TestXmlData {
f(dir)
fs.setVerifyChecksum(true)
}
+
+ def primitiveFieldValueTypeConflict: Seq[String] =
+ """<ROW>
+ | <num_num_1>11</num_num_1>
+ | <num_num_2/>
+ | <num_num_3>1.1</num_num_3>
+ | <num_bool>true</num_bool>
+ | <num_str>13.1</num_str>
+ | <str_bool>str1</str_bool>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1/>
+ | <num_num_2>21474836470.9</num_num_2>
+ | <num_num_3/>
+ | <num_bool>12</num_bool>
+ | <num_str/>
+ | <str_bool>true</str_bool>
+ |</ROW>""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1>21474836470</num_num_1>
+ | <num_num_2>92233720368547758070</num_num_2>
+ | <num_num_3>100</num_num_3>
+ | <num_bool>false</num_bool>
+ | <num_str>str1</num_str>
+ | <str_bool>false</str_bool>
+ |</ROW>""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1>21474836570</num_num_1>
+ | <num_num_2>1.1</num_num_2>
+ | <num_num_3>21474836470</num_num_3>
+ | <num_bool/>
+ | <num_str>92233720368547758070</num_str>
+ | <str_bool/>
+ |</ROW>""".stripMargin :: Nil
+
+ def xmlNullStruct: Seq[String] =
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers>
+ | <Host>1.abc.com</Host>
+ | <Charset>UTF-8</Charset>
+ | </headers>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers/>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers></headers>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr/>
+ | <ip>27.31.100.29</ip>
+ | <headers/>
+ |</ROW>""".stripMargin :: Nil
+
+ def complexFieldValueTypeConflict: Seq[String] =
+ """<ROW>
+ <num_struct>11</num_struct>
+ <str_array>1</str_array>
+ <str_array>2</str_array>
+ <str_array>3</str_array>
+ <array></array>
+ <struct_array></struct_array>
+ <struct></struct>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct>
+ <field>false</field>
+ </num_struct>
+ <str_array/>
+ <array/>
+ <struct_array></struct_array>
+ <struct/>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct/>
+ <str_array>str</str_array>
+ <array>4</array>
+ <array>5</array>
+ <array>6</array>
+ <struct_array>7</struct_array>
+ <struct_array>8</struct_array>
+ <struct_array>9</struct_array>
+ <struct>
+ <field/>
+ </struct>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct></num_struct>
+ <str_array>str1</str_array>
+ <str_array>str2</str_array>
+ <str_array>33</str_array>
+ <array>7</array>
+ <struct_array>
+ <field>true</field>
+ </struct_array>
+ <struct>
+ <field>str</field>
+ </struct>
+ </ROW>""" :: Nil
+
+ def arrayElementTypeConflict: Seq[String] =
+ """
+ |<ROW>
+ | <array1>
+ | <element>1</element>
+ | <element>1.1</element>
+ | <element>true</element>
+ | <element/>
+ | <element>
+ | <array/>
+ | </element>
+ | <element>
+ | <object/>
+ | </element>
+ | </array1>
+ | <array1>
+ | <element>
+ | <array>
+ | <element>2</element>
+ | <element>3</element>
+ | <element>4</element>
+ | </array>
+ | </element>
+ | <element>
+ | <object>
+ | <field>str</field>
+ | </object>
+ | </element>
+ | </array1>
+ | <array2>
+ | <field>214748364700</field>
+ | </array2>
+ | <array2>
+ | <field>1</field>
+ | </array2>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <array3>
+ | <field>str</field>
+ | </array3>
+ | <array3>
+ | <field>1</field>
+ | </array3>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <array3>1</array3>
+ | <array3>2</array3>
+ | <array3>3</array3>
+ |</ROW>
+ |""".stripMargin :: Nil
+
+ def missingFields: Seq[String] =
+ """
+ <ROW><a>true</a></ROW>
+ """ ::
+ """
+ <ROW><b>21474836470</b></ROW>
+ """ ::
+ """
+ <ROW><c>33</c><c>44</c></ROW>
+ """ ::
+ """
+ <ROW><d><field>true</field></d></ROW>
+ """ ::
+ """
+ <ROW><e>str</e></ROW>
+ """ :: Nil
+
+ // XML doesn't support array of arrays
+ // It only supports array of structs
+ def complexFieldAndType1: Seq[String] =
+ """
+ |<ROW>
+ | <struct>
+ | <field1>true</field1>
+ | <field2>92233720368547758070</field2>
+ | </struct>
+ | <structWithArrayFields>
+ | <field1>4</field1>
+ | <field1>5</field1>
+ | <field1>6</field1>
+ | <field2>str1</field2>
+ | <field2>str2</field2>
+ | </structWithArrayFields>
+ | <arrayOfString>str1</arrayOfString>
+ | <arrayOfString>str2</arrayOfString>
+ | <arrayOfInteger>1</arrayOfInteger>
+ | <arrayOfInteger>2147483647</arrayOfInteger>
+ | <arrayOfInteger>-2147483648</arrayOfInteger>
+ | <arrayOfLong>21474836470</arrayOfLong>
+ | <arrayOfLong>9223372036854775807</arrayOfLong>
+ | <arrayOfLong>-9223372036854775808</arrayOfLong>
+ | <arrayOfBigInteger>922337203685477580700</arrayOfBigInteger>
+ | <arrayOfBigInteger>-922337203685477580800</arrayOfBigInteger>
+ | <arrayOfDouble>1.2</arrayOfDouble>
+ | <arrayOfDouble>1.7976931348623157</arrayOfDouble>
+ | <arrayOfDouble>4.9E-324</arrayOfDouble>
+ | <arrayOfDouble>2.2250738585072014E-308</arrayOfDouble>
+ | <arrayOfBoolean>true</arrayOfBoolean>
+ | <arrayOfBoolean>false</arrayOfBoolean>
+ | <arrayOfBoolean>true</arrayOfBoolean>
+ | <arrayOfNull></arrayOfNull>
+ | <arrayOfNull></arrayOfNull>
+ | <arrayOfStruct>
+ | <field1>true</field1>
+ | <field2>str1</field2>
+ | </arrayOfStruct>
+ | <arrayOfStruct>
+ | <field1>false</field1>
+ | </arrayOfStruct>
+ | <arrayOfStruct>
+ | <field3/>
+ | </arrayOfStruct>
+ |<arrayOfArray1>
+ | <item>1</item><item>2</item><item>3</item>
+ |</arrayOfArray1>
+ |<arrayOfArray1>
+ | <item>str1</item><item>str2</item>
+ |</arrayOfArray1>
+ |<arrayOfArray2>
+ | <item>1</item><item>2</item><item>3</item>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <item>1.1</item><item>2.1</item><item>3.1</item>
+ |</arrayOfArray2>
+ |</ROW>
+ |
+ |""".stripMargin :: Nil
+
+ def complexFieldAndType2: Seq[String] =
+ """
+ |<ROW>
+ | <arrayOfStruct>
+ | <item>
+ | <field1>true</field1>
+ | <field2>str1</field2>
+ | </item>
+ | <item>
+ | <field1>false</field1>
+ | </item>
+ | <item>
+ | <field3/>
+ | </item>
+ | </arrayOfStruct>
+ | <complexArrayOfStruct>
+ | <item>
+ | <field1>
+ | <item>
+ | <inner1>str1</inner1>
+ | </item>
+ | <item>
+ | <inner2><item>str2</item><item>str22</item></inner2>
+ | </item>
+ | </field1>
+ | <field2>
+ | <array><item>1</item><item>2</item></array>
+ | <array><item>3</item><item>4</item></array>
+ | </field2>
+ | </item>
+ | <item>
+ | <field1>
+ | <item>
+ | <inner2>
+ | <item>str3</item><item>str33</item>
+ | </inner2>
+ | </item>
+ | <item>
+ | <inner1>str4</inner1>
+ | </item>
+ | </field1>
+ | <field2>
+ | <array>
+ | <item>5</item><item>6</item>
+ | </array>
+ | <array>
+ | <item>7</item><item>8</item>
+ | </array>
+ | </field2>
+ | </item>
+ | </complexArrayOfStruct>
+ | <arrayOfArray1>
+ | <array>
+ | <item>5</item>
+ | </array>
+ |</arrayOfArray1>
+ |<arrayOfArray1>
+ | <array>
+ | <item>6</item><item>7</item>
+ | </array>
+ | <array>
+ | <item>8</item>
+ | </array>
+ |</arrayOfArray1>
+ | <arrayOfArray2>
+ | <array>
+ | <item>
+ | <inner1>str1</inner1>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <array/>
+ | <array>
+ | <item>
+ | <inner2>str3</inner2>
+ | <inner2>str33</inner2>
+ | </item>
+ | <item>
+ | <inner2>str4</inner2>
+ | <inner1>str11</inner1>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <array>
+ | <item>
+ | <inner3>
+ | <inner4>2</inner4>
+ | <inner4>3</inner4>
+ | </inner3>
+ | <inner3/>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |</ROW>
+ |""".stripMargin :: Nil
+
+ def nullsInArrays: Seq[String] =
Review Comment:
I plan to add tests in a follow-up PR. I will remove this for clarity in
this PR.
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/TestXmlData.scala:
##########
@@ -68,4 +68,444 @@ private[xml] trait TestXmlData {
f(dir)
fs.setVerifyChecksum(true)
}
+
+ def primitiveFieldValueTypeConflict: Seq[String] =
+ """<ROW>
+ | <num_num_1>11</num_num_1>
+ | <num_num_2/>
+ | <num_num_3>1.1</num_num_3>
+ | <num_bool>true</num_bool>
+ | <num_str>13.1</num_str>
+ | <str_bool>str1</str_bool>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1/>
+ | <num_num_2>21474836470.9</num_num_2>
+ | <num_num_3/>
+ | <num_bool>12</num_bool>
+ | <num_str/>
+ | <str_bool>true</str_bool>
+ |</ROW>""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1>21474836470</num_num_1>
+ | <num_num_2>92233720368547758070</num_num_2>
+ | <num_num_3>100</num_num_3>
+ | <num_bool>false</num_bool>
+ | <num_str>str1</num_str>
+ | <str_bool>false</str_bool>
+ |</ROW>""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1>21474836570</num_num_1>
+ | <num_num_2>1.1</num_num_2>
+ | <num_num_3>21474836470</num_num_3>
+ | <num_bool/>
+ | <num_str>92233720368547758070</num_str>
+ | <str_bool/>
+ |</ROW>""".stripMargin :: Nil
+
+ def xmlNullStruct: Seq[String] =
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers>
+ | <Host>1.abc.com</Host>
+ | <Charset>UTF-8</Charset>
+ | </headers>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers/>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers></headers>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr/>
+ | <ip>27.31.100.29</ip>
+ | <headers/>
+ |</ROW>""".stripMargin :: Nil
+
+ def complexFieldValueTypeConflict: Seq[String] =
+ """<ROW>
+ <num_struct>11</num_struct>
+ <str_array>1</str_array>
+ <str_array>2</str_array>
+ <str_array>3</str_array>
+ <array></array>
+ <struct_array></struct_array>
+ <struct></struct>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct>
+ <field>false</field>
+ </num_struct>
+ <str_array/>
+ <array/>
+ <struct_array></struct_array>
+ <struct/>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct/>
+ <str_array>str</str_array>
+ <array>4</array>
+ <array>5</array>
+ <array>6</array>
+ <struct_array>7</struct_array>
+ <struct_array>8</struct_array>
+ <struct_array>9</struct_array>
+ <struct>
+ <field/>
+ </struct>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct></num_struct>
+ <str_array>str1</str_array>
+ <str_array>str2</str_array>
+ <str_array>33</str_array>
+ <array>7</array>
+ <struct_array>
+ <field>true</field>
+ </struct_array>
+ <struct>
+ <field>str</field>
+ </struct>
+ </ROW>""" :: Nil
+
+ def arrayElementTypeConflict: Seq[String] =
+ """
+ |<ROW>
+ | <array1>
+ | <element>1</element>
+ | <element>1.1</element>
+ | <element>true</element>
+ | <element/>
+ | <element>
+ | <array/>
+ | </element>
+ | <element>
+ | <object/>
+ | </element>
+ | </array1>
+ | <array1>
+ | <element>
+ | <array>
+ | <element>2</element>
+ | <element>3</element>
+ | <element>4</element>
+ | </array>
+ | </element>
+ | <element>
+ | <object>
+ | <field>str</field>
+ | </object>
+ | </element>
+ | </array1>
+ | <array2>
+ | <field>214748364700</field>
+ | </array2>
+ | <array2>
+ | <field>1</field>
+ | </array2>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <array3>
+ | <field>str</field>
+ | </array3>
+ | <array3>
+ | <field>1</field>
+ | </array3>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <array3>1</array3>
+ | <array3>2</array3>
+ | <array3>3</array3>
+ |</ROW>
+ |""".stripMargin :: Nil
+
+ def missingFields: Seq[String] =
+ """
+ <ROW><a>true</a></ROW>
+ """ ::
+ """
+ <ROW><b>21474836470</b></ROW>
+ """ ::
+ """
+ <ROW><c>33</c><c>44</c></ROW>
+ """ ::
+ """
+ <ROW><d><field>true</field></d></ROW>
+ """ ::
+ """
+ <ROW><e>str</e></ROW>
+ """ :: Nil
+
+ // XML doesn't support array of arrays
+ // It only supports array of structs
+ def complexFieldAndType1: Seq[String] =
+ """
+ |<ROW>
+ | <struct>
+ | <field1>true</field1>
+ | <field2>92233720368547758070</field2>
+ | </struct>
+ | <structWithArrayFields>
+ | <field1>4</field1>
+ | <field1>5</field1>
+ | <field1>6</field1>
+ | <field2>str1</field2>
+ | <field2>str2</field2>
+ | </structWithArrayFields>
+ | <arrayOfString>str1</arrayOfString>
+ | <arrayOfString>str2</arrayOfString>
+ | <arrayOfInteger>1</arrayOfInteger>
+ | <arrayOfInteger>2147483647</arrayOfInteger>
+ | <arrayOfInteger>-2147483648</arrayOfInteger>
+ | <arrayOfLong>21474836470</arrayOfLong>
+ | <arrayOfLong>9223372036854775807</arrayOfLong>
+ | <arrayOfLong>-9223372036854775808</arrayOfLong>
+ | <arrayOfBigInteger>922337203685477580700</arrayOfBigInteger>
+ | <arrayOfBigInteger>-922337203685477580800</arrayOfBigInteger>
+ | <arrayOfDouble>1.2</arrayOfDouble>
+ | <arrayOfDouble>1.7976931348623157</arrayOfDouble>
+ | <arrayOfDouble>4.9E-324</arrayOfDouble>
+ | <arrayOfDouble>2.2250738585072014E-308</arrayOfDouble>
+ | <arrayOfBoolean>true</arrayOfBoolean>
+ | <arrayOfBoolean>false</arrayOfBoolean>
+ | <arrayOfBoolean>true</arrayOfBoolean>
+ | <arrayOfNull></arrayOfNull>
+ | <arrayOfNull></arrayOfNull>
+ | <arrayOfStruct>
+ | <field1>true</field1>
+ | <field2>str1</field2>
+ | </arrayOfStruct>
+ | <arrayOfStruct>
+ | <field1>false</field1>
+ | </arrayOfStruct>
+ | <arrayOfStruct>
+ | <field3/>
+ | </arrayOfStruct>
+ |<arrayOfArray1>
+ | <item>1</item><item>2</item><item>3</item>
+ |</arrayOfArray1>
+ |<arrayOfArray1>
+ | <item>str1</item><item>str2</item>
+ |</arrayOfArray1>
+ |<arrayOfArray2>
+ | <item>1</item><item>2</item><item>3</item>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <item>1.1</item><item>2.1</item><item>3.1</item>
+ |</arrayOfArray2>
+ |</ROW>
+ |
+ |""".stripMargin :: Nil
+
+ def complexFieldAndType2: Seq[String] =
+ """
+ |<ROW>
+ | <arrayOfStruct>
+ | <item>
+ | <field1>true</field1>
+ | <field2>str1</field2>
+ | </item>
+ | <item>
+ | <field1>false</field1>
+ | </item>
+ | <item>
+ | <field3/>
+ | </item>
+ | </arrayOfStruct>
+ | <complexArrayOfStruct>
+ | <item>
+ | <field1>
+ | <item>
+ | <inner1>str1</inner1>
+ | </item>
+ | <item>
+ | <inner2><item>str2</item><item>str22</item></inner2>
+ | </item>
+ | </field1>
+ | <field2>
+ | <array><item>1</item><item>2</item></array>
+ | <array><item>3</item><item>4</item></array>
+ | </field2>
+ | </item>
+ | <item>
+ | <field1>
+ | <item>
+ | <inner2>
+ | <item>str3</item><item>str33</item>
+ | </inner2>
+ | </item>
+ | <item>
+ | <inner1>str4</inner1>
+ | </item>
+ | </field1>
+ | <field2>
+ | <array>
+ | <item>5</item><item>6</item>
+ | </array>
+ | <array>
+ | <item>7</item><item>8</item>
+ | </array>
+ | </field2>
+ | </item>
+ | </complexArrayOfStruct>
+ | <arrayOfArray1>
+ | <array>
+ | <item>5</item>
+ | </array>
+ |</arrayOfArray1>
+ |<arrayOfArray1>
+ | <array>
+ | <item>6</item><item>7</item>
+ | </array>
+ | <array>
+ | <item>8</item>
+ | </array>
+ |</arrayOfArray1>
+ | <arrayOfArray2>
+ | <array>
+ | <item>
+ | <inner1>str1</inner1>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <array/>
+ | <array>
+ | <item>
+ | <inner2>str3</inner2>
+ | <inner2>str33</inner2>
+ | </item>
+ | <item>
+ | <inner2>str4</inner2>
+ | <inner1>str11</inner1>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <array>
+ | <item>
+ | <inner3>
+ | <inner4>2</inner4>
+ | <inner4>3</inner4>
+ | </inner3>
+ | <inner3/>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |</ROW>
+ |""".stripMargin :: Nil
+
+ def nullsInArrays: Seq[String] =
+ """<ROW>
+ <field1>
+ <array>
+ <item/>
+ </array>
+ <array>
+ <array>
+ <array>
+ <item>Test</item>
+ </array>
+ </array>
+ </array>
+ </field1>
+ </ROW>""" ::
+ """
+ <ROW>
+ <field2>
+ <item/>
+ <array>
+ <item>
+ <Test>1</Test>
+ </item>
+ </array>
+ </field2>
+ </ROW>""" ::
+ """
+ <ROW>
+ <field3>
+ <array>
+ <item/>
+ </array>
+ <array>
+ <item>
+ <Test>2</Test>
+ </item>
+ </array>
+ </field3>
+ </ROW>""" ::
+ """
+ <ROW>
+ <field4>
+ <array>
+ <item/>
+ <array>
+ <item>1</item>
+ <item>2</item>
+ <item>3</item>
+ </array>
+ </array>
+ </field4>
+ </ROW>""" :: Nil
+
+ def corruptRecords: Seq[String] =
Review Comment:
I plan to add tests in a follow-up PR. I will remove this for clarity in
this PR.
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/TestXmlData.scala:
##########
@@ -68,4 +68,444 @@ private[xml] trait TestXmlData {
f(dir)
fs.setVerifyChecksum(true)
}
+
+ def primitiveFieldValueTypeConflict: Seq[String] =
+ """<ROW>
+ | <num_num_1>11</num_num_1>
+ | <num_num_2/>
+ | <num_num_3>1.1</num_num_3>
+ | <num_bool>true</num_bool>
+ | <num_str>13.1</num_str>
+ | <str_bool>str1</str_bool>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1/>
+ | <num_num_2>21474836470.9</num_num_2>
+ | <num_num_3/>
+ | <num_bool>12</num_bool>
+ | <num_str/>
+ | <str_bool>true</str_bool>
+ |</ROW>""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1>21474836470</num_num_1>
+ | <num_num_2>92233720368547758070</num_num_2>
+ | <num_num_3>100</num_num_3>
+ | <num_bool>false</num_bool>
+ | <num_str>str1</num_str>
+ | <str_bool>false</str_bool>
+ |</ROW>""".stripMargin ::
+ """
+ |<ROW>
+ | <num_num_1>21474836570</num_num_1>
+ | <num_num_2>1.1</num_num_2>
+ | <num_num_3>21474836470</num_num_3>
+ | <num_bool/>
+ | <num_str>92233720368547758070</num_str>
+ | <str_bool/>
+ |</ROW>""".stripMargin :: Nil
+
+ def xmlNullStruct: Seq[String] =
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers>
+ | <Host>1.abc.com</Host>
+ | <Charset>UTF-8</Charset>
+ | </headers>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers/>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr></nullstr>
+ | <ip>27.31.100.29</ip>
+ | <headers></headers>
+ |</ROW>""".stripMargin ::
+ """<ROW>
+ | <nullstr/>
+ | <ip>27.31.100.29</ip>
+ | <headers/>
+ |</ROW>""".stripMargin :: Nil
+
+ def complexFieldValueTypeConflict: Seq[String] =
+ """<ROW>
+ <num_struct>11</num_struct>
+ <str_array>1</str_array>
+ <str_array>2</str_array>
+ <str_array>3</str_array>
+ <array></array>
+ <struct_array></struct_array>
+ <struct></struct>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct>
+ <field>false</field>
+ </num_struct>
+ <str_array/>
+ <array/>
+ <struct_array></struct_array>
+ <struct/>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct/>
+ <str_array>str</str_array>
+ <array>4</array>
+ <array>5</array>
+ <array>6</array>
+ <struct_array>7</struct_array>
+ <struct_array>8</struct_array>
+ <struct_array>9</struct_array>
+ <struct>
+ <field/>
+ </struct>
+ </ROW>""" ::
+ """<ROW>
+ <num_struct></num_struct>
+ <str_array>str1</str_array>
+ <str_array>str2</str_array>
+ <str_array>33</str_array>
+ <array>7</array>
+ <struct_array>
+ <field>true</field>
+ </struct_array>
+ <struct>
+ <field>str</field>
+ </struct>
+ </ROW>""" :: Nil
+
+ def arrayElementTypeConflict: Seq[String] =
+ """
+ |<ROW>
+ | <array1>
+ | <element>1</element>
+ | <element>1.1</element>
+ | <element>true</element>
+ | <element/>
+ | <element>
+ | <array/>
+ | </element>
+ | <element>
+ | <object/>
+ | </element>
+ | </array1>
+ | <array1>
+ | <element>
+ | <array>
+ | <element>2</element>
+ | <element>3</element>
+ | <element>4</element>
+ | </array>
+ | </element>
+ | <element>
+ | <object>
+ | <field>str</field>
+ | </object>
+ | </element>
+ | </array1>
+ | <array2>
+ | <field>214748364700</field>
+ | </array2>
+ | <array2>
+ | <field>1</field>
+ | </array2>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <array3>
+ | <field>str</field>
+ | </array3>
+ | <array3>
+ | <field>1</field>
+ | </array3>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |<ROW>
+ | <array3>1</array3>
+ | <array3>2</array3>
+ | <array3>3</array3>
+ |</ROW>
+ |""".stripMargin :: Nil
+
+ def missingFields: Seq[String] =
+ """
+ <ROW><a>true</a></ROW>
+ """ ::
+ """
+ <ROW><b>21474836470</b></ROW>
+ """ ::
+ """
+ <ROW><c>33</c><c>44</c></ROW>
+ """ ::
+ """
+ <ROW><d><field>true</field></d></ROW>
+ """ ::
+ """
+ <ROW><e>str</e></ROW>
+ """ :: Nil
+
+ // XML doesn't support array of arrays
+ // It only supports array of structs
+ def complexFieldAndType1: Seq[String] =
+ """
+ |<ROW>
+ | <struct>
+ | <field1>true</field1>
+ | <field2>92233720368547758070</field2>
+ | </struct>
+ | <structWithArrayFields>
+ | <field1>4</field1>
+ | <field1>5</field1>
+ | <field1>6</field1>
+ | <field2>str1</field2>
+ | <field2>str2</field2>
+ | </structWithArrayFields>
+ | <arrayOfString>str1</arrayOfString>
+ | <arrayOfString>str2</arrayOfString>
+ | <arrayOfInteger>1</arrayOfInteger>
+ | <arrayOfInteger>2147483647</arrayOfInteger>
+ | <arrayOfInteger>-2147483648</arrayOfInteger>
+ | <arrayOfLong>21474836470</arrayOfLong>
+ | <arrayOfLong>9223372036854775807</arrayOfLong>
+ | <arrayOfLong>-9223372036854775808</arrayOfLong>
+ | <arrayOfBigInteger>922337203685477580700</arrayOfBigInteger>
+ | <arrayOfBigInteger>-922337203685477580800</arrayOfBigInteger>
+ | <arrayOfDouble>1.2</arrayOfDouble>
+ | <arrayOfDouble>1.7976931348623157</arrayOfDouble>
+ | <arrayOfDouble>4.9E-324</arrayOfDouble>
+ | <arrayOfDouble>2.2250738585072014E-308</arrayOfDouble>
+ | <arrayOfBoolean>true</arrayOfBoolean>
+ | <arrayOfBoolean>false</arrayOfBoolean>
+ | <arrayOfBoolean>true</arrayOfBoolean>
+ | <arrayOfNull></arrayOfNull>
+ | <arrayOfNull></arrayOfNull>
+ | <arrayOfStruct>
+ | <field1>true</field1>
+ | <field2>str1</field2>
+ | </arrayOfStruct>
+ | <arrayOfStruct>
+ | <field1>false</field1>
+ | </arrayOfStruct>
+ | <arrayOfStruct>
+ | <field3/>
+ | </arrayOfStruct>
+ |<arrayOfArray1>
+ | <item>1</item><item>2</item><item>3</item>
+ |</arrayOfArray1>
+ |<arrayOfArray1>
+ | <item>str1</item><item>str2</item>
+ |</arrayOfArray1>
+ |<arrayOfArray2>
+ | <item>1</item><item>2</item><item>3</item>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <item>1.1</item><item>2.1</item><item>3.1</item>
+ |</arrayOfArray2>
+ |</ROW>
+ |
+ |""".stripMargin :: Nil
+
+ def complexFieldAndType2: Seq[String] =
+ """
+ |<ROW>
+ | <arrayOfStruct>
+ | <item>
+ | <field1>true</field1>
+ | <field2>str1</field2>
+ | </item>
+ | <item>
+ | <field1>false</field1>
+ | </item>
+ | <item>
+ | <field3/>
+ | </item>
+ | </arrayOfStruct>
+ | <complexArrayOfStruct>
+ | <item>
+ | <field1>
+ | <item>
+ | <inner1>str1</inner1>
+ | </item>
+ | <item>
+ | <inner2><item>str2</item><item>str22</item></inner2>
+ | </item>
+ | </field1>
+ | <field2>
+ | <array><item>1</item><item>2</item></array>
+ | <array><item>3</item><item>4</item></array>
+ | </field2>
+ | </item>
+ | <item>
+ | <field1>
+ | <item>
+ | <inner2>
+ | <item>str3</item><item>str33</item>
+ | </inner2>
+ | </item>
+ | <item>
+ | <inner1>str4</inner1>
+ | </item>
+ | </field1>
+ | <field2>
+ | <array>
+ | <item>5</item><item>6</item>
+ | </array>
+ | <array>
+ | <item>7</item><item>8</item>
+ | </array>
+ | </field2>
+ | </item>
+ | </complexArrayOfStruct>
+ | <arrayOfArray1>
+ | <array>
+ | <item>5</item>
+ | </array>
+ |</arrayOfArray1>
+ |<arrayOfArray1>
+ | <array>
+ | <item>6</item><item>7</item>
+ | </array>
+ | <array>
+ | <item>8</item>
+ | </array>
+ |</arrayOfArray1>
+ | <arrayOfArray2>
+ | <array>
+ | <item>
+ | <inner1>str1</inner1>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <array/>
+ | <array>
+ | <item>
+ | <inner2>str3</inner2>
+ | <inner2>str33</inner2>
+ | </item>
+ | <item>
+ | <inner2>str4</inner2>
+ | <inner1>str11</inner1>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |<arrayOfArray2>
+ | <array>
+ | <item>
+ | <inner3>
+ | <inner4>2</inner4>
+ | <inner4>3</inner4>
+ | </inner3>
+ | <inner3/>
+ | </item>
+ | </array>
+ |</arrayOfArray2>
+ |</ROW>
+ |""".stripMargin :: Nil
+
+ def nullsInArrays: Seq[String] =
+ """<ROW>
+ <field1>
+ <array>
+ <item/>
+ </array>
+ <array>
+ <array>
+ <array>
+ <item>Test</item>
+ </array>
+ </array>
+ </array>
+ </field1>
+ </ROW>""" ::
+ """
+ <ROW>
+ <field2>
+ <item/>
+ <array>
+ <item>
+ <Test>1</Test>
+ </item>
+ </array>
+ </field2>
+ </ROW>""" ::
+ """
+ <ROW>
+ <field3>
+ <array>
+ <item/>
+ </array>
+ <array>
+ <item>
+ <Test>2</Test>
+ </item>
+ </array>
+ </field3>
+ </ROW>""" ::
+ """
+ <ROW>
+ <field4>
+ <array>
+ <item/>
+ <array>
+ <item>1</item>
+ <item>2</item>
+ <item>3</item>
+ </array>
+ </array>
+ </field4>
+ </ROW>""" :: Nil
+
+ def corruptRecords: Seq[String] =
+ """<ROW>""" ::
+ """""" ::
+ """<ROW>
+ | <a>1</a>
+ | <b>2</b>
+ |</ROW>""".stripMargin ::
+ """
+ |<ROW>
+ | <a>str_a_4</a>
+ | <b>str_b_4</b>
+ | <c>str_c_4</c>
+ |</ROW>
+ |""".stripMargin ::
+ """
+ |</ROW>
+ |""".stripMargin :: Nil
+
+ def emptyRecords: Seq[String] =
+ """<ROW>
+ <a><struct></struct></a>
+ </ROW>""" ::
+ """<ROW>
+ <a>
+ <struct><b><c/></b></struct>
+ </a>
+ </ROW>""" ::
+ """<ROW>
+ <b>
+ <item>
+ <c><struct></struct></c>
+ </item>
+ <item/>
+ </b>
+ </ROW>""" :: Nil
+
+ def arrayAndStructRecords: Seq[String] =
Review Comment:
I plan to add tests in a follow-up PR. I will remove this for clarity in
this PR.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]