This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new e59a39e  DRILL-7831: Drill Fails To Read XML File with Self Closing 
Tags
e59a39e is described below

commit e59a39ea77c07a38433e35098178bbd60b4d00e2
Author: Charles Givre <[email protected]>
AuthorDate: Wed Jan 6 23:03:23 2021 -0500

    DRILL-7831: Drill Fails To Read XML File with Self Closing Tags
---
 .../org/apache/drill/exec/store/xml/XMLReader.java | 30 ++++++++++++---
 .../org/apache/drill/exec/store/xml/XMLUtils.java  | 23 +++++------
 .../apache/drill/exec/store/xml/TestXMLReader.java | 45 ++++++++++++++++++++++
 .../apache/drill/exec/store/xml/TestXMLUtils.java  | 44 +++++++++++++++++++++
 .../resources/xml/very-nested-with-attributes.xml  | 38 ++++++++++++++++++
 .../format-xml/src/test/resources/xml/weather.xml  | 40 +++++++++++++++++++
 6 files changed, 201 insertions(+), 19 deletions(-)

diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
index 8fc0343..e51ded6 100644
--- 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
@@ -249,7 +249,9 @@ public class XMLReader {
           }
 
           Iterator<Attribute> attributes = startElement.getAttributes();
-          writeAttributes(attributePrefix, attributes);
+          if (attributes != null && attributes.hasNext()) {
+            writeAttributes(attributePrefix, attributes);
+          }
         }
         break;
 
@@ -257,6 +259,9 @@ public class XMLReader {
        * This case processes character elements.
        */
       case XMLStreamConstants.CHARACTERS:
+        /*
+         * This is the case for comments or other characters after a closing 
tag
+         */
         if (currentState == xmlState.ROW_ENDED) {
           break;
         }
@@ -278,16 +283,18 @@ public class XMLReader {
           }
         }
 
+        // Get the field value
         fieldValue = currentEvent.asCharacters().getData().trim();
         changeState(xmlState.GETTING_DATA);
         break;
 
       case XMLStreamConstants.END_ELEMENT:
         currentNestingLevel--;
-        // End the row
+
         if (currentNestingLevel < dataLevel - 1) {
           break;
         } else if 
(currentEvent.asEndElement().getName().toString().compareTo(rootDataFieldName) 
== 0) {
+          // End the row
           currentTupleWriter = endRow();
 
           // Clear stacks
@@ -298,15 +305,27 @@ public class XMLReader {
         } else if (currentState == xmlState.FIELD_ENDED && currentNestingLevel 
>= dataLevel) {
           // Case to end nested maps
           // Pop tupleWriter off stack
-          currentTupleWriter = rowWriterStack.pop();
-          attributePrefix = XMLUtils.removeField(attributePrefix);
+          if (rowWriterStack.size() > 0) {
+            currentTupleWriter = rowWriterStack.pop();
+          }
+          // Pop field name
+          if (fieldNameStack.size() > 0) {
+            fieldNameStack.pop();
+          }
+
+          attributePrefix = XMLUtils.removeField(attributePrefix,fieldName);
 
         } else if (currentState != xmlState.ROW_ENDED){
           writeFieldData(fieldName, fieldValue, currentTupleWriter);
           // Clear out field name and value
+          attributePrefix = XMLUtils.removeField(attributePrefix, fieldName);
+
+          // Pop field name
+          if (fieldNameStack.size() > 0) {
+            fieldNameStack.pop();
+          }
           fieldName = null;
           fieldValue = null;
-          attributePrefix = XMLUtils.removeField(attributePrefix);
         }
         break;
     }
@@ -436,5 +455,4 @@ public class XMLReader {
       writeAttributeData(key, currentAttribute.getValue(), attributeWriter);
     }
   }
-
 }
diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
index 28132a3..f11b483 100644
--- 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
@@ -76,21 +76,18 @@ public class XMLUtils {
    * @param fieldName The nested field name
    * @return The field name
    */
-  public static String removeField(String fieldName) {
-    if (Strings.isNullOrEmpty(fieldName)) {
-      return fieldName;
+  public static String removeField(String prefix, String fieldName) {
+    if (fieldName == null) {
+      return "";
     }
 
-    String[] components = fieldName.split("_");
-    StringBuilder newField = new StringBuilder();
-    for (int i = 0; i < components.length - 1; i++) {
-      if (i > 0) {
-        newField.append("_").append(components[i]);
-      } else {
-        newField = new StringBuilder(components[i]);
-      }
+    int index = prefix.lastIndexOf(fieldName);
+    if (index == 0) {
+      return "";
+    } else if (index < 0) {
+      return prefix;
     }
-    return newField.toString();
-  }
 
+    return prefix.substring(0, index-1);
+  }
 }
diff --git 
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
index 522f89c..e32a173 100644
--- 
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
+++ 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
@@ -83,6 +83,51 @@ public class TestXMLReader extends ClusterTest {
     new RowSetComparison(expected).verifyAndClearAll(results);
   }
 
+  @Test
+  public void testSelfClosingTags() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/weather.xml`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    assertEquals(1, results.rowCount());
+
+      TupleMetadata expectedSchema = new SchemaBuilder()
+        .addMap("attributes")
+          .addNullable("forecast_information_city_data", MinorType.VARCHAR)
+          .addNullable("forecast_information_postal_code_data", 
MinorType.VARCHAR)
+          .addNullable("forecast_information_latitude_e6_data", 
MinorType.VARCHAR)
+          .addNullable("forecast_information_longitude_e6_data", 
MinorType.VARCHAR)
+          .addNullable("forecast_information_forecast_date_data", 
MinorType.VARCHAR)
+          .addNullable("forecast_information_current_date_time_data", 
MinorType.VARCHAR)
+          .addNullable("forecast_information_unit_system_data", 
MinorType.VARCHAR)
+          .addNullable("current_conditions_condition_data", MinorType.VARCHAR)
+          .addNullable("current_conditions_temp_f_data", MinorType.VARCHAR)
+          .addNullable("current_conditions_temp_c_data", MinorType.VARCHAR)
+          .addNullable("current_conditions_humidity_data", MinorType.VARCHAR)
+          .addNullable("current_conditions_icon_data", MinorType.VARCHAR)
+          .addNullable("current_conditions_wind_condition_data", 
MinorType.VARCHAR)
+        .resumeSchema()
+        .addNullable("city", MinorType.VARCHAR)
+        .addNullable("postal_code", MinorType.VARCHAR)
+        .addNullable("latitude_e6", MinorType.VARCHAR)
+        .addNullable("longitude_e6", MinorType.VARCHAR)
+        .addNullable("forecast_date", MinorType.VARCHAR)
+        .addNullable("current_date_time", MinorType.VARCHAR)
+        .addNullable("unit_system", MinorType.VARCHAR)
+        .addNullable("condition", MinorType.VARCHAR)
+        .addNullable("temp_f", MinorType.VARCHAR)
+        .addNullable("temp_c", MinorType.VARCHAR)
+        .addNullable("humidity", MinorType.VARCHAR)
+        .addNullable("icon", MinorType.VARCHAR)
+        .addNullable("wind_condition", MinorType.VARCHAR)
+        .build();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow((Object)strArray("Seattle, WA", "Seattle WA", "", "", 
"2011-09-29", "2011-09-29 17:53:00 +0000", "US", "Clear", "62", "17", 
"Humidity: 62%", "/ig/images/weather" +
+        "/sunny.gif", "Wind: N at 4 mph"), null, null, null, null, null, null, 
null, null, null, null, null, null, null)
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
   /**
    * This unit test tests a simple XML file with no nesting or attributes, but 
with explicitly selected fields.
    * @throws Exception Throw exception if anything goes wrong
diff --git 
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLUtils.java
 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLUtils.java
new file mode 100644
index 0000000..06d70ee
--- /dev/null
+++ 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLUtils.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestXMLUtils {
+
+  @Test
+  public void testRemoveField() {
+    String test1 = "field1_field2_field3";
+    assertEquals(XMLUtils.removeField(test1, "field3"), "field1_field2");
+
+    // Test with underscores
+    String test2 = "field_1_field_2_field_3";
+    assertEquals(XMLUtils.removeField(test2, "field_3"), "field_1_field_2");
+
+    // Test with missing field
+    String test3 = "field_1_field_2_field_3";
+    assertEquals(XMLUtils.removeField(test3, "field_4"), 
"field_1_field_2_field_3");
+
+    // Test with empty string
+    String test4 = "";
+    assertEquals(XMLUtils.removeField(test4, "field_4"), "");
+  }
+}
diff --git 
a/contrib/format-xml/src/test/resources/xml/very-nested-with-attributes.xml 
b/contrib/format-xml/src/test/resources/xml/very-nested-with-attributes.xml
new file mode 100644
index 0000000..655e5d5
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/very-nested-with-attributes.xml
@@ -0,0 +1,38 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<book>
+  <field1 f1="a1" f2="a2">
+    <key1>value1</key1>
+    <key2>value2</key2>
+  </field1>
+  <field2>
+    <key3 f3="a3" f4="a4">k1</key3>
+    <nestedField1>
+      <nk1>nk_value1</nk1>
+      <nk2>nk_value2</nk2>
+      <nk3>nk_value3</nk3>
+      <nestedField2>
+        <nk1 f5="a5">nk2_value1</nk1>
+        <nk2>nk2_value2</nk2>
+        <nk3>nk2_value3</nk3>
+      </nestedField2>
+    </nestedField1>
+  </field2>
+</book>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/weather.xml 
b/contrib/format-xml/src/test/resources/xml/weather.xml
new file mode 100644
index 0000000..9ab3c67
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/weather.xml
@@ -0,0 +1,40 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<xml_api_reply version="1">
+  <weather module_id="0" tab_id="0" mobile_row="0" mobile_zipped="1" row="0" 
section="0">
+    <forecast_information>
+      <city data="Seattle, WA"/>
+      <postal_code data="Seattle WA"/>
+      <latitude_e6 data=""/>
+      <longitude_e6 data=""/>
+      <forecast_date data="2011-09-29"/>
+      <current_date_time data="2011-09-29 17:53:00 +0000"/>
+      <unit_system data="US"/>
+    </forecast_information>
+    <current_conditions>
+      <condition data="Clear"/>
+      <temp_f data="62"/>
+      <temp_c data="17"/>
+      <humidity data="Humidity: 62%"/>
+      <icon data="/ig/images/weather/sunny.gif"/>
+      <wind_condition data="Wind: N at 4 mph"/>
+    </current_conditions>
+  </weather>
+</xml_api_reply>

Reply via email to