This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new e59a39e DRILL-7831: Drill Fails To Read XML File with Self Closing
Tags
e59a39e is described below
commit e59a39ea77c07a38433e35098178bbd60b4d00e2
Author: Charles Givre <[email protected]>
AuthorDate: Wed Jan 6 23:03:23 2021 -0500
DRILL-7831: Drill Fails To Read XML File with Self Closing Tags
---
.../org/apache/drill/exec/store/xml/XMLReader.java | 30 ++++++++++++---
.../org/apache/drill/exec/store/xml/XMLUtils.java | 23 +++++------
.../apache/drill/exec/store/xml/TestXMLReader.java | 45 ++++++++++++++++++++++
.../apache/drill/exec/store/xml/TestXMLUtils.java | 44 +++++++++++++++++++++
.../resources/xml/very-nested-with-attributes.xml | 38 ++++++++++++++++++
.../format-xml/src/test/resources/xml/weather.xml | 40 +++++++++++++++++++
6 files changed, 201 insertions(+), 19 deletions(-)
diff --git
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
index 8fc0343..e51ded6 100644
---
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
+++
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
@@ -249,7 +249,9 @@ public class XMLReader {
}
Iterator<Attribute> attributes = startElement.getAttributes();
- writeAttributes(attributePrefix, attributes);
+ if (attributes != null && attributes.hasNext()) {
+ writeAttributes(attributePrefix, attributes);
+ }
}
break;
@@ -257,6 +259,9 @@ public class XMLReader {
* This case processes character elements.
*/
case XMLStreamConstants.CHARACTERS:
+ /*
+ * This is the case for comments or other characters after a closing
tag
+ */
if (currentState == xmlState.ROW_ENDED) {
break;
}
@@ -278,16 +283,18 @@ public class XMLReader {
}
}
+ // Get the field value
fieldValue = currentEvent.asCharacters().getData().trim();
changeState(xmlState.GETTING_DATA);
break;
case XMLStreamConstants.END_ELEMENT:
currentNestingLevel--;
- // End the row
+
if (currentNestingLevel < dataLevel - 1) {
break;
} else if
(currentEvent.asEndElement().getName().toString().compareTo(rootDataFieldName)
== 0) {
+ // End the row
currentTupleWriter = endRow();
// Clear stacks
@@ -298,15 +305,27 @@ public class XMLReader {
} else if (currentState == xmlState.FIELD_ENDED && currentNestingLevel
>= dataLevel) {
// Case to end nested maps
// Pop tupleWriter off stack
- currentTupleWriter = rowWriterStack.pop();
- attributePrefix = XMLUtils.removeField(attributePrefix);
+ if (rowWriterStack.size() > 0) {
+ currentTupleWriter = rowWriterStack.pop();
+ }
+ // Pop field name
+ if (fieldNameStack.size() > 0) {
+ fieldNameStack.pop();
+ }
+
+ attributePrefix = XMLUtils.removeField(attributePrefix,fieldName);
} else if (currentState != xmlState.ROW_ENDED){
writeFieldData(fieldName, fieldValue, currentTupleWriter);
// Clear out field name and value
+ attributePrefix = XMLUtils.removeField(attributePrefix, fieldName);
+
+ // Pop field name
+ if (fieldNameStack.size() > 0) {
+ fieldNameStack.pop();
+ }
fieldName = null;
fieldValue = null;
- attributePrefix = XMLUtils.removeField(attributePrefix);
}
break;
}
@@ -436,5 +455,4 @@ public class XMLReader {
writeAttributeData(key, currentAttribute.getValue(), attributeWriter);
}
}
-
}
diff --git
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
index 28132a3..f11b483 100644
---
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
+++
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
@@ -76,21 +76,18 @@ public class XMLUtils {
* @param fieldName The nested field name
* @return The field name
*/
- public static String removeField(String fieldName) {
- if (Strings.isNullOrEmpty(fieldName)) {
- return fieldName;
+ public static String removeField(String prefix, String fieldName) {
+ if (fieldName == null) {
+ return "";
}
- String[] components = fieldName.split("_");
- StringBuilder newField = new StringBuilder();
- for (int i = 0; i < components.length - 1; i++) {
- if (i > 0) {
- newField.append("_").append(components[i]);
- } else {
- newField = new StringBuilder(components[i]);
- }
+ int index = prefix.lastIndexOf(fieldName);
+ if (index == 0) {
+ return "";
+ } else if (index < 0) {
+ return prefix;
}
- return newField.toString();
- }
+ return prefix.substring(0, index-1);
+ }
}
diff --git
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
index 522f89c..e32a173 100644
---
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
+++
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
@@ -83,6 +83,51 @@ public class TestXMLReader extends ClusterTest {
new RowSetComparison(expected).verifyAndClearAll(results);
}
+ @Test
+ public void testSelfClosingTags() throws Exception {
+ String sql = "SELECT * FROM cp.`xml/weather.xml`";
+ RowSet results = client.queryBuilder().sql(sql).rowSet();
+ assertEquals(1, results.rowCount());
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addMap("attributes")
+ .addNullable("forecast_information_city_data", MinorType.VARCHAR)
+ .addNullable("forecast_information_postal_code_data",
MinorType.VARCHAR)
+ .addNullable("forecast_information_latitude_e6_data",
MinorType.VARCHAR)
+ .addNullable("forecast_information_longitude_e6_data",
MinorType.VARCHAR)
+ .addNullable("forecast_information_forecast_date_data",
MinorType.VARCHAR)
+ .addNullable("forecast_information_current_date_time_data",
MinorType.VARCHAR)
+ .addNullable("forecast_information_unit_system_data",
MinorType.VARCHAR)
+ .addNullable("current_conditions_condition_data", MinorType.VARCHAR)
+ .addNullable("current_conditions_temp_f_data", MinorType.VARCHAR)
+ .addNullable("current_conditions_temp_c_data", MinorType.VARCHAR)
+ .addNullable("current_conditions_humidity_data", MinorType.VARCHAR)
+ .addNullable("current_conditions_icon_data", MinorType.VARCHAR)
+ .addNullable("current_conditions_wind_condition_data",
MinorType.VARCHAR)
+ .resumeSchema()
+ .addNullable("city", MinorType.VARCHAR)
+ .addNullable("postal_code", MinorType.VARCHAR)
+ .addNullable("latitude_e6", MinorType.VARCHAR)
+ .addNullable("longitude_e6", MinorType.VARCHAR)
+ .addNullable("forecast_date", MinorType.VARCHAR)
+ .addNullable("current_date_time", MinorType.VARCHAR)
+ .addNullable("unit_system", MinorType.VARCHAR)
+ .addNullable("condition", MinorType.VARCHAR)
+ .addNullable("temp_f", MinorType.VARCHAR)
+ .addNullable("temp_c", MinorType.VARCHAR)
+ .addNullable("humidity", MinorType.VARCHAR)
+ .addNullable("icon", MinorType.VARCHAR)
+ .addNullable("wind_condition", MinorType.VARCHAR)
+ .build();
+
+ RowSet expected = client.rowSetBuilder(expectedSchema)
+ .addRow((Object)strArray("Seattle, WA", "Seattle WA", "", "",
"2011-09-29", "2011-09-29 17:53:00 +0000", "US", "Clear", "62", "17",
"Humidity: 62%", "/ig/images/weather" +
+ "/sunny.gif", "Wind: N at 4 mph"), null, null, null, null, null, null,
null, null, null, null, null, null, null)
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
+
/**
* This unit test tests a simple XML file with no nesting or attributes, but
with explicitly selected fields.
* @throws Exception Throw exception if anything goes wrong
diff --git
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLUtils.java
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLUtils.java
new file mode 100644
index 0000000..06d70ee
--- /dev/null
+++
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLUtils.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestXMLUtils {
+
+ @Test
+ public void testRemoveField() {
+ String test1 = "field1_field2_field3";
+ assertEquals(XMLUtils.removeField(test1, "field3"), "field1_field2");
+
+ // Test with underscores
+ String test2 = "field_1_field_2_field_3";
+ assertEquals(XMLUtils.removeField(test2, "field_3"), "field_1_field_2");
+
+ // Test with missing field
+ String test3 = "field_1_field_2_field_3";
+ assertEquals(XMLUtils.removeField(test3, "field_4"),
"field_1_field_2_field_3");
+
+ // Test with empty string
+ String test4 = "";
+ assertEquals(XMLUtils.removeField(test4, "field_4"), "");
+ }
+}
diff --git
a/contrib/format-xml/src/test/resources/xml/very-nested-with-attributes.xml
b/contrib/format-xml/src/test/resources/xml/very-nested-with-attributes.xml
new file mode 100644
index 0000000..655e5d5
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/very-nested-with-attributes.xml
@@ -0,0 +1,38 @@
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+<book>
+ <field1 f1="a1" f2="a2">
+ <key1>value1</key1>
+ <key2>value2</key2>
+ </field1>
+ <field2>
+ <key3 f3="a3" f4="a4">k1</key3>
+ <nestedField1>
+ <nk1>nk_value1</nk1>
+ <nk2>nk_value2</nk2>
+ <nk3>nk_value3</nk3>
+ <nestedField2>
+ <nk1 f5="a5">nk2_value1</nk1>
+ <nk2>nk2_value2</nk2>
+ <nk3>nk2_value3</nk3>
+ </nestedField2>
+ </nestedField1>
+ </field2>
+</book>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/weather.xml
b/contrib/format-xml/src/test/resources/xml/weather.xml
new file mode 100644
index 0000000..9ab3c67
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/weather.xml
@@ -0,0 +1,40 @@
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+<xml_api_reply version="1">
+ <weather module_id="0" tab_id="0" mobile_row="0" mobile_zipped="1" row="0"
section="0">
+ <forecast_information>
+ <city data="Seattle, WA"/>
+ <postal_code data="Seattle WA"/>
+ <latitude_e6 data=""/>
+ <longitude_e6 data=""/>
+ <forecast_date data="2011-09-29"/>
+ <current_date_time data="2011-09-29 17:53:00 +0000"/>
+ <unit_system data="US"/>
+ </forecast_information>
+ <current_conditions>
+ <condition data="Clear"/>
+ <temp_f data="62"/>
+ <temp_c data="17"/>
+ <humidity data="Humidity: 62%"/>
+ <icon data="/ig/images/weather/sunny.gif"/>
+ <wind_condition data="Wind: N at 4 mph"/>
+ </current_conditions>
+ </weather>
+</xml_api_reply>