This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 8f892b3  DRILL-7823 - Add XML Format Plugin
8f892b3 is described below

commit 8f892b3c9b04e5e0ff1973681ff862da857d22ef
Author: Charles Givre <[email protected]>
AuthorDate: Mon Dec 21 13:33:11 2020 -0500

    DRILL-7823 - Add XML Format Plugin
---
 contrib/format-xml/README.md                       | 136 +++++++
 contrib/format-xml/pom.xml                         |  86 +++++
 .../drill/exec/store/xml/XMLBatchReader.java       | 100 +++++
 .../drill/exec/store/xml/XMLFormatConfig.java      |  79 ++++
 .../drill/exec/store/xml/XMLFormatPlugin.java      |  93 +++++
 .../org/apache/drill/exec/store/xml/XMLMap.java    |  63 +++
 .../org/apache/drill/exec/store/xml/XMLReader.java | 416 ++++++++++++++++++++
 .../org/apache/drill/exec/store/xml/XMLUtils.java  |  92 +++++
 .../main/resources/bootstrap-format-plugins.json   |  26 ++
 .../src/main/resources/drill-module.conf           |  25 ++
 .../apache/drill/exec/store/xml/TestXMLReader.java | 428 +++++++++++++++++++++
 .../src/test/resources/xml/attributes.xml          |  42 ++
 .../src/test/resources/xml/deep-nested.xml         |  60 +++
 .../src/test/resources/xml/deep-nested2.xml        |  60 +++
 .../format-xml/src/test/resources/xml/nested.xml   |  63 +++
 .../test/resources/xml/really-simple-nested.xml    |  39 ++
 .../format-xml/src/test/resources/xml/simple.xml   |  42 ++
 .../src/test/resources/xml/simple_schema.xsd       |  43 +++
 .../src/test/resources/xml/very-nested.xml         |  38 ++
 .../native/client/src/protobuf/UserBitShared.pb.cc |  15 +-
 .../native/client/src/protobuf/UserBitShared.pb.h  |   5 +-
 contrib/pom.xml                                    |   1 +
 distribution/pom.xml                               |   5 +
 distribution/src/assemble/component.xml            |   1 +
 .../org/apache/drill/exec/proto/UserBitShared.java |  21 +-
 protocol/src/main/protobuf/UserBitShared.proto     |   1 +
 26 files changed, 1965 insertions(+), 15 deletions(-)

diff --git a/contrib/format-xml/README.md b/contrib/format-xml/README.md
new file mode 100644
index 0000000..dc245a3
--- /dev/null
+++ b/contrib/format-xml/README.md
@@ -0,0 +1,136 @@
+# XML Format Reader
+This plugin enables Drill to read XML files without defining any kind of 
schema.
+
+## Configuration
+Aside from the file extension, there is one configuration option:
+
+* `dataLevel`: XML data often contains a considerable amount of nesting which 
is not necesarily useful for data analysis. This parameter allows you to set 
the nesting level 
+  where the data actually starts.  The levels start at `1`.
+
+The default configuration is shown below:
+
+```json
+"xml": {
+  "type": "xml",
+  "extensions": [
+    "xml"
+  ],
+  "dataLevel": 2
+}
+```
+
+## Data Types
+All fields are read as strings.  Nested fields are read as maps.  Future 
functionality could include support for lists.
+
+### Attributes
+XML events can have attributes which can also be useful.
+```xml
+<book>
+  <author>O.-J. Dahl</author>
+  <title binding="hardcover" subcategory="non-fiction">Structured 
Programming</title>
+  <category>PROGRAMMING</category>
+  <year>1972</year>
+</book>
+```
+
+In the example above, the `title` field contains two attributes, the `binding` 
and `subcategory`.  In order to access these fields, Drill creates a map called 
`attributes` and 
+adds an entry for each attribute with the field name and then the attribute 
name.  Every XML file will have a field called `atttributes` regardless of 
whether the data actually 
+has attributes or not.
+
+```xml
+<books>
+   <book>
+     <author>Mark Twain</author>
+     <title>The Adventures of Tom Sawyer</title>
+     <category>FICTION</category>
+     <year>1876</year>
+   </book>
+   <book>
+     <authors>
+         <author>Niklaus Wirth</author>
+         <author>Somebody else</author>
+     </authors>
+     <title binding="paperback">The Programming Language Pascal</title>
+     <category >PASCAL</category>
+     <year>1971</year>
+   </book>
+   <book>
+     <author>O.-J. Dahl</author>
+     <title binding="hardcover" subcategory="non-fiction">Structured 
Programming</title>
+     <category>PROGRAMMING</category>
+     <year>1972</year>
+   </book>
+ </books>
+```
+If you queried this data in Drill you'd get the table below:
+
+```sql
+SELECT * 
+FROM <path>.`attributes.xml`
+```
+
+```
+apache drill> select * from dfs.test.`attributes.xml`;
++-----------------------------------------------------------------+------------+---------------------------------+-------------+------+-----------------------------------------+
+|                           attributes                            |   author   
|              title              |  category   | year |                 
authors                 |
++-----------------------------------------------------------------+------------+---------------------------------+-------------+------+-----------------------------------------+
+| {}                                                              | Mark Twain 
| The Adventures of Tom Sawyer    | FICTION     | 1876 | {}                     
                 |
+| {"title_binding":"paperback"}                                   | null       
| The Programming Language Pascal | PASCAL      | 1971 | {"author":"Niklaus 
WirthSomebody else"} |
+| {"title_binding":"hardcover","title_subcategory":"non-fiction"} | O.-J. Dahl 
| Structured Programming          | PROGRAMMING | 1972 | {}                     
                 |
++-----------------------------------------------------------------+------------+---------------------------------+-------------+------+-----------------------------------------+
+```
+
+
+
+## Limitations: Schema Ambiguity
+XML is a challenging format to process as the structure does not give any 
hints about the schema.  For example, a JSON file might have the following 
record:
+
+```json
+"record" : {
+  "intField:" : 1,
+  "listField" : [1, 2],
+  "otherField" : {
+    "nestedField1" : "foo",
+    "nestedField2" : "bar"
+  }
+}
+```
+
+From this data, it is clear that `listField` is a `list` and `otherField` is a 
map.  This same data could be represented in XML as follows:
+
+```xml
+<record>
+  <intField>1</intField>
+  <listField>
+    <value>1</value>
+    <value>2</value>
+  </listField>
+  <otherField>
+    <nestedField1>foo</nestedField1>
+    <nestedField2>bar</nestedField2>
+  </otherField>
+</record>
+```
+This is no problem to parse this data. But consider what would happen if we 
encountered the following first:
+```xml
+<record>
+  <intField>1</intField>
+  <listField>
+    <value>2</value>
+  </listField>
+  <otherField>
+    <nestedField1>foo</nestedField1>
+    <nestedField2>bar</nestedField2>
+  </otherField>
+</record>
+```
+In this example, there is no way for Drill to know whether `listField` is a 
`list` or a `map` because it only has one entry. 
+
+## Future Functionality
+
+* **Build schema from XSD file or link**:  One of the major challenges of this 
reader is having to infer the schema of the data. XML files do provide a schema 
although this is not
+ required.  In the future, if there is interest, we can extend this reader to 
use an XSD file to build the schema which will be used to parse the actual XML 
file. 
+  
+* **Infer Date Fields**: It may be possible to add the ability to infer data 
fields.
+
+* **List Support**:  Future functionality may include the ability to infer 
lists from data structures.  
\ No newline at end of file
diff --git a/contrib/format-xml/pom.xml b/contrib/format-xml/pom.xml
new file mode 100644
index 0000000..a000f8e
--- /dev/null
+++ b/contrib/format-xml/pom.xml
@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>drill-contrib-parent</artifactId>
+    <groupId>org.apache.drill.contrib</groupId>
+    <version>1.19.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>drill-format-xml</artifactId>
+  <name>contrib/format-xml</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.drill.exec</groupId>
+      <artifactId>drill-java-exec</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>org.apache.drill.exec</groupId>
+      <artifactId>drill-java-exec</artifactId>
+      <classifier>tests</classifier>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.drill</groupId>
+      <artifactId>drill-common</artifactId>
+      <classifier>tests</classifier>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-java-sources</id>
+            <phase>process-sources</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              
<outputDirectory>${basedir}/target/classes/org/apache/drill/exec/store/xml
+              </outputDirectory>
+              <resources>
+                <resource>
+                  
<directory>src/main/java/org/apache/drill/exec/store/xml</directory>
+                  <filtering>true</filtering>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
\ No newline at end of file
diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLBatchReader.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLBatchReader.java
new file mode 100644
index 0000000..83f549f
--- /dev/null
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLBatchReader.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.apache.drill.common.exceptions.CustomErrorContext;
+import org.apache.drill.common.exceptions.UserException;
+
+import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework;
+import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
+import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
+import org.apache.drill.exec.physical.resultSet.RowSetLoader;
+import org.apache.drill.exec.store.dfs.easy.EasySubScan;
+import org.apache.hadoop.mapred.FileSplit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.InputStream;
+
+
+public class XMLBatchReader implements ManagedReader<FileSchemaNegotiator> {
+
+  private static final Logger logger = 
LoggerFactory.getLogger(XMLBatchReader.class);
+
+  private FileSplit split;
+  private RowSetLoader rootRowWriter;
+  private CustomErrorContext errorContext;
+
+  private XMLReader reader;
+  private final int maxRecords;
+  private final int dataLevel;
+
+
+  static class XMLReaderConfig {
+    final XMLFormatPlugin plugin;
+    final int dataLevel;
+
+    XMLReaderConfig(XMLFormatPlugin plugin) {
+      this.plugin = plugin;
+      dataLevel = plugin.getConfig().dataLevel;
+    }
+  }
+
+  public XMLBatchReader(XMLReaderConfig readerConfig, EasySubScan scan) {
+    this.maxRecords = scan.getMaxRecords();
+    this.dataLevel = readerConfig.dataLevel;
+  }
+
+  @Override
+  public boolean open(FileSchemaNegotiator negotiator) {
+    split = negotiator.split();
+    ResultSetLoader loader = negotiator.build();
+    errorContext = negotiator.parentErrorContext();
+    rootRowWriter = loader.writer();
+
+    openFile(negotiator);
+    return true;
+  }
+
+  @Override
+  public boolean next() {
+    return reader.next();
+  }
+
+  @Override
+  public void close() {
+    reader.close();
+  }
+
+  private void openFile(FileScanFramework.FileSchemaNegotiator negotiator) {
+    try {
+      InputStream fsStream = 
negotiator.fileSystem().openPossiblyCompressedStream(split.getPath());
+      reader = new XMLReader(fsStream, dataLevel, maxRecords);
+      reader.open(rootRowWriter, errorContext);
+    } catch (Exception e) {
+      throw UserException
+        .dataReadError(e)
+        .message("Failed to open open input file: {}", 
split.getPath().toString())
+        .addContext(errorContext)
+        .addContext(e.getMessage())
+        .build(logger);
+    }
+  }
+}
diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLFormatConfig.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLFormatConfig.java
new file mode 100644
index 0000000..0babf20
--- /dev/null
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLFormatConfig.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import org.apache.drill.common.PlanStringBuilder;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+@JsonTypeName(XMLFormatPlugin.DEFAULT_NAME)
+@JsonInclude(JsonInclude.Include.NON_DEFAULT)
+public class XMLFormatConfig implements FormatPluginConfig {
+
+  public final List<String> extensions;
+  public final int dataLevel;
+
+  public XMLFormatConfig(@JsonProperty("extensions") List<String> extensions,
+                         @JsonProperty("dataLevel") int dataLevel) {
+    this.extensions = extensions == null ? Collections.singletonList("xml") : 
ImmutableList.copyOf(extensions);
+    this.dataLevel = Math.max(dataLevel, 1);
+  }
+
+  @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+  public List<String> getExtensions() {
+    return extensions;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(extensions, dataLevel);
+  }
+
+  public XMLBatchReader.XMLReaderConfig getReaderConfig(XMLFormatPlugin 
plugin) {
+    return new XMLBatchReader.XMLReaderConfig(plugin);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null || getClass() != obj.getClass()) {
+      return false;
+    }
+    XMLFormatConfig other = (XMLFormatConfig) obj;
+    return Objects.equals(extensions, other.extensions)
+      && Objects.equals(dataLevel, other.dataLevel);
+  }
+
+  @Override
+  public String toString() {
+    return new PlanStringBuilder(this)
+      .field("extensions", extensions)
+      .field("dataLevel", dataLevel)
+      .toString();
+  }
+}
\ No newline at end of file
diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLFormatPlugin.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLFormatPlugin.java
new file mode 100644
index 0000000..7cf3932
--- /dev/null
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLFormatPlugin.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.apache.drill.common.logical.StoragePluginConfig;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework;
+import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileScanBuilder;
+import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.proto.UserBitShared.CoreOperatorType;
+import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.server.options.OptionManager;
+import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
+import org.apache.drill.exec.store.dfs.easy.EasySubScan;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+
+
+public class XMLFormatPlugin extends EasyFormatPlugin<XMLFormatConfig> {
+
+  public static final String DEFAULT_NAME = "xml";
+
+  public static class XMLReaderFactory extends 
FileScanFramework.FileReaderFactory {
+    private final XMLBatchReader.XMLReaderConfig readerConfig;
+    private final EasySubScan scan;
+
+    public XMLReaderFactory(XMLBatchReader.XMLReaderConfig config, EasySubScan 
scan) {
+      this.readerConfig = config;
+      this.scan = scan;
+    }
+
+    @Override
+    public ManagedReader<? extends FileScanFramework.FileSchemaNegotiator> 
newReader() {
+      return new XMLBatchReader(readerConfig, scan);
+    }
+  }
+
+  public XMLFormatPlugin(String name,
+                         DrillbitContext context,
+                         Configuration fsConf,
+                         StoragePluginConfig storageConfig,
+                         XMLFormatConfig formatConfig) {
+    super(name, easyConfig(fsConf, formatConfig), context, storageConfig, 
formatConfig);
+  }
+
+  private static EasyFormatConfig easyConfig(Configuration fsConf, 
XMLFormatConfig pluginConfig) {
+    EasyFormatConfig config = new EasyFormatConfig();
+    config.readable = true;
+    config.writable = false;
+    config.blockSplittable = false;
+    config.compressible = true;
+    config.supportsProjectPushdown = true;
+    config.extensions = Lists.newArrayList(pluginConfig.getExtensions());
+    config.fsConf = fsConf;
+    config.defaultName = DEFAULT_NAME;
+    config.readerOperatorType = CoreOperatorType.XML_SUB_SCAN_VALUE;
+    config.useEnhancedScan = true;
+    config.supportsLimitPushdown = true;
+    return config;
+  }
+
+  @Override
+  public ManagedReader<? extends FileScanFramework.FileSchemaNegotiator> 
newBatchReader(
+    EasySubScan scan, OptionManager options) {
+    return new XMLBatchReader(formatConfig.getReaderConfig(this), scan);
+  }
+
+  @Override
+  protected FileScanFramework.FileScanBuilder frameworkBuilder(OptionManager 
options, EasySubScan scan) {
+    FileScanBuilder builder = new FileScanBuilder();
+    builder.setReaderFactory(new XMLReaderFactory(new 
XMLBatchReader.XMLReaderConfig(this), scan));
+    initScanBuilder(builder, scan);
+    builder.nullType(Types.optional(MinorType.VARCHAR));
+    return builder;
+  }
+}
diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLMap.java 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLMap.java
new file mode 100644
index 0000000..557762c
--- /dev/null
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLMap.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.apache.drill.common.PlanStringBuilder;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+
+import java.util.Objects;
+
+public class XMLMap {
+
+  private final String mapName;
+  private final TupleWriter mapWriter;
+
+  public XMLMap (String mapName, TupleWriter mapWriter) {
+    this.mapName = mapName;
+    this.mapWriter = mapWriter;
+  }
+
+  public TupleWriter getMapWriter() {
+    return mapWriter;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null || getClass() != obj.getClass()) {
+      return false;
+    }
+    XMLMap other = (XMLMap) obj;
+    return Objects.equals(mapName, other.mapName);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(mapName);
+  }
+
+  @Override
+  public String toString() {
+    return new PlanStringBuilder(this)
+      .field("Map Name", mapName)
+      .toString();
+  }
+}
diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
new file mode 100644
index 0000000..7665e6a
--- /dev/null
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.apache.drill.common.AutoCloseables;
+import org.apache.drill.common.exceptions.CustomErrorContext;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.resultSet.RowSetLoader;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.record.metadata.MetadataUtils;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.Attribute;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Stack;
+
+public class XMLReader {
+  private static final Logger logger = 
LoggerFactory.getLogger(XMLReader.class);
+  private static final String ATTRIBUTE_MAP_NAME = "attributes";
+
+  private final Stack<String> fieldNameStack;
+  private final Stack<TupleWriter> rowWriterStack;
+  private final int dataLevel;
+  private final int maxRecords;
+  private final Map<String, XMLMap> nestedMapCollection;
+
+  private TupleWriter attributeWriter;
+  private CustomErrorContext errorContext;
+  private RowSetLoader rootRowWriter;
+  private int currentNestingLevel;
+  private XMLEvent currentEvent;
+  private String rootDataFieldName;
+  private String fieldName;
+  private xmlState currentState;
+  private TupleWriter currentTupleWriter;
+  private boolean rowStarted;
+  private String attributePrefix;
+  private String fieldValue;
+  private InputStream fsStream;
+  private XMLEventReader reader;
+
+  /**
+   * This field indicates the various states in which the reader operates. The 
names should be self explanatory,
+   * but they are used as the reader iterates over the XML tags to know what 
to do.
+   */
+  private enum xmlState {
+    ROW_STARTED,
+    POSSIBLE_MAP,
+    NESTED_MAP_STARTED,
+    GETTING_DATA,
+    WRITING_DATA,
+    FIELD_ENDED,
+    ROW_ENDED
+  }
+
+  public XMLReader(InputStream fsStream, int dataLevel, int maxRecords) throws 
XMLStreamException {
+    this.fsStream = fsStream;
+    XMLInputFactory inputFactory = XMLInputFactory.newInstance();
+    reader = inputFactory.createXMLEventReader(fsStream);
+    fieldNameStack = new Stack<>();
+    rowWriterStack = new Stack<>();
+    nestedMapCollection = new HashMap<>();
+    this.dataLevel = dataLevel;
+    this.maxRecords = maxRecords;
+
+  }
+
+  public void open(RowSetLoader rootRowWriter, CustomErrorContext errorContext 
) {
+    this.errorContext = errorContext;
+    this.rootRowWriter = rootRowWriter;
+    attributeWriter = getAttributeWriter();
+  }
+
+  public boolean next() {
+    while (!rootRowWriter.isFull()) {
+      try {
+        if (!processElements()) {
+          return false;
+        }
+      } catch (Exception e) {
+        throw UserException
+          .dataReadError(e)
+          .message("Error parsing file: " + e.getMessage())
+          .addContext(errorContext)
+          .build(logger);
+      }
+    }
+    return true;
+  }
+
+
+  public void close() {
+    if (fsStream != null) {
+      AutoCloseables.closeSilently(fsStream);
+      fsStream = null;
+    }
+
+    if (reader != null) {
+      try {
+        reader.close();
+      } catch (XMLStreamException e) {
+        logger.warn("Error when closing XML stream: {}", e.getMessage());
+      }
+      reader = null;
+    }
+  }
+
+  /**
+   * This function processes the XML elements.  This function stops reading 
when the
+   * limit (if any) which came from the query has been reached or the Iterator 
runs out of
+   * elements.
+   * @return True if there are more elements to parse, false if not
+   */
+  private boolean processElements() {
+    XMLEvent nextEvent;
+
+    if (!reader.hasNext()) {
+      // Stop reading if there are no more results
+      return false;
+    } else if (rootRowWriter.limitReached(maxRecords)) {
+      // Stop if the query limit has been reached
+      return false;
+    }
+
+    // Iterate over XML events
+    while (reader.hasNext()) {
+      // get the current event
+      try {
+        nextEvent = reader.nextEvent();
+
+        // If the next event is whitespace, newlines, or other cruft that we 
don't need
+        // ignore and move to the next event
+        if (XMLUtils.isEmptyWhiteSpace(nextEvent)) {
+          continue;
+        }
+
+        // Capture the previous and current event
+        XMLEvent lastEvent = currentEvent;
+        currentEvent = nextEvent;
+
+        // Process the event
+        processEvent(currentEvent, lastEvent);
+      } catch (XMLStreamException e) {
+        throw UserException
+          .dataReadError(e)
+          .message("Error parsing XML file: " + e.getMessage())
+          .addContext(errorContext)
+          .build(logger);
+      }
+    }
+    return true;
+  }
+
+  /**
+   * This function processes an actual XMLEvent. There are three possibilities:
+   * 1.  The event is a start event
+   * 2.  The event contains text
+   * 3.  The event is a closing tag
+   * There are other possible elements, but they are not relevant for our 
purposes.
+   *
+   * @param currentEvent The current event to be processed
+   * @param lastEvent The previous event which was processed
+   */
+  private void processEvent(XMLEvent currentEvent,
+                            XMLEvent lastEvent) {
+    String mapName;
+    switch (currentEvent.getEventType()) {
+
+      /*
+       * This case handles start elements.
+       * Case 1:  The current nesting level is less than the data level.
+       * In this case, increase the nesting level and stop processing.
+       *
+       * Case 2: The nesting level is higher than the data level.
+       * In this case, a few things must happen.
+       * 1.  We capture the field name
+       * 2.  If the row has not started, we start the row
+       * 3.  Set the possible map flag
+       * 4.  Process attributes
+       * 5.  Push both the field name and writer to the stacks
+       */
+      case XMLStreamConstants.START_ELEMENT:
+        currentNestingLevel++;
+
+        // Case 1: Current nesting level is less than the data level
+        if (currentNestingLevel < dataLevel) {
+          // Stop here if the current level of nesting has not reached the 
data.
+          break;
+        }
+
+        StartElement startElement = currentEvent.asStartElement();
+        // Get the field name
+        fieldName = startElement.getName().getLocalPart();
+
+        if (rootDataFieldName == null && currentNestingLevel == dataLevel) {
+          rootDataFieldName = fieldName;
+          logger.debug("Root field name: {}", rootDataFieldName);
+        }
+
+        if (!rowStarted) {
+          currentTupleWriter = startRow(rootRowWriter);
+        } else {
+          if (lastEvent!= null &&
+            lastEvent.getEventType() == XMLStreamConstants.START_ELEMENT) {
+            /*
+             * Check the flag in the next section.  If the next element is a 
character AND the flag is set,
+             * start a map.  If not... ignore it all.
+             */
+            changeState(xmlState.POSSIBLE_MAP);
+
+            rowWriterStack.push(currentTupleWriter);
+          }
+
+          fieldNameStack.push(fieldName);
+          if (currentNestingLevel > dataLevel) {
+            attributePrefix = XMLUtils.addField(attributePrefix, fieldName);
+          }
+
+          Iterator<Attribute> attributes = startElement.getAttributes();
+          writeAttributes(attributePrefix, attributes);
+        }
+        break;
+
+      /*
+       * This case processes character elements.
+       */
+      case XMLStreamConstants.CHARACTERS:
+        if (currentState == xmlState.ROW_ENDED) {
+          break;
+        }
+
+        // Get the field value but ignore characters outside of rows
+        if (rowStarted) {
+          if (currentState == xmlState.POSSIBLE_MAP && currentNestingLevel > 
dataLevel +1) {
+            changeState(xmlState.NESTED_MAP_STARTED);
+
+            // Remove the current field name from the stack
+            if (fieldNameStack.size() > 1) {
+              fieldNameStack.pop();
+            }
+            // Get the map name and push to stack
+            mapName = fieldNameStack.pop();
+            currentTupleWriter = getMapWriter(mapName, currentTupleWriter);
+          } else {
+            changeState(xmlState.ROW_STARTED);
+          }
+        }
+
+        fieldValue = currentEvent.asCharacters().getData().trim();
+        changeState(xmlState.GETTING_DATA);
+        break;
+
+      case XMLStreamConstants.END_ELEMENT:
+        currentNestingLevel--;
+        // End the row
+        if (currentNestingLevel < dataLevel - 1) {
+          break;
+        } else if 
(currentEvent.asEndElement().getName().toString().compareTo(rootDataFieldName) 
== 0) {
+          currentTupleWriter = endRow();
+
+          // Clear stacks
+          rowWriterStack.clear();
+          fieldNameStack.clear();
+          attributePrefix = "";
+
+        } else if (currentState == xmlState.FIELD_ENDED && currentNestingLevel 
>= dataLevel) {
+          // Case to end nested maps
+          // Pop tupleWriter off stack
+          currentTupleWriter = rowWriterStack.pop();
+          attributePrefix = XMLUtils.removeField(attributePrefix);
+
+        } else if (currentState != xmlState.ROW_ENDED){
+          writeFieldData(fieldName, fieldValue, currentTupleWriter);
+          // Clear out field name and value
+          fieldName = null;
+          fieldValue = null;
+          attributePrefix = XMLUtils.removeField(attributePrefix);
+        }
+        break;
+    }
+  }
+
+  private TupleWriter startRow(RowSetLoader writer) {
+    if (currentNestingLevel == dataLevel) {
+      rootRowWriter.start();
+      rowStarted = true;
+      rowWriterStack.push(rootRowWriter);
+      changeState(xmlState.ROW_STARTED);
+      return rootRowWriter;
+    } else {
+      rowStarted = false;
+      return writer;
+    }
+  }
+
+  /**
+   * This method executes the steps to end a row from an XML dataset.
+   * @return the root row writer
+   */
+  private TupleWriter endRow() {
+    logger.debug("Ending row");
+    rootRowWriter.save();
+    rowStarted = false;
+    changeState(xmlState.ROW_ENDED);
+    return rootRowWriter;
+  }
+
+  /**
+   * Writes a field. If the field does not have a corresponding ScalarWriter, 
this method will
+   * create one.
+   * @param fieldName The field name
+   * @param fieldValue The field value to be written
+   * @param writer The TupleWriter which represents
+   */
+  private void writeFieldData(String fieldName, String fieldValue, TupleWriter 
writer) {
+    if (fieldName == null) {
+      return;
+    }
+
+    changeState(xmlState.WRITING_DATA);
+
+    // Find the TupleWriter object
+    int index = writer.tupleSchema().index(fieldName);
+    if (index == -1) {
+      ColumnMetadata colSchema = MetadataUtils.newScalar(fieldName, 
TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL);
+      index = writer.addColumn(colSchema);
+    }
+    ScalarWriter colWriter = writer.scalar(index);
+    if (fieldValue != null && (currentState != xmlState.ROW_ENDED && 
currentState != xmlState.FIELD_ENDED)) {
+      colWriter.setString(fieldValue);
+      changeState(xmlState.FIELD_ENDED);
+    }
+  }
+
+  /**
+   * Returns a MapWriter for a given field.  If the writer does not exist, add 
one to the schema
+   * @param mapName The Map's name
+   * @param rowWriter The current TupleWriter
+   * @return A TupleWriter of the new map
+   */
+  private TupleWriter getMapWriter(String mapName, TupleWriter rowWriter) {
+    logger.debug("Adding map: {}", mapName);
+    int index = rowWriter.tupleSchema().index(mapName);
+    if (index == -1) {
+      // Check to see if the map already exists in the map collection
+      // This condition can occur in deeply nested data.
+      String tempFieldName = mapName + "-" + currentNestingLevel;
+      XMLMap mapObject = nestedMapCollection.get(tempFieldName);
+      if (mapObject != null) {
+        logger.debug("Found map {}", tempFieldName);
+        return mapObject.getMapWriter();
+      }
+
+      index = rowWriter.addColumn(SchemaBuilder.columnSchema(mapName, 
MinorType.MAP, DataMode.REQUIRED));
+      // Add map to map collection for future use
+      nestedMapCollection.put(tempFieldName, new XMLMap(mapName, 
rowWriter.tuple(index)));
+    }
+    return rowWriter.tuple(index);
+  }
+
+  private void changeState(xmlState newState) {
+    xmlState previousState = currentState;
+    currentState = newState;
+  }
+
+  private TupleWriter getAttributeWriter() {
+    int attributeIndex = 
rootRowWriter.addColumn(SchemaBuilder.columnSchema(ATTRIBUTE_MAP_NAME, 
MinorType.MAP, DataMode.REQUIRED));
+    return rootRowWriter.tuple(attributeIndex);
+  }
+
+  /**
+   * Helper function which writes attributes of an XML element.
+   * @param prefix The attribute prefix
+   * @param attributes An iterator of Attribute objects
+   */
+  private void writeAttributes(String prefix, Iterator<Attribute> attributes) {
+    while (attributes.hasNext()) {
+      Attribute currentAttribute = attributes.next();
+      String key = prefix + "_" + currentAttribute.getName().toString();
+      writeFieldData(key, currentAttribute.getValue(), attributeWriter);
+    }
+  }
+
+}
diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
new file mode 100644
index 0000000..b7ea8e7
--- /dev/null
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLUtils.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.apache.drill.shaded.guava.com.google.common.base.Strings;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.events.XMLEvent;
+
+public class XMLUtils {
+
+  /**
+   * Empty events are not helpful so this method checks to see if the event 
consists solely of whitespace
+   * or newline characters.  Unfortunately, newlines and other extraneous 
characters are treated as new elements, so
+   * this function wraps a lot of those checks in one function.
+   * @param event The input XMLEvent
+   * @return True if the XMLEvent is only whitespace, false if not.
+   */
+  public static boolean isEmptyWhiteSpace(XMLEvent event) {
+    if (event.getEventType() == XMLStreamConstants.COMMENT) {
+      return true;
+    } else if (event.getEventType() != XMLStreamConstants.CHARACTERS) {
+      return false;
+    }
+
+    String value = event.asCharacters().getData();
+    if (Strings.isNullOrEmpty(value.trim())) {
+      return true;
+    } else {
+      return event.asCharacters().isIgnorableWhiteSpace();
+    }
+  }
+
+  /**
+   * Identifies XML events that may be populated but are not useful for 
extracting data.
+   * @param event The XMLEvent in question
+   * @return True if the event is useful, false if not
+   */
+  public static boolean isNotCruft(XMLEvent event) {
+    int eventType = event.getEventType();
+    return eventType == XMLStreamConstants.CHARACTERS ||
+      eventType == XMLStreamConstants.START_ELEMENT ||
+      eventType == XMLStreamConstants.END_ELEMENT;
+  }
+
+  /**
+   * Generates a nested field name by combining a field prefix to the current 
field name.
+   * @param prefix The prefix to be added to the field name.
+   * @param field The field name
+   * @return the prefix, followed by an underscore and the fieldname.
+   */
+  public static String addField(String prefix, String field) {
+    if (Strings.isNullOrEmpty(prefix)) {
+      return field;
+    }
+    return prefix + "_" + field;
+  }
+
+  /**
+   * Returns the field name from nested field names
+   * @param fieldName The nested field name
+   * @return The field name
+   */
+  public static String removeField(String fieldName) {
+    String[] components = fieldName.split("_");
+    StringBuilder newField = new StringBuilder();
+    for (int i = 0; i < components.length - 1; i++) {
+      if (i > 0) {
+        newField.append("_").append(components[i]);
+      } else {
+        newField = new StringBuilder(components[i]);
+      }
+    }
+    return newField.toString();
+  }
+
+}
diff --git 
a/contrib/format-xml/src/main/resources/bootstrap-format-plugins.json 
b/contrib/format-xml/src/main/resources/bootstrap-format-plugins.json
new file mode 100644
index 0000000..ef5f59c
--- /dev/null
+++ b/contrib/format-xml/src/main/resources/bootstrap-format-plugins.json
@@ -0,0 +1,26 @@
+{
+  "storage":{
+    "dfs": {
+      "type": "file",
+      "formats": {
+        "xml": {
+          "type": "xml",
+          "extensions": [
+            "xml"
+          ]
+        }
+      }
+    },
+    "s3": {
+      "type": "file",
+      "formats": {
+        "xml": {
+          "type": "xml",
+          "extensions": [
+            "xml"
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/contrib/format-xml/src/main/resources/drill-module.conf 
b/contrib/format-xml/src/main/resources/drill-module.conf
new file mode 100644
index 0000000..04406a3
--- /dev/null
+++ b/contrib/format-xml/src/main/resources/drill-module.conf
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#  This file tells Drill to consider this module when class path scanning.
+#  This file can also include any supplementary configuration information.
+#  This file is in HOCON format, see 
https://github.com/typesafehub/config/blob/master/HOCON.md for more information.
+
+drill.classpath.scanning: {
+  packages += "org.apache.drill.exec.store.xml"
+}
diff --git 
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
new file mode 100644
index 0000000..b515dab
--- /dev/null
+++ 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
@@ -0,0 +1,428 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml;
+
+import org.apache.drill.categories.RowSetTests;
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.RowSet;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.nio.file.Paths;
+
+import static org.apache.drill.test.QueryTestUtil.generateCompressedFile;
+import static org.apache.drill.test.rowSet.RowSetUtilities.mapArray;
+import static org.apache.drill.test.rowSet.RowSetUtilities.objArray;
+import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
+import static org.junit.Assert.assertEquals;
+
+@Category(RowSetTests.class)
+public class TestXMLReader extends ClusterTest {
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    ClusterTest.startCluster(ClusterFixture.builder(dirTestWatcher));
+
+    XMLFormatConfig formatConfig = new XMLFormatConfig(null, 2);
+    cluster.defineFormat("cp", "xml", formatConfig);
+    cluster.defineFormat("dfs", "xml", formatConfig);
+
+    // Needed for compressed file unit test
+    dirTestWatcher.copyResourceToRoot(Paths.get("xml/"));
+  }
+
+  /**
+   * This unit test tests a simple XML file with no nesting or attributes
+   * @throws Exception Throw exception if anything goes wrong
+   */
+  @Test
+  public void testWildcard() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/simple.xml`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    assertEquals(3, results.rowCount());
+
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("attributes", MinorType.MAP)
+      .addNullable("groupID", MinorType.VARCHAR)
+      .addNullable("artifactID", MinorType.VARCHAR)
+      .addNullable("version", MinorType.VARCHAR)
+      .addNullable("classifier", MinorType.VARCHAR)
+      .addNullable("scope", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow(mapArray(), "org.apache.drill.exec", "drill-java-exec", 
"${project.version}", null, null)
+      .addRow(mapArray(),"org.apache.drill.exec", "drill-java-exec", 
"${project.version}", "tests", "test")
+      .addRow(mapArray(),"org.apache.drill", "drill-common", 
"${project.version}", "tests", "test")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  /**
+   * This unit test tests a simple XML file with no nesting or attributes, but 
with explicitly selected fields.
+   * @throws Exception Throw exception if anything goes wrong
+   */
+  @Test
+  public void testExplicitWithSimpleXMLFile() throws Exception {
+    String sql = "SELECT groupID, artifactID, version, classifier, scope FROM 
cp.`xml/simple.xml`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    assertEquals(3, results.rowCount());
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("groupID", MinorType.VARCHAR)
+      .addNullable("artifactID", MinorType.VARCHAR)
+      .addNullable("version", MinorType.VARCHAR)
+      .addNullable("classifier", MinorType.VARCHAR)
+      .addNullable("scope", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("org.apache.drill.exec", "drill-java-exec", 
"${project.version}", null, null)
+      .addRow("org.apache.drill.exec", "drill-java-exec", 
"${project.version}", "tests", "test")
+      .addRow("org.apache.drill", "drill-common", "${project.version}", 
"tests", "test")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testWildcardWithFilter() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/simple.xml` WHERE scope='test'";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    assertEquals(2, results.rowCount());
+
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("attributes", MinorType.MAP)
+      .addNullable("groupID", MinorType.VARCHAR)
+      .addNullable("artifactID", MinorType.VARCHAR)
+      .addNullable("version", MinorType.VARCHAR)
+      .addNullable("classifier", MinorType.VARCHAR)
+      .addNullable("scope", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow(mapArray(),"org.apache.drill.exec", "drill-java-exec", 
"${project.version}", "tests", "test")
+      .addRow(mapArray(),"org.apache.drill", "drill-common", 
"${project.version}", "tests", "test")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testWildcardWithSingleNestedDataField() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/really-simple-nested.xml`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    assertEquals(3, results.rowCount());
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("attributes", MinorType.MAP, DataMode.REQUIRED)
+      .addMap("field1")
+        .addNullable("key1", MinorType.VARCHAR)
+        .addNullable("key2", MinorType.VARCHAR)
+      .resumeSchema()
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow(mapArray(), strArray("value1", "value2"))
+      .addRow(mapArray(), strArray("value3", "value4"))
+      .addRow(mapArray(), strArray("value5", "value6"))
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testExplicitWithSingleNestedDataField() throws Exception {
+    String sql = "SELECT t1.field1.key1 as key1, t1.field1.key2 as key2 FROM 
cp.`xml/really-simple-nested.xml` as t1";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    assertEquals(3, results.rowCount());
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("key1", MinorType.VARCHAR)
+      .addNullable("key2", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("value1", "value2")
+      .addRow("value3", "value4")
+      .addRow("value5", "value6")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testSerDe() throws Exception {
+    String sql = "SELECT COUNT(*) FROM cp.`xml/simple.xml`";
+    String plan = queryBuilder().sql(sql).explainJson();
+    long cnt = queryBuilder().physical(plan).singletonLong();
+    assertEquals("Counts should match", 3L, cnt);
+  }
+
+  @Test
+  public void testExplicitWithCompressedSimpleXMLFile() throws Exception {
+    generateCompressedFile("xml/simple.xml", "zip", "xml/simple.xml.zip");
+
+    String sql = "SELECT groupID, artifactID, version, classifier, scope FROM 
dfs.`xml/simple.xml.zip`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    assertEquals(3, results.rowCount());
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("groupID", MinorType.VARCHAR)
+      .addNullable("artifactID", MinorType.VARCHAR)
+      .addNullable("version", MinorType.VARCHAR)
+      .addNullable("classifier", MinorType.VARCHAR)
+      .addNullable("scope", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("org.apache.drill.exec", "drill-java-exec", 
"${project.version}", null, null)
+      .addRow("org.apache.drill.exec", "drill-java-exec", 
"${project.version}", "tests", "test")
+      .addRow("org.apache.drill", "drill-common", "${project.version}", 
"tests", "test")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testDeepNestedSpecificFields() throws Exception {
+    String sql = "select xml.level2.level3.level4.level5.level6.level7.field1 
as field1, xml.level2.level3.level4.level5.level6.level7.field2 as field2, 
xml.level2.level3.level4" +
+      ".level5.level6.level7.field3 as field3 FROM cp.`xml/deep-nested.xml` as 
xml";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    assertEquals(2, results.rowCount());
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("field1", MinorType.VARCHAR)
+      .addNullable("field2", MinorType.VARCHAR)
+      .addNullable("field3", MinorType.VARCHAR)
+      .build();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("f1", "f2", "f3")
+      .addRow("f4", "f5", "f6")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testDeepNesting() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/deep-nested.xml`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    assertEquals(2, results.rowCount());
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("attributes", MinorType.MAP, DataMode.REQUIRED)
+      .addMap("level2")
+        .addNullable("field1-level2", MinorType.VARCHAR)
+        .addMap("level3")
+        .addNullable("field1-level3", MinorType.VARCHAR)
+          .addMap("level4")
+          .addNullable("field1-level4", MinorType.VARCHAR)
+            .addMap("level5")
+            .addNullable("field1-level5", MinorType.VARCHAR)
+              .addMap("level6")
+              .addNullable("field1-level6", MinorType.VARCHAR)
+                .addMap("level7")
+                .addNullable("field1", MinorType.VARCHAR)
+                .addNullable("field2", MinorType.VARCHAR)
+                .addNullable("field3", MinorType.VARCHAR)
+              .resumeMap()  // End level 7
+              .resumeMap()   // End level 6
+            .resumeMap() // End level 5
+          .resumeMap() // End level 4
+        .resumeMap() // End level 3
+      .resumeSchema()
+      .build();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow(mapArray(), objArray(
+        objArray(
+          "l2",
+          objArray("l3",
+            objArray("l4",
+              objArray("l5",
+                objArray("l6",
+                  strArray("f1", "f2", "f3")
+                )
+              )
+            )
+          )
+        )
+      ))
+      .addRow(mapArray(), objArray(
+        objArray(
+          null,
+          objArray(null,
+            objArray(null,
+              objArray(null,
+                objArray(null,
+                  strArray("f4", "f5", "f6")
+                )
+              )
+            )
+          )
+        )
+      ))
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testDataLevel() throws Exception {
+    String sql = "SELECT * FROM table(cp.`xml/deep-nested2.xml` (type => 
'xml', dataLevel => 8))";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("attributes", MinorType.MAP, DataMode.REQUIRED)
+      .addNullable("field1", MinorType.VARCHAR)
+      .addNullable("field2", MinorType.VARCHAR)
+      .addNullable("field3", MinorType.VARCHAR)
+      .addNullable("field1-level6", MinorType.VARCHAR)
+      .build();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow(mapArray(), "f4", "f5", "f6", null)
+      .addRow(mapArray(), "f1", "f2", "f3", "l6")
+      .build();
+
+    assertEquals(2, results.rowCount());
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testExplicitDataLevel() throws Exception {
+    String sql = "SELECT field1, field2, field3 FROM 
table(cp.`xml/deep-nested2.xml` (type => 'xml', dataLevel => 8))";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("field1", MinorType.VARCHAR)
+      .addNullable("field2", MinorType.VARCHAR)
+      .addNullable("field3", MinorType.VARCHAR)
+      .build();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("f4", "f5", "f6")
+      .addRow("f1", "f2", "f3")
+      .build();
+
+    assertEquals(2, results.rowCount());
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testComplexWildcardStar() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/nested.xml`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("attributes", MinorType.MAP, DataMode.REQUIRED)
+      .addMap("field1")
+        .addNullable("key1", MinorType.VARCHAR)
+        .addNullable("key2", MinorType.VARCHAR)
+      .resumeSchema()
+      .addMap("field2")
+        .addNullable("key3", MinorType.VARCHAR)
+        .addMap("nestedField1")
+          .addNullable("nk1", MinorType.VARCHAR)
+          .addNullable("nk2", MinorType.VARCHAR)
+          .addNullable("nk3", MinorType.VARCHAR)
+        .resumeMap()
+      .resumeSchema()
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow(mapArray(), strArray("value1", "value2"), objArray("k1", 
strArray("nk_value1", "nk_value2", "nk_value3")))
+      .addRow(mapArray(), strArray("value3", "value4"), objArray("k2", 
strArray("nk_value4", "nk_value5", "nk_value6")))
+      .addRow(mapArray(), strArray("value5", "value6"), objArray("k3", 
strArray("nk_value7", "nk_value8", "nk_value9")))
+      .build();
+
+    assertEquals(3, results.rowCount());
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testComplexNestedExplicit() throws Exception {
+    String sql = "SELECT xml.field2.nestedField1.nk1 as nk1, 
xml.field2.nestedField1.nk2 as nk2, xml.field2.nestedField1.nk3 as nk3 FROM 
cp.`xml/nested.xml` AS xml";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("nk1", MinorType.VARCHAR)
+      .addNullable("nk2", MinorType.VARCHAR)
+      .addNullable("nk3", MinorType.VARCHAR)
+      .build();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("nk_value1", "nk_value2", "nk_value3")
+      .addRow("nk_value4", "nk_value5", "nk_value6")
+      .addRow("nk_value7", "nk_value8", "nk_value9")
+      .build();
+
+    assertEquals(3, results.rowCount());
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testAttributes() throws Exception {
+    String sql = "SELECT attributes FROM cp.`xml/attributes.xml`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addMap("attributes")
+        .addNullable("title_binding", MinorType.VARCHAR)
+        .addNullable("title_subcategory", MinorType.VARCHAR)
+      .resumeSchema()
+      .build();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow((Object) mapArray(null, null))
+      .addRow((Object) strArray("paperback", null))
+      .addRow((Object) strArray("hardcover", "non-fiction"))
+      .build();
+
+    assertEquals(3, results.rowCount());
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testLimitPushdown() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/simple.xml` LIMIT 2";
+
+    queryBuilder()
+      .sql(sql)
+      .planMatcher()
+      .include("Limit", "maxRecords=2")
+      .match();
+  }
+}
diff --git a/contrib/format-xml/src/test/resources/xml/attributes.xml 
b/contrib/format-xml/src/test/resources/xml/attributes.xml
new file mode 100644
index 0000000..a44eca0
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/attributes.xml
@@ -0,0 +1,42 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<books>
+  <book>
+    <author>Mark Twain</author>
+    <title>The Adventures of Tom Sawyer</title>
+    <category>FICTION</category>
+    <year>1876</year>
+  </book>
+  <book>
+    <authors>
+        <author>Niklaus Wirth</author>
+        <author>Somebody else</author>
+    </authors>
+    <title binding="paperback">The Programming Language Pascal</title>
+    <category >PASCAL</category>
+    <year>1971</year>
+  </book>
+  <book>
+    <author>O.-J. Dahl</author>
+    <title binding="hardcover" subcategory="non-fiction">Structured 
Programming</title>
+    <category>PROGRAMMING</category>
+    <year>1972</year>
+  </book>
+</books>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/deep-nested.xml 
b/contrib/format-xml/src/test/resources/xml/deep-nested.xml
new file mode 100644
index 0000000..2d28289
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/deep-nested.xml
@@ -0,0 +1,60 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<root>
+  <level1>
+    <level2>
+      <field1-level2>l2</field1-level2>
+      <level3>
+        <field1-level3>l3</field1-level3>
+        <level4>
+          <field1-level4>l4</field1-level4>
+          <level5>
+            <field1-level5>l5</field1-level5>
+            <level6>
+              <field1-level6>l6</field1-level6>
+              <level7>
+                <field1>f1</field1>
+                <field2>f2</field2>
+                <field3>f3</field3>
+              </level7>
+            </level6>
+          </level5>
+        </level4>
+      </level3>
+    </level2>
+  </level1>
+  <level1>
+    <level2>
+      <level3>
+        <level4>
+          <level5>
+            <level6>
+              <level7>
+                <field1>f4</field1>
+                <field2>f5</field2>
+                <field3>f6</field3>
+              </level7>
+            </level6>
+          </level5>
+        </level4>
+      </level3>
+    </level2>
+  </level1>
+</root>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/deep-nested2.xml 
b/contrib/format-xml/src/test/resources/xml/deep-nested2.xml
new file mode 100644
index 0000000..0a1b787
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/deep-nested2.xml
@@ -0,0 +1,60 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<root>
+  <level1>
+    <level2>
+      <level3>
+        <level4>
+          <level5>
+            <level6>
+              <level7>
+                <field1>f4</field1>
+                <field2>f5</field2>
+                <field3>f6</field3>
+              </level7>
+            </level6>
+          </level5>
+        </level4>
+      </level3>
+    </level2>
+  </level1>
+  <level1>
+    <level2>
+      <field1-level2>l2</field1-level2>
+      <level3>
+        <field1-level3>l3</field1-level3>
+        <level4>
+          <field1-level4>l4</field1-level4>
+          <level5>
+            <field1-level5>l5</field1-level5>
+            <level6>
+              <field1-level6>l6</field1-level6>
+              <level7>
+                <field1>f1</field1>
+                <field2>f2</field2>
+                <field3>f3</field3>
+              </level7>
+            </level6>
+          </level5>
+        </level4>
+      </level3>
+    </level2>
+  </level1>
+</root>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/nested.xml 
b/contrib/format-xml/src/test/resources/xml/nested.xml
new file mode 100644
index 0000000..da94687
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/nested.xml
@@ -0,0 +1,63 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<books>
+  <book>
+    <field1>
+      <key1>value1</key1>
+      <key2>value2</key2>
+    </field1>
+    <field2>
+      <key3>k1</key3>
+      <nestedField1>
+        <nk1>nk_value1</nk1>
+        <nk2>nk_value2</nk2>
+        <nk3>nk_value3</nk3>
+      </nestedField1>
+    </field2>
+  </book>
+  <book>
+    <field1>
+      <key1>value3</key1>
+      <key2>value4</key2>
+    </field1>
+    <field2>
+      <key3>k2</key3>
+      <nestedField1>
+        <nk1>nk_value4</nk1>
+        <nk2>nk_value5</nk2>
+        <nk3>nk_value6</nk3>
+      </nestedField1>
+    </field2>
+  </book>
+  <book>
+    <field1>
+      <key1>value5</key1>
+      <key2>value6</key2>
+    </field1>
+    <field2>
+      <key3>k3</key3>
+      <nestedField1>
+        <nk1>nk_value7</nk1>
+        <nk2>nk_value8</nk2>
+        <nk3>nk_value9</nk3>
+      </nestedField1>
+    </field2>
+  </book>
+</books>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/really-simple-nested.xml 
b/contrib/format-xml/src/test/resources/xml/really-simple-nested.xml
new file mode 100644
index 0000000..5bb1d18
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/really-simple-nested.xml
@@ -0,0 +1,39 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<books>
+  <book>
+    <field1>
+      <key1>value1</key1>
+      <key2>value2</key2>
+    </field1>
+  </book>
+  <book>
+    <field1>
+      <key1>value3</key1>
+      <key2>value4</key2>
+    </field1>
+  </book>
+  <book>
+    <field1>
+      <key1>value5</key1>
+      <key2>value6</key2>
+    </field1>
+  </book>
+</books>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/simple.xml 
b/contrib/format-xml/src/test/resources/xml/simple.xml
new file mode 100644
index 0000000..f651ed6
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/simple.xml
@@ -0,0 +1,42 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<dependencies>
+  <dependency>
+    <groupId>org.apache.drill.exec</groupId>
+    <artifactId>drill-java-exec</artifactId>
+    <version>${project.version}</version>
+  </dependency>
+
+  <dependency>
+    <groupId>org.apache.drill.exec</groupId>
+    <artifactId>drill-java-exec</artifactId>
+    <classifier>tests</classifier>
+    <version>${project.version}</version>
+    <scope>test</scope>
+  </dependency>
+
+  <dependency>
+    <groupId>org.apache.drill</groupId>
+    <artifactId>drill-common</artifactId>
+    <classifier>tests</classifier>
+    <version>${project.version}</version>
+    <scope>test</scope>
+  </dependency>
+</dependencies>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/simple_schema.xsd 
b/contrib/format-xml/src/test/resources/xml/simple_schema.xsd
new file mode 100644
index 0000000..df825b3
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/simple_schema.xsd
@@ -0,0 +1,43 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema";
+            xmlns:tns="http://tempuri.org/PurchaseOrderSchema.xsd";
+            targetNamespace="http://tempuri.org/PurchaseOrderSchema.xsd";
+            elementFormDefault="qualified">
+  <xsd:element name="PurchaseOrder" type="tns:PurchaseOrderType"/>
+  <xsd:complexType name="PurchaseOrderType">
+    <xsd:sequence>
+      <xsd:element name="ShipTo" type="tns:USAddress" maxOccurs="2"/>
+      <xsd:element name="BillTo" type="tns:USAddress"/>
+    </xsd:sequence>
+    <xsd:attribute name="OrderDate" type="xsd:date"/>
+  </xsd:complexType>
+
+  <xsd:complexType name="USAddress">
+    <xsd:sequence>
+      <xsd:element name="name"   type="xsd:string"/>
+      <xsd:element name="street" type="xsd:string"/>
+      <xsd:element name="city"   type="xsd:string"/>
+      <xsd:element name="state"  type="xsd:string"/>
+      <xsd:element name="zip"    type="xsd:integer"/>
+    </xsd:sequence>
+    <xsd:attribute name="country" type="xsd:NMTOKEN" fixed="US"/>
+  </xsd:complexType>
+</xsd:schema>
\ No newline at end of file
diff --git a/contrib/format-xml/src/test/resources/xml/very-nested.xml 
b/contrib/format-xml/src/test/resources/xml/very-nested.xml
new file mode 100644
index 0000000..bdf8a0c
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/very-nested.xml
@@ -0,0 +1,38 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<book>
+  <field1>
+    <key1>value1</key1>
+    <key2>value2</key2>
+  </field1>
+  <field2>
+    <key3>k1</key3>
+    <nestedField1>
+      <nk1>nk_value1</nk1>
+      <nk2>nk_value2</nk2>
+      <nk3>nk_value3</nk3>
+      <nestedField2>
+        <nk1>nk2_value1</nk1>
+        <nk2>nk2_value2</nk2>
+        <nk3>nk2_value3</nk3>
+      </nestedField2>
+    </nestedField1>
+  </field2>
+</book>
\ No newline at end of file
diff --git a/contrib/native/client/src/protobuf/UserBitShared.pb.cc 
b/contrib/native/client/src/protobuf/UserBitShared.pb.cc
index 6dbd625..26b2e4f 100644
--- a/contrib/native/client/src/protobuf/UserBitShared.pb.cc
+++ b/contrib/native/client/src/protobuf/UserBitShared.pb.cc
@@ -956,7 +956,7 @@ const char 
descriptor_table_protodef_UserBitShared_2eproto[] PROTOBUF_SECTION_VA
   "ATEMENT\020\005*\207\001\n\rFragmentState\022\013\n\007SENDING\020\000"
   
"\022\027\n\023AWAITING_ALLOCATION\020\001\022\013\n\007RUNNING\020\002\022\014"
   
"\n\010FINISHED\020\003\022\r\n\tCANCELLED\020\004\022\n\n\006FAILED\020\005\022"
-  "\032\n\026CANCELLATION_REQUESTED\020\006*\236\013\n\020CoreOper"
+  "\032\n\026CANCELLATION_REQUESTED\020\006*\260\013\n\020CoreOper"
   "atorType\022\021\n\rSINGLE_SENDER\020\000\022\024\n\020BROADCAST"
   "_SENDER\020\001\022\n\n\006FILTER\020\002\022\022\n\016HASH_AGGREGATE\020"
   
"\003\022\r\n\tHASH_JOIN\020\004\022\016\n\nMERGE_JOIN\020\005\022\031\n\025HASH"
@@ -992,11 +992,11 @@ const char 
descriptor_table_protodef_UserBitShared_2eproto[] PROTOBUF_SECTION_VA
   "CAN\020\?\022\022\n\016EXCEL_SUB_SCAN\020@\022\020\n\014SHP_SUB_SCA"
   "N\020A\022\024\n\020METADATA_HANDLER\020B\022\027\n\023METADATA_CO"
   "NTROLLER\020C\022\022\n\016DRUID_SUB_SCAN\020D\022\021\n\rSPSS_S"
-  "UB_SCAN\020E\022\021\n\rHTTP_SUB_SCAN\020F*g\n\nSaslStat"
-  
"us\022\020\n\014SASL_UNKNOWN\020\000\022\016\n\nSASL_START\020\001\022\024\n\020"
-  "SASL_IN_PROGRESS\020\002\022\020\n\014SASL_SUCCESS\020\003\022\017\n\013"
-  "SASL_FAILED\020\004B.\n\033org.apache.drill.exec.p"
-  "rotoB\rUserBitSharedH\001"
+  "UB_SCAN\020E\022\021\n\rHTTP_SUB_SCAN\020F\022\020\n\014XML_SUB_"
+  "SCAN\020G*g\n\nSaslStatus\022\020\n\014SASL_UNKNOWN\020\000\022\016"
+  "\n\nSASL_START\020\001\022\024\n\020SASL_IN_PROGRESS\020\002\022\020\n\014"
+  "SASL_SUCCESS\020\003\022\017\n\013SASL_FAILED\020\004B.\n\033org.a"
+  "pache.drill.exec.protoB\rUserBitSharedH\001"
   ;
 static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const 
descriptor_table_UserBitShared_2eproto_deps[3] = {
   &::descriptor_table_Coordination_2eproto,
@@ -1030,7 +1030,7 @@ static 
::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_Use
 static ::PROTOBUF_NAMESPACE_ID::internal::once_flag 
descriptor_table_UserBitShared_2eproto_once;
 static bool descriptor_table_UserBitShared_2eproto_initialized = false;
 const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable 
descriptor_table_UserBitShared_2eproto = {
-  &descriptor_table_UserBitShared_2eproto_initialized, 
descriptor_table_protodef_UserBitShared_2eproto, "UserBitShared.proto", 5821,
+  &descriptor_table_UserBitShared_2eproto_initialized, 
descriptor_table_protodef_UserBitShared_2eproto, "UserBitShared.proto", 5839,
   &descriptor_table_UserBitShared_2eproto_once, 
descriptor_table_UserBitShared_2eproto_sccs, 
descriptor_table_UserBitShared_2eproto_deps, 22, 3,
   schemas, file_default_instances, TableStruct_UserBitShared_2eproto::offsets,
   file_level_metadata_UserBitShared_2eproto, 22, 
file_level_enum_descriptors_UserBitShared_2eproto, 
file_level_service_descriptors_UserBitShared_2eproto,
@@ -1269,6 +1269,7 @@ bool CoreOperatorType_IsValid(int value) {
     case 68:
     case 69:
     case 70:
+    case 71:
       return true;
     default:
       return false;
diff --git a/contrib/native/client/src/protobuf/UserBitShared.pb.h 
b/contrib/native/client/src/protobuf/UserBitShared.pb.h
index ae87641..c9afda2 100644
--- a/contrib/native/client/src/protobuf/UserBitShared.pb.h
+++ b/contrib/native/client/src/protobuf/UserBitShared.pb.h
@@ -392,11 +392,12 @@ enum CoreOperatorType : int {
   METADATA_CONTROLLER = 67,
   DRUID_SUB_SCAN = 68,
   SPSS_SUB_SCAN = 69,
-  HTTP_SUB_SCAN = 70
+  HTTP_SUB_SCAN = 70,
+  XML_SUB_SCAN = 71
 };
 bool CoreOperatorType_IsValid(int value);
 constexpr CoreOperatorType CoreOperatorType_MIN = SINGLE_SENDER;
-constexpr CoreOperatorType CoreOperatorType_MAX = HTTP_SUB_SCAN;
+constexpr CoreOperatorType CoreOperatorType_MAX = XML_SUB_SCAN;
 constexpr int CoreOperatorType_ARRAYSIZE = CoreOperatorType_MAX + 1;
 
 const ::PROTOBUF_NAMESPACE_ID::EnumDescriptor* CoreOperatorType_descriptor();
diff --git a/contrib/pom.xml b/contrib/pom.xml
index f5f60ee..d9ea07a 100644
--- a/contrib/pom.xml
+++ b/contrib/pom.xml
@@ -50,6 +50,7 @@
     <module>format-esri</module>
     <module>format-hdf5</module>
     <module>format-spss</module>
+    <module>format-xml</module>
     <module>storage-hive</module>
     <module>storage-mongo</module>
     <module>storage-jdbc</module>
diff --git a/distribution/pom.xml b/distribution/pom.xml
index c6ebecb..8490919 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -364,6 +364,11 @@
         </dependency>
         <dependency>
           <groupId>org.apache.drill.contrib</groupId>
+          <artifactId>drill-format-xml</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.drill.contrib</groupId>
           <artifactId>drill-format-esri</artifactId>
           <version>${project.version}</version>
         </dependency>
diff --git a/distribution/src/assemble/component.xml 
b/distribution/src/assemble/component.xml
index 2148fb8..a7670e3 100644
--- a/distribution/src/assemble/component.xml
+++ b/distribution/src/assemble/component.xml
@@ -44,6 +44,7 @@
         <include>org.apache.drill.contrib:drill-format-mapr:jar</include>
         <include>org.apache.drill.contrib:drill-format-syslog:jar</include>
         <include>org.apache.drill.contrib:drill-format-esri:jar</include>
+        <include>org.apache.drill.contrib:drill-format-xml:jar</include>
         <include>org.apache.drill.contrib:drill-format-hdf5:jar</include>
         <include>org.apache.drill.contrib:drill-format-ltsv:jar</include>
         <include>org.apache.drill.contrib:drill-format-httpd:jar</include>
diff --git 
a/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java 
b/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java
index 01a51f0..4292ec1 100644
--- a/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java
+++ b/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java
@@ -697,6 +697,10 @@ public final class UserBitShared {
      * <code>HTTP_SUB_SCAN = 70;</code>
      */
     HTTP_SUB_SCAN(70),
+    /**
+     * <code>XML_SUB_SCAN = 71;</code>
+     */
+    XML_SUB_SCAN(71),
     ;
 
     /**
@@ -983,6 +987,10 @@ public final class UserBitShared {
      * <code>HTTP_SUB_SCAN = 70;</code>
      */
     public static final int HTTP_SUB_SCAN_VALUE = 70;
+    /**
+     * <code>XML_SUB_SCAN = 71;</code>
+     */
+    public static final int XML_SUB_SCAN_VALUE = 71;
 
 
     public final int getNumber() {
@@ -1076,6 +1084,7 @@ public final class UserBitShared {
         case 68: return DRUID_SUB_SCAN;
         case 69: return SPSS_SUB_SCAN;
         case 70: return HTTP_SUB_SCAN;
+        case 71: return XML_SUB_SCAN;
         default: return null;
       }
     }
@@ -29055,7 +29064,7 @@ public final class UserBitShared {
       "ATEMENT\020\005*\207\001\n\rFragmentState\022\013\n\007SENDING\020\000" 
+
       
"\022\027\n\023AWAITING_ALLOCATION\020\001\022\013\n\007RUNNING\020\002\022\014"
 +
       
"\n\010FINISHED\020\003\022\r\n\tCANCELLED\020\004\022\n\n\006FAILED\020\005\022"
 +
-      "\032\n\026CANCELLATION_REQUESTED\020\006*\236\013\n\020CoreOper" +
+      "\032\n\026CANCELLATION_REQUESTED\020\006*\260\013\n\020CoreOper" +
       "atorType\022\021\n\rSINGLE_SENDER\020\000\022\024\n\020BROADCAST" +
       
"_SENDER\020\001\022\n\n\006FILTER\020\002\022\022\n\016HASH_AGGREGATE\020" +
       
"\003\022\r\n\tHASH_JOIN\020\004\022\016\n\nMERGE_JOIN\020\005\022\031\n\025HASH"
 +
@@ -29091,11 +29100,11 @@ public final class UserBitShared {
       "CAN\020?\022\022\n\016EXCEL_SUB_SCAN\020@\022\020\n\014SHP_SUB_SCA" +
       "N\020A\022\024\n\020METADATA_HANDLER\020B\022\027\n\023METADATA_CO" +
       "NTROLLER\020C\022\022\n\016DRUID_SUB_SCAN\020D\022\021\n\rSPSS_S" +
-      "UB_SCAN\020E\022\021\n\rHTTP_SUB_SCAN\020F*g\n\nSaslStat" +
-      
"us\022\020\n\014SASL_UNKNOWN\020\000\022\016\n\nSASL_START\020\001\022\024\n\020"
 +
-      
"SASL_IN_PROGRESS\020\002\022\020\n\014SASL_SUCCESS\020\003\022\017\n\013" +
-      "SASL_FAILED\020\004B.\n\033org.apache.drill.exec.p" +
-      "rotoB\rUserBitSharedH\001"
+      "UB_SCAN\020E\022\021\n\rHTTP_SUB_SCAN\020F\022\020\n\014XML_SUB_" +
+      "SCAN\020G*g\n\nSaslStatus\022\020\n\014SASL_UNKNOWN\020\000\022\016" +
+      
"\n\nSASL_START\020\001\022\024\n\020SASL_IN_PROGRESS\020\002\022\020\n\014" +
+      "SASL_SUCCESS\020\003\022\017\n\013SASL_FAILED\020\004B.\n\033org.a" +
+      "pache.drill.exec.protoB\rUserBitSharedH\001"
     };
     descriptor = com.google.protobuf.Descriptors.FileDescriptor
       .internalBuildGeneratedFileFrom(descriptorData,
diff --git a/protocol/src/main/protobuf/UserBitShared.proto 
b/protocol/src/main/protobuf/UserBitShared.proto
index f7b7b02..c2304cf 100644
--- a/protocol/src/main/protobuf/UserBitShared.proto
+++ b/protocol/src/main/protobuf/UserBitShared.proto
@@ -382,6 +382,7 @@ enum CoreOperatorType {
   DRUID_SUB_SCAN = 68;
   SPSS_SUB_SCAN = 69;
   HTTP_SUB_SCAN = 70;
+  XML_SUB_SCAN = 71;
 }
 
 /* Registry that contains list of jars, each jar contains its name and list of 
function signatures.

Reply via email to