[jira] [Commented] (DRILL-8453) Add XSD Support to XML Reader (Part 1)
[ https://issues.apache.org/jira/browse/DRILL-8453?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17757618#comment-17757618 ] ASF GitHub Bot commented on DRILL-8453: --- cgivre commented on PR #2824: URL: https://github.com/apache/drill/pull/2824#issuecomment-1688723270 @mbeckerle > Add XSD Support to XML Reader (Part 1) > -- > > Key: DRILL-8453 > URL: https://issues.apache.org/jira/browse/DRILL-8453 > Project: Apache Drill > Issue Type: Improvement > Components: Format - XML >Affects Versions: 1.21.1 >Reporter: Charles Givre >Assignee: Charles Givre >Priority: Major > Fix For: 1.21.2 > > > This PR is a part of a series to add better support for reading XML data to > Drill. One of the main challenges is that XML data does not have a way of > inferring data types, nor does it have a way of detecting arrays. > The only way to do this really well is to have a schema. Some XML files link > a schema definition file to the data. This PR adds the capability for Drill > to map XSD schema files into Drill schemas. > The current plan is as follows: Part 1 of this PR simply adds the reader but > adds no new user detectable functionality. Part 2 will include the actual > integration with the XML reader. Part 3 will include the ability to read > arrays. -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Commented] (DRILL-8436) Upgrade Hadoop 3.2.4 -> 3.3.6
[ https://issues.apache.org/jira/browse/DRILL-8436?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17757485#comment-17757485 ] ASF GitHub Bot commented on DRILL-8436: --- jnturton commented on PR #2821: URL: https://github.com/apache/drill/pull/2821#issuecomment-1688246753 I've got the JDBC driver working by bundling a core-site.xml file in it that handles the relocation of org.apache.hadoop to oadd.org.apache.hadoop. > Upgrade Hadoop 3.2.4 -> 3.3.6 > - > > Key: DRILL-8436 > URL: https://issues.apache.org/jira/browse/DRILL-8436 > Project: Apache Drill > Issue Type: Improvement > Components: library >Affects Versions: 1.21.1 >Reporter: James Turton >Assignee: James Turton >Priority: Minor > Fix For: 1.22.0 > > > Hadoop is upgraded to 3.3.6. Jetty is upgraded to 9.4.51.v20230217. -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Commented] (DRILL-8453) Add XSD Support to XML Reader (Part 1)
[ https://issues.apache.org/jira/browse/DRILL-8453?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17757474#comment-17757474 ] ASF GitHub Bot commented on DRILL-8453: --- cgivre commented on code in PR #2824: URL: https://github.com/apache/drill/pull/2824#discussion_r1301658422 ## contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/xsd/TestXSDSchema.java: ## @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.xml.xsd; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.common.util.DrillFileUtils; +import org.apache.drill.exec.record.metadata.MapBuilder; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.junit.Test; + +import java.io.File; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestXSDSchema { + + @Test + public void testSimpleXSD() throws Exception { +File simple_xsd = DrillFileUtils.getResourceAsFile("/xsd/simple.xsd"); +TupleMetadata schema = DrillXSDSchemaUtils.getSchema(simple_xsd.getPath()); + +TupleMetadata expectedSchema = new SchemaBuilder() +.addMap("shiporder") + .addMap("attributes") +.addNullable("orderid", MinorType.VARCHAR) + .resumeMap() + .addNullable("orderperson", MinorType.VARCHAR) + .addMap("shipto") +.addNullable("name", MinorType.VARCHAR) +.addNullable("address", MinorType.VARCHAR) +.addNullable("city", MinorType.VARCHAR) +.addNullable("country", MinorType.VARCHAR) +.resumeMap() + .addMapArray("item") +.addNullable("title", MinorType.VARCHAR) +.addNullable("note", MinorType.VARCHAR) +.addNullable("quantity", MinorType.VARDECIMAL) +.addNullable("price", MinorType.VARDECIMAL) + .resumeMap() +.resumeSchema() + .buildSchema(); +assertTrue(expectedSchema.isEquivalent(schema)); + } + + + @Test + public void testComplexXSD() throws Exception { +File complex_xsd = DrillFileUtils.getResourceAsFile("/xsd/complex.xsd"); +TupleMetadata schema = DrillXSDSchemaUtils.getSchema(complex_xsd.getPath()); + +SchemaBuilder sb1 = new SchemaBuilder(); +MapBuilder sb2 = sb1 +.addNullable("comment", MinorType.VARCHAR) // global comment element +.addMap("infoType") + .addMap("attributes") +.addNullable("kind", MinorType.VARCHAR) + .resumeMap() +.resumeSchema() +.addMap("purchaseOrder") // global purchaseOrder element + .addMap("attributes") +.addNullable("orderDate", MinorType.DATE) // an attribute +.addNullable("confirmDate", MinorType.DATE) // an attribute + .resumeMap() + .addMap("shipTo") +.addMap("attributes") + .addNullable("country", MinorType.VARCHAR) // an attribute +.resumeMap() +.addNullable("name", MinorType.VARCHAR) +.addNullable("street", MinorType.VARCHAR) +.addNullable("city", MinorType.VARCHAR) +.addNullable("state", MinorType.VARCHAR) +.addNullable("zip", MinorType.VARDECIMAL) + .resumeMap(); // end shipTo +MapBuilder sb3 = sb2 + .addMap("billTo") +.addMap("attributes") + .addNullable("country", MinorType.VARCHAR) // an attribute +.resumeMap() +.addNullable("name", MinorType.VARCHAR) +.addNullable("street", MinorType.VARCHAR) + .addNullable("city", MinorType.VARCHAR) +.addNullable("state", MinorType.VARCHAR) +.addNullable("zip", MinorType.VARDECIMAL) + .resumeMap(); +MapBuilder sb4 = sb3 + .addNullable("comment", MinorType.VARCHAR) + .addMap("items") +.addMapArray("item") + .addMap("attributes") +.addNullable("partNum", MinorType.VARCHAR) // an attribute + .resumeMap() +
[jira] [Commented] (DRILL-8453) Add XSD Support to XML Reader (Part 1)
[ https://issues.apache.org/jira/browse/DRILL-8453?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17757382#comment-17757382 ] ASF GitHub Bot commented on DRILL-8453: --- jnturton commented on code in PR #2824: URL: https://github.com/apache/drill/pull/2824#discussion_r1301478103 ## contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/xsd/DrillXSDSchemaUtils.java: ## @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.xml.xsd; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap; +import org.apache.ws.commons.schema.XmlSchema; +import org.apache.ws.commons.schema.XmlSchemaCollection; +import org.apache.ws.commons.schema.XmlSchemaElement; + +import org.apache.ws.commons.schema.XmlSchemaObject; +import org.apache.ws.commons.schema.walker.XmlSchemaWalker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.xml.transform.stream.StreamSource; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; + +public class DrillXSDSchemaUtils { + private static final MinorType DEFAULT_TYPE = MinorType.VARCHAR; + private static final Logger logger = LoggerFactory.getLogger(DrillXSDSchemaUtils.class); + + /** + * This map maps the data types defined by the XSD definition to Drill data types. + */ + public static final ImmutableMap XML_TYPE_MAPPINGS = ImmutableMap.builder() +.put("BASE64BINARY", MinorType.VARBINARY) +.put("BOOLEAN", MinorType.BIT) +.put("DATE", MinorType.DATE) +.put("DATETIME", MinorType.TIMESTAMP) +.put("DECIMAL", MinorType.VARDECIMAL) +.put("DOUBLE", MinorType.FLOAT8) +.put("DURATION", MinorType.INTERVAL) +.put("FLOAT", MinorType.FLOAT4) +.put("HEXBINARY", MinorType.VARBINARY) +.put("STRING", MinorType.VARCHAR) +.put("TIME", MinorType.TIME) +.build(); + + /** + * This function is only used for testing, but accepts a XSD file as input rather than a {@link InputStream} + * @param filename A {@link String} containing an XSD file. + * @return A {@link TupleMetadata} containing a Drill representation of the XSD schema. + * @throws IOException If anything goes wrong or the file is not found. + */ + public static TupleMetadata getSchema(String filename) throws IOException { Review Comment: ```suggestion @VisibleForTesting public static TupleMetadata getSchema(String filename) throws IOException { ``` ## contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/xsd/TestXSDSchema.java: ## @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.xml.xsd; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.common.util.DrillFileUtils; +import org.apache.drill.exec.record.metadata.MapBuilder; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.junit.Test; + +import java.io.File; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestXSDSchema { + + @Test + public