mbeckerle commented on code in PR #2836: URL: https://github.com/apache/drill/pull/2836#discussion_r1376997202
########## contrib/format-daffodil/src/main/java/org/apache/drill/exec/store/daffodil/schema/DrillDaffodilSchemaVisitor.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.daffodil.schema; + +import org.apache.daffodil.runtime1.api.ComplexElementMetadata; +import org.apache.daffodil.runtime1.api.MetadataHandler; +import org.apache.daffodil.runtime1.api.SimpleElementMetadata; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.metadata.MapBuilder; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.exec.record.metadata.TupleMetadata; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Stack; + +/** + * This class transforms a DFDL/Daffodil schema into a Drill Schema. + */ +public class DrillDaffodilSchemaVisitor + extends MetadataHandler +{ + private static final Logger logger = LoggerFactory.getLogger(DrillDaffodilSchemaVisitor.class); + private final SchemaBuilder builder = new SchemaBuilder(); + + private final Stack<MapBuilder> mapBuilderStack = new Stack<MapBuilder>(); + + private MapBuilder mapBuilder() { + return mapBuilderStack.peek(); + } + + /** + * Returns a {@link TupleMetadata} representation of the DFDL schema. + * Should only be called after the walk of the DFDL schema with this visitor has been called. + * @return A {@link TupleMetadata} representation of the DFDL schema. + */ + public TupleMetadata getDrillSchema() { + return builder.build(); + } + + @Override + public void elementSimple(SimpleElementMetadata md) { Review Comment: Daffodil should rename this to simpleMetadata, and below to complexMetadata. They sound too much like data right now. ########## exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatCreator.java: ########## @@ -117,7 +117,7 @@ private static Map<Class<?>, Constructor<?>> initConfigConstructors(Collection<C } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e1) { logger.warn("Failure initializing storage config named '{}' of type '{}'.", e.getKey(), e.getValue().getClass().getName(), e1); } - } + }xml Review Comment: ??? Looks like I clobbered this by accident. Revert ########## exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatCreator.java: ########## @@ -117,7 +117,7 @@ private static Map<Class<?>, Constructor<?>> initConfigConstructors(Collection<C } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e1) { logger.warn("Failure initializing storage config named '{}' of type '{}'.", e.getKey(), e.getValue().getClass().getName(), e1); } - } + }xml Review Comment: Fixed already. ########## contrib/format-daffodil/src/test/java/org/apache/drill/exec/store/daffodil/TestDaffodilUtils.java: ########## @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.daffodil; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestDaffodilUtils { + +// @Test Review Comment: Not sure I'm going to need this file. Perhaps delete. ########## contrib/format-daffodil/src/test/java/org/apache/drill/exec/store/daffodil/TestDaffodilReader.java: ########## @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.daffodil; + +import org.apache.drill.categories.RowSetTest; +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.physical.rowSet.RowSet; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.test.ClusterFixture; +import org.apache.drill.test.ClusterTest; +import org.apache.drill.test.QueryBuilder; +import org.apache.drill.test.rowSet.RowSetComparison; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.nio.file.Paths; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; + +import static org.apache.drill.test.QueryTestUtil.generateCompressedFile; +import static org.apache.drill.test.rowSet.RowSetUtilities.mapArray; +import static org.apache.drill.test.rowSet.RowSetUtilities.objArray; +import static org.apache.drill.test.rowSet.RowSetUtilities.strArray; +import static org.junit.Assert.assertEquals; + +@Category(RowSetTest.class) +public class TestDaffodilReader extends ClusterTest { + + @BeforeClass + public static void setup() throws Exception { + // boilerplate call to start test rig + ClusterTest.startCluster(ClusterFixture.builder(dirTestWatcher)); + + // + // FIXME: isn't this supposed to come from some file location? + // + // FIXME: Where does drill search for these configs? + // FIXME: What kind of file, what naming convention and what format? + DaffodilFormatConfig formatConfig = + new DaffodilFormatConfig(null, + "", + "", + "", + false, + ""); + + cluster.defineFormat("dfs", "daffodil", formatConfig); + + // Needed to test against compressed files. + // Copies data from src/test/resources to the dfs root. + java.io.File newDataDir = dirTestWatcher.copyResourceToRoot(Paths.get("data/")); + java.io.File newSchemaDir = dirTestWatcher.copyResourceToRoot(Paths.get("schema/")); + } + + /** + * This unit test tests a simple data file + * + * @throws Exception Throw exception if anything goes wrong + */ + @Test + public void testSimple1() throws Exception { + Review Comment: @cgivre I could use some help figuring out why I needed absolute URIs here. I thought the `copyResourceToRoot` calls above in the setup would make those dirs available to the test, but paths like 'schema/simple.dfdl.xsd' did not work nor 'data/simple.dat.gz'. Ultimately I want to drop dataURI and have the ``` dfs.`data/simple.dat.gz` ``` find the file, but I had no luck with that so just did this dataURI workaround. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@drill.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org