mbeckerle commented on code in PR #2836: URL: https://github.com/apache/drill/pull/2836#discussion_r1384188980
########## contrib/format-daffodil/.gitignore: ########## Review Comment: Remove this file from change set. ########## contrib/format-daffodil/src/main/java/org/apache/drill/exec/store/daffodil/DaffodilDrillInfosetOutputter.java: ########## @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.daffodil; + +import org.apache.daffodil.lib.xml.NamedQName; +import org.apache.daffodil.runtime1.api.BlobMethodsImpl; +import org.apache.daffodil.runtime1.api.ComplexElementMetadata; +import org.apache.daffodil.runtime1.api.ElementMetadata; +import org.apache.daffodil.runtime1.api.InfosetArray; +import org.apache.daffodil.runtime1.api.InfosetComplexElement; +import org.apache.daffodil.runtime1.api.InfosetOutputter; +import org.apache.daffodil.runtime1.api.InfosetSimpleElement; +import org.apache.daffodil.runtime1.api.Status; +import org.apache.drill.exec.physical.resultSet.RowSetLoader; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.TupleSchema; +import org.apache.drill.exec.vector.accessor.ArrayWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriter; +import org.apache.drill.exec.vector.accessor.ObjectWriter; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter; +import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter; +import org.apache.drill.exec.vector.accessor.writer.MapWriter; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Enumeration; + +import java.util.Stack; + +/** + * Adapts Daffodil parser infoset event calls to Drill writer calls + * to fill in Drill data rows. + */ +public class DaffodilDrillInfosetOutputter + extends BlobMethodsImpl implements InfosetOutputter { + + private final Stack<ColumnWriter> columnWriterStack = new Stack<ColumnWriter>(); + + private ColumnWriter columnWriter() { + return columnWriterStack.peek(); + } + + private static final Logger logger = LoggerFactory.getLogger(DaffodilDrillInfosetOutputter.class); + + private DaffodilDrillInfosetOutputter() {} // no default constructor + + public DaffodilDrillInfosetOutputter(RowSetLoader writer) { + columnWriterStack.push(writer); + } + + @Override + public void reset() { + } + + @Override + public void startDocument() {} + + @Override + public void endDocument() {} + + private String colName(ElementMetadata erd) { + NamedQName nqn = erd.namedQName(); + String colName = nqn.toPrefix_localName(); // just the local name if no prefix/namespace + return colName; + } + + @Override + public void startSimple(InfosetSimpleElement diSimple) { + ElementMetadata erd = diSimple.metadata(); + boolean isNilled = diSimple.isNilled(); + String colName = colName(erd); + // If the column is an array, startArray will have setup columnWriter() to be the array writer. + // Otherwise, it's a TupleMetadata and we need to get this simple element's column. + // or it could be a top-level simple element (which we do use for test cases) + // in which case the columnWriter is the original RowSetWriter. + System.err.println(columnWriter().getClass().getName()); + ColumnWriter cw = columnWriter(); + switch (cw.type()) { + case ARRAY: { + assert(erd.isArray()); + // do nothing startArray has this ready to write.' + break; + } + case TUPLE: { + cw = ((TupleWriter)cw).column(colName); + break; + } + } + ColumnMetadata cm = cw.schema(); + if (isNilled) { + assert(cm.isNullable()); + cw.setNull(); // Can I have a nullable array item. I.e., each item is nullable? + } else { + // + // FIXME: only INT is implemented right now. + // + int value = diSimple.getInt(); // will fail on downcast if not an INT. + cw.setObject(value); // autoboxing for primitive types. + } + } + + @Override + public void endSimple(InfosetSimpleElement diSimple) { + /// nothing to do. + } + + @Override + public void startComplex(InfosetComplexElement diComplex) { + ComplexElementMetadata erd = diComplex.metadata(); + String colName = colName(erd); + ColumnWriter cw = columnWriter(); + switch (cw.type()) { + case ARRAY: { + // Review Comment: This issue is fixed, at least as far as a test reads 3 records each containing two fields. ########## exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/framework/SchemaNegotiator.java: ########## @@ -78,7 +78,7 @@ * Some readers can determine the source schema at the start of a scan. * For example, a CSV file has headers, a Parquet file has footers, both * of which define a schema. This case is called "early schema." The - * reader fefines the schema by calling + * reader defines the schema by calling Review Comment: Revert this. I need to stay focused in just my part of things. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@drill.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org