[
https://issues.apache.org/jira/browse/DRILL-8474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17803334#comment-17803334
]
ASF GitHub Bot commented on DRILL-8474:
---------------------------------------
cgivre commented on code in PR #2836:
URL: https://github.com/apache/drill/pull/2836#discussion_r1442278098
##########
contrib/format-daffodil/src/main/java/org/apache/drill/exec/store/daffodil/DaffodilDrillInfosetOutputter.java:
##########
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.daffodil;
+
+import org.apache.daffodil.runtime1.api.ComplexElementMetadata;
+import org.apache.daffodil.runtime1.api.ElementMetadata;
+import org.apache.daffodil.runtime1.api.InfosetArray;
+import org.apache.daffodil.runtime1.api.InfosetComplexElement;
+import org.apache.daffodil.japi.infoset.InfosetOutputter;
+import org.apache.daffodil.runtime1.api.InfosetSimpleElement;
+import org.apache.daffodil.runtime1.api.PrimitiveType;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.physical.resultSet.RowSetLoader;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.store.daffodil.schema.DrillDaffodilSchemaUtils;
+import org.apache.drill.exec.store.daffodil.schema.DrillDaffodilSchemaVisitor;
+import org.apache.drill.exec.vector.accessor.ArrayWriter;
+import org.apache.drill.exec.vector.accessor.ColumnWriter;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Stack;
+
+/**
+ * Adapts Daffodil parser infoset event calls to Drill writer calls
+ * to fill in Drill data rows.
+ */
+public class DaffodilDrillInfosetOutputter
+ extends InfosetOutputter {
+
+ private boolean isOriginalRoot() {
+ boolean result = currentTupleWriter() == rowSetWriter;
+ if (result)
+ assert(tupleWriterStack.size() == 1);
+ return result;
+ }
+
+ /**
+ * True if the next startComplex call will be for the
+ * DFDL infoset root element whose children are the columns of
+ * the row set.
+ */
+ private boolean isRootElement = true;
+
+ /**
+ * Stack that is used only if we have sub-structures that are not
+ * simple-type fields of the row.
+ */
+ private final Stack<TupleWriter> tupleWriterStack = new Stack<>();
+
+ private final Stack<ArrayWriter> arrayWriterStack = new Stack<>();
+
+ private TupleWriter currentTupleWriter() {
+ return tupleWriterStack.peek();
+ }
+
+ private ArrayWriter currentArrayWriter() {
+ return arrayWriterStack.peek();
+ }
+
+
+ private static final Logger logger =
LoggerFactory.getLogger(DaffodilDrillInfosetOutputter.class);
+
+ private DaffodilDrillInfosetOutputter() {} // no default constructor
+
+ private RowSetLoader rowSetWriter;
+
+ public DaffodilDrillInfosetOutputter(RowSetLoader writer) {
+ this.rowSetWriter = writer;
+ this.tupleWriterStack.push(writer);
+ }
+
+ @Override
+ public void reset() {
+ tupleWriterStack.clear();
+ tupleWriterStack.push(rowSetWriter);
+ arrayWriterStack.clear();
+ this.isRootElement = true;
+ checkCleanState();
+ }
+
+ private void checkCleanState() {
+ assert(isOriginalRoot());
+ assert(arrayWriterStack.isEmpty());
+ assert(isRootElement);
+ }
+
+ @Override
+ public void startDocument() {
+ checkCleanState();
+ }
+
+ @Override
+ public void endDocument() {
+ checkCleanState();
+ }
+
+ private String colName(ElementMetadata md) {
+ return DrillDaffodilSchemaVisitor.makeColumnName(md);
+ }
+
+ @Override
+ public void startSimple(InfosetSimpleElement ise) {
+ assert (!isRootElement);
+ ElementMetadata md = ise.metadata();
+ String colName = colName(md);
+ ColumnWriter cw;
+ if (md.isArray()) {
+ // A simple type array
+ assert(!arrayWriterStack.isEmpty());
+ cw = currentArrayWriter().scalar();
+ } else {
+ // A simple element within a map
+ // Note the map itself might be an array
+ // but we don't care about that here.
+ cw = currentTupleWriter().column(colName);
+ }
+ ColumnMetadata cm = cw.schema();
+ assert(cm.isScalar());
+ if (md.isNillable() && ise.isNilled()) {
+ assert cm.isNullable();
+ cw.setNull();
+ } else {
+ convertDaffodilValueToDrillValue(ise, cm, cw);
+ }
+ }
+
+ @Override
+ public void endSimple(InfosetSimpleElement diSimple) {
+ assert (!isRootElement);
+ // do nothing
+ }
+
+ @Override
+ public void startComplex(InfosetComplexElement ce) {
+ ComplexElementMetadata md = ce.metadata();
+ String colName = colName(ce.metadata());
+ if (isRootElement) {
+ assert(isOriginalRoot());
+ // This complex element's corresponds to the root element of the
+ // DFDL schema. We don't treat this as a column of the row set.
+ // Rather, it's children are the columns of the row set.
+ //
+ // If we do nothing at all here, then we'll start getting
+ // even calls for the children.
+ isRootElement = false;
+ return;
+ }
+ if (md.isArray()) {
+ assert(!arrayWriterStack.isEmpty());
+ // FIXME: is this the way to add a complex array child item (i.e., each
array item is a map)
+ tupleWriterStack.push(currentArrayWriter().tuple());
+ } else {
+ tupleWriterStack.push(currentTupleWriter().tuple(colName));
+ }
+ }
+
+ @Override
+ public void endComplex(InfosetComplexElement ce) {
+ ComplexElementMetadata md = ce.metadata();
+ if (isOriginalRoot()) {
+ isRootElement = true;
+ // do nothing else. The row gets closed-out in the
DaffodilBatchReader.next() method.
+ } else {
+ // it's a map.
+ // We seem to not need to do anything to end the map. No action taken
here works.
+ if (md.isArray()) {
+ assert (!arrayWriterStack.isEmpty());
+ currentArrayWriter().save(); // required for map array entries.
+ }
+ tupleWriterStack.pop();
+ }
+ }
+
+ @Override
+ public void startArray(InfosetArray diArray) {
+ ElementMetadata md = diArray.metadata();
+ assert (md.isArray());
+ // DFDL has no notion of an array directly within another array. A named
field (map) is necessary
+ // before you can have another array.
+ assert (currentTupleWriter().type() == ObjectType.TUPLE); // parent is a
map, or the top level row.
+ String colName = colName(md);
+ TupleWriter enclosingParentTupleWriter = currentTupleWriter();
+ ArrayWriter aw = enclosingParentTupleWriter.array(colName);
+ arrayWriterStack.push(aw);
+ }
+
+ @Override
+ public void endArray(InfosetArray ia) {
+ ElementMetadata md = ia.metadata();
+ assert (md.isArray());
+ assert (!arrayWriterStack.empty());
+ // FIXME: How do we end/close-out an array?
+ // note that each array instance, when the instance is a map, must have
+ // save called after it is written to the array but that happens
+ // in endComplex events since it must be called not once per array, but
+ // once per array item.
+ arrayWriterStack.pop();
+ }
+
+ private void convertDaffodilValueToDrillValue(InfosetSimpleElement ise,
ColumnMetadata cm, ColumnWriter cw) {
+ PrimitiveType dafType = ise.metadata().primitiveType();
+ TypeProtos.MinorType drillType =
DrillDaffodilSchemaUtils.getDrillDataType(dafType);
+ assert(drillType == cm.type());
+ switch (drillType) {
+ case INT: {
+ //
+ // FIXME: Javadoc for setObject says "primarily for testing"
+ // So how are we supposed to assign the column value then?
+ // Is there a way to get from a ColumnWriter to a typed scalar writer
(downcast perhaps?)
+ cw.setObject(ise.getInt());
Review Comment:
As a part of Drill's outstanding design there are two classes with the same
name of `ScalarWriter`. We also cleverly configured it so that IDEs will pick
up the incorrect one.
</sarcasm>
> Add Daffodil Format Plugin
> --------------------------
>
> Key: DRILL-8474
> URL: https://issues.apache.org/jira/browse/DRILL-8474
> Project: Apache Drill
> Issue Type: New Feature
> Affects Versions: 1.21.1
> Reporter: Charles Givre
> Priority: Major
> Fix For: 1.22.0
>
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)