seojangho commented on a change in pull request #104: [NEMO-183] DAG-centric translation from Beam pipeline to IR DAG URL: https://github.com/apache/incubator-nemo/pull/104#discussion_r210797773
########## File path: compiler/frontend/beam/src/main/java/edu/snu/nemo/compiler/frontend/beam/PipelineVisitor.java ########## @@ -0,0 +1,291 @@ +/* + * Copyright (C) 2018 Seoul National University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package edu.snu.nemo.compiler.frontend.beam; + +import edu.snu.nemo.common.dag.DAG; +import edu.snu.nemo.common.dag.DAGBuilder; +import edu.snu.nemo.common.dag.Edge; +import edu.snu.nemo.common.dag.Vertex; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.runners.TransformHierarchy; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.View; +import org.apache.beam.sdk.values.PValue; + +import java.util.*; + +/** + * Traverses through the given Beam pipeline to construct a DAG of Beam Transform, + * while preserving hierarchy of CompositeTransforms. + * This DAG will be later translated by {@link PipelineTranslator} into Nemo IR DAG. + */ +public final class PipelineVisitor extends Pipeline.PipelineVisitor.Defaults { + + private static final String TRANSFORM = "Transform-"; + private static final String DATAFLOW = "Dataflow-"; + + private final Stack<CompositeTransformVertex> compositeTransformVertexStack = new Stack<>(); + private CompositeTransformVertex rootVertex = null; + private int nextIdx = 0; + + @Override + public void visitPrimitiveTransform(final TransformHierarchy.Node node) { + final PrimitiveTransformVertex vertex = new PrimitiveTransformVertex(node, compositeTransformVertexStack.peek()); + compositeTransformVertexStack.peek().addVertex(vertex); + vertex.getPValuesConsumed() + .forEach(pValue -> { + final TransformVertex dst = getDestinationOfDataFlowEdge(vertex, pValue); + dst.parent.addDataFlow(new DataFlowEdge(dst.parent.getProducerOf(pValue), dst)); + }); + } + + @Override + public CompositeBehavior enterCompositeTransform(final TransformHierarchy.Node node) { + final CompositeTransformVertex vertex; + if (compositeTransformVertexStack.isEmpty()) { + // There is always a top-level CompositeTransform that encompasses the entire Beam pipeline. + vertex = new CompositeTransformVertex(node, null); + } else { + vertex = new CompositeTransformVertex(node, compositeTransformVertexStack.peek()); + } + compositeTransformVertexStack.push(vertex); + return CompositeBehavior.ENTER_TRANSFORM; + } + + @Override + public void leaveCompositeTransform(final TransformHierarchy.Node node) { + final CompositeTransformVertex vertex = compositeTransformVertexStack.pop(); + vertex.build(); + if (compositeTransformVertexStack.isEmpty()) { + // The vertex is the root. + if (rootVertex != null) { + throw new RuntimeException("The visitor already have traversed a Beam pipeline. " + + "Re-using a visitor is not allowed."); + } + rootVertex = vertex; + } else { + // The CompositeTransformVertex is ready; adding it to its parent vertex. + compositeTransformVertexStack.peek().addVertex(vertex); + } + } + + /** + * @return A vertex representing the top-level CompositeTransform. + */ + public CompositeTransformVertex getConvertedPipeline() { + if (rootVertex == null) { + throw new RuntimeException("The visitor have not fully traversed through a Beam pipeline."); + } + return rootVertex; + } + + /** + * Represents a {@link org.apache.beam.sdk.transforms.PTransform} as a vertex in DAG. + */ + public abstract class TransformVertex extends Vertex { Review comment: Thanks for the opinion. I prefer to stay in the design decision of 'lazy' translation, not because it's superior than 'hybrid' translation, but porting 'mapper-side combiner' to hybrid translator would require some works. In hybrid translator, GroupByKey(layer3 primitive transform) should 'repeat itself' if and only if it have been expanded by Combine(layer2 composite transform). Or the translator for Combine composite transform should replicate inner transforms if and only if a GroupByKey is included in the inner transforms. This PR makes frontend codebase much more verbose than before, and someday we have to fuse 'PipelineTranslator' and 'PipelineVisitor' into one. I believe that's not going to a hard work, except for the mapper-side combiner as mentioned above. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
