taegeonum commented on a change in pull request #122: [NEMO-213] Use Beam's 
DoFnRunners to execute DoFn
URL: https://github.com/apache/incubator-nemo/pull/122#discussion_r223928922
 
 

 ##########
 File path: 
compiler/frontend/beam/src/main/java/org/apache/nemo/compiler/frontend/beam/source/BeamBoundedWindowSourceVertex.java
 ##########
 @@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2018 Seoul National University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nemo.compiler.frontend.beam.source;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.nemo.common.ir.Readable;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.nemo.common.ir.vertex.SourceVertex;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * SourceVertex implementation for BoundedSource.
+ * @param <O> output type.
+ */
+public final class BeamBoundedWindowSourceVertex<O> extends 
SourceVertex<WindowedValue<O>> {
+  private static final Logger LOG = 
LoggerFactory.getLogger(BeamBoundedWindowSourceVertex.class.getName());
+  private BoundedSource<O> source;
+  private final String sourceDescription;
+
+  /**
+   * Constructor of BeamBoundedWindowSourceVertex.
+   *
+   * @param source BoundedSource to read from.
+   */
+  public BeamBoundedWindowSourceVertex(final BoundedSource<O> source) {
+    super();
+    this.source = source;
+    this.sourceDescription = source.toString();
+  }
+
+  /**
+   * Constructor of BeamBoundedWindowSourceVertex.
+   *
+   * @param that the source object for copying
+   */
+  public BeamBoundedWindowSourceVertex(final BeamBoundedWindowSourceVertex 
that) {
+    super(that);
+    this.source = that.source;
+    this.sourceDescription = that.source.toString();
+  }
+
+  @Override
+  public BeamBoundedWindowSourceVertex getClone() {
+    return new BeamBoundedWindowSourceVertex(this);
+  }
+
+  @Override
+  public List<Readable<WindowedValue<O>>> getReadables(final int 
desiredNumOfSplits) throws Exception {
+    final List<Readable<WindowedValue<O>>> readables = new ArrayList<>();
+    LOG.info("estimate: {}", source.getEstimatedSizeBytes(null));
+    LOG.info("desired: {}", desiredNumOfSplits);
+    source.split(source.getEstimatedSizeBytes(null) / desiredNumOfSplits, null)
+        .forEach(boundedSource -> readables.add(new 
BoundedSourceReadable<>(boundedSource)));
+    return readables;
+  }
+
+  @Override
+  public void clearInternalStates() {
+    source = null;
+  }
+
+  @Override
+  public ObjectNode getPropertiesAsJsonNode() {
+    final ObjectNode node = getIRVertexPropertiesAsJsonNode();
+    node.put("source", sourceDescription);
+    return node;
+  }
+
+  /**
+   * BoundedSourceReadable class.
+   * @param <T> type.
+   */
+  private static final class BoundedSourceReadable<T> implements 
Readable<WindowedValue<T>> {
+    private final BoundedSource<T> boundedSource;
+
+    /**
+     * Constructor of the BoundedSourceReadable.
+     * @param boundedSource the BoundedSource.
+     */
+    BoundedSourceReadable(final BoundedSource<T> boundedSource) {
+      this.boundedSource = boundedSource;
+    }
+
+    @Override
+    public Iterable<WindowedValue<T>> read() throws IOException {
+      boolean started = false;
+      boolean windowed = false;
+
+      final ArrayList<WindowedValue<T>> elements = new ArrayList<>();
+      try (BoundedSource.BoundedReader<T> reader = 
boundedSource.createReader(null)) {
+        for (boolean available = reader.start(); available; available = 
reader.advance()) {
+          final T elem = reader.getCurrent();
+
+          if (!started) {
+            started = true;
+            if (elem instanceof WindowedValue) {
+              windowed = true;
+            }
+          }
+
+          if (!windowed) {
 
 Review comment:
   I thought this may lead to a better performance because we don't have to 
check all elements (it is enough to check the first element)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to