[BEAM-151] Remove dependence on num workers in DatastoreIO This prevents moving DataflowPipelineOptions into a separate maven module.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/551d0814 Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/551d0814 Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/551d0814 Branch: refs/heads/master Commit: 551d081481450e750a9a95b5902a540d0662db85 Parents: d0db477 Author: Luke Cwik <[email protected]> Authored: Mon Mar 28 13:11:28 2016 -0700 Committer: Luke Cwik <[email protected]> Committed: Thu Apr 7 11:19:49 2016 -0700 ---------------------------------------------------------------------- .../com/google/cloud/dataflow/sdk/io/DatastoreIO.java | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/551d0814/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java index 9128585..8d85ab3 100644 --- a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java +++ b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java @@ -54,7 +54,6 @@ import com.google.cloud.dataflow.sdk.coders.EntityCoder; import com.google.cloud.dataflow.sdk.coders.SerializableCoder; import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation; import com.google.cloud.dataflow.sdk.io.Sink.Writer; -import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions; import com.google.cloud.dataflow.sdk.options.GcpOptions; import com.google.cloud.dataflow.sdk.options.PipelineOptions; import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff; @@ -307,18 +306,7 @@ public class DatastoreIO { numSplits = Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes); } catch (Exception e) { // Fallback in case estimated size is unavailable. TODO: fix this, it's horrible. - - // 1. Try Dataflow's numWorkers, which will be 0 for other workers. - DataflowPipelineWorkerPoolOptions poolOptions = - options.as(DataflowPipelineWorkerPoolOptions.class); - if (poolOptions.getNumWorkers() > 0) { - LOG.warn("Estimated size of unavailable, using the number of workers {}", - poolOptions.getNumWorkers(), e); - numSplits = poolOptions.getNumWorkers(); - } else { - // 2. Default to 12 in the unknown case. - numSplits = 12; - } + numSplits = 12; } // If the desiredBundleSize or number of workers results in 1 split, simply return
