chamikaramj commented on code in PR #34388:
URL: https://github.com/apache/beam/pull/34388#discussion_r2019113752
##########
runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java:
##########
@@ -56,202 +58,255 @@ public interface DataflowPipelineOptions
DataflowProfilingOptions,
PubsubOptions {
- @Description(
- "Project id. Required when running a Dataflow in the cloud. "
- + "See https://cloud.google.com/storage/docs/projects for further
details.")
- @Override
- @Validation.Required
- @Default.InstanceFactory(DefaultProjectFactory.class)
- String getProject();
-
- @Override
- void setProject(String value);
-
- /**
- * GCS path for staging local files, e.g. gs://bucket/object
- *
- * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
- *
- * <p>If {@link #getStagingLocation()} is not set, it will default to {@link
- * GcpOptions#getGcpTempLocation()}. {@link GcpOptions#getGcpTempLocation()}
must be a valid GCS
- * path.
- */
- @Description(
- "GCS path for staging local files, e.g. \"gs://bucket/object\". "
- + "Must be a valid Cloud Storage URL, beginning with the prefix
\"gs://\". "
- + "If stagingLocation is unset, defaults to gcpTempLocation with
\"/staging\" suffix.")
- @Default.InstanceFactory(StagingLocationFactory.class)
- String getStagingLocation();
-
- void setStagingLocation(String value);
-
- /** Whether to update the currently running pipeline with the same name as
this one. */
- @Description(
- "If set, replace the existing pipeline with the name specified by
--jobName with "
- + "this pipeline, preserving state.")
- boolean isUpdate();
-
- void setUpdate(boolean value);
-
- /** If set, the snapshot from which the job should be created. */
- @Description("If set, the snapshot from which the job should be created.")
- String getCreateFromSnapshot();
-
- void setCreateFromSnapshot(String value);
-
- /** Where the runner should generate a template file. Must either be local
or Cloud Storage. */
- @Description(
- "Where the runner should generate a template file. "
- + "Must either be local or Cloud Storage.")
- String getTemplateLocation();
-
- void setTemplateLocation(String value);
-
- /**
- * Service options are set by the user and configure the service. This
decouples service side
- * feature availability from the Apache Beam release cycle.
- */
- @Description(
- "Service options are set by the user and configure the service. This "
- + "decouples service side feature availability from the Apache Beam
release cycle. "
- + "For a list of service options, see "
- + "https://cloud.google.com/dataflow/docs/reference/service-options "
- + "in the Dataflow documentation.")
- List<String> getDataflowServiceOptions();
-
- void setDataflowServiceOptions(List<String> options);
-
- /** Run the job as a specific service account, instead of the default GCE
robot. */
- @Description("Run the job as a specific service account, instead of the
default GCE robot.")
- String getServiceAccount();
-
- void setServiceAccount(String value);
-
- /**
- * The Google Compute Engine <a
- *
href="https://cloud.google.com/compute/docs/regions-zones/regions-zones">region</a>
for
- * creating Dataflow jobs.
- */
- @Description(
- "The Google Compute Engine region for creating Dataflow jobs. See "
- + "https://cloud.google.com/compute/docs/regions-zones/regions-zones
for a list of valid "
- + "options.")
- @Default.InstanceFactory(DefaultGcpRegionFactory.class)
- String getRegion();
-
- void setRegion(String region);
-
- /**
- * Dataflow endpoint to use.
- *
- * <p>Defaults to the current version of the Google Cloud Dataflow API, at
the time the current
- * SDK version was released.
- *
- * <p>If the string contains "://", then this is treated as a URL, otherwise
{@link
- * #getApiRootUrl()} is used as the root URL.
- */
- @Description(
- "The URL for the Dataflow API. If the string contains \"://\", this"
- + " will be treated as the entire URL, otherwise will be treated
relative to apiRootUrl.")
- @Override
- @Default.String(Dataflow.DEFAULT_SERVICE_PATH)
- String getDataflowEndpoint();
-
- @Override
- void setDataflowEndpoint(String value);
-
- /** Labels that will be applied to the billing records for this job. */
- @Description("Labels that will be applied to the billing records for this
job.")
- Map<String, String> getLabels();
-
- void setLabels(Map<String, String> labels);
-
- /** The URL of the staged portable pipeline. */
- @Description("The URL of the staged portable pipeline")
- String getPipelineUrl();
-
- void setPipelineUrl(String urlString);
-
- @Description("The customized dataflow worker jar")
- String getDataflowWorkerJar();
-
- void setDataflowWorkerJar(String dataflowWorkerJar);
-
- /** Set of available Flexible Resource Scheduling goals. */
- enum FlexResourceSchedulingGoal {
- /** No goal specified. */
- UNSPECIFIED,
-
- /** Optimize for lower execution time. */
- SPEED_OPTIMIZED,
-
- /** Optimize for lower cost. */
- COST_OPTIMIZED,
- }
-
- /** This option controls Flexible Resource Scheduling mode. */
- @Description("Controls the Flexible Resource Scheduling mode.")
- @Default.Enum("UNSPECIFIED")
- FlexResourceSchedulingGoal getFlexRSGoal();
-
- void setFlexRSGoal(FlexResourceSchedulingGoal goal);
-
- /** Returns a default staging location under {@link
GcpOptions#getGcpTempLocation}. */
- class StagingLocationFactory implements DefaultValueFactory<String> {
- private static final Logger LOG =
LoggerFactory.getLogger(StagingLocationFactory.class);
+ @Description(
+ "Project id. Required when running a Dataflow in the cloud. "
+ + "See https://cloud.google.com/storage/docs/projects for further
details.")
+ @Override
Review Comment:
There's a very large update to the Dataflow runner here that is not
explained in the PR description.
Is this just formatting ? If so pls revert this so that the doc updates can
be reviewed cleanly.
If there are actual updates to the Dataflow runner pls create a separate PR
and an Github issue and discuss in Beam dev list appropriately.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]